diff --git a/.devops/cpu.Dockerfile b/.devops/cpu.Dockerfile index 522ee8147d..aa2aa03120 100644 --- a/.devops/cpu.Dockerfile +++ b/.devops/cpu.Dockerfile @@ -14,9 +14,9 @@ WORKDIR /app COPY . . RUN if [ "$TARGETARCH" = "amd64" ]; then \ - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \ + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \ elif [ "$TARGETARCH" = "arm64" ]; then \ - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \ + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \ else \ echo "Unsupported architecture"; \ exit 1; \ diff --git a/.devops/cuda.Dockerfile b/.devops/cuda.Dockerfile index 974dd78a8b..8ae57d2e28 100644 --- a/.devops/cuda.Dockerfile +++ b/.devops/cuda.Dockerfile @@ -1,6 +1,6 @@ ARG UBUNTU_VERSION=22.04 # This needs to generally match the container host's environment. -ARG CUDA_VERSION=12.6.0 +ARG CUDA_VERSION=12.4.0 # Target the CUDA build image ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} @@ -21,7 +21,7 @@ COPY . . RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile index af783f5e99..091e1dc5d8 100644 --- a/.devops/intel.Dockerfile +++ b/.devops/intel.Dockerfile @@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ fi && \ echo "Building with dynamic libs" && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.devops/llama-cli-cann.Dockerfile b/.devops/llama-cli-cann.Dockerfile index 02dce501ce..0eb1af87cb 100644 --- a/.devops/llama-cli-cann.Dockerfile +++ b/.devops/llama-cli-cann.Dockerfile @@ -1,4 +1,4 @@ -ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8 +ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10 FROM ascendai/cann:$ASCEND_VERSION AS build @@ -6,7 +6,7 @@ WORKDIR /app COPY . . -RUN yum install -y gcc g++ cmake make +RUN yum install -y gcc g++ cmake make libcurl-devel ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} diff --git a/.devops/llama-cpp-cuda.srpm.spec b/.devops/llama-cpp-cuda.srpm.spec index 7425d3a9d7..3bbf4a4def 100644 --- a/.devops/llama-cpp-cuda.srpm.spec +++ b/.devops/llama-cpp-cuda.srpm.spec @@ -17,10 +17,10 @@ Version: %( date "+%%Y%%m%%d" ) Release: 1%{?dist} Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) License: MIT -Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz +Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz BuildRequires: coreutils make gcc-c++ git cuda-toolkit Requires: cuda-toolkit -URL: https://github.com/ggerganov/llama.cpp +URL: https://github.com/ggml-org/llama.cpp %define debug_package %{nil} %define source_date_epoch_from_changelog 0 diff --git a/.devops/llama-cpp.srpm.spec b/.devops/llama-cpp.srpm.spec index 4d55600898..45902dcf89 100644 --- a/.devops/llama-cpp.srpm.spec +++ b/.devops/llama-cpp.srpm.spec @@ -18,10 +18,10 @@ Version: %( date "+%%Y%%m%%d" ) Release: 1%{?dist} Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) License: MIT -Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz +Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz BuildRequires: coreutils make gcc-c++ git libstdc++-devel Requires: libstdc++ -URL: https://github.com/ggerganov/llama.cpp +URL: https://github.com/ggml-org/llama.cpp %define debug_package %{nil} %define source_date_epoch_from_changelog 0 diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile index bfd7fc1c17..261a2823a0 100644 --- a/.devops/musa.Dockerfile +++ b/.devops/musa.Dockerfile @@ -1,6 +1,6 @@ ARG UBUNTU_VERSION=22.04 # This needs to generally match the container host's environment. -ARG MUSA_VERSION=rc3.1.0 +ARG MUSA_VERSION=rc3.1.1 # Target the MUSA build image ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} @@ -35,7 +35,7 @@ COPY . . RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 043c4364b9..6e8050a499 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -133,12 +133,12 @@ effectiveStdenv.mkDerivation (finalAttrs: { --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" ''; - # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, + # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015, # `default.metallib` may be compiled with Metal compiler from XCode # and we need to escape sandbox on MacOS to access Metal compiler. # `xcrun` is used find the path of the Metal compiler, which is varible # and not on $PATH - # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion + # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; nativeBuildInputs = @@ -220,7 +220,7 @@ effectiveStdenv.mkDerivation (finalAttrs: { broken = (useMetalKit && !effectiveStdenv.isDarwin); description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; - homepage = "https://github.com/ggerganov/llama.cpp/"; + homepage = "https://github.com/ggml-org/llama.cpp/"; license = lib.licenses.mit; # Accommodates `nix run` and `lib.getExe` diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile index a8088ea00d..a1b34723a4 100644 --- a/.devops/rocm.Dockerfile +++ b/.devops/rocm.Dockerfile @@ -11,14 +11,14 @@ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-co FROM ${BASE_ROCM_DEV_CONTAINER} AS build # Unless otherwise specified, we make a fat build. -# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 +# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878 # This is mostly tied to rocBLAS supported archs. # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported # gfx906 is deprecated #check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html -#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102' -ARG ROCM_DOCKER_ARCH=gfx1100 +ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102' +#ARG ROCM_DOCKER_ARCH=gfx1100 # Set nvcc architectured ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} @@ -40,7 +40,7 @@ WORKDIR /app COPY . . RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ - cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \ + cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \ && cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib \ diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile index 9064f38385..f8f3072e95 100644 --- a/.devops/vulkan.Dockerfile +++ b/.devops/vulkan.Dockerfile @@ -16,7 +16,7 @@ WORKDIR /app COPY . . -RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \ +RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.editorconfig b/.editorconfig index eac38a15f1..5d63d0a51e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -40,3 +40,11 @@ indent_style = tab [examples/cvector-generator/*.txt] trim_trailing_whitespace = unset insert_final_newline = unset + +[models/templates/*.jinja] +indent_style = unset +indent_size = unset +end_of_line = unset +charset = unset +trim_trailing_whitespace = unset +insert_final_newline = unset diff --git a/.github/ISSUE_TEMPLATE/020-enhancement.yml b/.github/ISSUE_TEMPLATE/020-enhancement.yml index 02dd4f575a..cee1446f5a 100644 --- a/.github/ISSUE_TEMPLATE/020-enhancement.yml +++ b/.github/ISSUE_TEMPLATE/020-enhancement.yml @@ -6,7 +6,7 @@ body: - type: markdown attributes: value: | - [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas) + [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas) - type: checkboxes id: prerequisites @@ -16,11 +16,11 @@ body: options: - label: I am running the latest code. Mention the version if possible as well. required: true - - label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md). + - label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md). required: true - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed). required: true - - label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share. + - label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share. required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/030-research.yml b/.github/ISSUE_TEMPLATE/030-research.yml index 18975dbbfd..e774550d59 100644 --- a/.github/ISSUE_TEMPLATE/030-research.yml +++ b/.github/ISSUE_TEMPLATE/030-research.yml @@ -6,7 +6,7 @@ body: - type: markdown attributes: value: | - Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22) + Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22) - type: checkboxes id: research-stage diff --git a/.github/ISSUE_TEMPLATE/040-refactor.yml b/.github/ISSUE_TEMPLATE/040-refactor.yml index b6e6ab36de..2fe94e26c6 100644 --- a/.github/ISSUE_TEMPLATE/040-refactor.yml +++ b/.github/ISSUE_TEMPLATE/040-refactor.yml @@ -6,8 +6,8 @@ body: - type: markdown attributes: value: | - Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered. - Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too. + Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered. + Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too. - type: textarea id: background-description diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index eb8c4b472d..0d246533c9 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,11 +1,11 @@ blank_issues_enabled: true contact_links: - name: Got an idea? - url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas + url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas about: Pop it there. It may then become an enhancement ticket. - name: Got a question? - url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a + url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a about: Ask a question there! - name: Want to contribute? - url: https://github.com/ggerganov/llama.cpp/wiki/contribute + url: https://github.com/ggml-org/llama.cpp/wiki/contribute about: Head to the contribution guide page of the wiki for areas you can help with diff --git a/.github/actions/windows-setup-curl/action.yml b/.github/actions/windows-setup-curl/action.yml new file mode 100644 index 0000000000..5d76da3d79 --- /dev/null +++ b/.github/actions/windows-setup-curl/action.yml @@ -0,0 +1,25 @@ +name: 'Windows - Setup CURL' +description: 'Composite action, to be reused in other workflow' +inputs: + curl_version: + description: 'CURL version' + required: false + default: '8.6.0_6' +outputs: + curl_path: + description: "Path to the downloaded libcurl" + value: ${{ steps.get_libcurl.outputs.curl_path }} + +runs: + using: "composite" + steps: + - name: libCURL + id: get_libcurl + shell: powershell + env: + CURL_VERSION: ${{ inputs.curl_version }} + run: | + curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" + mkdir $env:RUNNER_TEMP/libcurl + tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl + echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index d9f5bdc235..d0bdd73c44 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1 +1 @@ -*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR* +*Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR* diff --git a/.github/workflows/bench.yml.disabled b/.github/workflows/bench.yml.disabled index 1c8787ef78..75d2714792 100644 --- a/.github/workflows/bench.yml.disabled +++ b/.github/workflows/bench.yml.disabled @@ -1,5 +1,5 @@ # TODO: there have been some issues with the workflow, so disabling for now -# https://github.com/ggerganov/llama.cpp/issues/7893 +# https://github.com/ggml-org/llama.cpp/issues/7893 # # Benchmark name: Benchmark @@ -57,17 +57,7 @@ jobs: if: | inputs.gpu-series == 'Standard_NC4as_T4_v3' - || ( - github.event_name == 'schedule' - && github.ref_name == 'master' - && github.repository_owner == 'ggerganov' - ) || github.event_name == 'pull_request_target' - || ( - github.event_name == 'push' - && github.event.ref == 'refs/heads/master' - && github.repository_owner == 'ggerganov' - ) steps: - name: Clone id: checkout @@ -114,7 +104,6 @@ jobs: cmake -B build \ -DGGML_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ -DLLAMA_CUBLAS=ON \ -DCUDAToolkit_ROOT=/usr/local/cuda \ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml new file mode 100644 index 0000000000..e8639913ea --- /dev/null +++ b/.github/workflows/build-linux-cross.yml @@ -0,0 +1,124 @@ +name: Build on Linux using cross-compiler +on: + workflow_dispatch: + workflow_call: + +jobs: + ubuntu-latest-riscv64-cpu-cross: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Setup Riscv + run: | + sudo dpkg --add-architecture riscv64 + sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \ + /etc/apt/sources.list /etc/apt/apt-mirrors.txt + sudo apt-get clean + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + build-essential \ + gcc-14-riscv64-linux-gnu \ + g++-14-riscv64-linux-gnu \ + libcurl4-openssl-dev:riscv64 + + - name: Build + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + cmake --build build --config Release -j $(nproc) + + ubuntu-latest-riscv64-vulkan-cross: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Riscv + run: | + sudo dpkg --add-architecture riscv64 + sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \ + /etc/apt/sources.list /etc/apt/apt-mirrors.txt + sudo apt-get clean + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + build-essential \ + glslc \ + gcc-14-riscv64-linux-gnu \ + g++-14-riscv64-linux-gnu \ + libvulkan-dev:riscv64 \ + libcurl4-openssl-dev:riscv64 + + - name: Build + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release \ + -DGGML_VULKAN=ON \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + cmake --build build --config Release -j $(nproc) + + ubuntu-latest-arm64-vulkan-cross: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Arm64 + run: | + sudo dpkg --add-architecture arm64 + sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \ + /etc/apt/sources.list /etc/apt/apt-mirrors.txt + sudo apt-get clean + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + build-essential \ + glslc \ + crossbuild-essential-arm64 \ + libvulkan-dev:arm64 \ + libcurl4-openssl-dev:arm64 + + - name: Build + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release \ + -DGGML_VULKAN=ON \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ + -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \ + -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + cmake --build build --config Release -j $(nproc) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cd8422f8a2..32c8b7717f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,10 +10,10 @@ on: push: branches: - master - paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal'] + paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal'] + paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -43,26 +43,30 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-arm64 + evict-old-files: 1d + - name: Dependencies id: depends continue-on-error: true run: | brew update + brew install curl - name: Build id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake .. \ + cmake -B build \ -DCMAKE_BUILD_RPATH="@loader_path" \ -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_CURL=ON \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DGGML_RPC=ON - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - name: Test id: cmake_test @@ -88,7 +92,6 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* - name: Upload artifacts @@ -108,22 +111,28 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-x64 + evict-old-files: 1d + - name: Dependencies id: depends continue-on-error: true run: | brew update + brew install curl - name: Build id: cmake_build run: | sysctl -a # Metal is disabled due to intermittent failures with Github runners not having a GPU: - # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 + # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 cmake -B build \ -DCMAKE_BUILD_RPATH="@loader_path" \ -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_CURL=ON \ -DGGML_METAL=OFF \ -DGGML_RPC=ON cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) @@ -152,7 +161,6 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* - name: Upload artifacts @@ -163,7 +171,15 @@ jobs: name: llama-bin-macos-x64.zip ubuntu-cpu-cmake: - runs-on: ubuntu-22.04 + strategy: + matrix: + include: + - build: 'x64' + os: ubuntu-22.04 + - build: 'arm64' + os: ubuntu-22.04-arm + + runs-on: ${{ matrix.os }} steps: - name: Clone @@ -172,6 +188,12 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-cpu-cmake + evict-old-files: 1d + - name: Dependencies id: depends run: | @@ -181,13 +203,10 @@ jobs: - name: Build id: cmake_build run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_CURL=ON \ -DGGML_RPC=ON - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) - name: Test id: cmake_test @@ -224,15 +243,14 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp - zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/* + zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/* - name: Upload artifacts if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip - name: llama-bin-ubuntu-x64.zip + path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip + name: llama-bin-ubuntu-${{ matrix.build }}.zip ubuntu-latest-cmake-sanitizer: runs-on: ubuntu-latest @@ -249,36 +267,68 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }} + evict-old-files: 1d + - name: Dependencies id: depends run: | sudo apt-get update - sudo apt-get install build-essential + sudo apt-get install build-essential libcurl4-openssl-dev - name: Build id: cmake_build if: ${{ matrix.sanitizer != 'THREAD' }} run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DLLAMA_FATAL_WARNINGS=ON \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - cmake --build . --config ${{ matrix.build_type }} -j $(nproc) + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - name: Build (no OpenMP) id: cmake_build_no_openmp if: ${{ matrix.sanitizer == 'THREAD' }} run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DLLAMA_FATAL_WARNINGS=ON \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DGGML_OPENMP=OFF - cmake --build . --config ${{ matrix.build_type }} -j $(nproc) + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + ubuntu-latest-llguidance: + runs-on: ubuntu-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_LLGUIDANCE=ON + cmake --build . --config Release -j $(nproc) - name: Test id: cmake_test @@ -296,20 +346,24 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-rpc + evict-old-files: 1d + - name: Dependencies id: depends run: | sudo apt-get update - sudo apt-get install build-essential + sudo apt-get install build-essential libcurl4-openssl-dev - name: Build id: cmake_build run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DGGML_RPC=ON - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) - name: Test id: cmake_test @@ -324,6 +378,14 @@ jobs: - name: Clone id: checkout uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-vulkan + evict-old-files: 1d - name: Dependencies id: depends @@ -331,22 +393,48 @@ jobs: wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list sudo apt-get update -y - sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk + sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev - name: Build id: cmake_build run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DGGML_VULKAN=ON - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) - name: Test id: cmake_test run: | cd build - ctest -L main --verbose --timeout 900 + # This is using llvmpipe and runs slower than other backends + ctest -L main --verbose --timeout 2700 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + cp LICENSE ./build/bin/ + zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip + name: llama-bin-ubuntu-vulkan-x64.zip ubuntu-22-cmake-hip: runs-on: ubuntu-22.04 @@ -361,13 +449,20 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev + sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libcurl4-openssl-dev + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-hip + evict-old-files: 1d - name: Build with native CMake HIP support id: cmake_build run: | cmake -B build -S . \ -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ -DGGML_HIP=ON cmake --build build --config Release -j $(nproc) @@ -377,12 +472,13 @@ jobs: cmake -B build2 -S . \ -DCMAKE_C_COMPILER=hipcc \ -DCMAKE_CXX_COMPILER=hipcc \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ -DGGML_HIP=ON cmake --build build2 --config Release -j $(nproc) ubuntu-22-cmake-musa: runs-on: ubuntu-22.04 - container: mthreads/musa:rc3.1.0-devel-ubuntu22.04 + container: mthreads/musa:rc3.1.1-devel-ubuntu22.04 steps: - name: Clone @@ -395,6 +491,12 @@ jobs: apt-get update apt-get install -y build-essential git cmake libcurl4-openssl-dev + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-musa + evict-old-files: 1d + - name: Build with native CMake MUSA support id: cmake_build run: | @@ -423,7 +525,7 @@ jobs: shell: bash run: | sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp + sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev - name: install oneAPI MKL library shell: bash @@ -434,17 +536,21 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-sycl + evict-old-files: 1d + - name: Build id: cmake_build run: | source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake .. \ + cmake -B build \ -DGGML_SYCL=ON \ -DCMAKE_C_COMPILER=icx \ -DCMAKE_CXX_COMPILER=icpx - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) ubuntu-22-cmake-sycl-fp16: runs-on: ubuntu-22.04 @@ -467,7 +573,7 @@ jobs: shell: bash run: | sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp + sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev - name: install oneAPI MKL library shell: bash @@ -478,18 +584,26 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-sycl-fp16 + evict-old-files: 1d + - name: Build id: cmake_build run: | source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake .. \ + cmake -B build \ -DGGML_SYCL=ON \ -DCMAKE_C_COMPILER=icx \ -DCMAKE_CXX_COMPILER=icpx \ -DGGML_SYCL_F16=ON - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) + +# Disabled for now due to sporadic issue syncing. +# build-linux-cross: +# uses: ./.github/workflows/build-linux-cross.yml macOS-latest-cmake-ios: runs-on: macos-latest @@ -499,6 +613,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-ios + evict-old-files: 1d + - name: Dependencies id: depends continue-on-error: true @@ -509,22 +629,57 @@ jobs: id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ + cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ -DCMAKE_SYSTEM_NAME=iOS \ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO macOS-latest-cmake-tvos: runs-on: macos-latest + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-tvos + evict-old-files: 1d + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=tvOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-cmake-visionos: + runs-on: macos-latest + steps: - name: Clone id: checkout @@ -540,18 +695,17 @@ jobs: id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ + cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=tvOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_SYSTEM_NAME=visionOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO macOS-latest-swift: runs-on: macos-latest @@ -565,6 +719,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-swift + evict-old-files: 1d + - name: Dependencies id: depends continue-on-error: true @@ -575,22 +735,20 @@ jobs: id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ + cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_CURL=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) - sudo cmake --install . --config Release + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - name: xcodebuild for swift package id: xcodebuild run: | - xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}" + ./build-xcframework.sh windows-msys2: runs-on: windows-latest @@ -606,6 +764,13 @@ jobs: - name: Clone uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-msys2 + variant: sccache + evict-old-files: 1d + - name: Setup ${{ matrix.sys }} uses: msys2/setup-msys2@v2 with: @@ -641,7 +806,7 @@ jobs: env: OPENBLAS_VERSION: 0.3.23 SDE_VERSION: 9.33.0-2024-01-07 - VULKAN_VERSION: 1.3.261.1 + VULKAN_VERSION: 1.4.309.0 strategy: matrix: @@ -674,6 +839,13 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-${{ matrix.build }} + variant: sccache + evict-old-files: 1d + - name: Clone Kompute submodule id: clone_kompute if: ${{ matrix.build == 'kompute-x64' }} @@ -713,26 +885,31 @@ jobs: run: | git clone https://github.com/KhronosGroup/OpenCL-Headers cd OpenCL-Headers - mkdir build && cd build - cmake .. ` + cmake -B build ` -DBUILD_TESTING=OFF ` -DOPENCL_HEADERS_BUILD_TESTING=OFF ` -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build . --target install + cmake --build build --target install git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader cd OpenCL-ICD-Loader - mkdir build-arm64-release && cd build-arm64-release - cmake .. ` + cmake -B build-arm64-release ` -A arm64 ` -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build . --target install --config release + cmake --build build-arm64-release --target install --config release + + - name: libCURL + id: get_libcurl + uses: ./.github/actions/windows-setup-curl - name: Build id: cmake_build + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - cmake -S . -B build ${{ matrix.defines }} + cmake -S . -B build ${{ matrix.defines }} ` + -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} - name: Add libopenblas.dll @@ -792,9 +969,10 @@ jobs: - name: Pack artifacts id: pack_artifacts if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt - Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt + Copy-Item $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\Release\libcurl-x64.dll 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\* - name: Upload artifacts @@ -812,13 +990,21 @@ jobs: - name: Clone id: checkout uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Install dependencies env: DEBIAN_FRONTEND: noninteractive run: | apt update - apt install -y cmake build-essential ninja-build libgomp1 git + apt install -y cmake build-essential ninja-build libgomp1 git libcurl4-openssl-dev + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-cuda + evict-old-files: 1d - name: Build with CMake run: | @@ -846,6 +1032,13 @@ jobs: with: fetch-depth: 0 + - name: Install ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }} + variant: sccache + evict-old-files: 1d + - name: Install Cuda Toolkit 11.7 if: ${{ matrix.cuda == '11.7' }} run: | @@ -902,26 +1095,28 @@ jobs: echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - name: Install ccache - uses: hendrikmuhs/ccache-action@v1.2 - with: - key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }} - - name: Install Ninja id: install_ninja run: | choco install ninja + - name: libCURL + id: get_libcurl + uses: ./.github/actions/windows-setup-curl + - name: Build id: cmake_build shell: cmd + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" cmake -S . -B build -G "Ninja Multi-Config" ^ -DLLAMA_BUILD_SERVER=ON ^ -DGGML_NATIVE=OFF ^ -DGGML_CUDA=ON ^ - -DGGML_RPC=ON + -DGGML_RPC=ON ^ + -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 cmake --build build --config Release -j %NINJA_JOBS% -t ggml cmake --build build --config Release @@ -942,7 +1137,10 @@ jobs: - name: Pack artifacts id: pack_artifacts if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | + cp $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\Release\libcurl-x64.dll 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\* - name: Upload artifacts @@ -986,10 +1184,19 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-sycl + variant: sccache + evict-old-files: 1d + - name: Install run: | scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL + # TODO: add libcurl support ; we will also need to modify win-build-sycl.bat to accept user-specified args + - name: Build id: cmake_build run: examples/sycl/win-build-sycl.bat @@ -1049,6 +1256,11 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: Clone rocWMMA repository + id: clone_rocwmma + run: | + git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1 + - name: Install id: depends run: | @@ -1065,23 +1277,34 @@ jobs: & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - name: Install ccache - uses: hendrikmuhs/ccache-action@v1.2 + uses: hendrikmuhs/ccache-action@v1.2.16 with: key: ${{ github.job }} + evict-old-files: 1d + + - name: libCURL + id: get_libcurl + uses: ./.github/actions/windows-setup-curl - name: Build id: cmake_build + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" cmake -G "Unix Makefiles" -B build -S . ` -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` + -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" ` -DCMAKE_BUILD_TYPE=Release ` -DGGML_HIP=ON ` - -DGGML_RPC=ON + -DGGML_HIP_ROCWMMA_FATTN=ON ` + -DGGML_RPC=ON ` + -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" cmake --build build -j ${env:NUMBER_OF_PROCESSORS} + # TODO: reuse windows-latest-cmake-hip instead of duplicating this job windows-latest-cmake-hip-release: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} runs-on: windows-latest @@ -1097,6 +1320,17 @@ jobs: with: fetch-depth: 0 + - name: Clone rocWMMA repository + id: clone_rocwmma + run: | + git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-hip-release + evict-old-files: 1d + - name: Install id: depends run: | @@ -1112,18 +1346,27 @@ jobs: run: | & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version + - name: libCURL + id: get_libcurl + uses: ./.github/actions/windows-setup-curl + - name: Build id: cmake_build + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" cmake -G "Unix Makefiles" -B build -S . ` -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` + -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" ` -DCMAKE_BUILD_TYPE=Release ` -DAMDGPU_TARGETS=${{ matrix.gpu_target }} ` + -DGGML_HIP_ROCWMMA_FATTN=ON ` -DGGML_HIP=ON ` - -DGGML_RPC=ON + -DGGML_RPC=ON ` + -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" cmake --build build -j ${env:NUMBER_OF_PROCESSORS} md "build\bin\rocblas\library\" cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" @@ -1145,7 +1388,10 @@ jobs: - name: Pack artifacts id: pack_artifacts + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | + cp $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\libcurl-x64.dll 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\* - name: Upload artifacts @@ -1160,32 +1406,58 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Build id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ + cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_CURL=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ -DCMAKE_SYSTEM_NAME=iOS \ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - sudo cmake --install . --config Release + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - name: xcodebuild for swift package id: xcodebuild run: | - xcodebuild -scheme llama-Package -destination 'generic/platform=iOS' + ./build-xcframework.sh - name: Build Xcode project - run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build + run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-xcframework.zip + name: llama-${{ steps.tag.outputs.name }}-xcframework android-build: runs-on: ubuntu-latest @@ -1194,6 +1466,12 @@ jobs: - name: Clone uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: android-build + evict-old-files: 1d + - name: Set up JDK uses: actions/setup-java@v3 with: @@ -1218,8 +1496,10 @@ jobs: needs: - ubuntu-cpu-cmake + - ubuntu-22-cmake-vulkan - windows-latest-cmake - windows-2019-cmake-cuda + - windows-latest-cmake-sycl - windows-latest-cmake-hip-release - macOS-latest-cmake-arm64 - macOS-latest-cmake-x64 @@ -1231,6 +1511,12 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: release + evict-old-files: 1d + - name: Determine tag name id: tag shell: bash @@ -1481,16 +1767,17 @@ jobs: if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }} defaults: run: - shell: bash -el {0} - runs-on: ubuntu-24.04-arm + shell: bash -el {0} strategy: matrix: + arch: [x86, aarch64] cann: - - '8.0.rc3.beta1-910b-openeuler22.03-py3.10' + - '8.1.RC1.alpha001-910b-openeuler22.03-py3.10' device: - 'ascend910b3' build: - 'Release' + runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} container: ascendai/cann:${{ matrix.cann }} steps: - name: Checkout @@ -1499,7 +1786,7 @@ jobs: - name: Dependencies run: | yum update -y - yum install -y git gcc gcc-c++ make cmake + yum install -y git gcc gcc-c++ make cmake libcurl-devel - name: Build run: | diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml index f63860d141..276a217d45 100644 --- a/.github/workflows/close-issue.yml +++ b/.github/workflows/close-issue.yml @@ -17,7 +17,7 @@ jobs: steps: - uses: actions/stale@v5 with: - exempt-issue-labels: "refactor,help wanted,good first issue,research,bug" + exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap" days-before-issue-stale: 30 days-before-issue-close: 14 stale-issue-label: "stale" diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 6955a7dc82..114cbf83ed 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -36,13 +36,13 @@ jobs: matrix: config: # Multi-stage build - - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false} - - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} + - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false } + - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false } + - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true } + - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false } + - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false } # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete - #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true } + #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true } steps: - name: Check out the repo uses: actions/checkout@v4 @@ -51,6 +51,8 @@ jobs: - name: Set up QEMU uses: docker/setup-qemu-action@v3 + with: + image: tonistiigi/binfmt:qemu-v7.0.0-28 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 368dbdbe5d..0b0f300aa4 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -11,7 +11,7 @@ jobs: steps: - uses: actions/checkout@v4 with: - repository: "ggerganov/llama.cpp" + repository: "ggml-org/llama.cpp" - uses: actions/labeler@v5 with: configuration-path: '.github/labeler.yml' diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index ed1c357a57..6c9b513227 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -81,13 +81,36 @@ jobs: with: node-version: '22.11.0' + - name: WebUI - Install dependencies + id: webui_lint + run: | + cd examples/server/webui + npm ci + + - name: WebUI - Check code format + id: webui_format + run: | + git config --global --add safe.directory $(realpath .) + cd examples/server/webui + git status + + npm run format + git status + modified_files="$(git status -s)" + echo "Modified files: ${modified_files}" + if [ -n "${modified_files}" ]; then + echo "Files do not follow coding style. To fix: npm run format" + echo "${modified_files}" + exit 1 + fi + - name: Verify bundled index.html id: verify_server_index_html run: | git config --global --add safe.directory $(realpath .) cd examples/server/webui git status - npm ci + npm run build git status modified_files="$(git status -s)" @@ -106,7 +129,6 @@ jobs: cmake -B build \ -DGGML_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ -DGGML_OPENMP=OFF ; @@ -119,7 +141,6 @@ jobs: cmake -B build \ -DGGML_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server @@ -131,13 +152,14 @@ jobs: cmake -B build \ -DGGML_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ; cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - name: Tests id: server_integration_tests if: ${{ matrix.sanitizer == '' }} + env: + GITHUB_ACTIONS: "true" run: | cd examples/server/tests ./tests.sh @@ -170,17 +192,14 @@ jobs: - name: libCURL id: get_libcurl - env: - CURL_VERSION: 8.6.0_6 - run: | - curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" - mkdir $env:RUNNER_TEMP/libcurl - tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl + uses: ./.github/actions/windows-setup-curl - name: Build id: cmake_build + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" + cmake -B build -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server - name: Python setup @@ -196,8 +215,10 @@ jobs: - name: Copy Libcurl id: prepare_libcurl + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll + cp $env:CURL_PATH/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll - name: Tests id: server_integration_tests @@ -205,7 +226,7 @@ jobs: run: | cd examples/server/tests $env:PYTHONIOENCODING = ":replace" - pytest -v -x + pytest -v -x -m "not slow" - name: Slow tests id: server_integration_tests_slow diff --git a/.gitignore b/.gitignore index 694f36e042..2c67ad7f7c 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,8 @@ lcov-report/ tags .build/ build* +release +debug !build-info.cmake !build-info.cpp.in !build-info.sh @@ -98,6 +100,7 @@ examples/server/*.css.hpp examples/server/*.html.hpp examples/server/*.js.hpp examples/server/*.mjs.hpp +examples/server/*.gz.hpp !build_64.sh !examples/*.bat !examples/*/*.kts diff --git a/AUTHORS b/AUTHORS index 2eb60806ad..0af9f44ad4 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,4 +1,4 @@ -# date: Thu Nov 28 20:46:15 EET 2024 +# date: Sat Mar 8 18:23:52 EET 2025 # this file is auto-generated by scripts/gen-authors.sh 0cc4m @@ -8,10 +8,12 @@ 3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com> 44670 <44670@users.noreply.github.com> 65a <10104049+65a@users.noreply.github.com> +708-145 <40387547+708-145@users.noreply.github.com> AN Long AT Aarni Koskela Aaron Miller +Aaron Teo <57927438+taronaeo@users.noreply.github.com> Aaryaman Vasishta Abheek Gulati Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> @@ -20,21 +22,27 @@ Adithya Balaji AdithyanI Adrian Adrian Hesketh +Adrian Kretz +Adrien Gallouët +Adrien Gallouët Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com> Ahmet Zeer AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> AidanBeltonS Aisuko Akarshan Biswas +Akarshan Biswas Akarshan Biswas Al Mochkin <14274697+amochkin@users.noreply.github.com> Albert Jin Alberto <57916483+albbus-stack@users.noreply.github.com> Alberto Cabrera Pérez Alberto Cabrera Pérez +Aleksei Nikiforov <103434461+AlekseiNikiforovIBM@users.noreply.github.com> Alex Alex Azarov Alex Azarov +Alex Brooks Alex Klinkhamer Alex Klinkhamer Alex Nguyen @@ -55,6 +63,7 @@ Ananta Bastola Anas Ahouzi <112881240+aahouzi@users.noreply.github.com> András Salamon Andreas (Andi) Kunar +Andreas Kieslinger <47689530+aendk@users.noreply.github.com> Andrei Andrew Canis Andrew Downing @@ -64,6 +73,7 @@ Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com> Andy Salerno Andy Tai Anthony Van de Gejuchte +Antoine Viallon Antonis Makropoulos Arik Poznanski Armen Kaleshian @@ -80,6 +90,7 @@ Atsushi Tatsuma Austin <77757836+teleprint-me@users.noreply.github.com> AustinMroz BADR +BB-fat <45072480+BB-fat@users.noreply.github.com> Bach Le Bailey Chittle <39804642+bachittle@users.noreply.github.com> BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com> @@ -91,13 +102,18 @@ Ben Siraphob Ben Williams Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com> Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com> +Benson Wong Bernat Vadell +Bernhard M. Wiedemann Bert Wagner +Billel Mokeddem Bingan <70050083+binganao@users.noreply.github.com> Bjarke Viksøe <164612031+bviksoe@users.noreply.github.com> +Bodhi <3882561+BodhiHu@users.noreply.github.com> Bodo Graumann Bono Lv Borislav Stanimirov +Borislav Stanimirov Branden Butler Brandon Squizzato <35474886+bsquizz@users.noreply.github.com> Brian @@ -117,9 +133,11 @@ Casey Primozic Casey Primozic CausalLM <148736309+CausalLM@users.noreply.github.com> Cebtenzzre +CentricStorm Chad Brewbaker Changyeon Kim Chao Jiang +Charles Duffy Charles Xu <63788048+chaxu01@users.noreply.github.com> Charles Xu Chen Xi @@ -131,12 +149,17 @@ Chris Kuehl Christian Demsar Christian Demsar Christian Falch <875252+chrfalch@users.noreply.github.com> +Christian Fillion +Christian Kastner Christian Kögler Christian Köhnenkamp Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com> +Christopher Nielsen <62156882+mascguy@users.noreply.github.com> Clark Saben <76020733+csaben@users.noreply.github.com> +Clauszy Clint Herron Conrad Kramer +Corentin REGAL CrispStrobe <154636388+CrispStrobe@users.noreply.github.com> Csaba Kecskemeti Cuong Trinh Manh @@ -152,6 +175,7 @@ Daniel Hiltgen Daniel Illescas Romero Daniel Kleine <53251018+d-kleine@users.noreply.github.com> Daniele <57776841+daniandtheweb@users.noreply.github.com> +Danny Milosavljevic DannyDaemonic Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com> Dave @@ -159,6 +183,7 @@ Dave Airlie Dave Airlie Dave Della Costa David Friehs +David Huang <1969802+hjc4869@users.noreply.github.com> David Kennedy David Pflug David Renshaw @@ -176,6 +201,7 @@ Dibakar Gope Didzis Gosko Diego Devesa Diogo Teles Sant'Anna +Djip007 <3705339+Djip007@users.noreply.github.com> Djip007 Don Mahurin DooWoong Lee (David) @@ -193,6 +219,7 @@ Edward Taylor Elaine Elbios <141279586+Elbios@users.noreply.github.com> Elton Kola +Emreerdog <34742675+Emreerdog@users.noreply.github.com> Engininja2 <139037756+Engininja2@users.noreply.github.com> Equim Eric Curtin @@ -223,6 +250,7 @@ Felix Finn Voorhees Firat FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com> +Florent BENOIT Folko-Ven <71110216+Folko-Ven@users.noreply.github.com> Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com> Francisco Melo <43780565+francis2tm@users.noreply.github.com> @@ -233,6 +261,7 @@ Fred Douglas <43351173+fredlas@users.noreply.github.com> Frederik Vogel Gabe Goodhart Gabe Goodhart +Gaetan Bisson GainLee Galunid Gary Linscott @@ -240,6 +269,7 @@ Gary Mulder Gavin Zhao Genkagaku.GPT Georgi Gerganov +Gian-Carlo Pascutto Gilad S Gilad S. <7817232+giladgd@users.noreply.github.com> Giuseppe Scrivano @@ -249,21 +279,27 @@ Guillaume "Vermeille" Sanchez Guillaume Wenzek Guoliang Hua <32868157+nbcsm@users.noreply.github.com> Guoteng <32697156+SolenoidWGT@users.noreply.github.com> +Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com> Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com> Haggai Nuchi Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com> +Hale Chan Hamdoud Hakem <90524568+hamdoudhakem@users.noreply.github.com> +Han Yin HanishKVC Haohui Mai Haoxiang Fei Harald Fernengel Hatsune Miku <129688334+at8u@users.noreply.github.com> HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com> +Haus1 Henk Poley Henri Vasserman Henrik Forstén +Henry Linjamäki Herman Semenov Hesen Peng +HimariO Hoang Nguyen Hong Bo PENG Hongyu Ouyang <96765450+casavaca@users.noreply.github.com> @@ -280,6 +316,7 @@ Icecream95 Ido S IgnacioFDM Igor Okulist +Ihar Hrachyshka Ikko Eltociear Ashimine Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com> Ionoclast Laboratories @@ -289,12 +326,15 @@ Ivan Ivan Filipov <159561759+vanaka11@users.noreply.github.com> Ivan Komarov Ivan Stepanov +JC <43374599+MrSMlT@users.noreply.github.com> +JFLFY2255 JH23X <165871467+JH23X@users.noreply.github.com> Jack Mousseau Jack Mousseau JackJollimore <130917767+JackJollimore@users.noreply.github.com> Jaeden Amero Jaemin Son +Jafar Uruç Jag Chadha Jakub N James A Capozzoli <157492257+jac-jim@users.noreply.github.com> @@ -305,6 +345,7 @@ Jan Ploski Jannis Schönleber Jared Van Bortel Jared Van Bortel +Jason C.H Jason McCartney Jason Stillerman Jean-Christophe Hoelt @@ -315,12 +356,14 @@ Jeffrey Morgan Jeffrey Quesnelle Jeroen Mostert Jesse Jojo Johnson +Jett Janiak Jeximo Jhen-Jie Hong Jiahao Li Jian Liao JidongZhang-THU <1119708529@qq.com> Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com> +Jinyang He Jiří Podivín <66251151+jpodivin@users.noreply.github.com> Jiří Sejkora Joan Fontanals @@ -343,6 +386,7 @@ Josh Ramer Joyce Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Judd +Juk Armstrong <69222624+jukofyork@users.noreply.github.com> Julius Arkenberg Jun Hee Yoo Jun Jie <71215065+junnjiee16@users.noreply.github.com> @@ -357,6 +401,8 @@ Justine Tunney Juuso Alasuutari KASR Kamil Tomšík +Kante Yin +Karol Kontny <82021046+kkontny@users.noreply.github.com> Karsten Weiss Karthick Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com> @@ -376,6 +422,7 @@ Kolen Cheung Konstantin Herud Konstantin Zhuravlyov Kunshang Ji +Kyle Bruene Kyle Liang Kyle Mistele Kylin <56434533+KyL0N@users.noreply.github.com> @@ -394,6 +441,8 @@ Liu Jia LoganDark Loïc Carrère LostRuins <39025047+LostRuins@users.noreply.github.com> +LostRuins Concedo <39025047+LostRuins@users.noreply.github.com> +Lucas Moura Belo Luciano Luo Tian Lyle Dean @@ -423,6 +472,7 @@ MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com> Mateusz Charytoniuk Matheus C. França Matheus Gabriel Alves Silva +Mathieu Baudier Mathieu Geli Mathieu Nayrolles Mathijs Henquet @@ -437,6 +487,7 @@ Matthew Tejo Matvey Soloviev Max Krasnyansky Max Krasnyansky +Maxim Evtush <154841002+maximevtush@users.noreply.github.com> Maxime <672982+maximegmd@users.noreply.github.com> Maximilian Winter Meng Zhang @@ -444,6 +495,7 @@ Meng, Hengyu Mengqing Cao Merrick Christensen Michael Coppola +Michael Engel Michael Francis Michael Hueschen Michael Kesper @@ -452,7 +504,9 @@ Michael Podvitskiy Michael Potter Michael de Gans Michaël de Vries +Michał Moskal Michał Tuszyński +Michelle Tan <41475767+MichelleTanPY@users.noreply.github.com> Mihai Mike Mikko Juola @@ -465,6 +519,7 @@ Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com> Mohammadreza Hendiani Mohammadreza Hendiani Molly Sophia +MoonRide303 <130458190+MoonRide303@users.noreply.github.com> MorganRO8 <47795945+MorganRO8@users.noreply.github.com> Murilo Santana Musab Gultekin @@ -477,6 +532,7 @@ Neo Zhang <14088817+arthw@users.noreply.github.com> Neo Zhang Neo Zhang Jianyu Neuman Vong +NeverLucky <92274250+nvrxq@users.noreply.github.com> Nexes the Old <124105151+Nexesenex@users.noreply.github.com> Nexesenex <124105151+Nexesenex@users.noreply.github.com> Niall Coates <1349685+Niall-@users.noreply.github.com> @@ -484,12 +540,17 @@ Nicholai Tukanov Nico Bosshard Nicolai Weitkemper Nicolás Pérez +Nicolò Scipione Nigel Bosch +Nikita Sarychev <42014488+sARY77@users.noreply.github.com> Niklas Korz NikolaiLyssogor <59844691+NikolaiLyssogor@users.noreply.github.com> +Nikolaos Pothitos Nikolas <127742645+nneubacher@users.noreply.github.com> Nindaleth +Nuno OSecret <135510162+OLSecret@users.noreply.github.com> +Oleksandr Kuvshynov <661042+okuvshynov@users.noreply.github.com> Oleksandr Nikitin Oleksii Maryshchenko Olivier Chafik @@ -499,11 +560,13 @@ PAB Pablo Duboue Pascal Patry Patrice Ferlet +Patrick Peng Paul Tsochantaris Pavel Zloi Pavol Rusnak Paweł Wodnicki <151604+32bitmicro@users.noreply.github.com> Pedro Cuenca +Peter Peter Sugihara Phil H <5756783+phiharri@users.noreply.github.com> Philip Taron @@ -514,6 +577,7 @@ Pieter Ouwerkerk Plamen Minev Prashant Vithule <119530321+Vithulep@users.noreply.github.com> Przemysław Pawełczyk +PureJourney Qin Yue Chen <71813199+chenqiny@users.noreply.github.com> Qingyou Meng Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com> @@ -529,11 +593,17 @@ Rand Xie Randall Fitzgerald Random Fly Reinforce-II +Rémy O +Rémy Oudompheng Ren Xuancheng Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com> +Reza Kakhki +Reza Rahemtola <49811529+RezaRahemtola@users.noreply.github.com> RhinoDevel +Riccardo Orlando Riceball LEE Rich Dougherty +Richard Richard Kiss Richard Roberson Rick G <26732651+TheFlipbook@users.noreply.github.com> @@ -544,10 +614,13 @@ Riley Stewart Rinne Rinne Robert Brisita <986796+rbrisita@users.noreply.github.com> +Robert Collins +Robert Ormandi <52251610+ormandi@users.noreply.github.com> Robert Sung-wook Shin Robey Holderith Robyn Roger Meier +Rohanjames1997 Roland <14355895+rbur0425@users.noreply.github.com> Romain Biessy Romain D <90720+Artefact2@users.noreply.github.com> @@ -559,7 +632,9 @@ Roni Ronny Brendel Ronsor Rowan Hart +Ruan <47767371+ruanych@users.noreply.github.com> Ruchira Hasaranga +Rudi Servo Ruixin Huang <18860020911@163.com> Rune <43761327+Rune-AI@users.noreply.github.com> RunningLeon @@ -568,6 +643,7 @@ Ryan Landay Ryder Wishart Ryuei Rőczey Barnabás <31726601+An0nie@users.noreply.github.com> +SAMI SRHMorris <69468379+SRHMorris@users.noreply.github.com> SXX SakuraUmi @@ -592,6 +668,8 @@ Shane A Shangning Xu <32517059+xushangning@users.noreply.github.com> Shankar Shanshan Shen <467638484@qq.com> +Shelby Jenkins <47464908+ShelbyJenkins@users.noreply.github.com> +Sheldon Robinson Shijie <821898965@qq.com> Shintarou Okada Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com> @@ -623,12 +701,14 @@ Steven Roussey Steward Garcia <57494570+FSSRepo@users.noreply.github.com> StrangeBytesDev <141275258+StrangeBytesDev@users.noreply.github.com> Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com> +Sukriti Sharma SuperUserNameMan Sutou Kouhei Tai Duc Nguyen Taikono-Himazin Tameem <113388789+AhmadTameem@users.noreply.github.com> Tamotsu Takahashi +Tei Home Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com> Thatcher Chamberlin Theia Vogel @@ -640,6 +720,7 @@ Tim Miller Tim Wang Timmy Knight Timothy Cronin <40186632+4imothy@users.noreply.github.com> +Ting Lou Ting Lou Ting Sun Tobias Lütke @@ -661,25 +742,36 @@ Uzo Nweke Vaibhav Srivastav Val Kharitonov Valentin Konovalov +Valentin Mamedov <45292985+Inf1delis@users.noreply.github.com> Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com> Vali Malinoiu <0x4139@gmail.com> Victor Nogueira Victor Z. Peng Viet-Anh NGUYEN (Andrew) Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com> +Vitali Lovich +Vivian Vlad Vladimir Vladimir Malyutin +Vladimir Vuksanovic <109677816+vvuksanovic@users.noreply.github.com> Vladimir Zorin VoidIsVoid <343750470@qq.com> Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com> +Wagner Bruna +Wang Qin <37098874+wangqin0@users.noreply.github.com> +Wang Ran (汪然) WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com> Weird Constructor +Weizhao Ouyang Welby Seely Wentai Zhang +Wilken Gottwalt <12194808+wgottwalt@users.noreply.github.com> WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com> William Tambellini +William Tambellini Willy Tarreau +Woof Dog <197125663+woof-dog@users.noreply.github.com> Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com> Wu Jian Ping Wu Jian Ping @@ -692,6 +784,7 @@ Xie Yanbo Xingchen Song(宋星辰) Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com> Xuan Son Nguyen +Xuan-Son Nguyen Yaiko Yann Follet <131855179+YannFollet@users.noreply.github.com> Yaroslav @@ -702,7 +795,9 @@ Yoshi Suhara Yoshi Suhara Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com> +Yüg Yui +Yun Dou Yuri Khrustalev Yusuf Kağan Hanoğlu Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com> @@ -714,18 +809,23 @@ Zhang Peiyuan Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com> Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com> Zhiyuan Li +Zhiyuan Li ZhouYuChen Ziad Ben Hadj-Alouane Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com> Zsapi a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com> +a3sh <38979186+A3shTnT@users.noreply.github.com> adel boussaken afrideva <95653597+afrideva@users.noreply.github.com> +ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com> agray3 akawrykow <142945436+akawrykow@users.noreply.github.com> +alek3y <44779186+alek3y@users.noreply.github.com> alexpinel <93524949+alexpinel@users.noreply.github.com> alonfaraj alwqx +amd-dwang amd-lalithnc amritahs-ibm andrijdavid @@ -737,6 +837,7 @@ arch-btw <57669023+arch-btw@users.noreply.github.com> arcrank ardfork <134447697+ardfork@users.noreply.github.com> arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com> +aryantandon01 <80969509+aryantandon01@users.noreply.github.com> at8u <129688334+at8u@users.noreply.github.com> automaticcat awatuna <23447591+awatuna@users.noreply.github.com> @@ -751,12 +852,16 @@ bryanSwk <93190252+bryanSwk@users.noreply.github.com> bsilvereagle bssrdf byte-6174 <88070277+byte-6174@users.noreply.github.com> +cduk <19917266+cduk@users.noreply.github.com> cebtenzzre chaihahaha chiranko <96988916+chiranko@users.noreply.github.com> clibdev <52199778+clibdev@users.noreply.github.com> clyang +cmdr2 +cmdr2 cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com> +codezjx coezbek comex compilade <113953597+compilade@users.noreply.github.com> @@ -774,20 +879,25 @@ deepdiffuser <112834445+deepdiffuser@users.noreply.github.com> devojony <61173062+devojony@users.noreply.github.com> ditsuke divinity76 +dm4 dm4 dotpy314 <33351922+dotpy314@users.noreply.github.com> drbh ds5t5 <145942675+ds5t5@users.noreply.github.com> dylan eastriver +ebraminio ebraminio eiery <19350831+eiery@users.noreply.github.com> eric8607242 fairydreaming <166155368+fairydreaming@users.noreply.github.com> fengerhu1 <2748250768@qq.com> +fj-y-saito <85871716+fj-y-saito@users.noreply.github.com> fraxy-v <65565042+fraxy-v@users.noreply.github.com> +fxzjshm <11426482+fxzjshm@users.noreply.github.com> github-actions[bot] gliptic +gn64 goerch grahameth <96447521+grahameth@users.noreply.github.com> gtygo @@ -809,13 +919,17 @@ hydai iSma iacore <74560659+iacore@users.noreply.github.com> icppWorld <124377669+icppWorld@users.noreply.github.com> +igardev <49397134+igardev@users.noreply.github.com> igarnier intelmatt <61025942+intelmatt@users.noreply.github.com> iohub +issixx <46835150+issixx@users.noreply.github.com> jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com> jaime-m-p <167997752+jaime-m-p@users.noreply.github.com> jameswu2014 <545426914@qq.com> +jason_w jdomke <28772296+jdomke@users.noreply.github.com> +jiahao su jiez <373447296@qq.com> jneem joecryptotoo <80373433+joecryptotoo@users.noreply.github.com> @@ -825,9 +939,11 @@ jon-chuang <9093549+jon-chuang@users.noreply.github.com> jp-x-g jukofyork <69222624+jukofyork@users.noreply.github.com> junchao-loongson <68935141+junchao-loongson@users.noreply.github.com> +junchao-zhao <68935141+junchao-loongson@users.noreply.github.com> jwj7140 <32943891+jwj7140@users.noreply.github.com> k.h.lai kaizau +kallewoof kalomaze <66376113+kalomaze@users.noreply.github.com> kang katsu560 <118887472+katsu560@users.noreply.github.com> @@ -835,6 +951,7 @@ kchro3 <62481661+kchro3@users.noreply.github.com> khimaros kiltyj klosax <131523366+klosax@users.noreply.github.com> +krystiancha kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> kunnis kuronekosaiko @@ -847,6 +964,8 @@ ldwang le.chang leejet leo-pony +lexasub +lhez limitedAtonement liuwei-git <14815172+liuwei-git@users.noreply.github.com> lon <114724657+longregen@users.noreply.github.com> @@ -855,19 +974,25 @@ ltoniazzi <61414566+ltoniazzi@users.noreply.github.com> luoyu-intel m3ndax maddes8cht <55592906+maddes8cht@users.noreply.github.com> +magicse +mahorozte <41834471+mahorozte@users.noreply.github.com> makomk manikbhandari maor-ps <154728172+maor-ps@users.noreply.github.com> +mashdragon <122402293+mashdragon@users.noreply.github.com> matiaslin <45382001+matiaslin@users.noreply.github.com> +matt23654 matteo mdrokz mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com> +midnight minarchist mj-shifu <77107165+mj-shifu@users.noreply.github.com> mmyjona momonga <115213907+mmnga@users.noreply.github.com> momonga <146910567+mmngays@users.noreply.github.com> moritzbrantner <31051084+moritzbrantner@users.noreply.github.com> +musoles <135031143+musoles@users.noreply.github.com> mzcu nanahi <130121847+na-na-hi@users.noreply.github.com> ngc92 <7938269+ngc92@users.noreply.github.com> @@ -884,18 +1009,23 @@ omahs <73983677+omahs@users.noreply.github.com> oobabooga <112222186+oobabooga@users.noreply.github.com> opparco ostix360 <55257054+ostix360@users.noreply.github.com> +pascal-lc <49066376+pascal-lc@users.noreply.github.com> pculliton +peidaqi pengxin99 perserk +petterreinholdtsen piDack <104877312+piDack@users.noreply.github.com> pmysl postmasters pudepiedj qingfengfenga <41416092+qingfengfenga@users.noreply.github.com> +qingy1337 qouoq qunash rabidcopy rankaiyx +redbeard rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com> rhuddleston rimoliga <53384203+rimoliga@users.noreply.github.com> @@ -906,12 +1036,14 @@ semidark serhii-nakon <57632032+serhii-nakon@users.noreply.github.com> sharpHL <132747147+sharpHL@users.noreply.github.com> shibe2 +simon886212 <37953122+simon886212@users.noreply.github.com> singularity <12184989+singularity-s0@users.noreply.github.com> sjinzh sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com> slaren <2141330+slaren@users.noreply.github.com> slaren snadampal <87143774+snadampal@users.noreply.github.com> +someone13574 <81528246+someone13574@users.noreply.github.com> standby24x7 staviq stduhpf @@ -922,19 +1054,23 @@ tarcey tc-mb <157115220+tc-mb@users.noreply.github.com> texmex76 <40733439+texmex76@users.noreply.github.com> thement <40525767+thement@users.noreply.github.com> +theraininsky <76763719+theraininsky@users.noreply.github.com> thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> tjohnman toyer <2042519524@qq.com> tslmy +tv1wnd <55383215+tv1wnd@users.noreply.github.com> ubik2 uint256_t uint256_t unbounded uvos +uvos valiray <133289098+valiray@users.noreply.github.com> vb vik viric +vmobilis <75476228+vmobilis@users.noreply.github.com> vodkaslime <646329483@qq.com> vvhg1 <94630311+vvhg1@users.noreply.github.com> vxiiduu <73044267+vxiiduu@users.noreply.github.com> @@ -949,8 +1085,11 @@ wzy <32936898+Freed-Wu@users.noreply.github.com> xaedes xaedes xctan +xiaobing318 <71554036+xiaobing318@users.noreply.github.com> +xiaofei xloem <0xloem@gmail.com> yangli2 +ymcki <84055651+ymcki@users.noreply.github.com> yuiseki yuri@FreeBSD zakkor @@ -963,4 +1102,5 @@ zrm 杨朱 · Kiki 源文雨 <41315874+fumiama@users.noreply.github.com> 蕭澧邦 <45505768+shou692199@users.noreply.github.com> +谢乃闻 Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com> diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c62d17880..de51c0a17b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,8 @@ else() set(LLAMA_STANDALONE OFF) endif() +option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF) + if (EMSCRIPTEN) set(BUILD_SHARED_LIBS_DEFAULT OFF) @@ -79,7 +81,8 @@ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) # 3rd party libs -option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) +option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON) +option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF) # Required for relocatable CMake package include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) @@ -144,7 +147,13 @@ endif() # 3rd-party # -if (NOT TARGET ggml) +if (LLAMA_USE_SYSTEM_GGML) + message(STATUS "Using system-provided libggml, skipping ggml build") + find_package(ggml REQUIRED) + add_library(ggml ALIAS ggml::ggml) +endif() + +if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML) add_subdirectory(ggml) # ... otherwise assume ggml is added by a parent CMakeLists.txt endif() @@ -159,6 +168,11 @@ add_subdirectory(src) # utils, programs, examples and tests # +if (NOT LLAMA_BUILD_COMMON) + message(STATUS "LLAMA_BUILD_COMMON is OFF, disabling LLAMA_CURL") + set(LLAMA_CURL OFF) +endif() + if (LLAMA_BUILD_COMMON) add_subdirectory(common) endif() @@ -232,4 +246,21 @@ configure_file(cmake/llama.pc.in @ONLY) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" - DESTINATION lib/pkgconfig) + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + +# +# copy the license files +# + +# Check if running in GitHub Actions +if(DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true") + message(STATUS "Running inside GitHub Actions - copying license files") + + # Copy all files from licenses/ to build/bin/ + file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*") + foreach(LICENSE_FILE ${LICENSE_FILES}) + get_filename_component(FILENAME ${LICENSE_FILE} NAME) + configure_file(${LICENSE_FILE} "${CMAKE_BINARY_DIR}/bin/${FILENAME}" COPYONLY) + endforeach() +endif() + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8d411982b4..e68ff92445 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,10 +1,12 @@ # Pull requests (for contributors) +- llama.cpp uses the ggml tensor library for model evaluation. If you are unfamiliar with ggml, consider taking a look at the [examples in the ggml repository](https://github.com/ggml-org/ggml/tree/master/examples/). [simple](https://github.com/ggml-org/ggml/tree/master/examples/simple) shows the bare minimum for using ggml. [gpt-2](https://github.com/ggml-org/ggml/tree/master/examples/gpt-2) has minimal implementations for language model inference using GPT-2. [mnist](https://github.com/ggml-org/ggml/tree/master/examples/mnist) demonstrates how to train and evaluate a simple image classifier - Test your changes: - Execute [the full CI locally on your machine](ci/README.md) before publishing - Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`) - If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends) - If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops` +- Create separate PRs for each feature or fix. Avoid combining unrelated changes in a single PR - Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly - If your PR becomes stale, don't hesitate to ping the maintainers in the comments @@ -12,7 +14,7 @@ - Squash-merge PRs - Use the following format for the squashed commit title: ` : (#)`. For example: `utils : fix typo in utils.py (#1234)` -- Optionally pick a `` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules +- Optionally pick a `` from here: https://github.com/ggml-org/llama.cpp/wiki/Modules - Consider adding yourself to [CODEOWNERS](CODEOWNERS) # Coding guidelines @@ -37,17 +39,17 @@ _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_ -- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code +- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` (from clang-tools v15+) to format the added code - For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines) - Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices -- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$ +- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggml-org/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$ ![matmul](media/matmul.png) # Naming guidelines - Use `snake_case` for function, variable and type names -- Naming usually optimizes for longest common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963) +- Naming usually optimizes for longest common prefix (see https://github.com/ggml-org/ggml/pull/302#discussion_r1243240963) ```cpp // not OK @@ -122,4 +124,4 @@ The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects: -https://github.com/ggerganov/llama.cpp/projects +https://github.com/ggml-org/llama.cpp/projects diff --git a/Makefile b/Makefile index 295522ba35..772993ada2 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ ifndef LLAMA_MAKEFILE -$(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) +$(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md) endif # Define the default target now so that it is always the first target @@ -52,6 +52,7 @@ TEST_TARGETS = \ tests/test-arg-parser \ tests/test-autorelease \ tests/test-backend-ops \ + tests/test-chat \ tests/test-chat-template \ tests/test-double-float \ tests/test-grammar-integration \ @@ -462,7 +463,7 @@ endif ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))' # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves. # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412 - # https://github.com/ggerganov/llama.cpp/issues/2922 + # https://github.com/ggml-org/llama.cpp/issues/2922 MK_CFLAGS += -Xassembler -muse-unaligned-vector-move MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move @@ -595,7 +596,7 @@ ifdef GGML_RPC OBJ_GGML_EXT += ggml/src/ggml-rpc.o endif # GGML_RPC -OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu)) +OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu)) OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu)) ifdef GGML_CUDA_FA_ALL_QUANTS @@ -679,6 +680,10 @@ ifdef GGML_CUDA_CCBIN MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN) endif # GGML_CUDA_CCBIN +ifdef GGML_CUDA_NO_FA + MK_NVCCFLAGS += -DGGML_CUDA_NO_FA +endif # GGML_CUDA_NO_FA + ifdef GGML_CUDA_FA_ALL_QUANTS MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS endif # GGML_CUDA_FA_ALL_QUANTS @@ -775,10 +780,6 @@ ifdef GGML_HIP MK_CPPFLAGS += -DGGML_USE_HIP -DGGML_USE_CUDA -ifdef GGML_HIP_UMA - MK_CPPFLAGS += -DGGML_HIP_UMA -endif # GGML_HIP_UMA - MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas @@ -799,6 +800,10 @@ ifdef GGML_CUDA_NO_PEER_COPY HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY endif # GGML_CUDA_NO_PEER_COPY +ifdef GGML_CUDA_NO_FA + HIPFLAGS += -DGGML_CUDA_NO_FA +endif # GGML_CUDA_NO_FA + OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) @@ -827,7 +832,7 @@ ifdef GGML_MUSA else MUSA_PATH ?= /opt/musa endif - MUSA_ARCHITECTURES ?= 21;22 + MUSA_ARCHITECTURES ?= 21;22;31 MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib @@ -846,7 +851,7 @@ ifdef GGML_MUSA CXX := $(MUSA_PATH)/bin/clang++ MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc - MUSAFLAGS = -x musa -mtgpu + MUSAFLAGS = -fsigned-char -x musa -mtgpu MUSAFLAGS += $(foreach arch,$(subst ;, ,$(MUSA_ARCHITECTURES)),--cuda-gpu-arch=mp_$(arch)) ifdef GGML_CUDA_FORCE_MMQ @@ -875,6 +880,10 @@ ifdef GGML_CUDA_NO_PEER_COPY MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY endif # GGML_CUDA_NO_PEER_COPY +ifdef GGML_CUDA_NO_FA + MUSAFLAGS += -DGGML_CUDA_NO_FA +endif # GGML_CUDA_NO_FA + ifdef GGML_CUDA_FA_ALL_QUANTS MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS endif # GGML_CUDA_FA_ALL_QUANTS @@ -983,6 +992,7 @@ OBJ_COMMON = \ $(DIR_COMMON)/ngram-cache.o \ $(DIR_COMMON)/sampling.o \ $(DIR_COMMON)/speculative.o \ + $(DIR_COMMON)/chat.o \ $(DIR_COMMON)/build-info.o \ $(DIR_COMMON)/json-schema-to-grammar.o @@ -1076,8 +1086,8 @@ endif ifdef REMOVE_WARNING $(info !!! REMOVAL WARNING !!!) $(info The following LLAMA_ options have been removed and are no longer supported) -$(info - LLAMA_DISABLE_LOGS (https://github.com/ggerganov/llama.cpp/pull/9418)) -$(info - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418)) +$(info - LLAMA_DISABLE_LOGS (https://github.com/ggml-org/llama.cpp/pull/9418)) +$(info - LLAMA_SERVER_VERBOSE (https://github.com/ggml-org/llama.cpp/pull/9418)) $(info ) endif @@ -1361,6 +1371,8 @@ llama-server: \ examples/server/httplib.h \ examples/server/index.html.hpp \ examples/server/loading.html.hpp \ + common/chat.cpp \ + common/chat.h \ common/chat-template.hpp \ common/json.hpp \ common/minja.hpp \ @@ -1471,6 +1483,11 @@ tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \ $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) +tests/test-chat: tests/test-chat.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + tests/test-opt: tests/test-opt.cpp \ $(OBJ_GGML) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) diff --git a/Package.swift b/Package.swift deleted file mode 100644 index 01c996d242..0000000000 --- a/Package.swift +++ /dev/null @@ -1,19 +0,0 @@ -// swift-tools-version:5.5 - -import PackageDescription - -let package = Package( - name: "llama", - platforms: [ - .macOS(.v12), - .iOS(.v14), - .watchOS(.v4), - .tvOS(.v14) - ], - products: [ - .library(name: "llama", targets: ["llama"]), - ], - targets: [ - .systemLibrary(name: "llama", pkgConfig: "llama"), - ] -) diff --git a/README.md b/README.md index ff85367733..cf45f23cf4 100644 --- a/README.md +++ b/README.md @@ -3,25 +3,26 @@ ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) -[![Server](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml) +[![Server](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml) -[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml) +[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggml-org/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml) Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ ## Recent API changes -- [Changelog for `libllama` API](https://github.com/ggerganov/llama.cpp/issues/9289) -- [Changelog for `llama-server` REST API](https://github.com/ggerganov/llama.cpp/issues/9291) +- [Changelog for `libllama` API](https://github.com/ggml-org/llama.cpp/issues/9289) +- [Changelog for `llama-server` REST API](https://github.com/ggml-org/llama.cpp/issues/9291) ## Hot topics -- **How to use [MTLResidencySet](https://developer.apple.com/documentation/metal/mtlresidencyset?language=objc) to keep the GPU memory active?** https://github.com/ggerganov/llama.cpp/pull/11427 +- **How to use [MTLResidencySet](https://developer.apple.com/documentation/metal/mtlresidencyset?language=objc) to keep the GPU memory active?** https://github.com/ggml-org/llama.cpp/pull/11427 - **VS Code extension for FIM completions:** https://github.com/ggml-org/llama.vscode +- Universal [tool call support](./docs/function-calling.md) in `llama-server` https://github.com/ggml-org/llama.cpp/pull/9639 - Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim -- Introducing GGUF-my-LoRA https://github.com/ggerganov/llama.cpp/discussions/10123 -- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggerganov/llama.cpp/discussions/9669 -- Hugging Face GGUF editor: [discussion](https://github.com/ggerganov/llama.cpp/discussions/9268) | [tool](https://huggingface.co/spaces/CISCai/gguf-editor) +- Introducing GGUF-my-LoRA https://github.com/ggml-org/llama.cpp/discussions/10123 +- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggml-org/llama.cpp/discussions/9669 +- Hugging Face GGUF editor: [discussion](https://github.com/ggml-org/llama.cpp/discussions/9268) | [tool](https://huggingface.co/spaces/CISCai/gguf-editor) ---- @@ -38,7 +39,7 @@ range of hardware - locally and in the cloud. - Vulkan and SYCL backend support - CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity -The `llama.cpp` project is the main playground for developing new features for the [ggml](https://github.com/ggerganov/ggml) library. +The `llama.cpp` project is the main playground for developing new features for the [ggml](https://github.com/ggml-org/ggml) library.
Models @@ -58,23 +59,23 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon) - [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2) - [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne) -- [X] [BERT](https://github.com/ggerganov/llama.cpp/pull/5423) +- [X] [BERT](https://github.com/ggml-org/llama.cpp/pull/5423) - [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) - [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft) - [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila) -- [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187) +- [X] [Starcoder models](https://github.com/ggml-org/llama.cpp/pull/3187) - [X] [Refact](https://huggingface.co/smallcloudai/Refact-1_6B-fim) -- [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417) -- [X] [Bloom](https://github.com/ggerganov/llama.cpp/pull/3553) +- [X] [MPT](https://github.com/ggml-org/llama.cpp/pull/3417) +- [X] [Bloom](https://github.com/ggml-org/llama.cpp/pull/3553) - [x] [Yi models](https://huggingface.co/models?search=01-ai/Yi) - [X] [StableLM models](https://huggingface.co/stabilityai) - [x] [Deepseek models](https://huggingface.co/models?search=deepseek-ai/deepseek) - [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen) -- [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557) +- [x] [PLaMo-13B](https://github.com/ggml-org/llama.cpp/pull/3557) - [x] [Phi models](https://huggingface.co/models?search=microsoft/phi) -- [x] [PhiMoE](https://github.com/ggerganov/llama.cpp/pull/11003) +- [x] [PhiMoE](https://github.com/ggml-org/llama.cpp/pull/11003) - [x] [GPT-2](https://huggingface.co/gpt2) -- [x] [Orion 14B](https://github.com/ggerganov/llama.cpp/pull/5118) +- [x] [Orion 14B](https://github.com/ggml-org/llama.cpp/pull/5118) - [x] [InternLM2](https://huggingface.co/models?search=internlm2) - [x] [CodeShell](https://github.com/WisdomShell/codeshell) - [x] [Gemma](https://ai.google.dev/gemma) @@ -95,7 +96,8 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [x] [Bitnet b1.58 models](https://huggingface.co/1bitLLM) - [x] [Flan T5](https://huggingface.co/models?search=flan-t5) - [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca) -- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) +- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) + [GLMEdge-1.5b](https://huggingface.co/THUDM/glm-edge-1.5b-chat) + [GLMEdge-4b](https://huggingface.co/THUDM/glm-edge-4b-chat) +- [x] [GLM-4-0414](https://huggingface.co/collections/THUDM/glm-4-0414-67f3cbcb34dd9d252707cb2e) - [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966) - [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct) - [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a) @@ -104,6 +106,8 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [x] [RWKV-6](https://github.com/BlinkDL/RWKV-LM) - [x] [QRWKV-6](https://huggingface.co/recursal/QRWKV6-32B-Instruct-Preview-v0.1) - [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct) +- [X] [Trillion-7B-preview](https://huggingface.co/trillionlabs/Trillion-7B-preview) +- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32) #### Multimodal @@ -116,6 +120,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [x] [Mini CPM](https://huggingface.co/models?search=MiniCPM) - [x] [Moondream](https://huggingface.co/vikhyatk/moondream2) - [x] [Bunny](https://github.com/BAAI-DCAI/Bunny) +- [x] [GLM-EDGE](https://huggingface.co/models?search=glm-edge) - [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d)
@@ -134,6 +139,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - Rust (more features): [edgenai/llama_cpp-rs](https://github.com/edgenai/llama_cpp-rs) - Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp) - Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs) +- Rust (automated build from crates.io): [ShelbyJenkins/llm_client](https://github.com/ShelbyJenkins/llm_client) - C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp) - C#/VB.NET (more features - community license): [LM-Kit.NET](https://docs.lm-kit.com/lm-kit-net/index.html) - Scala 3: [donderom/llm4s](https://github.com/donderom/llm4s) @@ -143,10 +149,11 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - Zig: [deins/llama.cpp.zig](https://github.com/Deins/llama.cpp.zig) - Flutter/Dart: [netdur/llama_cpp_dart](https://github.com/netdur/llama_cpp_dart) - Flutter: [xuegao-tzx/Fllama](https://github.com/xuegao-tzx/Fllama) -- PHP (API bindings and features built on top of llama.cpp): [distantmagic/resonance](https://github.com/distantmagic/resonance) [(more info)](https://github.com/ggerganov/llama.cpp/pull/6326) +- PHP (API bindings and features built on top of llama.cpp): [distantmagic/resonance](https://github.com/distantmagic/resonance) [(more info)](https://github.com/ggml-org/llama.cpp/pull/6326) - Guile Scheme: [guile_llama_cpp](https://savannah.nongnu.org/projects/guile-llama-cpp) - Swift [srgtuszy/llama-cpp-swift](https://github.com/srgtuszy/llama-cpp-swift) - Swift [ShenghaiWang/SwiftLlama](https://github.com/ShenghaiWang/SwiftLlama) +- Delphi [Embarcadero/llama-cpp-delphi](https://github.com/Embarcadero/llama-cpp-delphi) @@ -161,6 +168,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [eva](https://github.com/ylsdamxssjxxdd/eva) (MIT) - [iohub/collama](https://github.com/iohub/coLLaMA) (Apache-2.0) - [janhq/jan](https://github.com/janhq/jan) (AGPL) +- [johnbean393/Sidekick](https://github.com/johnbean393/Sidekick) (MIT) - [KanTV](https://github.com/zhouwg/kantv?tab=readme-ov-file) (Apache-2.0) - [KodiBot](https://github.com/firatkiral/kodibot) (GPL) - [llama.vim](https://github.com/ggml-org/llama.vim) (MIT) @@ -186,6 +194,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [ramalama](https://github.com/containers/ramalama) (MIT) - [semperai/amica](https://github.com/semperai/amica) (MIT) - [withcatai/catai](https://github.com/withcatai/catai) (MIT) +- [Autopen](https://github.com/blackhole89/autopen) (GPL) @@ -208,7 +217,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [llama_cpp_canister](https://github.com/onicai/llama_cpp_canister) - llama.cpp as a smart contract on the Internet Computer, using WebAssembly - [llama-swap](https://github.com/mostlygeek/llama-swap) - transparent proxy that adds automatic model switching with llama-server - [Kalavai](https://github.com/kalavai-net/kalavai-client) - Crowdsource end to end LLM deployment at any scale - +- [llmaz](https://github.com/InftyAI/llmaz) - ☸️ Easy, advanced inference platform for large language models on Kubernetes.
@@ -231,6 +240,8 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo | [HIP](docs/build.md#hip) | AMD GPU | | [Vulkan](docs/build.md#vulkan) | GPU | | [CANN](docs/build.md#cann) | Ascend NPU | +| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU | +| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/examples/rpc) | All | ## Building the project @@ -240,7 +251,7 @@ The project also includes many example programs and tools using the `llama` libr - Clone this repository and build locally, see [how to build](docs/build.md) - On MacOS or Linux, install `llama.cpp` via [brew, flox or nix](docs/install.md) - Use a Docker image, see [documentation for Docker](docs/docker.md) -- Download pre-built binaries from [releases](https://github.com/ggerganov/llama.cpp/releases) +- Download pre-built binaries from [releases](https://github.com/ggml-org/llama.cpp/releases) ## Obtaining and quantizing models @@ -249,18 +260,20 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt - [Trending](https://huggingface.co/models?library=gguf&sort=trending) - [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf) -You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf /[:quant]` +You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from [Hugging Face](https://huggingface.co/) or other model hosting sites, such as [ModelScope](https://modelscope.cn/), by using this CLI argument: `-hf /[:quant]`. + +By default, the CLI would download from Hugging Face, you can switch to other options with the environment variable `MODEL_ENDPOINT`. For example, you may opt to downloading model checkpoints from ModelScope or other model sharing communities by setting the environment variable, e.g. `MODEL_ENDPOINT=https://www.modelscope.cn/`. After downloading a model, use the CLI tools to run it locally - see below. -`llama.cpp` requires the model to be stored in the [GGUF](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) file format. Models in other data formats can be converted to GGUF using the `convert_*.py` Python scripts in this repo. +`llama.cpp` requires the model to be stored in the [GGUF](https://github.com/ggml-org/ggml/blob/master/docs/gguf.md) file format. Models in other data formats can be converted to GGUF using the `convert_*.py` Python scripts in this repo. The Hugging Face platform provides a variety of online tools for converting, quantizing and hosting models with `llama.cpp`: - Use the [GGUF-my-repo space](https://huggingface.co/spaces/ggml-org/gguf-my-repo) to convert to GGUF format and quantize model weights to smaller sizes -- Use the [GGUF-my-LoRA space](https://huggingface.co/spaces/ggml-org/gguf-my-lora) to convert LoRA adapters to GGUF format (more info: https://github.com/ggerganov/llama.cpp/discussions/10123) -- Use the [GGUF-editor space](https://huggingface.co/spaces/CISCai/gguf-editor) to edit GGUF meta data in the browser (more info: https://github.com/ggerganov/llama.cpp/discussions/9268) -- Use the [Inference Endpoints](https://ui.endpoints.huggingface.co/) to directly host `llama.cpp` in the cloud (more info: https://github.com/ggerganov/llama.cpp/discussions/9669) +- Use the [GGUF-my-LoRA space](https://huggingface.co/spaces/ggml-org/gguf-my-lora) to convert LoRA adapters to GGUF format (more info: https://github.com/ggml-org/llama.cpp/discussions/10123) +- Use the [GGUF-editor space](https://huggingface.co/spaces/CISCai/gguf-editor) to edit GGUF meta data in the browser (more info: https://github.com/ggml-org/llama.cpp/discussions/9268) +- Use the [Inference Endpoints](https://ui.endpoints.huggingface.co/) to directly host `llama.cpp` in the cloud (more info: https://github.com/ggml-org/llama.cpp/discussions/9669) To learn more about model quantization, [read this documentation](examples/quantize/README.md) @@ -422,7 +435,7 @@ To learn more about model quantization, [read this documentation](examples/quant
-[^1]: [examples/perplexity/README.md](examples/perplexity/README.md) +[^1]: [examples/perplexity/README.md](./examples/perplexity/README.md) [^2]: [https://huggingface.co/docs/transformers/perplexity](https://huggingface.co/docs/transformers/perplexity) ## [`llama-bench`](examples/llama-bench) @@ -483,9 +496,9 @@ To learn more about model quantization, [read this documentation](examples/quant - Collaborators can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch - Collaborators will be invited based on contributions - Any help with managing issues, PRs and projects is very appreciated! -- See [good first issues](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions +- See [good first issues](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions - Read the [CONTRIBUTING.md](CONTRIBUTING.md) for more information -- Make sure to read this: [Inference at the edge](https://github.com/ggerganov/llama.cpp/discussions/205) +- Make sure to read this: [Inference at the edge](https://github.com/ggml-org/llama.cpp/discussions/205) - A bit of backstory for those who are interested: [Changelog podcast](https://changelog.com/podcast/532) ## Other documentation @@ -500,7 +513,7 @@ To learn more about model quantization, [read this documentation](examples/quant - [Running on Docker](docs/docker.md) - [Build on Android](docs/android.md) - [Performance troubleshooting](docs/development/token_generation_performance_tips.md) -- [GGML tips & tricks](https://github.com/ggerganov/llama.cpp/wiki/GGML-Tips-&-Tricks) +- [GGML tips & tricks](https://github.com/ggml-org/llama.cpp/wiki/GGML-Tips-&-Tricks) #### Seminal papers and background on the models @@ -514,5 +527,47 @@ If your issue is with model generation quality, then please at least scan the fo - [Aligning language models to follow instructions](https://openai.com/research/instruction-following) - [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155) -#### References +## XCFramework +The XCFramework is a precompiled version of the library for iOS, visionOS, tvOS, +and macOS. It can be used in Swift projects without the need to compile the +library from source. For example: +```swift +// swift-tools-version: 5.10 +// The swift-tools-version declares the minimum version of Swift required to build this package. +import PackageDescription + +let package = Package( + name: "MyLlamaPackage", + targets: [ + .executableTarget( + name: "MyLlamaPackage", + dependencies: [ + "LlamaFramework" + ]), + .binaryTarget( + name: "LlamaFramework", + url: "https://github.com/ggml-org/llama.cpp/releases/download/b5046/llama-b5046-xcframework.zip", + checksum: "c19be78b5f00d8d29a25da41042cb7afa094cbf6280a225abe614b03b20029ab" + ) + ] +) +``` +The above example is using an intermediate build `b5046` of the library. This can be modified +to use a different version by changing the URL and checksum. + +## Completions +Command-line completion is available for some environments. + +#### Bash Completion +```bash +$ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash +$ source ~/.llama-completion.bash +``` +Optionally this can be added to your `.bashrc` or `.bash_profile` to load it +automatically. For example: +```console +$ echo "source ~/.llama-completion.bash" >> ~/.bashrc +``` + +## References diff --git a/SECURITY.md b/SECURITY.md index f4322c6ee4..6a1bb6c32c 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -62,6 +62,6 @@ Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp- However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released. -Please disclose it as a private [security advisory](https://github.com/ggerganov/llama.cpp/security/advisories/new). +Please disclose it as a private [security advisory](https://github.com/ggml-org/llama.cpp/security/advisories/new). A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure. diff --git a/Sources/llama/llama.h b/Sources/llama/llama.h deleted file mode 100644 index 41725880ed..0000000000 --- a/Sources/llama/llama.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -#include - diff --git a/Sources/llama/module.modulemap b/Sources/llama/module.modulemap deleted file mode 100644 index d010555b1c..0000000000 --- a/Sources/llama/module.modulemap +++ /dev/null @@ -1,5 +0,0 @@ -module llama [system] { - header "llama.h" - link "llama" - export * -} diff --git a/build-xcframework.sh b/build-xcframework.sh new file mode 100755 index 0000000000..97001b5f7f --- /dev/null +++ b/build-xcframework.sh @@ -0,0 +1,538 @@ +#!/bin/bash +# +# Options +IOS_MIN_OS_VERSION=16.4 +MACOS_MIN_OS_VERSION=13.3 +VISIONOS_MIN_OS_VERSION=1.0 +TVOS_MIN_OS_VERSION=16.4 + +BUILD_SHARED_LIBS=OFF +LLAMA_BUILD_EXAMPLES=OFF +LLAMA_BUILD_TESTS=OFF +LLAMA_BUILD_SERVER=OFF +GGML_METAL=ON +GGML_METAL_EMBED_LIBRARY=ON +GGML_BLAS_DEFAULT=ON +GGML_METAL_USE_BF16=ON +GGML_OPENMP=OFF + +COMMON_C_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g" +COMMON_CXX_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g" + +# Common options for all builds +COMMON_CMAKE_ARGS=( + -DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO + -DCMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY="" + -DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED=NO + -DCMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT="dwarf-with-dsym" + -DCMAKE_XCODE_ATTRIBUTE_GCC_GENERATE_DEBUGGING_SYMBOLS=YES + -DCMAKE_XCODE_ATTRIBUTE_COPY_PHASE_STRIP=NO + -DCMAKE_XCODE_ATTRIBUTE_STRIP_INSTALLED_PRODUCT=NO + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} + -DLLAMA_BUILD_EXAMPLES=${LLAMA_BUILD_EXAMPLES} + -DLLAMA_BUILD_TESTS=${LLAMA_BUILD_TESTS} + -DLLAMA_BUILD_SERVER=${LLAMA_BUILD_SERVER} + -DGGML_METAL_EMBED_LIBRARY=${GGML_METAL_EMBED_LIBRARY} + -DGGML_BLAS_DEFAULT=${GGML_BLAS_DEFAULT} + -DGGML_METAL=${GGML_METAL} + -DGGML_METAL_USE_BF16=${GGML_METAL_USE_BF16} + -DGGML_NATIVE=OFF + -DGGML_OPENMP=${GGML_OPENMP} +) + +XCODE_VERSION=$(xcodebuild -version 2>/dev/null | head -n1 | awk '{ print $2 }') +MAJOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f1) +MINOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f2) +echo "Detected Xcode version: $XCODE_VERSION" + +check_required_tool() { + local tool=$1 + local install_message=$2 + + if ! command -v $tool &> /dev/null; then + echo "Error: $tool is required but not found." + echo "$install_message" + exit 1 + fi +} +echo "Checking for required tools..." +check_required_tool "cmake" "Please install CMake 3.28.0 or later (brew install cmake)" +check_required_tool "xcodebuild" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)" +check_required_tool "libtool" "Please install libtool which should be available with Xcode Command Line Tools (CLT). Make sure Xcode CLT is installed (xcode-select --install)" +check_required_tool "dsymutil" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)" + +set -e + +## Clean up previous builds +rm -rf build-apple +rm -rf build-ios-sim +rm -rf build-ios-device +rm -rf build-macos +rm -rf build-visionos +rm -rf build-visionos-sim +rm -rf build-tvos-sim +rm -rf build-tvos-device + +# Setup the xcframework build directory structure +setup_framework_structure() { + local build_dir=$1 + local min_os_version=$2 + local platform=$3 # "ios", "macos", "visionos", or "tvos" + local framework_name="llama" + + echo "Creating ${platform}-style framework structure for ${build_dir}" + + if [[ "$platform" == "macos" ]]; then + # macOS versioned structure uses versioned directories + mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Headers + mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Modules + mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Resources + + # Create symbolic links + ln -sf A ${build_dir}/framework/${framework_name}.framework/Versions/Current + ln -sf Versions/Current/Headers ${build_dir}/framework/${framework_name}.framework/Headers + ln -sf Versions/Current/Modules ${build_dir}/framework/${framework_name}.framework/Modules + ln -sf Versions/Current/Resources ${build_dir}/framework/${framework_name}.framework/Resources + ln -sf Versions/Current/${framework_name} ${build_dir}/framework/${framework_name}.framework/${framework_name} + + # Set header and module paths + local header_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Headers/ + local module_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Modules/ + else + # iOS/VisionOS/tvOS use a flat structure + mkdir -p ${build_dir}/framework/${framework_name}.framework/Headers + mkdir -p ${build_dir}/framework/${framework_name}.framework/Modules + + # Remove any existing structure to ensure clean build + rm -rf ${build_dir}/framework/${framework_name}.framework/Versions + + # Set header and module paths + local header_path=${build_dir}/framework/${framework_name}.framework/Headers/ + local module_path=${build_dir}/framework/${framework_name}.framework/Modules/ + fi + + # Copy all required headers (common for all platforms) + cp include/llama.h ${header_path} + cp ggml/include/ggml.h ${header_path} + cp ggml/include/ggml-alloc.h ${header_path} + cp ggml/include/ggml-backend.h ${header_path} + cp ggml/include/ggml-metal.h ${header_path} + cp ggml/include/ggml-cpu.h ${header_path} + cp ggml/include/ggml-blas.h ${header_path} + cp ggml/include/gguf.h ${header_path} + + # Create module map (common for all platforms) + cat > ${module_path}module.modulemap << EOF +framework module llama { + header "llama.h" + header "ggml.h" + header "ggml-alloc.h" + header "ggml-backend.h" + header "ggml-metal.h" + header "ggml-cpu.h" + header "ggml-blas.h" + header "gguf.h" + + link "c++" + link framework "Accelerate" + link framework "Metal" + link framework "Foundation" + + export * +} +EOF + + # Platform-specific settings for Info.plist + local platform_name="" + local sdk_name="" + local supported_platform="" + + case "$platform" in + "ios") + platform_name="iphoneos" + sdk_name="iphoneos${min_os_version}" + supported_platform="iPhoneOS" + local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist" + local device_family=' UIDeviceFamily + + 1 + 2 + ' + ;; + "macos") + platform_name="macosx" + sdk_name="macosx${min_os_version}" + supported_platform="MacOSX" + local plist_path="${build_dir}/framework/${framework_name}.framework/Versions/A/Resources/Info.plist" + local device_family="" + ;; + "visionos") + platform_name="xros" + sdk_name="xros${min_os_version}" + supported_platform="XRPlatform" + local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist" + local device_family="" + ;; + "tvos") + platform_name="appletvos" + sdk_name="appletvos${min_os_version}" + supported_platform="AppleTVOS" + local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist" + local device_family=' UIDeviceFamily + + 3 + ' + ;; + esac + + # Create Info.plist + cat > ${plist_path} << EOF + + + + + CFBundleDevelopmentRegion + en + CFBundleExecutable + llama + CFBundleIdentifier + org.ggml.llama + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + llama + CFBundlePackageType + FMWK + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + MinimumOSVersion + ${min_os_version} + CFBundleSupportedPlatforms + + ${supported_platform} + ${device_family} + DTPlatformName + ${platform_name} + DTSDKName + ${sdk_name} + + +EOF +} + +# Create dynamic libraries from static libraries. +combine_static_libraries() { + local build_dir="$1" + local release_dir="$2" + local platform="$3" # "ios", "macos", "visionos", or "tvos" + local is_simulator="$4" + local base_dir="$(pwd)" + local framework_name="llama" + + # Determine output path based on platform + local output_lib="" + if [[ "$platform" == "macos" ]]; then + # macOS uses versioned structure + output_lib="${build_dir}/framework/${framework_name}.framework/Versions/A/${framework_name}" + else + # iOS, visionOS, and tvOS use a directory flat structure + output_lib="${build_dir}/framework/${framework_name}.framework/${framework_name}" + fi + + local libs=( + "${base_dir}/${build_dir}/src/${release_dir}/libllama.a" + "${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml.a" + "${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-base.a" + "${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a" + "${base_dir}/${build_dir}/ggml/src/ggml-metal/${release_dir}/libggml-metal.a" + "${base_dir}/${build_dir}/ggml/src/ggml-blas/${release_dir}/libggml-blas.a" + ) + + # Create temporary directory for processing + local temp_dir="${base_dir}/${build_dir}/temp" + mkdir -p "${temp_dir}" + + # Since we have multiple architectures libtool will find object files that do not + # match the target architecture. We suppress these warnings. + libtool -static -o "${temp_dir}/combined.a" "${libs[@]}" 2> /dev/null + + # Determine SDK, architectures, and install_name based on platform and simulator flag. + local sdk="" + local archs="" + local min_version_flag="" + local install_name="" + + case "$platform" in + "ios") + if [[ "$is_simulator" == "true" ]]; then + sdk="iphonesimulator" + archs="arm64 x86_64" + min_version_flag="-mios-simulator-version-min=${IOS_MIN_OS_VERSION}" + else + sdk="iphoneos" + archs="arm64" + min_version_flag="-mios-version-min=${IOS_MIN_OS_VERSION}" + fi + install_name="@rpath/llama.framework/llama" + ;; + "macos") + sdk="macosx" + archs="arm64 x86_64" + min_version_flag="-mmacosx-version-min=${MACOS_MIN_OS_VERSION}" + install_name="@rpath/llama.framework/Versions/Current/llama" + ;; + "visionos") + if [[ "$is_simulator" == "true" ]]; then + sdk="xrsimulator" + archs="arm64 x86_64" + min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}-simulator" + else + sdk="xros" + archs="arm64" + min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}" + fi + # Use flat structure for visionOS, same as iOS + install_name="@rpath/llama.framework/llama" + ;; + "tvos") + if [[ "$is_simulator" == "true" ]]; then + sdk="appletvsimulator" + archs="arm64 x86_64" + min_version_flag="-mtvos-simulator-version-min=${TVOS_MIN_OS_VERSION}" + else + sdk="appletvos" + archs="arm64" + min_version_flag="-mtvos-version-min=${TVOS_MIN_OS_VERSION}" + fi + install_name="@rpath/llama.framework/llama" + ;; + esac + + # Build architecture flags + local arch_flags="" + for arch in $archs; do + arch_flags+=" -arch $arch" + done + + # Create dynamic library + echo "Creating dynamic library for ${platform}." + xcrun -sdk $sdk clang++ -dynamiclib \ + -isysroot $(xcrun --sdk $sdk --show-sdk-path) \ + $arch_flags \ + $min_version_flag \ + -Wl,-force_load,"${temp_dir}/combined.a" \ + -framework Foundation -framework Metal -framework Accelerate \ + -install_name "$install_name" \ + -o "${base_dir}/${output_lib}" + + # Platform-specific post-processing for device builds + if [[ "$is_simulator" == "false" ]]; then + if command -v xcrun vtool &>/dev/null; then + case "$platform" in + "ios") + echo "Marking binary as a framework binary for iOS..." + xcrun vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \ + -output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}" + ;; + "visionos") + echo "Marking binary as a framework binary for visionOS..." + if [[ "$MAJOR_VERSION" -gt 16 ]] || [[ "$MAJOR_VERSION" -eq 16 && "$MINOR_VERSION" -gt 2 ]]; then + echo "Xcode version greater than 16.2, using visionOS." + VISION_OS_BUILD_VERSION="visionos" + else + echo "Xcode version less than or equal to 16.2, using xros." + VISION_OS_BUILD_VERSION="xros" + fi + xcrun vtool -set-build-version ${VISION_OS_BUILD_VERSION} ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \ + -output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}" + ;; + "tvos") + echo "Marking binary as a framework binary for tvOS..." + xcrun vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \ + -output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}" + ;; + esac + else + echo "Warning: vtool not found. Binary may not pass App Store validation." + fi + fi + + echo "Creating properly formatted dSYM..." + # Create a separate directory for dSYMs for all platforms + mkdir -p "${base_dir}/${build_dir}/dSYMs" + + # iOS and visionOS style dSYM (flat structure) + if [[ "$platform" == "ios" || "$platform" == "visionos" || "$platform" == "tvos" ]]; then + # Generate dSYM in the dSYMs directory + xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/llama.dSYM" + + # Create a copy of the binary that will be stripped + cp "${base_dir}/${output_lib}" "${temp_dir}/binary_to_strip" + + # Strip debug symbols from the copy + xcrun strip -S "${temp_dir}/binary_to_strip" -o "${temp_dir}/stripped_lib" + + # Replace the original with the stripped version + mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}" + else + # macOS style dSYM + # First strip debug info to a separate file + xcrun strip -S "${base_dir}/${output_lib}" -o "${temp_dir}/stripped_lib" + + # Generate dSYM in the dSYMs directory + xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/llama.dSYM" + + # Replace original binary with stripped version + mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}" + fi + + # Remove any automatically generated dSYM files in the framework structure as they will + # otherwise case Invalid Bundle Structure validation errors. + if [ -d "${base_dir}/${output_lib}.dSYM" ]; then + echo "Removing generated dSYM file in framework structure: ${base_dir}/${output_lib}.dSYM" + rm -rf "${base_dir}/${output_lib}.dSYM" + fi + + # Clean up + rm -rf "${temp_dir}" +} + +echo "Building for iOS simulator..." +cmake -B build-ios-sim -G Xcode \ + "${COMMON_CMAKE_ARGS[@]}" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \ + -DIOS=ON \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_SYSROOT=iphonesimulator \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \ + -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphonesimulator \ + -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ + -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ + -S . +cmake --build build-ios-sim --config Release -- -quiet + +echo "Building for iOS devices..." +cmake -B build-ios-device -G Xcode \ + "${COMMON_CMAKE_ARGS[@]}" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \ + -DCMAKE_OSX_SYSROOT=iphoneos \ + -DCMAKE_OSX_ARCHITECTURES="arm64" \ + -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \ + -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ + -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ + -S . +cmake --build build-ios-device --config Release -- -quiet + +echo "Building for macOS..." +cmake -B build-macos -G Xcode \ + "${COMMON_CMAKE_ARGS[@]}" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=${MACOS_MIN_OS_VERSION} \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \ + -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ + -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ + -S . +cmake --build build-macos --config Release -- -quiet + +echo "Building for visionOS..." +cmake -B build-visionos -G Xcode \ + "${COMMON_CMAKE_ARGS[@]}" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \ + -DCMAKE_OSX_ARCHITECTURES="arm64" \ + -DCMAKE_SYSTEM_NAME=visionOS \ + -DCMAKE_OSX_SYSROOT=xros \ + -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \ + -DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \ + -DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ + -S . +cmake --build build-visionos --config Release -- -quiet + +echo "Building for visionOS simulator..." +cmake -B build-visionos-sim -G Xcode \ + "${COMMON_CMAKE_ARGS[@]}" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \ + -DCMAKE_SYSTEM_NAME=visionOS \ + -DCMAKE_OSX_SYSROOT=xrsimulator \ + -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \ + -DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \ + -DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ + -S . +cmake --build build-visionos-sim --config Release -- -quiet + +# Add tvOS builds (might need the same u_int definitions as watchOS and visionOS) +echo "Building for tvOS simulator..." +cmake -B build-tvos-sim -G Xcode \ + "${COMMON_CMAKE_ARGS[@]}" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \ + -DCMAKE_SYSTEM_NAME=tvOS \ + -DCMAKE_OSX_SYSROOT=appletvsimulator \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \ + -DGGML_METAL=ON \ + -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvsimulator \ + -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ + -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ + -S . +cmake --build build-tvos-sim --config Release -- -quiet + +echo "Building for tvOS devices..." +cmake -B build-tvos-device -G Xcode \ + "${COMMON_CMAKE_ARGS[@]}" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \ + -DCMAKE_SYSTEM_NAME=tvOS \ + -DCMAKE_OSX_SYSROOT=appletvos \ + -DCMAKE_OSX_ARCHITECTURES="arm64" \ + -DGGML_METAL=ON \ + -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvos \ + -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ + -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ + -S . +cmake --build build-tvos-device --config Release -- -quiet + +# Setup frameworks and copy binaries and headers +echo "Setting up framework structures..." +setup_framework_structure "build-ios-sim" ${IOS_MIN_OS_VERSION} "ios" +setup_framework_structure "build-ios-device" ${IOS_MIN_OS_VERSION} "ios" +setup_framework_structure "build-macos" ${MACOS_MIN_OS_VERSION} "macos" +setup_framework_structure "build-visionos" ${VISIONOS_MIN_OS_VERSION} "visionos" +setup_framework_structure "build-visionos-sim" ${VISIONOS_MIN_OS_VERSION} "visionos" +setup_framework_structure "build-tvos-sim" ${TVOS_MIN_OS_VERSION} "tvos" +setup_framework_structure "build-tvos-device" ${TVOS_MIN_OS_VERSION} "tvos" + +# Create dynamic libraries from static libraries +echo "Creating dynamic libraries from static libraries..." +combine_static_libraries "build-ios-sim" "Release-iphonesimulator" "ios" "true" +combine_static_libraries "build-ios-device" "Release-iphoneos" "ios" "false" +combine_static_libraries "build-macos" "Release" "macos" "false" +combine_static_libraries "build-visionos" "Release-xros" "visionos" "false" +combine_static_libraries "build-visionos-sim" "Release-xrsimulator" "visionos" "true" +combine_static_libraries "build-tvos-sim" "Release-appletvsimulator" "tvos" "true" +combine_static_libraries "build-tvos-device" "Release-appletvos" "tvos" "false" + +# Create XCFramework with correct debug symbols paths +echo "Creating XCFramework..." +xcodebuild -create-xcframework \ + -framework $(pwd)/build-ios-sim/framework/llama.framework \ + -debug-symbols $(pwd)/build-ios-sim/dSYMs/llama.dSYM \ + -framework $(pwd)/build-ios-device/framework/llama.framework \ + -debug-symbols $(pwd)/build-ios-device/dSYMs/llama.dSYM \ + -framework $(pwd)/build-macos/framework/llama.framework \ + -debug-symbols $(pwd)/build-macos/dSYMS/llama.dSYM \ + -framework $(pwd)/build-visionos/framework/llama.framework \ + -debug-symbols $(pwd)/build-visionos/dSYMs/llama.dSYM \ + -framework $(pwd)/build-visionos-sim/framework/llama.framework \ + -debug-symbols $(pwd)/build-visionos-sim/dSYMs/llama.dSYM \ + -framework $(pwd)/build-tvos-device/framework/llama.framework \ + -debug-symbols $(pwd)/build-tvos-device/dSYMs/llama.dSYM \ + -framework $(pwd)/build-tvos-sim/framework/llama.framework \ + -debug-symbols $(pwd)/build-tvos-sim/dSYMs/llama.dSYM \ + -output $(pwd)/build-apple/llama.xcframework diff --git a/ci/README.md b/ci/README.md index 4064705190..ec3f443503 100644 --- a/ci/README.md +++ b/ci/README.md @@ -1,11 +1,11 @@ # CI -In addition to [Github Actions](https://github.com/ggerganov/llama.cpp/actions) `llama.cpp` uses a custom CI framework: +In addition to [Github Actions](https://github.com/ggml-org/llama.cpp/actions) `llama.cpp` uses a custom CI framework: https://github.com/ggml-org/ci It monitors the `master` branch for new commits and runs the -[ci/run.sh](https://github.com/ggerganov/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us +[ci/run.sh](https://github.com/ggml-org/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled to cover various hardware architectures, including GPU and Apple Silicon instances. @@ -26,4 +26,43 @@ GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt # with SYCL support source /opt/intel/oneapi/setvars.sh GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + +# with MUSA support +GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ``` + +## Running MUSA CI in a Docker Container + +Assuming `$PWD` is the root of the `llama.cpp` repository, follow these steps to set up and run MUSA CI in a Docker container: + +### 1. Create a local directory to store cached models, configuration files and venv: + +```bash +mkdir -p $HOME/llama.cpp/ci-cache +``` + +### 2. Create a local directory to store CI run results: + +```bash +mkdir -p $HOME/llama.cpp/ci-results +``` + +### 3. Start a Docker container and run the CI: + +```bash +docker run --privileged -it \ + -v $HOME/llama.cpp/ci-cache:/ci-cache \ + -v $HOME/llama.cpp/ci-results:/ci-results \ + -v $PWD:/ws -w /ws \ + mthreads/musa:rc3.1.1-devel-ubuntu22.04 +``` + +Inside the container, execute the following commands: + +```bash +apt update -y && apt install -y bc cmake ccache git python3.10-venv time unzip wget +git config --global --add safe.directory /ws +GG_BUILD_MUSA=1 bash ./ci/run.sh /ci-results /ci-cache +``` + +This setup ensures that the CI runs within an isolated Docker environment while maintaining cached files and results across runs. diff --git a/ci/run.sh b/ci/run.sh index 77c32ce005..f463d7a8b2 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -16,6 +16,9 @@ # # with VULKAN support # GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt # +# # with MUSA support +# GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# if [ -z "$2" ]; then echo "usage: $0 " @@ -36,7 +39,7 @@ sd=`dirname $0` cd $sd/../ SRC=`pwd` -CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON" +CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=OFF" if [ ! -z ${GG_BUILD_METAL} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON" @@ -52,13 +55,24 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then echo "source /opt/intel/oneapi/setvars.sh" exit 1 fi - + # Use only main GPU + export ONEAPI_DEVICE_SELECTOR="level_zero:0" + # Enable sysman for correct memory reporting + export ZES_ENABLE_SYSMAN=1 + # to circumvent precision issues on CPY operations + export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt" CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON" fi if [ ! -z ${GG_BUILD_VULKAN} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1" fi + +if [ ! -z ${GG_BUILD_MUSA} ]; then + # Use qy1 by default (MTT S80) + MUSA_ARCH=${MUSA_ARCH:-21} + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}" +fi ## helpers # download a file if it does not exist or if it is outdated @@ -352,10 +366,10 @@ function gg_run_open_llama_7b_v2 { (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log - (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log function check_ppl { qnt="$1" @@ -808,7 +822,7 @@ export LLAMA_LOG_PREFIX=1 export LLAMA_LOG_TIMESTAMPS=1 if [ -z ${GG_BUILD_LOW_PERF} ]; then - # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt + # Create symlink: ./llama.cpp/models-mnt -> $MNT/models rm -rf ${SRC}/models-mnt mnt_models=${MNT}/models mkdir -p ${mnt_models} @@ -826,8 +840,10 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then fi ret=0 - -test $ret -eq 0 && gg_run ctest_debug +if [ -z ${GG_BUILD_SYCL} ]; then + # SYCL build breaks with debug build flags + test $ret -eq 0 && gg_run ctest_debug +fi test $ret -eq 0 && gg_run ctest_release if [ -z ${GG_BUILD_LOW_PERF} ]; then @@ -835,7 +851,9 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then test $ret -eq 0 && gg_run rerank_tiny if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then - test $ret -eq 0 && gg_run test_scripts_debug + if [ -z ${GG_BUILD_SYCL} ]; then + test $ret -eq 0 && gg_run test_scripts_debug + fi test $ret -eq 0 && gg_run test_scripts_release fi @@ -846,7 +864,9 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then test $ret -eq 0 && gg_run pythia_2_8b #test $ret -eq 0 && gg_run open_llama_7b_v2 fi - test $ret -eq 0 && gg_run ctest_with_model_debug + if [ -z ${GG_BUILD_SYCL} ]; then + test $ret -eq 0 && gg_run ctest_with_model_debug + fi test $ret -eq 0 && gg_run ctest_with_model_release fi fi diff --git a/cmake/common.cmake b/cmake/common.cmake index 0f54871e41..a5bb787f15 100644 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1,3 +1,5 @@ +include("ggml/cmake/common.cmake") + function(llama_add_compile_flags) if (LLAMA_FATAL_WARNINGS) if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") diff --git a/cmake/llama.pc.in b/cmake/llama.pc.in index 0b2b6bcfab..6fb58b5f68 100644 --- a/cmake/llama.pc.in +++ b/cmake/llama.pc.in @@ -1,10 +1,10 @@ prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=${prefix} -libdir=${exec_prefix}/lib -includedir=${prefix}/include +exec_prefix=@CMAKE_INSTALL_PREFIX@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: llama Description: Port of Facebook's LLaMA model in C/C++ -Version: @PROJECT_VERSION@ -Libs: -L${libdir} -lggml -lggml-base -lllama +Version: @LLAMA_INSTALL_VERSION@ +Libs: -L${libdir} -lggml -lggml-base -lllama Cflags: -I${includedir} diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 24b7f8741a..43533fc86a 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -56,16 +56,19 @@ add_library(${TARGET} STATIC arg.cpp arg.h base64.hpp - chat-template.hpp + chat.cpp + chat.h common.cpp common.h console.cpp console.h json-schema-to-grammar.cpp json.hpp + llguidance.cpp log.cpp log.h - minja.hpp + minja/chat-template.hpp + minja/minja.hpp ngram-cache.cpp ngram-cache.h sampling.cpp @@ -82,13 +85,60 @@ set(LLAMA_COMMON_EXTRA_LIBS build_info) # Use curl to download model url if (LLAMA_CURL) - find_package(CURL REQUIRED) + find_package(CURL) + if (NOT CURL_FOUND) + message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF") + endif() target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL) include_directories(${CURL_INCLUDE_DIRS}) find_library(CURL_LIBRARY curl REQUIRED) set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY}) endif () +if (LLAMA_LLGUIDANCE) + include(ExternalProject) + set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source) + set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release) + + # Set the correct library file extension based on platform + if (WIN32) + set(LLGUIDANCE_LIB_NAME "llguidance.lib") + # Add Windows-specific libraries + set(LLGUIDANCE_PLATFORM_LIBS + ws2_32 # Windows Sockets API + userenv # For GetUserProfileDirectoryW + ntdll # For NT functions + bcrypt # For BCryptGenRandom + ) + else() + set(LLGUIDANCE_LIB_NAME "libllguidance.a") + set(LLGUIDANCE_PLATFORM_LIBS "") + endif() + + ExternalProject_Add(llguidance_ext + GIT_REPOSITORY https://github.com/guidance-ai/llguidance + # v0.7.10: + GIT_TAG 0309d2a6bf40abda35344a362edc71e06d5009f8 + PREFIX ${CMAKE_BINARY_DIR}/llguidance + SOURCE_DIR ${LLGUIDANCE_SRC} + BUILD_IN_SOURCE TRUE + CONFIGURE_COMMAND "" + BUILD_COMMAND cargo build --release + INSTALL_COMMAND "" + BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME} ${LLGUIDANCE_PATH}/llguidance.h + UPDATE_COMMAND "" + ) + target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE) + + add_library(llguidance STATIC IMPORTED) + set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME}) + add_dependencies(llguidance llguidance_ext) + + target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH}) + # Add platform libraries to the main target + set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS}) +endif () + target_include_directories(${TARGET} PUBLIC .) target_compile_features (${TARGET} PUBLIC cxx_std_17) target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads) diff --git a/common/arg.cpp b/common/arg.cpp index a6226a34b1..0b57f9da1e 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1,11 +1,24 @@ +#include "gguf.h" // for reading GGUF splits #include "arg.h" +#include "common.h" #include "log.h" #include "sampling.h" +#include "chat.h" + +// fix problem with std::min and std::max +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX +#endif +#include +#endif #include #include #include +#include #include #include #include @@ -13,6 +26,14 @@ #include #include +//#define LLAMA_USE_CURL + +#if defined(LLAMA_USE_CURL) +#include +#include +#include +#endif + #include "json-schema-to-grammar.h" using json = nlohmann::ordered_json; @@ -124,47 +145,554 @@ std::string common_arg::to_string() { return ss.str(); } +// +// downloader +// + +struct common_hf_file_res { + std::string repo; // repo name with ":tag" removed + std::string ggufFile; + std::string mmprojFile; +}; + +#ifdef LLAMA_USE_CURL + +#ifdef __linux__ +#include +#elif defined(_WIN32) +# if !defined(PATH_MAX) +# define PATH_MAX MAX_PATH +# endif +#elif defined(_AIX) +#include +#else +#include +#endif +#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 + +// +// CURL utils +// + +using curl_ptr = std::unique_ptr; + +// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one +struct curl_slist_ptr { + struct curl_slist * ptr = nullptr; + ~curl_slist_ptr() { + if (ptr) { + curl_slist_free_all(ptr); + } + } +}; + +#define CURL_MAX_RETRY 3 +#define CURL_RETRY_DELAY_SECONDS 2 + +static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) { + int remaining_attempts = max_attempts; + + while (remaining_attempts > 0) { + LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts); + + CURLcode res = curl_easy_perform(curl); + if (res == CURLE_OK) { + return true; + } + + int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000; + LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay); + + remaining_attempts--; + std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); + } + + LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts); + + return false; +} + +// download one single file from remote URL to local path +static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token) { + // Initialize libcurl + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_slist_ptr http_headers; + if (!curl) { + LOG_ERR("%s: error initializing libcurl\n", __func__); + return false; + } + + bool force_download = false; + + // Set the URL, allow to follow http redirection + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); + // Check if hf-token or bearer-token was specified + if (!bearer_token.empty()) { + std::string auth_header = "Authorization: Bearer " + bearer_token; + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + } + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); + +#if defined(_WIN32) + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + // operating system. Currently implemented under MS-Windows. + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif + + // Check if the file already exists locally + auto file_exists = std::filesystem::exists(path); + + // If the file exists, check its JSON metadata companion file. + std::string metadata_path = path + ".json"; + nlohmann::json metadata; + std::string etag; + std::string last_modified; + + if (file_exists) { + // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block). + std::ifstream metadata_in(metadata_path); + if (metadata_in.good()) { + try { + metadata_in >> metadata; + LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str()); + if (metadata.contains("url") && metadata.at("url").is_string()) { + auto previous_url = metadata.at("url").get(); + if (previous_url != url) { + LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str()); + return false; + } + } + if (metadata.contains("etag") && metadata.at("etag").is_string()) { + etag = metadata.at("etag"); + } + if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) { + last_modified = metadata.at("lastModified"); + } + } catch (const nlohmann::json::exception & e) { + LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); + return false; + } + } + } else { + LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); + } + + // Send a HEAD request to retrieve the etag and last-modified headers + struct common_load_model_from_url_headers { + std::string etag; + std::string last_modified; + }; + + common_load_model_from_url_headers headers; + + { + typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); + auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { + common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; + + static std::regex header_regex("([^:]+): (.*)\r\n"); + static std::regex etag_regex("ETag", std::regex_constants::icase); + static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); + + std::string header(buffer, n_items); + std::smatch match; + if (std::regex_match(header, match, header_regex)) { + const std::string & key = match[1]; + const std::string & value = match[2]; + if (std::regex_match(key, match, etag_regex)) { + headers->etag = value; + } else if (std::regex_match(key, match, last_modified_regex)) { + headers->last_modified = value; + } + } + return n_items; + }; + + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress + curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); + curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); + + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); + if (!was_perform_successful) { + return false; + } + + long http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code != 200) { + // HEAD not supported, we don't know if the file has changed + // force trigger downloading + force_download = true; + LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + } + } + + bool should_download = !file_exists || force_download; + if (!should_download) { + if (!etag.empty() && etag != headers.etag) { + LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); + should_download = true; + } else if (!last_modified.empty() && last_modified != headers.last_modified) { + LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); + should_download = true; + } + } + if (should_download) { + std::string path_temporary = path + ".downloadInProgress"; + if (file_exists) { + LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } + } + + // Set the output file + + struct FILE_deleter { + void operator()(FILE * f) const { + fclose(f); + } + }; + + std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); + if (!outfile) { + LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); + return false; + } + + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); + auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { + return fwrite(data, size, nmemb, (FILE *)fd); + }; + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); + + // display download progress + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); + + // helper function to hide password in URL + auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { + std::size_t protocol_pos = url.find("://"); + if (protocol_pos == std::string::npos) { + return url; // Malformed URL + } + + std::size_t at_pos = url.find('@', protocol_pos + 3); + if (at_pos == std::string::npos) { + return url; // No password in URL + } + + return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); + }; + + // start the download + LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, + llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); + if (!was_perform_successful) { + return false; + } + + long http_code = 0; + curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); + return false; + } + + // Causes file to be closed explicitly here before we rename it. + outfile.reset(); + + // Write the updated JSON metadata file. + metadata.update({ + {"url", url}, + {"etag", headers.etag}, + {"lastModified", headers.last_modified} + }); + std::ofstream(metadata_path) << metadata.dump(4); + LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); + + if (rename(path_temporary.c_str(), path.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return false; + } + } + + return true; +} + +// download multiple files from remote URLs to local paths +// the input is a vector of pairs +static bool common_download_file_multiple(const std::vector> & urls, const std::string & bearer_token) { + // Prepare download in parallel + std::vector> futures_download; + for (auto const & item : urls) { + futures_download.push_back(std::async(std::launch::async, [bearer_token](const std::pair & it) -> bool { + return common_download_file_single(it.first, it.second, bearer_token); + }, item)); + } + + // Wait for all downloads to complete + for (auto & f : futures_download) { + if (!f.get()) { + return false; + } + } + + return true; +} + +static bool common_download_model( + const common_params_model & model, + const std::string & bearer_token) { + // Basic validation of the model.url + if (model.url.empty()) { + LOG_ERR("%s: invalid model url\n", __func__); + return false; + } + + if (!common_download_file_single(model.url, model.path, bearer_token)) { + return false; + } + + // check for additional GGUFs split to download + int n_split = 0; + { + struct gguf_init_params gguf_params = { + /*.no_alloc = */ true, + /*.ctx = */ NULL, + }; + auto * ctx_gguf = gguf_init_from_file(model.path.c_str(), gguf_params); + if (!ctx_gguf) { + LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.path.c_str()); + return false; + } + + auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT); + if (key_n_split >= 0) { + n_split = gguf_get_val_u16(ctx_gguf, key_n_split); + } + + gguf_free(ctx_gguf); + } + + if (n_split > 1) { + char split_prefix[PATH_MAX] = {0}; + char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0}; + + // Verify the first split file format + // and extract split URL and PATH prefixes + { + if (!llama_split_prefix(split_prefix, sizeof(split_prefix), model.path.c_str(), 0, n_split)) { + LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.path.c_str(), n_split); + return false; + } + + if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model.url.c_str(), 0, n_split)) { + LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model.url.c_str(), n_split); + return false; + } + } + + std::vector> urls; + for (int idx = 1; idx < n_split; idx++) { + char split_path[PATH_MAX] = {0}; + llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split); + + char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0}; + llama_split_path(split_url, sizeof(split_url), split_url_prefix, idx, n_split); + + if (std::string(split_path) == model.path) { + continue; // skip the already downloaded file + } + + urls.push_back({split_url, split_path}); + } + + // Download in parallel + common_download_file_multiple(urls, bearer_token); + } + + return true; +} + +/** + * Allow getting the HF file from the HF repo with tag (like ollama), for example: + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 + * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s + * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo) + * + * Return pair of (with "repo" already having tag removed) + * + * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. + */ +static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token) { + auto parts = string_split(hf_repo_with_tag, ':'); + std::string tag = parts.size() > 1 ? parts.back() : "latest"; + std::string hf_repo = parts[0]; + if (string_split(hf_repo, '/').size() != 2) { + throw std::invalid_argument("error: invalid HF repo format, expected /[:quant]\n"); + } + + // fetch model info from Hugging Face Hub API + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_slist_ptr http_headers; + std::string res_str; + + std::string model_endpoint = get_model_endpoint(); + + std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag; + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); + auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t { + static_cast(data)->append((char * ) ptr, size * nmemb); + return size * nmemb; + }; + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str); +#if defined(_WIN32) + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif + if (!bearer_token.empty()) { + std::string auth_header = "Authorization: Bearer " + bearer_token; + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + } + // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); + http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json"); + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); + + CURLcode res = curl_easy_perform(curl.get()); + + if (res != CURLE_OK) { + throw std::runtime_error("error: cannot make GET request to HF API"); + } + + long res_code; + std::string ggufFile = ""; + std::string mmprojFile = ""; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code); + if (res_code == 200) { + // extract ggufFile.rfilename in json, using regex + { + std::regex pattern("\"ggufFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\""); + std::smatch match; + if (std::regex_search(res_str, match, pattern)) { + ggufFile = match[1].str(); + } + } + // extract mmprojFile.rfilename in json, using regex + { + std::regex pattern("\"mmprojFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\""); + std::smatch match; + if (std::regex_search(res_str, match, pattern)) { + mmprojFile = match[1].str(); + } + } + } else if (res_code == 401) { + throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); + } else { + throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str())); + } + + // check response + if (ggufFile.empty()) { + throw std::runtime_error("error: model does not have ggufFile"); + } + + return { hf_repo, ggufFile, mmprojFile }; +} + +#else + +static bool common_download_file_single(const std::string &, const std::string &, const std::string &) { + LOG_ERR("error: built without CURL, cannot download model from internet\n"); + return false; +} + +static bool common_download_file_multiple(const std::vector> &, const std::string &) { + LOG_ERR("error: built without CURL, cannot download model from the internet\n"); + return false; +} + +static bool common_download_model( + const common_params_model &, + const std::string &) { + LOG_ERR("error: built without CURL, cannot download model from the internet\n"); + return false; +} + +static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &) { + LOG_ERR("error: built without CURL, cannot download model from the internet\n"); + return {}; +} + +#endif // LLAMA_USE_CURL + // // utils // -static void common_params_handle_model_default( - std::string & model, - const std::string & model_url, - std::string & hf_repo, - std::string & hf_file, - const std::string & hf_token, - const std::string & model_default) { - if (!hf_repo.empty()) { - // short-hand to avoid specifying --hf-file -> default it to --model - if (hf_file.empty()) { - if (model.empty()) { - auto auto_detected = common_get_hf_file(hf_repo, hf_token); - if (auto_detected.first.empty() || auto_detected.second.empty()) { - exit(1); // built without CURL, error message already printed +static void common_params_handle_model( + struct common_params_model & model, + const std::string & bearer_token, + const std::string & model_path_default, + bool is_mmproj = false) { // TODO: move is_mmproj to an enum when we have more files? + // handle pre-fill default model path and url based on hf_repo and hf_file + { + if (!model.hf_repo.empty()) { + // short-hand to avoid specifying --hf-file -> default it to --model + if (model.hf_file.empty()) { + if (model.path.empty()) { + auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token); + if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) { + exit(1); // built without CURL, error message already printed + } + model.hf_repo = auto_detected.repo; + model.hf_file = is_mmproj ? auto_detected.mmprojFile : auto_detected.ggufFile; + } else { + model.hf_file = model.path; } - hf_repo = auto_detected.first; - hf_file = auto_detected.second; - } else { - hf_file = model; } + + std::string model_endpoint = get_model_endpoint(); + model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file; + // make sure model path is present (for caching purposes) + if (model.path.empty()) { + // this is to avoid different repo having same file name, or same file name in different subdirs + std::string filename = model.hf_repo + "_" + model.hf_file; + // to make sure we don't have any slashes in the filename + string_replace_all(filename, "/", "_"); + model.path = fs_get_cache_file(filename); + } + + } else if (!model.url.empty()) { + if (model.path.empty()) { + auto f = string_split(model.url, '#').front(); + f = string_split(f, '?').front(); + model.path = fs_get_cache_file(string_split(f, '/').back()); + } + + } else if (model.path.empty()) { + model.path = model_path_default; } - // make sure model path is present (for caching purposes) - if (model.empty()) { - // this is to avoid different repo having same file name, or same file name in different subdirs - std::string filename = hf_repo + "_" + hf_file; - // to make sure we don't have any slashes in the filename - string_replace_all(filename, "/", "_"); - model = fs_get_cache_file(filename); + } + + // then, download it if needed + if (!model.url.empty()) { + bool ok = common_download_model(model, bearer_token); + if (!ok) { + LOG_ERR("error: failed to download model from %s\n", model.url.c_str()); + exit(1); } - } else if (!model_url.empty()) { - if (model.empty()) { - auto f = string_split(model_url, '#').front(); - f = string_split(f, '?').front(); - model = fs_get_cache_file(string_split(f, '/').back()); - } - } else if (model.empty()) { - model = model_default; } } @@ -299,10 +827,16 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n"); } - // TODO: refactor model params in a common struct - common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token, DEFAULT_MODEL_PATH); - common_params_handle_model_default(params.speculative.model, params.speculative.model_url, params.speculative.hf_repo, params.speculative.hf_file, params.hf_token, ""); - common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token, ""); + common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH); + common_params_handle_model(params.speculative.model, params.hf_token, ""); + common_params_handle_model(params.vocoder.model, params.hf_token, ""); + + // allow --mmproj to be set from -hf + // assuming that mmproj is always in the same repo as text model + if (!params.model.hf_repo.empty() && ctx_arg.ex == LLAMA_EXAMPLE_LLAVA) { + params.mmproj.hf_repo = params.model.hf_repo; + } + common_params_handle_model(params.mmproj, params.hf_token, "", true); if (params.escape) { string_process_escapes(params.prompt); @@ -321,6 +855,10 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context params.kv_overrides.back().key[0] = 0; } + if (!params.tensor_buft_overrides.empty()) { + params.tensor_buft_overrides.push_back({nullptr, nullptr}); + } + if (params.reranking && params.embedding) { throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both"); } @@ -365,6 +903,112 @@ static void common_params_print_usage(common_params_context & ctx_arg) { print_options(specific_options); } +static void common_params_print_completion(common_params_context & ctx_arg) { + std::vector common_options; + std::vector sparam_options; + std::vector specific_options; + + for (auto & opt : ctx_arg.options) { + if (opt.is_sparam) { + sparam_options.push_back(&opt); + } else if (opt.in_example(ctx_arg.ex)) { + specific_options.push_back(&opt); + } else { + common_options.push_back(&opt); + } + } + + printf("_llama_completions() {\n"); + printf(" local cur prev opts\n"); + printf(" COMPREPLY=()\n"); + printf(" cur=\"${COMP_WORDS[COMP_CWORD]}\"\n"); + printf(" prev=\"${COMP_WORDS[COMP_CWORD-1]}\"\n\n"); + + printf(" opts=\""); + auto print_options = [](const std::vector & options) { + for (const common_arg * opt : options) { + for (const char * arg : opt->args) { + printf("%s ", arg); + } + } + }; + + print_options(common_options); + print_options(sparam_options); + print_options(specific_options); + printf("\"\n\n"); + + printf(" case \"$prev\" in\n"); + printf(" --model)\n"); + printf(" COMPREPLY=( $(compgen -f -X '!*.gguf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n"); + printf(" return 0\n"); + printf(" ;;\n"); + printf(" --grammar-file)\n"); + printf(" COMPREPLY=( $(compgen -f -X '!*.gbnf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n"); + printf(" return 0\n"); + printf(" ;;\n"); + printf(" --chat-template-file)\n"); + printf(" COMPREPLY=( $(compgen -f -X '!*.jinja' -- \"$cur\") $(compgen -d -- \"$cur\") )\n"); + printf(" return 0\n"); + printf(" ;;\n"); + printf(" *)\n"); + printf(" COMPREPLY=( $(compgen -W \"${opts}\" -- \"$cur\") )\n"); + printf(" return 0\n"); + printf(" ;;\n"); + printf(" esac\n"); + printf("}\n\n"); + + std::set executables = { + "llama-batched", + "llama-batched-bench", + "llama-bench", + "llama-cli", + "llama-convert-llama2c-to-ggml", + "llama-cvector-generator", + "llama-embedding", + "llama-eval-callback", + "llama-export-lora", + "llama-gbnf-validator", + "llama-gen-docs", + "llama-gguf", + "llama-gguf-hash", + "llama-gguf-split", + "llama-gritlm", + "llama-imatrix", + "llama-infill", + "llama-llava-cli", + "llama-llava-clip-quantize-cli", + "llama-lookahead", + "llama-lookup", + "llama-lookup-create", + "llama-lookup-merge", + "llama-lookup-stats", + "llama-minicpmv-cli", + "llama-parallel", + "llama-passkey", + "llama-perplexity", + "llama-q8dot", + "llama-quantize", + "llama-quantize-stats", + "llama-qwen2vl-cli", + "llama-retrieval", + "llama-run", + "llama-save-load-state", + "llama-server", + "llama-simple", + "llama-simple-chat", + "llama-speculative", + "llama-speculative-simple", + "llama-tokenize", + "llama-tts", + "llama-vdot" + }; + + for (const auto& exe : executables) { + printf("complete -F _llama_completions %s\n", exe.c_str()); + } +} + static std::vector parse_device_list(const std::string & value) { std::vector devices; auto dev_names = string_split(value, ','); @@ -426,6 +1070,10 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e } exit(0); } + if (ctx_arg.params.completion) { + common_params_print_completion(ctx_arg); + exit(0); + } } catch (const std::invalid_argument & ex) { fprintf(stderr, "%s\n", ex.what()); ctx_arg.params = params_org; @@ -494,6 +1142,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex exit(0); } )); + add_opt(common_arg( + {"--completion-bash"}, + "print source-able bash completion script for llama.cpp", + [](common_params & params) { + params.completion = true; + } + )); add_opt(common_arg( {"--verbose-prompt"}, string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"), @@ -646,7 +1301,11 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_env("LLAMA_ARG_CTX_SIZE")); add_opt(common_arg( {"-n", "--predict", "--n-predict"}, "N", - string_format("number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)", params.n_predict), + string_format( + ex == LLAMA_EXAMPLE_MAIN || ex == LLAMA_EXAMPLE_INFILL + ? "number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)" + : "number of tokens to predict (default: %d, -1 = infinity)", + params.n_predict), [](common_params & params, int value) { params.n_predict = value; } @@ -674,7 +1333,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex )); add_opt(common_arg( {"--no-context-shift"}, - string_format("disables context shift on inifinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"), + string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"), [](common_params & params) { params.ctx_shift = false; } @@ -695,13 +1354,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_env("LLAMA_ARG_FLASH_ATTN")); add_opt(common_arg( {"-p", "--prompt"}, "PROMPT", - ex == LLAMA_EXAMPLE_MAIN - ? "prompt to start generation with\nif -cnv is set, this will be used as system prompt" - : "prompt to start generation with", + "prompt to start generation with; for system message, use -sys", [](common_params & params, const std::string & value) { params.prompt = value; } ).set_excludes({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"-sys", "--system-prompt"}, "PROMPT", + "system prompt to use with model (if applicable, depending on chat template)", + [](common_params & params, const std::string & value) { + params.system_prompt = value; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); add_opt(common_arg( {"--no-perf"}, string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"), @@ -726,6 +1390,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } } ).set_excludes({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"-sysf", "--system-prompt-file"}, "FNAME", + "a file containing the system prompt (default: none)", + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.system_prompt)); + if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') { + params.system_prompt.pop_back(); + } + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); add_opt(common_arg( {"--in-file"}, "FNAME", "an input file (repeat to specify multiple files)", @@ -826,6 +1504,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED; } ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-st", "--single-turn"}, + "run conversation for a single turn only, then exit when done\n" + "will not be interactive if first turn is predefined with --prompt\n" + "(default: false)", + [](common_params & params) { + params.single_turn = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); add_opt(common_arg( {"-i", "--interactive"}, string_format("run in interactive mode (default: %s)", params.interactive ? "true" : "false"), @@ -877,7 +1564,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params) { params.warmup = false; } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER})); + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING})); add_opt(common_arg( {"--spm-infill"}, string_format( @@ -946,6 +1633,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.sampling.min_p = std::stof(value); } ).set_sparam()); + add_opt(common_arg( + {"--top-nsigma"}, "N", + string_format("top-n-sigma sampling (default: %.1f, -1.0 = disabled)", params.sampling.top_n_sigma), + [](common_params & params, const std::string & value) { + params.sampling.top_n_sigma = std::stof(value); + } + ).set_examples({LLAMA_EXAMPLE_MAIN}).set_sparam()); add_opt(common_arg( {"--xtc-probability"}, "N", string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability), @@ -1404,7 +2098,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--mmproj"}, "FILE", "path to a multimodal projector file for LLaVA. see examples/llava/README.md", [](common_params & params, const std::string & value) { - params.mmproj = value; + params.mmproj.path = value; + } + ).set_examples({LLAMA_EXAMPLE_LLAVA})); + add_opt(common_arg( + {"--mmproj-url"}, "URL", + "URL to a multimodal projector file for LLaVA. see examples/llava/README.md", + [](common_params & params, const std::string & value) { + params.mmproj.url = value; } ).set_examples({LLAMA_EXAMPLE_LLAVA})); add_opt(common_arg( @@ -1445,7 +2146,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex "- isolate: only spawn threads on CPUs on the node that execution started on\n" "- numactl: use the CPU map provided by numactl\n" "if run without this previously, it is recommended to drop the system page cache before using this\n" - "see https://github.com/ggerganov/llama.cpp/issues/1437", + "see https://github.com/ggml-org/llama.cpp/issues/1437", [](common_params & params, const std::string & value) { /**/ if (value == "distribute" || value == "") { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; } else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; } @@ -1465,18 +2166,66 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--list-devices"}, "print list of available devices and exit", [](common_params &) { - printf("Available devices:\n"); + std::vector rpc_devices; + std::vector all_devices; for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { auto * dev = ggml_backend_dev_get(i); if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) { - size_t free, total; - ggml_backend_dev_memory(dev, &free, &total); - printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); + ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev); + if (ggml_backend_reg_name(reg) == std::string("RPC")) { + rpc_devices.push_back(dev); + } else { + all_devices.push_back(dev); + } } } + // insert RPC devices in front + all_devices.insert(all_devices.begin(), rpc_devices.begin(), rpc_devices.end()); + printf("Available devices:\n"); + for (size_t i = 0; i < all_devices.size(); ++i) { + auto * dev = all_devices[i]; + size_t free, total; + ggml_backend_dev_memory(dev, &free, &total); + printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); + } exit(0); } )); + add_opt(common_arg( + {"--override-tensor", "-ot"}, "=,...", + "override tensor buffer type", [](common_params & params, const std::string & value) { + /* static */ std::map buft_list; + if (buft_list.empty()) { + // enumerate all the devices and add their buffer types to the list + for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { + auto * dev = ggml_backend_dev_get(i); + auto * buft = ggml_backend_dev_buffer_type(dev); + if (buft) { + buft_list[ggml_backend_buft_name(buft)] = buft; + } + } + } + + for (const auto & override : string_split(value, ',')) { + std::string::size_type pos = override.find('='); + if (pos == std::string::npos) { + throw std::invalid_argument("invalid value"); + } + std::string tensor_name = override.substr(0, pos); + std::string buffer_type = override.substr(pos + 1); + + if (buft_list.find(buffer_type) == buft_list.end()) { + printf("Available buffer types:\n"); + for (const auto & it : buft_list) { + printf(" %s\n", ggml_backend_buft_name(it.second)); + } + throw std::invalid_argument("unknown buffer type"); + } + // FIXME: this leaks memory + params.tensor_buft_overrides.push_back({strdup(tensor_name.c_str()), buft_list.at(buffer_type)}); + } + } + )); add_opt(common_arg( {"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N", "number of layers to store in VRAM", @@ -1620,14 +2369,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex "or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH ), [](common_params & params, const std::string & value) { - params.model = value; + params.model.path = value; } ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}).set_env("LLAMA_ARG_MODEL")); add_opt(common_arg( {"-mu", "--model-url"}, "MODEL_URL", "model download url (default: unused)", [](common_params & params, const std::string & value) { - params.model_url = value; + params.model.url = value; } ).set_env("LLAMA_ARG_MODEL_URL")); add_opt(common_arg( @@ -1636,35 +2385,35 @@ common_params_context common_params_parser_init(common_params & params, llama_ex "example: unsloth/phi-4-GGUF:q4_k_m\n" "(default: unused)", [](common_params & params, const std::string & value) { - params.hf_repo = value; + params.model.hf_repo = value; } ).set_env("LLAMA_ARG_HF_REPO")); add_opt(common_arg( {"-hfd", "-hfrd", "--hf-repo-draft"}, "/[:quant]", "Same as --hf-repo, but for the draft model (default: unused)", [](common_params & params, const std::string & value) { - params.speculative.hf_repo = value; + params.speculative.model.hf_repo = value; } ).set_env("LLAMA_ARG_HFD_REPO")); add_opt(common_arg( {"-hff", "--hf-file"}, "FILE", "Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)", [](common_params & params, const std::string & value) { - params.hf_file = value; + params.model.hf_file = value; } ).set_env("LLAMA_ARG_HF_FILE")); add_opt(common_arg( {"-hfv", "-hfrv", "--hf-repo-v"}, "/[:quant]", "Hugging Face model repository for the vocoder model (default: unused)", [](common_params & params, const std::string & value) { - params.vocoder.hf_repo = value; + params.vocoder.model.hf_repo = value; } ).set_env("LLAMA_ARG_HF_REPO_V")); add_opt(common_arg( {"-hffv", "--hf-file-v"}, "FILE", "Hugging Face model file for the vocoder model (default: unused)", [](common_params & params, const std::string & value) { - params.vocoder.hf_file = value; + params.vocoder.model.hf_file = value; } ).set_env("LLAMA_ARG_HF_FILE_V")); add_opt(common_arg( @@ -1715,18 +2464,11 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_examples({LLAMA_EXAMPLE_PASSKEY})); add_opt(common_arg( {"-o", "--output", "--output-file"}, "FNAME", - string_format("output file (default: '%s')", - ex == LLAMA_EXAMPLE_EXPORT_LORA - ? params.lora_outfile.c_str() - : ex == LLAMA_EXAMPLE_CVECTOR_GENERATOR - ? params.cvector_outfile.c_str() - : params.out_file.c_str()), + string_format("output file (default: '%s')", params.out_file.c_str()), [](common_params & params, const std::string & value) { params.out_file = value; - params.cvector_outfile = value; - params.lora_outfile = value; } - ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA})); + ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS})); add_opt(common_arg( {"-ofreq", "--output-frequency"}, "N", string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq), @@ -1816,7 +2558,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); add_opt(common_arg( {"--host"}, "HOST", - string_format("ip address to listen (default: %s)", params.hostname.c_str()), + string_format("ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: %s)", params.hostname.c_str()), [](common_params & params, const std::string & value) { params.hostname = value; } @@ -1962,6 +2704,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.use_jinja = true; } ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA")); + add_opt(common_arg( + {"--reasoning-format"}, "FORMAT", + "reasoning format (default: deepseek; allowed values: deepseek, none)\n" + "controls whether thought tags are extracted from the response, and in which format they're returned. 'none' leaves thoughts unparsed in `message.content`, 'deepseek' puts them in `message.reasoning_content` (for DeepSeek R1 & Command R7B only).\n" + "only supported for non-streamed responses", + [](common_params & params, const std::string & value) { + /**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; } + else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; } + else { std::invalid_argument("invalid value"); } + } + ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK")); add_opt(common_arg( {"--chat-template"}, "JINJA_TEMPLATE", string_format( @@ -2099,7 +2852,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_env("LLAMA_LOG_VERBOSITY")); add_opt(common_arg( {"--log-prefix"}, - "Enable prefx in log messages", + "Enable prefix in log messages", [](common_params &) { common_log_set_prefix(common_log_main(), true); } @@ -2280,7 +3033,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"-md", "--model-draft"}, "FNAME", "draft model for speculative decoding (default: unused)", [](common_params & params, const std::string & value) { - params.speculative.model = value; + params.speculative.model.path = value; } ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT")); @@ -2288,7 +3041,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"-mv", "--model-vocoder"}, "FNAME", "vocoder model for audio generation (default: unused)", [](common_params & params, const std::string & value) { - params.vocoder.model = value; + params.vocoder.model.path = value; } ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER})); add_opt(common_arg( @@ -2298,18 +3051,153 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.vocoder.use_guide_tokens = true; } ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--tts-speaker-file"}, "FNAME", + "speaker file path for audio generation", + [](common_params & params, const std::string & value) { + params.vocoder.speaker_file = value; + } + ).set_examples({LLAMA_EXAMPLE_TTS})); // model-specific add_opt(common_arg( {"--tts-oute-default"}, string_format("use default OuteTTS models (note: can download weights from the internet)"), [](common_params & params) { - params.hf_repo = "OuteAI/OuteTTS-0.2-500M-GGUF"; - params.hf_file = "OuteTTS-0.2-500M-Q8_0.gguf"; - params.vocoder.hf_repo = "ggml-org/WavTokenizer"; - params.vocoder.hf_file = "WavTokenizer-Large-75-F16.gguf"; + params.model.hf_repo = "OuteAI/OuteTTS-0.2-500M-GGUF"; + params.model.hf_file = "OuteTTS-0.2-500M-Q8_0.gguf"; + params.vocoder.model.hf_repo = "ggml-org/WavTokenizer"; + params.vocoder.model.hf_file = "WavTokenizer-Large-75-F16.gguf"; } ).set_examples({LLAMA_EXAMPLE_TTS})); + add_opt(common_arg( + {"--embd-bge-small-en-default"}, + string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"), + [](common_params & params) { + params.model.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF"; + params.model.hf_file = "bge-small-en-v1.5-q8_0.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--embd-e5-small-en-default"}, + string_format("use default e5-small-v2 model (note: can download weights from the internet)"), + [](common_params & params) { + params.model.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF"; + params.model.hf_file = "e5-small-v2-q8_0.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--embd-gte-small-default"}, + string_format("use default gte-small model (note: can download weights from the internet)"), + [](common_params & params) { + params.model.hf_repo = "ggml-org/gte-small-Q8_0-GGUF"; + params.model.hf_file = "gte-small-q8_0.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--fim-qwen-1.5b-default"}, + string_format("use default Qwen 2.5 Coder 1.5B (note: can download weights from the internet)"), + [](common_params & params) { + params.model.hf_repo = "ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF"; + params.model.hf_file = "qwen2.5-coder-1.5b-q8_0.gguf"; + params.port = 8012; + params.n_gpu_layers = 99; + params.flash_attn = true; + params.n_ubatch = 1024; + params.n_batch = 1024; + params.n_ctx = 0; + params.n_cache_reuse = 256; + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--fim-qwen-3b-default"}, + string_format("use default Qwen 2.5 Coder 3B (note: can download weights from the internet)"), + [](common_params & params) { + params.model.hf_repo = "ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF"; + params.model.hf_file = "qwen2.5-coder-3b-q8_0.gguf"; + params.port = 8012; + params.n_gpu_layers = 99; + params.flash_attn = true; + params.n_ubatch = 1024; + params.n_batch = 1024; + params.n_ctx = 0; + params.n_cache_reuse = 256; + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--fim-qwen-7b-default"}, + string_format("use default Qwen 2.5 Coder 7B (note: can download weights from the internet)"), + [](common_params & params) { + params.model.hf_repo = "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF"; + params.model.hf_file = "qwen2.5-coder-7b-q8_0.gguf"; + params.port = 8012; + params.n_gpu_layers = 99; + params.flash_attn = true; + params.n_ubatch = 1024; + params.n_batch = 1024; + params.n_ctx = 0; + params.n_cache_reuse = 256; + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--fim-qwen-7b-spec"}, + string_format("use Qwen 2.5 Coder 7B + 0.5B draft for speculative decoding (note: can download weights from the internet)"), + [](common_params & params) { + params.model.hf_repo = "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF"; + params.model.hf_file = "qwen2.5-coder-7b-q8_0.gguf"; + params.speculative.model.hf_repo = "ggml-org/Qwen2.5-Coder-0.5B-Q8_0-GGUF"; + params.speculative.model.hf_file = "qwen2.5-coder-0.5b-q8_0.gguf"; + params.speculative.n_gpu_layers = 99; + params.port = 8012; + params.n_gpu_layers = 99; + params.flash_attn = true; + params.n_ubatch = 1024; + params.n_batch = 1024; + params.n_ctx = 0; + params.n_cache_reuse = 256; + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--fim-qwen-14b-spec"}, + string_format("use Qwen 2.5 Coder 14B + 0.5B draft for speculative decoding (note: can download weights from the internet)"), + [](common_params & params) { + params.model.hf_repo = "ggml-org/Qwen2.5-Coder-14B-Q8_0-GGUF"; + params.model.hf_file = "qwen2.5-coder-14b-q8_0.gguf"; + params.speculative.model.hf_repo = "ggml-org/Qwen2.5-Coder-0.5B-Q8_0-GGUF"; + params.speculative.model.hf_file = "qwen2.5-coder-0.5b-q8_0.gguf"; + params.speculative.n_gpu_layers = 99; + params.port = 8012; + params.n_gpu_layers = 99; + params.flash_attn = true; + params.n_ubatch = 1024; + params.n_batch = 1024; + params.n_ctx = 0; + params.n_cache_reuse = 256; + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + return ctx_arg; } diff --git a/common/chat-template.hpp b/common/chat-template.hpp deleted file mode 100644 index 42ee0b6159..0000000000 --- a/common/chat-template.hpp +++ /dev/null @@ -1,268 +0,0 @@ -/* - Copyright 2024 Google LLC - - Use of this source code is governed by an MIT-style - license that can be found in the LICENSE file or at - https://opensource.org/licenses/MIT. -*/ -// SPDX-License-Identifier: MIT -#pragma once - -#include "minja.hpp" -#include -#include -#include - -using json = nlohmann::ordered_json; - -namespace minja { - -class chat_template { - public: - - private: - bool supports_tools_ = true; - // Meta-Llama-3.1-8B-Instruct's template expects arguments to be an object. - // Most other templates (and OpenAI's API) expect the arguments object to be stringified. - bool requires_object_arguments_ = false; - bool requires_typed_content_ = false; - bool supports_system_role_ = true; - bool supports_parallel_tool_calls_ = false; - std::string source_; - std::string bos_token_; - std::string eos_token_; - std::shared_ptr template_root_; - - std::string try_raw_render( - const nlohmann::ordered_json & messages, - const nlohmann::ordered_json & tools, - bool add_generation_prompt, - const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const - { - try { - auto prompt = apply(messages, tools, add_generation_prompt, extra_context, /* adjust_inputs= */ false); - // fprintf(stderr, "Prompt: %s\n", prompt.c_str()); - return prompt; - } catch (const std::exception & e) { - // fprintf(stderr, "Error: %s\n", e.what()); - return ""; - } - } - - public: - chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token) - : source_(source), bos_token_(bos_token), eos_token_(eos_token) - { - template_root_ = minja::Parser::parse(source_, { - /* .trim_blocks = */ true, - /* .lstrip_blocks = */ true, - /* .keep_trailing_newline = */ false, - }); - supports_tools_ = source.find("tools") != std::string::npos; - - auto renders_string_arguments = - try_raw_render({ - { - {"role", "user"}, - {"content", "Hey"} - }, - { - {"role", "assistant"}, - {"tool_calls", json::array({ - { - {"id", "call_1___"}, - {"type", "function"}, - {"function", { - {"arguments", "{\"code\": \"print('Hello, World!')\"}"}, - {"name", "ipython"}, - }}, - }, - })}, - } - }, {}, false).find("{\"code\": \"print") != std::string::npos; - if (!renders_string_arguments) { - auto renders_object_arguments = - try_raw_render({ - { - {"role", "user"}, - {"content", "Hey"} - }, - { - {"role", "assistant"}, - {"tool_calls", json::array({ - { - {"id", "call_1___"}, - {"type", "function"}, - {"function", { - {"arguments", { - {"code", "print('Hello, World!')"}, - }}, - {"name", "ipython"}, - }}, - }, - })}, - } - }, {}, false).find("{\"code\": \"print") != std::string::npos; - requires_object_arguments_ = renders_object_arguments; - } - supports_parallel_tool_calls_ = source.find("tool_call_id") != std::string::npos; - - supports_system_role_ = try_raw_render({ - {{"role", "system"}, {"content", ""}}, - {{"role", "user"}, {"content", "Hey"}} - }, {}, false).find("") != std::string::npos; - - requires_typed_content_ = try_raw_render({{{"role", "user"}, {"content", "Hey"}}}, {}, false).find("Hey") == std::string::npos - && try_raw_render({{{"role", "user"}, {"content", {{{"type", "text"}, {"text", "Hey"}}}}}}, {}, false).find("Hey") != std::string::npos; - } - - const std::string & source() const { return source_; } - const std::string & bos_token() const { return bos_token_; } - const std::string & eos_token() const { return eos_token_; } - bool supports_tools() const { return supports_tools_; } - bool supports_parallel_tool_calls() const { return supports_parallel_tool_calls_; } - - std::string apply( - const nlohmann::ordered_json & messages, - const nlohmann::ordered_json & tools, - bool add_generation_prompt, - const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(), - bool adjust_inputs = true) const - { - json actual_messages; - - // First, "fix" messages so they have a chance to be rendered correctly by the template - - if (adjust_inputs && (requires_object_arguments_ || !supports_system_role_ || !supports_tools_ || requires_typed_content_)) { - actual_messages = json::array(); - - auto add_message = [&](const json & msg) { - if (requires_typed_content_ && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) { - actual_messages.push_back({ - {"role", msg.at("role")}, - {"content", {{ - {"type", "text"}, - {"text", msg.at("content")}, - }}}, - }); - } else { - actual_messages.push_back(msg); - } - }; - - std::string pending_system; - auto flush_sys = [&]() { - if (!pending_system.empty()) { - add_message({ - {"role", "user"}, - {"content", pending_system}, - }); - pending_system.clear(); - } - }; - for (const auto & message_ : messages) { - auto message = message_; - if (!message.contains("role") || !message.contains("content")) { - throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump()); - } - std::string role = message.at("role"); - - if (message.contains("tool_calls")) { - if (requires_object_arguments_ || !supports_tools_) { - for (auto & tool_call : message.at("tool_calls")) { - if (tool_call["type"] == "function") { - auto & function = tool_call.at("function"); - std::string arguments = function.at("arguments"); - function["arguments"] = json::parse(arguments); - } - } - } - if (!supports_tools_) { - auto content = message.at("content"); - auto tool_calls = json::array(); - for (const auto & tool_call : message.at("tool_calls")) { - if (tool_call.at("type") != "function") { - continue; - } - const auto & function = tool_call.at("function"); - auto tc = json { - {"name", function.at("name")}, - {"arguments", function.at("arguments")}, - }; - if (tool_call.contains("id")) { - tc["id"] = tool_call["id"]; - } - tool_calls.push_back(tc); - } - auto obj = json { - {"tool_calls", tool_calls}, - }; - if (!content.is_null() && content != "") { - obj["content"] = content; - } - message["content"] = obj.dump(2); - message.erase("tool_calls"); - } - } - if (!supports_tools_ && role == "tool") { - message["role"] = "user"; - auto obj = json { - {"tool_response", { - {"tool", message.at("name")}, - {"content", message.at("content")}, - }}, - }; - if (message.contains("tool_call_id")) { - obj["tool_response"]["tool_call_id"] = message.at("tool_call_id"); - } - message["content"] = obj.dump(2); - message.erase("name"); - } - - if (!message["content"].is_null() && !supports_system_role_) { - std::string content = message.at("content"); - if (role == "system") { - if (!pending_system.empty()) pending_system += "\n"; - pending_system += content; - continue; - } else { - if (role == "user") { - if (!pending_system.empty()) { - message["content"] = pending_system + (content.empty() ? "" : "\n" + content); - pending_system.clear(); - } - } else { - flush_sys(); - } - } - } - add_message(message); - } - flush_sys(); - } else { - actual_messages = messages; - } - - auto context = minja::Context::make(json({ - {"messages", actual_messages}, - {"add_generation_prompt", add_generation_prompt}, - {"bos_token", bos_token_}, - {"eos_token", eos_token_}, - })); - - if (!tools.is_null()) { - auto tools_val = minja::Value(tools); - context->set("tools", tools_val); - } - if (!extra_context.is_null()) { - for (auto & kv : extra_context.items()) { - minja::Value val(kv.value()); - context->set(kv.key(), val); - } - } - - return template_root_->render(context); - } -}; - -} // namespace minja diff --git a/common/chat.cpp b/common/chat.cpp new file mode 100644 index 0000000000..bbc5f087cd --- /dev/null +++ b/common/chat.cpp @@ -0,0 +1,1779 @@ +#include "chat.h" +#include "json-schema-to-grammar.h" +#include "log.h" +#include "minja/chat-template.hpp" +#include "minja/minja.hpp" + +#include + +typedef minja::chat_template common_chat_template; + +struct common_chat_templates { + bool has_explicit_template; // Model had builtin template or template overridde was specified. + std::unique_ptr template_default; // always set (defaults to chatml) + std::unique_ptr template_tool_use; +}; + +struct templates_params { + json messages; + json tools; + common_chat_tool_choice tool_choice; + json json_schema; + bool parallel_tool_calls; + bool stream; + std::string grammar; + bool add_generation_prompt = true; + bool extract_reasoning = true; +}; + +common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) { + if (tool_choice == "auto") { + return COMMON_CHAT_TOOL_CHOICE_AUTO; + } + if (tool_choice == "none") { + return COMMON_CHAT_TOOL_CHOICE_NONE; + } + if (tool_choice == "required") { + return COMMON_CHAT_TOOL_CHOICE_REQUIRED; + } + throw std::runtime_error("Invalid tool_choice: " + tool_choice); +} + +template <> +std::vector common_chat_msgs_parse_oaicompat(const json & messages) { + std::vector msgs; + + try { + + if (!messages.is_array()) { + throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump()); + } + + for (const auto & message : messages) { + if (!message.is_object()) { + throw std::runtime_error("Expected 'message' to be an object, got " + message.dump()); + } + + common_chat_msg msg; + if (!message.contains("role")) { + throw std::runtime_error("Missing 'role' in message: " + message.dump()); + } + msg.role = message.at("role"); + + auto has_content = message.contains("content"); + auto has_tool_calls = message.contains("tool_calls"); + if (has_content) { + const auto & content = message.at("content"); + if (content.is_string()) { + msg.content = content; + } else if (content.is_array()) { + for (const auto & part : content) { + if (!part.contains("type")) { + throw std::runtime_error("Missing content part type: " + part.dump()); + } + const auto & type = part.at("type"); + if (type != "text") { + throw std::runtime_error("Unsupported content part type: " + type.dump()); + } + common_chat_msg_content_part msg_part; + msg_part.type = type; + msg_part.text = part.at("text"); + msg.content_parts.push_back(msg_part); + } + } else if (!content.is_null()) { + throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)"); + } + } + if (has_tool_calls) { + for (const auto & tool_call : message.at("tool_calls")) { + common_chat_tool_call tc; + if (!tool_call.contains("type")) { + throw std::runtime_error("Missing tool call type: " + tool_call.dump()); + } + const auto & type = tool_call.at("type"); + if (type != "function") { + throw std::runtime_error("Unsupported tool call type: " + tool_call.dump()); + } + if (!tool_call.contains("function")) { + throw std::runtime_error("Missing tool call function: " + tool_call.dump()); + } + const auto & fc = tool_call.at("function"); + if (!fc.contains("name")) { + throw std::runtime_error("Missing tool call name: " + tool_call.dump()); + } + tc.name = fc.at("name"); + tc.arguments = fc.at("arguments"); + if (tool_call.contains("id")) { + tc.id = tool_call.at("id"); + } + msg.tool_calls.push_back(tc); + } + } + if (!has_content && !has_tool_calls) { + throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)"); + } + if (message.contains("reasoning_content")) { + msg.reasoning_content = message.at("reasoning_content"); + } + if (message.contains("name")) { + msg.tool_name = message.at("name"); + } + if (message.contains("tool_call_id")) { + msg.tool_call_id = message.at("tool_call_id"); + } + + msgs.push_back(msg); + } + } catch (const std::exception & e) { + throw std::runtime_error("Failed to parse messages: " + std::string(e.what()) + "; messages = " + messages.dump(2)); + } + + return msgs; +} + +template <> +json common_chat_msgs_to_json_oaicompat(const std::vector & msgs, bool concat_typed_text) { + json messages = json::array(); + for (const auto & msg : msgs) { + if (!msg.content.empty() && !msg.content_parts.empty()) { + throw std::runtime_error("Cannot specify both content and content_parts"); + } + json jmsg { + {"role", msg.role}, + }; + if (!msg.content.empty()) { + jmsg["content"] = msg.content; + } else if (!msg.content_parts.empty()) { + if (concat_typed_text) { + std::string text; + for (const auto & part : msg.content_parts) { + if (part.type != "text") { + LOG_WRN("Ignoring content part type: %s\n", part.type.c_str()); + continue; + } + if (!text.empty()) { + text += '\n'; + } + text += part.text; + } + jmsg["content"] = text; + } else { + auto & parts = jmsg["content"] = json::array(); + for (const auto & part : msg.content_parts) { + parts.push_back({ + {"type", part.type}, + {"text", part.text}, + }); + } + } + } else { + jmsg["content"] = json(); // null + } + if (!msg.reasoning_content.empty()) { + jmsg["reasoning_content"] = msg.reasoning_content; + } + if (!msg.tool_name.empty()) { + jmsg["name"] = msg.tool_name; + } + if (!msg.tool_call_id.empty()) { + jmsg["tool_call_id"] = msg.tool_call_id; + } + if (!msg.tool_calls.empty()) { + auto & tool_calls = jmsg["tool_calls"] = json::array(); + for (const auto & tool_call : msg.tool_calls) { + json tc { + {"type", "function"}, + {"function", { + {"name", tool_call.name}, + {"arguments", tool_call.arguments}, + }}, + }; + if (!tool_call.id.empty()) { + tc["id"] = tool_call.id; + } + tool_calls.push_back(tc); + } + } + messages.push_back(jmsg); + } + return messages; +} + +template <> +std::vector common_chat_msgs_parse_oaicompat(const std::string & messages) { + return common_chat_msgs_parse_oaicompat(json::parse(messages)); +} + +template <> +std::vector common_chat_tools_parse_oaicompat(const json & tools) { + std::vector result; + + try { + if (!tools.is_null()) { + if (!tools.is_array()) { + throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump()); + } + for (const auto & tool : tools) { + if (!tool.contains("type")) { + throw std::runtime_error("Missing tool type: " + tool.dump()); + } + const auto & type = tool.at("type"); + if (!type.is_string() || type != "function") { + throw std::runtime_error("Unsupported tool type: " + tool.dump()); + } + if (!tool.contains("function")) { + throw std::runtime_error("Missing tool function: " + tool.dump()); + } + + const auto & function = tool.at("function"); + result.push_back({ + /* .name = */ function.at("name"), + /* .description = */ function.at("description"), + /* .parameters = */ function.at("parameters").dump(), + }); + } + } + } catch (const std::exception & e) { + throw std::runtime_error("Failed to parse tools: " + std::string(e.what()) + "; tools = " + tools.dump(2)); + } + + return result; +} + +template <> +std::vector common_chat_tools_parse_oaicompat(const std::string & tools) { + return common_chat_tools_parse_oaicompat(json::parse(tools)); +} + +template <> +json common_chat_tools_to_json_oaicompat(const std::vector & tools) { + if (tools.empty()) { + return json(); + } + + auto result = json::array(); + for (const auto & tool : tools) { + result.push_back({ + {"type", "function"}, + {"function", { + {"name", tool.name}, + {"description", tool.description}, + {"parameters", json::parse(tool.parameters)}, + }}, + }); + } + return result; +} + +bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { + if (use_jinja) { + try { + common_chat_msg msg; + msg.role = "user"; + msg.content = "test"; + + auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl); + + common_chat_templates_inputs inputs; + inputs.messages = {msg}; + + common_chat_templates_apply(tmpls.get(), inputs); + return true; + } catch (const std::exception & e) { + LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what()); + return false; + } + } + llama_chat_message chat[] = {{"user", "test"}}; + const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0); + return res >= 0; +} + +std::string common_chat_format_single( + const struct common_chat_templates * tmpls, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass, + bool use_jinja) { + + common_chat_templates_inputs inputs; + inputs.use_jinja = use_jinja; + + std::string fmt_past_msg; + if (!past_msg.empty()) { + inputs.messages = past_msg; + inputs.add_generation_prompt = false; + fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt; + } + std::ostringstream ss; + // if the past_msg ends with a newline, we must preserve it in the formatted version + if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') { + ss << "\n"; + }; + // format chat with new_msg + inputs.messages.push_back(new_msg); + inputs.add_generation_prompt = add_ass; + auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt; + // get the diff part + ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size()); + return ss.str(); +} + +std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja) { + common_chat_templates_inputs inputs; + inputs.use_jinja = use_jinja; + auto add_simple_msg = [&](auto role, auto content) { + common_chat_msg msg; + msg.role = role; + msg.content = content; + inputs.messages.push_back(msg); + }; + add_simple_msg("system", "You are a helpful assistant"); + add_simple_msg("user", "Hello"); + add_simple_msg("assistant", "Hi there"); + add_simple_msg("user", "How are you?"); + return common_chat_templates_apply(tmpls, inputs).prompt; +} + +#define CHATML_TEMPLATE_SRC \ + "{%- for message in messages -%}\n" \ + " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \ + "{%- endfor -%}\n" \ + "{%- if add_generation_prompt -%}\n" \ + " {{- '<|im_start|>assistant\n' -}}\n" \ + "{%- endif -%}" + +void common_chat_templates_free(struct common_chat_templates * tmpls) { + delete tmpls; +} + +bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls) { + return tmpls->has_explicit_template; +} + +const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant) { + if (variant != nullptr) { + if (strcmp(variant, "tool_use") == 0) { + if (tmpls->template_tool_use) { + return tmpls->template_tool_use->source().c_str(); + } + return nullptr; + } else { + LOG_DBG("%s: unknown template variant: %s\n", __func__, variant); + } + } + return tmpls->template_default->source().c_str(); +} + +common_chat_templates_ptr common_chat_templates_init( + const struct llama_model * model, + const std::string & chat_template_override, + const std::string & bos_token_override, + const std::string & eos_token_override) +{ + std::string default_template_src; + std::string template_tool_use_src; + + bool has_explicit_template = !chat_template_override.empty(); + if (chat_template_override.empty()) { + GGML_ASSERT(model != nullptr); + const auto * str = llama_model_chat_template(model, /* name */ nullptr); + if (str) { + default_template_src = str; + has_explicit_template = true; + } + str = llama_model_chat_template(model, /* name */ "tool_use"); + if (str) { + template_tool_use_src = str; + has_explicit_template = true; + } + } else { + default_template_src = chat_template_override; + } + if (default_template_src.empty() || default_template_src == "chatml") { + if (!template_tool_use_src.empty()) { + default_template_src = template_tool_use_src; + } else { + default_template_src = CHATML_TEMPLATE_SRC; + } + } + std::string token_bos = bos_token_override; + std::string token_eos = eos_token_override; + if (model) { + const auto * vocab = llama_model_get_vocab(model); + const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) { + if (token == LLAMA_TOKEN_NULL) { + if (default_template_src.find(jinja_variable_name) != std::string::npos + || template_tool_use_src.find(jinja_variable_name) != std::string::npos) { + LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name); + } + return std::string(); + } + return common_token_to_piece(vocab, token, true); + }; + token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token"); + token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token"); + } + common_chat_templates_ptr tmpls(new common_chat_templates()); + tmpls->has_explicit_template = has_explicit_template; + try { + tmpls->template_default = std::make_unique(default_template_src, token_bos, token_eos); + } catch (const std::exception & e) { + LOG_ERR("%s: failed to parse chat template (defaulting to chatml): %s \n", __func__, e.what()); + tmpls->template_default = std::make_unique(CHATML_TEMPLATE_SRC, token_bos, token_eos); + } + if (!template_tool_use_src.empty()) { + try { + tmpls->template_tool_use = std::make_unique(template_tool_use_src, token_bos, token_eos); + } catch (const std::exception & e) { + LOG_ERR("%s: failed to parse tool use chat template (ignoring it): %s\n", __func__, e.what()); + } + } + return tmpls; +} + +std::string common_chat_format_name(common_chat_format format) { + switch (format) { + case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only"; + case COMMON_CHAT_FORMAT_GENERIC: return "Generic"; + case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo"; + case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x"; + case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools"; + case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1"; + case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING: return "DeepSeek R1 (extract reasoning)"; + case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2"; + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; + case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; + case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return "Hermes 2 Pro (extract reasoning)"; + case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; + case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)"; + default: + throw std::runtime_error("Unknown chat format"); + } +} + +static bool parse_json(std::string::const_iterator & it, const std::string::const_iterator & end, json & out) { + // // https://json.nlohmann.me/features/parsing/sax_interface/ + struct json_error_locator : public nlohmann::json_sax { + std::size_t position; + bool found_error; + + json_error_locator() : position(0), found_error(false) {} + + bool parse_error(std::size_t position, const std::string &, const json::exception &) override { // NOLINT + this->position = position - 1; + this->found_error = true; + return false; + } + bool null() override { return true; } // NOLINT + bool boolean(bool) override { return true; } // NOLINT + bool number_integer(number_integer_t) override { return true; } // NOLINT + bool number_unsigned(number_unsigned_t) override { return true; } // NOLINT + bool number_float(number_float_t, const string_t &) override { return true; } // NOLINT + bool string(string_t &) override { return true; } // NOLINT + bool binary(binary_t &) override { return true; } // NOLINT + bool start_object(std::size_t) override { return true; } // NOLINT + bool key(string_t &) override { return true; } // NOLINT + bool end_object() override { return true; } + bool start_array(std::size_t) override { return true; } // NOLINT + bool end_array() override { return true; } + }; + json_error_locator err_loc; + json::sax_parse(it, end, &err_loc); + + std::string::const_iterator temptative_end; + if (err_loc.found_error) { + temptative_end = it + err_loc.position; + } else { + temptative_end = end; + } + std::string json_sub {it, temptative_end}; + try { + out = json::parse(json_sub); + it = temptative_end; + return true; + } catch (const std::exception &) { + return false; + } +} + +static bool parse_literal(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) { + auto expected_it = expected.begin(); + auto tmp_it = it; + while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) { + ++tmp_it; + ++expected_it; + } + if (expected_it == expected.end()) { + it = tmp_it; + return true; + } + return false; +} + +static std::optional parse_pattern(std::string::const_iterator & it, const std::string::const_iterator & end, const std::regex & expected) { + std::smatch match; + if (std::regex_match(it, end, match, expected)) { + it = match.suffix().first; + return match; + } + return std::nullopt; +} + +static void consume_spaces(std::string::const_iterator & it, const std::string::const_iterator & end) { + while (it != end && std::isspace(*it)) { + ++it; + } +} + +/** + * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between. + * Aggregates the prefix, suffix and in-between text into the content. + */ +static common_chat_msg parse_json_tool_calls( + const std::string& input, + const std::optional & trigger_opt, + const std::regex & function_regex, + const std::regex & close_regex, + bool allow_raw_python = false) { + std::smatch match; + + common_chat_msg result; + result.role = "assistant"; + + + auto end = input.end(); + auto it = input.begin(); + + if (trigger_opt) { + if (!std::regex_search(it, end, match, *trigger_opt)) { + result.content = input; + return result; + } + result.content = match.prefix().str(); + it = match.suffix().first; + } + + while (it != end) { + std::sregex_iterator rend; + std::sregex_iterator rit(it, end, function_regex); + if (rit == rend) { + result.content += std::string(it, end); + break; + } + auto name = rit->str(1); + result.content += std::string(it, rit->prefix().second); + it = rit->suffix().first; + + json arguments; + if (parse_json(it, end, arguments)) { + if (!std::regex_search(it, end, match, close_regex)) { + throw std::runtime_error("Malformed input, missing closing pattern: " + input); + } + it = match.suffix().first; + result.tool_calls.push_back({name, arguments.is_string() ? arguments.get() : arguments.dump(), /* id= */ ""}); + } else { + if (allow_raw_python && name == "python") { + result.tool_calls.push_back({name, json({{"code", std::string(it, end)}}).dump(), /* id= */ ""}); + break; + } + throw std::runtime_error("Failed to parse json tool call arguments: " + input); + } + } + + if (!result.tool_calls.empty()) { + if (!string_strip(result.content).empty()) { + LOG_WRN("Content found with tool calls: %s\n", result.content.c_str()); + } + result.content = ""; + } + return result; +} + +static common_chat_tool_call process_tool_call(const json & tool_call) { + const auto & arguments = tool_call.at("arguments"); + return { + /* .name = */ tool_call.at("name"), + /* .arguments = */ arguments.is_string() ? arguments.get() : arguments.dump(), + /* .id = */ tool_call.contains("id") ? tool_call.at("id") : "", + }; +} +static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) { + auto content_end = input.find(prefix); + size_t tc_start = std::string::npos; + + common_chat_msg result; + result.role = "assistant"; + if (content_end == std::string::npos) { + result.content = input; + } else { + tc_start = content_end + prefix.size() - rstrip_prefix; + result.content = input.substr(0, content_end); + auto tool_calls = json::parse(input.substr(tc_start)); + for (const auto & tool_call : tool_calls) { + result.tool_calls.emplace_back(process_tool_call(tool_call)); + } + } + return result; +} + +static void foreach_function(const json & tools, const std::function & fn) { + for (const auto & tool : tools) { + if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) { + LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str()); + continue; + } + fn(tool); + } +} + +static std::string apply( + const common_chat_template & tmpl, + const nlohmann::ordered_json & messages, + const nlohmann::ordered_json & tools, + bool add_generation_prompt, + const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) +{ + minja::chat_template_inputs tmpl_inputs; + tmpl_inputs.messages = messages; + tmpl_inputs.tools = tools; + tmpl_inputs.add_generation_prompt = add_generation_prompt; + tmpl_inputs.extra_context = extra_context; + // TODO: add flag to control date/time, if only for testing purposes. + // tmpl_inputs.now = std::chrono::system_clock::now(); + + minja::chat_template_options tmpl_opts; + // To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens + // instead of using `chat_template_options.use_bos_token = false`, since these tokens + // may be needed inside the template / between messages too. + auto result = tmpl.apply(tmpl_inputs, tmpl_opts); + if (string_starts_with(result, tmpl.bos_token())) { + result = result.substr(tmpl.bos_token().size()); + } + if (string_ends_with(result, tmpl.eos_token())) { + result = result.substr(0, result.size() - tmpl.eos_token().size()); + } + return result; +} + +static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + + auto tool_call_schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + auto tool_schema = json { + {"type", "object"}, + {"properties", { + {"name", { + {"type", "string"}, + {"const", function.at("name")}, + }}, + {"arguments", function.at("parameters")}, + }}, + {"required", json::array({"name", "arguments"})}, + }; + if (function.contains("description")) { + tool_schema["description"] = function.at("description"); + } + if (inputs.parallel_tool_calls) { + tool_schema.at("properties")["id"] = { + {"type", "string"}, + {"minLength", 4}, + }; + tool_schema.at("required").push_back("id"); + } + tool_call_schemas.emplace_back(tool_schema); + }); + const auto tool_call = + inputs.parallel_tool_calls + ? json { + {"type", "object"}, + {"properties", { + {"tool_calls", { + {"type", "array"}, + {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { + {"anyOf", tool_call_schemas}, + }}, + {"minItems", 1}, + }}, + }}, + {"required", json::array({"tool_calls"})}, + } + : json { + {"type", "object"}, + {"properties", { + {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { + {"anyOf", tool_call_schemas}, + }}, + }}, + {"required", json::array({"tool_call"})}, + }; + const auto schema = + inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED + ? json { + {"anyOf", json::array({ + tool_call, + { + {"type", "object"}, + {"properties", { + {"response", inputs.json_schema.is_null() + ? json {{"type", "string"}} + : inputs.json_schema + }, + }}, + {"required", json::array({"response"})}, + }, + })} + } + : tool_call; + + data.grammar_lazy = false; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + builder.add_schema("root", schema); + }); + + auto tweaked_messages = common_chat_template::add_system( + inputs.messages, + "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request"); + + data.prompt = apply(tmpl, tweaked_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_GENERIC; + return data; +} +static common_chat_msg common_chat_parse_generic(const std::string & input) { + json data = json::parse(input); + common_chat_msg result; + result.role = "assistant"; + if (data.contains("tool_calls")) { + for (const auto & tool_call : data.at("tool_calls")) { + result.tool_calls.push_back({ + tool_call.at("name"), + tool_call.at("arguments").dump(), + tool_call.contains("id") ? tool_call.at("id") : "", + }); + } + } else if (data.contains("tool_call")) { + result.tool_calls.push_back({ + data.at("tool_call").at("name"), + data.at("tool_call").at("arguments").dump(), + /* id= */ "", + }); + } else if (data.contains("response")) { + const auto & response = data.at("response"); + result.content = response.is_string() ? response.get() : response.dump(2); + } + return result; +} + +static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + schemas.push_back({ + {"type", "object"}, + {"properties", { + // Important note: the model is probably trained to take a JSON stringified arguments value. + // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object. + {"name", { + {"type", "string"}, + {"const", function.at("name")}, + }}, + {"arguments", function.at("parameters")}, + {"id", { + {"type", "string"}, + // Nemo's template expects a 9-character alphanumeric ID. + {"pattern", "^[a-zA-Z0-9]{9}$"}, + }}, + }}, + {"required", json::array({"name", "arguments", "id"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema)); + }); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}); + data.preserved_tokens = { + "[TOOL_CALLS]", + }; + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO; + return data; +} +static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) { + return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]"); +} + +static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + schemas.push_back({ + {"type", "object"}, + {"properties", { + {"tool_call_id", { + {"type", "string"}, + // Command-R's template expects an integer string. + {"pattern", "^[0-9]{1,10}$"}, + }}, + {"tool_name", { + {"type", "string"}, + {"const", function.at("name")}, + }}, + {"parameters", function.at("parameters")}, + }}, + {"required", json::array({"tool_call_id", "tool_name", "parameters"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\""); + }); + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_WORD, + "<|START_ACTION|>", + }); + data.preserved_tokens = { + "<|START_ACTION|>", + "<|END_ACTION|>", + "<|START_RESPONSE|>", + "<|END_RESPONSE|>", + "<|START_THINKING|>", + "<|END_THINKING|>", + }; + auto adjusted_messages = json::array(); + for (const auto & msg : inputs.messages) { + auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string(); + auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array(); + if (has_reasoning_content && has_tool_calls) { + auto adjusted_message = msg; + adjusted_message["tool_plan"] = msg.at("reasoning_content"); + adjusted_message.erase("reasoning_content"); + adjusted_messages.push_back(adjusted_message); + } else { + adjusted_messages.push_back(msg); + } + } + data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {}); + data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING : COMMON_CHAT_FORMAT_COMMAND_R7B; + return data; +} +static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) { + static const std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)"); + static const std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>"); + static const std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>"); + + std::smatch match; + + common_chat_msg result; + result.role = "assistant"; + + std::string rest = input; + + if (std::regex_match(rest, match, thought_regex)) { + if (extract_reasoning) { + result.reasoning_content = match[2].str(); + } else if (!match[2].str().empty()) { + // Let the unparsed thinking tags through in content only if their insides aren't empty. + result.content = match[1].str(); + } + rest = match[3].str(); + } + if (std::regex_match(rest, match, action_regex)) { + auto actions_str = match[1].str(); + auto actions = json::parse(actions_str); + for (const auto & action : actions) { + result.tool_calls.push_back({ + /* .name = */ action.at("tool_name"), + /* .arguments = */ action.at("parameters").dump(), + /* .id = */ action.at("tool_call_id"), + }); + } + } else if (std::regex_match(rest, match, response_regex)) { + auto response = match[1].str(); + result.content += response; + } else { + result.content += rest; + } + return result; +} + +static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector & expected_properties) { + if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) { + throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties"); + } + const auto & parameters_properties = parameters.at("properties"); + const auto & parameters_required = parameters.at("required"); + for (const auto & prop : expected_properties) { + if (!parameters_properties.contains(prop)) { + throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT + } + if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) { + throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT + } + } + if (parameters_properties.size() != expected_properties.size()) { + throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", ")); + } +} + +static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) { + auto builtin_tools = json::array(); + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + + auto handle_builtin_tool = [&](const std::string & name, const json & parameters) { + if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") { + // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py + // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py + expect_tool_parameters(name, parameters, {"query"}); + } else if (name == "python" || name == "code_interpreter") { + // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py + expect_tool_parameters(name, parameters, {"code"}); + } else { + return false; + } + + std::vector kvs; + for (const auto & [key, value] : parameters.at("properties").items()) { + kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT + } + + tool_rules.push_back( + builder.add_rule( + name + "-call", + "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\"")); + builtin_tools.push_back(name); + + return true; + }; + + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + + // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime + if (allow_python_tag_builtin_tools) { + handle_builtin_tool(name, parameters); + } + tool_rules.push_back( + builder.add_rule( + name + "-call", + "\"{\" space " + "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? " + " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space " + " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " " + "\"}\" space")); + }); + // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name. + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START, + "\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"", // + name + "\"[\\s\\S]*", + }); + if (!builtin_tools.empty()) { + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"}); + data.preserved_tokens.push_back("<|python_tag|>"); + } + // Allow a few empty lines on top of the usual constrained json schema space rule. + builder.add_rule("root", string_join(tool_rules, " | ")); + }); + data.additional_stops.push_back("<|eom_id|>"); + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, { + {"tools_in_user_message", false}, + {"builtin_tools", builtin_tools.empty() ? json() : builtin_tools}, + }); + data.format = allow_python_tag_builtin_tools && !builtin_tools.empty() + ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS + : COMMON_CHAT_FORMAT_LLAMA_3_X; + return data; +} +static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) { + // TODO: tighten & simplify the parser, don't accept leading text context. + static const std::regex function_regex( + "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: "); + static const std::regex close_regex("\\}\\s*"); + static const std::regex builtin_call_regex("<\\|python_tag\\|>\\s*([^.(]+)\\s*\\.\\s*call\\s*\\(\\s*([\\w]+)\\s*=\\s*([\\s\\S]*?)\\)"); + + if (with_builtin_tools) { + std::smatch match; + if (std::regex_match(input, match, builtin_call_regex)) { + try { + auto name = match[1].str(); + auto arg_name = match[2].str(); + auto arg_value_str = match[3].str(); + auto arg_value = json::parse(arg_value_str); + + common_chat_msg msg; + msg.role = "assistant"; + msg.tool_calls.push_back({ + /* .name = */ name, + /* .arguments = */ (json { + {arg_name, arg_value}, + }).dump(), + /* .id = */ "", + }); + return msg; + } catch (const std::exception & e) { + LOG_WRN("Failed to parse builtin tool call arguments (%s): %s", e.what(), input.c_str()); + } + } + } + return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); +} + +static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + if (inputs.tools.is_array() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + tool_rules.push_back(builder.add_rule(name + "-call", + "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n" + "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " " + "\"```<|tool▁call▁end|>\"")); + }); + // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, + // so we accept common variants (then it's all constrained) + builder.add_rule("root", + "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" ) " + "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " + "\"<|tool▁calls▁end|>\"" + " space"); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool▁calls▁begin|>"}); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls_begin|>"}); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool calls begin|>"}); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool\\_calls\\_begin|>"}); + data.preserved_tokens = { + "", + "", + "<|tool▁calls▁begin|>", + "<|tool▁call▁begin|>", + "<|tool▁sep|>", + "<|tool▁call▁end|>", + "<|tool▁calls▁end|", + }; + }); + } + auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + + // Hacks to fix the official (broken) prompt. + // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead, + // until the official template is fixed. + if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) { + // Don't leave the chat dangling after tool results + if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) { + prompt += "<|end▁of▁sentence|>"; + if (inputs.add_generation_prompt) { + prompt += "<|Assistant|>"; + } + } + // Fix up tool call delta example added by Minja + prompt = std::regex_replace( + prompt, + std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"), + "$1<|tool▁calls▁end|><|end▁of▁sentence|>$2"); + } + data.prompt = prompt; + data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1; + return data; +} +static common_chat_msg handle_think_tag_prelude(const std::string & input, bool extract_reasoning, const std::function & rest_parser) { + std::smatch match; + static const std::regex reasoning_content_regex("((?:)?([\\s\\S\\r\\n]*?))?([\\s\\S\\r\\n]*)"); + if (std::regex_match(input, match, reasoning_content_regex)) { + auto rest = match[3].str(); + auto msg = rest_parser(rest); + auto reasoning_content = string_strip(match[2].str()); + if (extract_reasoning) { + msg.reasoning_content = reasoning_content; + } else if (!reasoning_content.empty()) { + std::ostringstream content; + content << "" << reasoning_content << "" << msg.content; + msg.content = content.str(); + } + return msg; + } + return rest_parser(input); +} +static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) { + return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) { + static const std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n"); + static const std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>"); + static const std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>"); + + common_chat_msg msg; + msg.role = "assistant"; + std::smatch match; + if (std::regex_search(input, match, tool_calls_regex)) { + auto tool_calls = match[1].str(); + auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex); + msg.tool_calls = std::move(msg2.tool_calls); + } else { + msg.content = input; + } + return msg; + }); +} + +static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { + LOG_DBG("%s\n", __func__); + common_chat_params data; + data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, { + {"datetime", "Jan 29 2025 13:00:00 GMT"}, + {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))}, + }); + if (inputs.tools.is_array() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + schemas.push_back({ + {"type", "object"}, + {"properties", { + {"name", { + {"type", "string"}, + {"const", function.at("name")}, + }}, + {"arguments", function.at("parameters")}, + }}, + {"required", json::array({"name", "arguments", "id"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema)); + }); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["}); + data.preserved_tokens = { + " functools[", + }; + data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2; + } else { + data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + } + return data; +} +static common_chat_msg common_chat_parse_firefunction_v2(const std::string & input) { + return parse_prefixed_json_tool_call_array(input, " functools[", /* rstrip_prefix= */ 1); +} + +static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) { + // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}... + // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar + common_chat_params data; + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2; + if (inputs.tools.is_array() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector first_tool_rules; + std::vector subsequent_tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + auto args_rule = builder.add_schema(name + "-args", parameters); + first_tool_rules.push_back(builder.add_rule(name + "-call", "( \"assistant<|end_header_id|>\\n\" )? \"" + name + "\\n\" " + args_rule)); + subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule)); + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START, + regex_escape(name + "\n"), + }); + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START, + regex_escape("assistant<|end_header_id|>\n" + name + "\n"), + }); + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_WORD, + regex_escape(">>>" + name + "\n"), + }); + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_WORD, + ">>>assistant<|end_header_id|>\n" + name, + }); + }); + data.preserved_tokens = { + "<|end_header_id|>", + }; + auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space"; + if (inputs.parallel_tool_calls) { + auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space"; + builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*"); + } else { + builder.add_rule("root", first_rule); + } + + }); + } + return data; +} + +static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) { + static const std::regex function_regex(R"((?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)"); + static const std::regex close_regex(R"($|(?=>>>))"); + + std::string content; + auto it = input.begin(); + const auto end = input.end(); + + if (parse_literal(it, end, "all\n")) { + std::smatch match; + if (std::regex_search(it, end, match, function_regex)) { + auto fun_it = match.prefix().second; + content = std::string(it, fun_it); + it = fun_it; + } else { + common_chat_msg res; + res.role = "assistant"; + res.content = std::string(it, end); + return res; + } + } + // TODO: tighten & simplify. + try { + auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex, /* allow_raw_python= */ true); + res.content = content + res.content; + return res; + } catch (const std::exception & e) { + LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what()); + common_chat_msg res; + res.role = "assistant"; + res.content = input; + return res; + } +} + +static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { + // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt + common_chat_params data; + json tools = inputs.tools.is_null() ? inputs.tools : json::array(); + std::string python_code_argument_name; + auto has_raw_python = false; + + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + const auto & parameters = function.at("parameters"); + std::string name = function.at("name"); + if (name == "python" || name == "ipython") { + if (!parameters.contains("type")) { + throw std::runtime_error("Missing type in python tool"); + } + has_raw_python = true; + const auto & type = parameters.at("type"); + if (type == "object") { + auto properties = parameters.at("properties"); + for (auto it = properties.begin(); it != properties.end(); ++it) { + if (it.value().at("type") == "string") { + if (!python_code_argument_name.empty()) { + throw std::runtime_error("Multiple string arguments found in python tool"); + } + python_code_argument_name = it.key(); + } + } + if (python_code_argument_name.empty()) { + throw std::runtime_error("No string argument found in python tool"); + } + } else if (type != "string") { + throw std::runtime_error("Invalid type in python tool: " + type.dump()); + } + } + tool_rules.push_back(builder.add_rule(name + "-call", "\"\" " + builder.add_schema(name + "-args", parameters) + " \"\" space")); + }); + if (has_raw_python) { + tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*")); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"}); + data.preserved_tokens.push_back("<|python_tag|>"); + } + auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space"; + builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "([\s\S\n]*)$)"); + std::smatch match; + if (std::regex_search(input, match, python_tag_regex)) { + auto code = match[1].str(); + common_chat_msg msg; + msg.role = "assistant"; + msg.content = match.prefix().str(); + msg.tool_calls.push_back({ + /* .name = */ "python", + /* .arguments = */ (json {{"code", code}}).dump(), + /* .id = */ "", + }); + return msg; + } + static const std::regex function_regex(R"()"); + static const std::regex close_regex(R"()"); + // TODO: tighten & simplify. + return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); +} + +static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + // (content)?({"name": "foo", "arguments": {"a": 1}})* + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + std::vector tool_call_alts; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + tool_rules.push_back(builder.add_schema(name + "-call", { + {"type", "object"}, + {"properties", json { + {"name", json {{"const", name}}}, + {"arguments", parameters}, + }}, + {"required", json::array({"name", "arguments"})}, + })); + tool_call_alts.push_back(builder.add_rule( + name + "-function-tag", + "\"\" space " + + builder.add_schema(name + "-args", parameters) + " " + "\"\" space")); + + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_WORD, + "", + }); + auto escaped_name = regex_escape(name); + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, + " alt_tags { + any_tool_call, + "\"\" space " + any_tool_call + " \"\"", + // The rest is just to accommodate common "good bad" outputs. + "\"\" space " + any_tool_call + " \"\"", + "\"\" space " + any_tool_call + " \"\"", + "\"\" space " + any_tool_call + " \"\"", + "\"\" space " + any_tool_call + " \"\"", + "\"\" space " + any_tool_call + " \"\"", + "\"\" space " + any_tool_call + " \"\"", + }; + auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space"); + tool_call_alts.push_back(wrappable_tool_call); + tool_call_alts.push_back( + "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space "); + auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); + builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, ""}); + data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "|||)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"", + }); + data.preserved_tokens = { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "```", + "```json", + "```xml", + }; + }); + + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING : COMMON_CHAT_FORMAT_HERMES_2_PRO; + return data; +} +static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input, bool extract_reasoning) { + return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) { + static const std::regex open_regex( + "(?:" + "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start) + "(" // match 2 (open_tag) + "|" + "|" + "|" + "|" + "|" + "|" + "|" + ")?" + "(\\s*\\{\\s*\"name\"\\s*:[\\s\\S]*)" // match 3 (named tool call + rest) + ")" + "|" + "(?:]+)>" // match 4 (function name) + "|)" // match 5 (function name again) + "([\\s\\S]*)" // match 6 (function arguments + rest)})" + ); + + try { + common_chat_msg msg; + msg.role = "assistant"; + + std::string::const_iterator it = input.begin(); + const std::string::const_iterator end = input.end(); + std::smatch match; + + while (it != end) { + if (std::regex_search(it, end, match, open_regex)) { + // Add content before the match + msg.content += std::string(it, match[0].first); + + auto block_start = match[1].str(); + std::string block_end = block_start.empty() ? "" : "```"; + + auto open_tag = match[2].str(); + std::string close_tag; + + if (match[3].matched) { + close_tag = open_tag.empty() ? "" : ""; + // Start parsing from after the opening tags + auto json_it = match[6].first; + json arguments; + if (parse_json(json_it, end, arguments)) { + msg.tool_calls.emplace_back(process_tool_call({ + {"name", function_name}, + {"arguments", arguments}, + })); + it = json_it; // Move iterator past parsed JSON + + // Handle close tags + consume_spaces(it, end); + if (!close_tag.empty() && !parse_literal(it, end, close_tag)) { + throw std::runtime_error("Failed to parse closing tag"); + } + consume_spaces(it, end); + if (!block_end.empty() && !parse_literal(it, end, block_end)) { + throw std::runtime_error("Failed to parse block end"); + } + consume_spaces(it, end); + } else { + // Not a valid tool call, treat as content + msg.content += std::string(match[0].first, match[0].second); + it = match[0].second; + } + } + } else { + // Add remaining content + msg.content += std::string(it, end); + break; + } + } + return msg; + } catch (const std::exception & e) { + LOG_ERR("Failed to parse hermes 2 pro input: %s\n", e.what()); + common_chat_msg msg; + msg.role = "assistant"; + msg.content = input; + return msg; + } + }); +} + +static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + data.grammar_lazy = false; + if (!inputs.json_schema.is_null()) { + if (!inputs.grammar.empty()) { + throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); + } + data.grammar = json_schema_to_grammar(inputs.json_schema); + } else { + data.grammar = inputs.grammar; + } + return data; +} + +static common_chat_params common_chat_templates_apply_jinja( + const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs) +{ + templates_params params; + params.tools = common_chat_tools_to_json_oaicompat(inputs.tools); + const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use + ? *tmpls->template_tool_use + : *tmpls->template_default; + const auto & src = tmpl.source(); + const auto & caps = tmpl.original_caps(); + params.messages = common_chat_msgs_to_json_oaicompat(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content); + params.add_generation_prompt = inputs.add_generation_prompt; + params.extract_reasoning = inputs.extract_reasoning; + params.tool_choice = inputs.tool_choice; + params.grammar = inputs.grammar; + if (!inputs.json_schema.empty()) { + params.json_schema = json::parse(inputs.json_schema); + } + + if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) { + LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n"); + params.parallel_tool_calls = false; + } else { + params.parallel_tool_calls = inputs.parallel_tool_calls; + } + + if (params.tools.is_array()) { + if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) { + throw std::runtime_error("Cannot specify grammar with tools"); + } + if (caps.supports_tool_calls && !caps.supports_tools) { + LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n"); + } + } + + // DeepSeek R1: use handler in all cases except json schema (thinking / tools). + if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) { + return common_chat_params_init_deepseek_r1(tmpl, params); + } + + // Command R7B: : use handler in all cases except json schema (thinking / tools). + if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) { + return common_chat_params_init_command_r7b(tmpl, params); + } + + // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools) + if (src.find("") != std::string::npos && params.json_schema.is_null() && params.tools.is_array() && params.json_schema.is_null()) { + return common_chat_params_init_hermes_2_pro(tmpl, params); + } + + // Use generic handler when mixing tools + JSON schema. + // TODO: support that mix in handlers below. + if ((params.tools.is_array() && params.json_schema.is_object())) { + return common_chat_params_init_generic(tmpl, params); + } + + // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases. + if (src.find(">>>all") != std::string::npos) { + return common_chat_params_init_functionary_v3_2(tmpl, params); + } + + // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases. + if (src.find(" functools[") != std::string::npos) { + return common_chat_params_init_firefunction_v2(tmpl, params); + } + + // Plain handler (no tools) + if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { + return common_chat_params_init_without_tools(tmpl, params); + } + + // Functionary v3.1 (w/ tools) + if (src.find("<|start_header_id|>") != std::string::npos + && src.find("ipython<|end_header_id|>") != std::string::npos) { + auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos; + return common_chat_params_init_llama_3_1_tool_calls(tmpl, params, allow_python_tag_builtin_tools); + } + + // Mistral Nemo (w/ tools) + if (src.find("[TOOL_CALLS]") != std::string::npos) { + return common_chat_params_init_mistral_nemo(tmpl, params); + } + + // Generic fallback + return common_chat_params_init_generic(tmpl, params); +} + +// Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template. +static common_chat_params common_chat_templates_apply_legacy( + const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs) +{ + int alloc_size = 0; + std::vector chat; + std::vector contents; + for (const auto & msg : inputs.messages) { + auto content = msg.content; + for (const auto & part : msg.content_parts) { + if (part.type != "text") { + LOG_WRN("Ignoring non-text content part: %s\n", part.type.c_str()); + continue; + } + if (!content.empty()) { + content += "\n";; + } + content += part.text; + } + contents.emplace_back(std::move(content)); + } + for (size_t i = 0; i < contents.size(); ++i) { + const auto & msg = inputs.messages[i]; + const auto & content = contents[i]; + chat.push_back({msg.role.c_str(), content.c_str()}); + alloc_size += (msg.role.size() + content.size()) * 1.25; + } + + std::vector buf(alloc_size); + + // run the first time to get the total output length + const auto & src = tmpls->template_default->source(); + int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size()); + + // error: chat template is not supported + if (res < 0) { + // if the custom "tmpl" is not supported, we throw an error + // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template() + throw std::runtime_error("this custom template is not supported"); + } + + // if it turns out that our buffer is too small, we resize it + if ((size_t) res > buf.size()) { + buf.resize(res); + res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size()); + } + + common_chat_params params; + params.prompt = std::string(buf.data(), res); + if (!inputs.json_schema.empty()) { + params.grammar = json_schema_to_grammar(json::parse(inputs.json_schema)); + } else { + params.grammar = inputs.grammar; + } + return params; +} + +common_chat_params common_chat_templates_apply( + const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs) +{ + GGML_ASSERT(tmpls != nullptr); + return inputs.use_jinja + ? common_chat_templates_apply_jinja(tmpls, inputs) + : common_chat_templates_apply_legacy(tmpls, inputs); +} + +static common_chat_msg common_chat_parse_content_only(const std::string & input) { + common_chat_msg msg; + msg.role = "assistant"; + msg.content = input; + return msg; +} + +common_chat_msg common_chat_parse(const std::string & input, common_chat_format format) { + switch (format) { + case COMMON_CHAT_FORMAT_CONTENT_ONLY: + return common_chat_parse_content_only(input); + case COMMON_CHAT_FORMAT_GENERIC: + return common_chat_parse_generic(input); + case COMMON_CHAT_FORMAT_MISTRAL_NEMO: + return common_chat_parse_mistral_nemo(input); + case COMMON_CHAT_FORMAT_LLAMA_3_X: + return common_chat_parse_llama_3_1(input); + case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: + return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true); + case COMMON_CHAT_FORMAT_DEEPSEEK_R1: + return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ false); + case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING: + return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ true); + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: + return common_chat_parse_functionary_v3_2(input); + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: + return common_chat_parse_functionary_v3_1_llama_3_1(input); + case COMMON_CHAT_FORMAT_HERMES_2_PRO: + return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ false); + case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: + return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ true); + case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: + return common_chat_parse_firefunction_v2(input); + case COMMON_CHAT_FORMAT_COMMAND_R7B: + return common_chat_parse_command_r7b(input, /* extract_reasoning= */ false); + case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: + return common_chat_parse_command_r7b(input, /* extract_reasoning= */ true); + default: + throw std::runtime_error("Unsupported format: " + common_chat_format_name(format)); + } +} diff --git a/common/chat.h b/common/chat.h new file mode 100644 index 0000000000..9aad84e880 --- /dev/null +++ b/common/chat.h @@ -0,0 +1,135 @@ +// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers. + +#pragma once + +#include "common.h" +#include +#include + +struct common_chat_templates; + +struct common_chat_tool_call { + std::string name; + std::string arguments; + std::string id; +}; + +struct common_chat_msg_content_part { + std::string type; + std::string text; +}; + +struct common_chat_msg { + std::string role; + std::string content; + std::vector content_parts = {}; + std::vector tool_calls = {}; + std::string reasoning_content; + std::string tool_name; + std::string tool_call_id; +}; + +struct common_chat_tool { + std::string name; + std::string description; + std::string parameters; +}; + +enum common_chat_tool_choice { + COMMON_CHAT_TOOL_CHOICE_AUTO, + COMMON_CHAT_TOOL_CHOICE_REQUIRED, + COMMON_CHAT_TOOL_CHOICE_NONE, +}; + +enum common_chat_format { + COMMON_CHAT_FORMAT_CONTENT_ONLY, + COMMON_CHAT_FORMAT_GENERIC, + COMMON_CHAT_FORMAT_MISTRAL_NEMO, + COMMON_CHAT_FORMAT_LLAMA_3_X, + COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, + COMMON_CHAT_FORMAT_DEEPSEEK_R1, + COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, + COMMON_CHAT_FORMAT_FIREFUNCTION_V2, + COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, + COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, + COMMON_CHAT_FORMAT_HERMES_2_PRO, + COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING, + COMMON_CHAT_FORMAT_COMMAND_R7B, + COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING, + + COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats +}; + +struct common_chat_templates_inputs { + std::vector messages; + std::string grammar; + std::string json_schema; + bool add_generation_prompt = true; + bool use_jinja = true; + // Parameters below only supported when use_jinja is true + std::vector tools; + common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + bool parallel_tool_calls = false; + bool extract_reasoning = true; +}; + +struct common_chat_params { + common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + std::string prompt; + std::string grammar; + bool grammar_lazy = false; + std::vector grammar_triggers; + std::vector preserved_tokens; + std::vector additional_stops; +}; + +// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid +bool common_chat_verify_template(const std::string & tmpl, bool use_jinja); + +void common_chat_templates_free(struct common_chat_templates * tmpls); + +struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } }; + +typedef std::unique_ptr common_chat_templates_ptr; + +common_chat_templates_ptr common_chat_templates_init( + const struct llama_model * model, + const std::string & chat_template_override, + const std::string & bos_token_override = "", + const std::string & eos_token_override = ""); + +bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls); +const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant = nullptr); + + +struct common_chat_params common_chat_templates_apply( + const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs); + +// Format single message, while taking into account the position of that message in chat history +std::string common_chat_format_single( + const struct common_chat_templates * tmpls, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass, + bool use_jinja); + +// Returns an example of formatted chat +std::string common_chat_format_example( + const struct common_chat_templates * tmpls, + bool use_jinja); + +std::string common_chat_format_name(common_chat_format format); +common_chat_msg common_chat_parse( const std::string & input, common_chat_format format); + +common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice); + +// Parses a JSON array of messages in OpenAI's chat completion API format. +// T can be std::string containing JSON or nlohmann::ordered_json +template std::vector common_chat_msgs_parse_oaicompat(const T & messages); +template T common_chat_msgs_to_json_oaicompat(const std::vector & msgs, bool concat_typed_text = false); + +// Parses a JSON array of tools in OpenAI's chat completion tool call API format. +// T can be std::string containing JSON or nlohmann::ordered_json +template std::vector common_chat_tools_parse_oaicompat(const T & tools); +template T common_chat_tools_to_json_oaicompat(const std::vector & tools); diff --git a/common/common.cpp b/common/common.cpp index 6dea8e3d25..94f545f815 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -7,12 +7,7 @@ #include "common.h" #include "log.h" -// Change JSON_ASSERT from assert() to GGML_ASSERT: -#define JSON_ASSERT GGML_ASSERT -#include "json.hpp" -#include "json-schema-to-grammar.h" #include "llama.h" -#include "chat-template.hpp" #include #include @@ -53,47 +48,11 @@ #include #include #endif -#if defined(LLAMA_USE_CURL) -#include -#include -#include -#endif #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif -#if defined(LLAMA_USE_CURL) -#ifdef __linux__ -#include -#elif defined(_WIN32) -# if !defined(PATH_MAX) -# define PATH_MAX MAX_PATH -# endif -#else -#include -#endif -#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 - -// -// CURL utils -// - -using curl_ptr = std::unique_ptr; - -// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one -struct curl_slist_ptr { - struct curl_slist * ptr = nullptr; - ~curl_slist_ptr() { - if (ptr) { - curl_slist_free_all(ptr); - } - } -}; -#endif // LLAMA_USE_CURL - -using json = nlohmann::ordered_json; - // // CPU utils // @@ -484,6 +443,11 @@ void string_replace_all(std::string & s, const std::string & search, const std:: s = std::move(builder); } +std::string regex_escape(const std::string & s) { + static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]"); + return std::regex_replace(s, special_chars, "\\$0"); +} + std::string string_join(const std::vector & values, const std::string & separator) { std::ostringstream result; for (size_t i = 0; i < values.size(); ++i) { @@ -866,7 +830,7 @@ std::string fs_get_cache_directory() { if (getenv("LLAMA_CACHE")) { cache_directory = std::getenv("LLAMA_CACHE"); } else { -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) if (std::getenv("XDG_CACHE_HOME")) { cache_directory = std::getenv("XDG_CACHE_HOME"); } else { @@ -876,7 +840,9 @@ std::string fs_get_cache_directory() { cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); #elif defined(_WIN32) cache_directory = std::getenv("LOCALAPPDATA"); -#endif // __linux__ +#else +# error Unknown architecture +#endif cache_directory = ensure_trailing_slash(cache_directory); cache_directory += "llama.cpp"; } @@ -897,22 +863,14 @@ std::string fs_get_cache_file(const std::string & filename) { // // Model utils // + struct common_init_result common_init_from_params(common_params & params) { common_init_result iparams; auto mparams = common_model_params_to_llama(params); - llama_model * model = nullptr; - - if (!params.hf_repo.empty() && !params.hf_file.empty()) { - model = common_load_model_from_hf(params.hf_repo, params.hf_file, params.model, params.hf_token, mparams); - } else if (!params.model_url.empty()) { - model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams); - } else { - model = llama_model_load_from_file(params.model.c_str(), mparams); - } - + llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams); if (model == NULL) { - LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str()); + LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str()); return iparams; } @@ -947,13 +905,13 @@ struct common_init_result common_init_from_params(common_params & params) { llama_context * lctx = llama_init_from_model(model, cparams); if (lctx == NULL) { - LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str()); + LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str()); llama_model_free(model); return iparams; } - if (params.ctx_shift && !llama_kv_cache_can_shift(lctx)) { - LOG_WRN("%s: KV cache shifting is not supported for this model, disabling KV cache shifting\n", __func__); + if (params.ctx_shift && !llama_kv_self_can_shift(lctx)) { + LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__); params.ctx_shift = false; } @@ -1030,6 +988,8 @@ struct common_init_result common_init_from_params(common_params & params) { if (params.warmup) { LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__); + llama_set_warmup(lctx, true); + std::vector tmp; llama_token bos = llama_vocab_bos(vocab); llama_token eos = llama_vocab_eos(vocab); @@ -1057,9 +1017,10 @@ struct common_init_result common_init_from_params(common_params & params) { if (llama_model_has_decoder(model)) { llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch))); } - llama_kv_cache_clear(lctx); + llama_kv_self_clear(lctx); llama_synchronize(lctx); llama_perf_context_reset(lctx); + llama_set_warmup(lctx, false); } iparams.model.reset(model); @@ -1068,6 +1029,19 @@ struct common_init_result common_init_from_params(common_params & params) { return iparams; } +std::string get_model_endpoint() { + const char * model_endpoint_env = getenv("MODEL_ENDPOINT"); + // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility. + const char * hf_endpoint_env = getenv("HF_ENDPOINT"); + const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env; + std::string model_endpoint = "https://huggingface.co/"; + if (endpoint_env) { + model_endpoint = endpoint_env; + if (model_endpoint.back() != '/') model_endpoint += '/'; + } + return model_endpoint; +} + void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora) { llama_clear_adapter_lora(ctx); for (auto & la : lora) { @@ -1083,15 +1057,18 @@ struct llama_model_params common_model_params_to_llama(common_params & params) { if (!params.devices.empty()) { mparams.devices = params.devices.data(); } + if (params.n_gpu_layers != -1) { mparams.n_gpu_layers = params.n_gpu_layers; } + mparams.main_gpu = params.main_gpu; mparams.split_mode = params.split_mode; mparams.tensor_split = params.tensor_split; mparams.use_mmap = params.use_mmap; mparams.use_mlock = params.use_mlock; mparams.check_tensors = params.check_tensors; + if (params.kv_overrides.empty()) { mparams.kv_overrides = NULL; } else { @@ -1099,6 +1076,13 @@ struct llama_model_params common_model_params_to_llama(common_params & params) { mparams.kv_overrides = params.kv_overrides.data(); } + if (params.tensor_buft_overrides.empty()) { + mparams.tensor_buft_overrides = NULL; + } else { + GGML_ASSERT(params.tensor_buft_overrides.back().pattern == nullptr && "Tensor buffer overrides not terminated with empty pattern"); + mparams.tensor_buft_overrides = params.tensor_buft_overrides.data(); + } + return mparams; } @@ -1158,451 +1142,6 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p return tpp; } -#ifdef LLAMA_USE_CURL - -#define CURL_MAX_RETRY 3 -#define CURL_RETRY_DELAY_SECONDS 2 - -static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) { - int remaining_attempts = max_attempts; - - while (remaining_attempts > 0) { - LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts); - - CURLcode res = curl_easy_perform(curl); - if (res == CURLE_OK) { - return true; - } - - int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000; - LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay); - - remaining_attempts--; - std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); - } - - LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts); - - return false; -} - -static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) { - // Initialize libcurl - curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); - curl_slist_ptr http_headers; - if (!curl) { - LOG_ERR("%s: error initializing libcurl\n", __func__); - return false; - } - - bool force_download = false; - - // Set the URL, allow to follow http redirection - curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); - - // Check if hf-token or bearer-token was specified - if (!hf_token.empty()) { - std::string auth_header = "Authorization: Bearer " + hf_token; - http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); - } - -#if defined(_WIN32) - // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of - // operating system. Currently implemented under MS-Windows. - curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); -#endif - - // Check if the file already exists locally - auto file_exists = std::filesystem::exists(path); - - // If the file exists, check its JSON metadata companion file. - std::string metadata_path = path + ".json"; - nlohmann::json metadata; - std::string etag; - std::string last_modified; - - if (file_exists) { - // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block). - std::ifstream metadata_in(metadata_path); - if (metadata_in.good()) { - try { - metadata_in >> metadata; - LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str()); - if (metadata.contains("url") && metadata.at("url").is_string()) { - auto previous_url = metadata.at("url").get(); - if (previous_url != url) { - LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str()); - return false; - } - } - if (metadata.contains("etag") && metadata.at("etag").is_string()) { - etag = metadata.at("etag"); - } - if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) { - last_modified = metadata.at("lastModified"); - } - } catch (const nlohmann::json::exception & e) { - LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); - return false; - } - } - } else { - LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); - } - - // Send a HEAD request to retrieve the etag and last-modified headers - struct common_load_model_from_url_headers { - std::string etag; - std::string last_modified; - }; - - common_load_model_from_url_headers headers; - - { - typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); - auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { - common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; - - static std::regex header_regex("([^:]+): (.*)\r\n"); - static std::regex etag_regex("ETag", std::regex_constants::icase); - static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); - - std::string header(buffer, n_items); - std::smatch match; - if (std::regex_match(header, match, header_regex)) { - const std::string & key = match[1]; - const std::string & value = match[2]; - if (std::regex_match(key, match, etag_regex)) { - headers->etag = value; - } else if (std::regex_match(key, match, last_modified_regex)) { - headers->last_modified = value; - } - } - return n_items; - }; - - curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress - curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); - curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); - - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); - if (!was_perform_successful) { - return false; - } - - long http_code = 0; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code != 200) { - // HEAD not supported, we don't know if the file has changed - // force trigger downloading - force_download = true; - LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); - } - } - - bool should_download = !file_exists || force_download; - if (!should_download) { - if (!etag.empty() && etag != headers.etag) { - LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); - should_download = true; - } else if (!last_modified.empty() && last_modified != headers.last_modified) { - LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); - should_download = true; - } - } - if (should_download) { - std::string path_temporary = path + ".downloadInProgress"; - if (file_exists) { - LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); - if (remove(path.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; - } - } - - // Set the output file - - struct FILE_deleter { - void operator()(FILE * f) const { - fclose(f); - } - }; - - std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); - if (!outfile) { - LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); - return false; - } - - typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); - auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { - return fwrite(data, size, nmemb, (FILE *)fd); - }; - curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); - curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); - curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); - - // display download progress - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); - - // helper function to hide password in URL - auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { - std::size_t protocol_pos = url.find("://"); - if (protocol_pos == std::string::npos) { - return url; // Malformed URL - } - - std::size_t at_pos = url.find('@', protocol_pos + 3); - if (at_pos == std::string::npos) { - return url; // No password in URL - } - - return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); - }; - - // start the download - LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, - llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); - if (!was_perform_successful) { - return false; - } - - long http_code = 0; - curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code < 200 || http_code >= 400) { - LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); - return false; - } - - // Causes file to be closed explicitly here before we rename it. - outfile.reset(); - - // Write the updated JSON metadata file. - metadata.update({ - {"url", url}, - {"etag", headers.etag}, - {"lastModified", headers.last_modified} - }); - std::ofstream(metadata_path) << metadata.dump(4); - LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); - - if (rename(path_temporary.c_str(), path.c_str()) != 0) { - LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; - } - } - - return true; -} - -struct llama_model * common_load_model_from_url( - const std::string & model_url, - const std::string & local_path, - const std::string & hf_token, - const struct llama_model_params & params) { - // Basic validation of the model_url - if (model_url.empty()) { - LOG_ERR("%s: invalid model_url\n", __func__); - return NULL; - } - - if (!common_download_file(model_url, local_path, hf_token)) { - return NULL; - } - - // check for additional GGUFs split to download - int n_split = 0; - { - struct gguf_init_params gguf_params = { - /*.no_alloc = */ true, - /*.ctx = */ NULL, - }; - auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params); - if (!ctx_gguf) { - LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, local_path.c_str()); - return NULL; - } - - auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT); - if (key_n_split >= 0) { - n_split = gguf_get_val_u16(ctx_gguf, key_n_split); - } - - gguf_free(ctx_gguf); - } - - if (n_split > 1) { - char split_prefix[PATH_MAX] = {0}; - char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0}; - - // Verify the first split file format - // and extract split URL and PATH prefixes - { - if (!llama_split_prefix(split_prefix, sizeof(split_prefix), local_path.c_str(), 0, n_split)) { - LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, local_path.c_str(), n_split); - return NULL; - } - - if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url.c_str(), 0, n_split)) { - LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url.c_str(), n_split); - return NULL; - } - } - - // Prepare download in parallel - std::vector> futures_download; - for (int idx = 1; idx < n_split; idx++) { - futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split, hf_token](int download_idx) -> bool { - char split_path[PATH_MAX] = {0}; - llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split); - - char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0}; - llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split); - - return common_download_file(split_url, split_path, hf_token); - }, idx)); - } - - // Wait for all downloads to complete - for (auto & f : futures_download) { - if (!f.get()) { - return NULL; - } - } - } - - return llama_model_load_from_file(local_path.c_str(), params); -} - -struct llama_model * common_load_model_from_hf( - const std::string & repo, - const std::string & remote_path, - const std::string & local_path, - const std::string & hf_token, - const struct llama_model_params & params) { - // construct hugging face model url: - // - // --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf - // https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf - // - // --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf - // https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf - // - - std::string model_url = "https://huggingface.co/"; - model_url += repo; - model_url += "/resolve/main/"; - model_url += remote_path; - - return common_load_model_from_url(model_url, local_path, hf_token, params); -} - -/** - * Allow getting the HF file from the HF repo with tag (like ollama), for example: - * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 - * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M - * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s - * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo) - * - * Return pair of (with "repo" already having tag removed) - * - * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. - */ -std::pair common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) { - auto parts = string_split(hf_repo_with_tag, ':'); - std::string tag = parts.size() > 1 ? parts.back() : "latest"; - std::string hf_repo = parts[0]; - if (string_split(hf_repo, '/').size() != 2) { - throw std::invalid_argument("error: invalid HF repo format, expected /[:quant]\n"); - } - - // fetch model info from Hugging Face Hub API - json model_info; - curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); - curl_slist_ptr http_headers; - std::string res_str; - std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag; - curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); - typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); - auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t { - static_cast(data)->append((char * ) ptr, size * nmemb); - return size * nmemb; - }; - curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); - curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str); -#if defined(_WIN32) - curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); -#endif - if (!hf_token.empty()) { - std::string auth_header = "Authorization: Bearer " + hf_token; - http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); - } - // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response - http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); - http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json"); - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); - - CURLcode res = curl_easy_perform(curl.get()); - - if (res != CURLE_OK) { - throw std::runtime_error("error: cannot make GET request to HF API"); - } - - long res_code; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code); - if (res_code == 200) { - model_info = json::parse(res_str); - } else if (res_code == 401) { - throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); - } else { - throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str())); - } - - // check response - if (!model_info.contains("ggufFile")) { - throw std::runtime_error("error: model does not have ggufFile"); - } - json & gguf_file = model_info.at("ggufFile"); - if (!gguf_file.contains("rfilename")) { - throw std::runtime_error("error: ggufFile does not have rfilename"); - } - - return std::make_pair(hf_repo, gguf_file.at("rfilename")); -} - -#else - -struct llama_model * common_load_model_from_url( - const std::string & /*model_url*/, - const std::string & /*local_path*/, - const std::string & /*hf_token*/, - const struct llama_model_params & /*params*/) { - LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__); - return nullptr; -} - -struct llama_model * common_load_model_from_hf( - const std::string & /*repo*/, - const std::string & /*remote_path*/, - const std::string & /*local_path*/, - const std::string & /*hf_token*/, - const struct llama_model_params & /*params*/) { - LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); - return nullptr; -} - -std::pair common_get_hf_file(const std::string &, const std::string &) { - LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); - return std::make_pair("", ""); -} - -#endif // LLAMA_USE_CURL - // // Batch utils // @@ -1767,156 +1306,6 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto return text; } -// -// Chat template utils -// - -bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { - if (use_jinja) { - try { - auto chat_template = minja::chat_template(tmpl, "", ""); - chat_template.apply({{ - {"role", "user"}, - {"content", "test"}, - }}, json(), true); - return true; - } catch (const std::exception & e) { - LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what()); - return false; - } - } - llama_chat_message chat[] = {{"user", "test"}}; - const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0); - return res >= 0; -} - -std::string common_chat_apply_template( - const common_chat_template & tmpl, - const std::vector & msgs, - bool add_ass, - bool use_jinja) { - if (use_jinja) { - auto messages = json::array(); - for (const auto & msg : msgs) { - messages.push_back({{"role", msg.role}, {"content", msg.content}}); - } - return tmpl.apply(messages, /* tools= */ json(), add_ass); - } - - int alloc_size = 0; - std::vector chat; - for (const auto & msg : msgs) { - chat.push_back({msg.role.c_str(), msg.content.c_str()}); - alloc_size += (msg.role.size() + msg.content.size()) * 1.25; - } - - std::vector buf(alloc_size); - - // run the first time to get the total output length - int32_t res = llama_chat_apply_template(tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size()); - - // error: chat template is not supported - if (res < 0) { - // if the custom "tmpl" is not supported, we throw an error - // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template() - throw std::runtime_error("this custom template is not supported"); - } - - // if it turns out that our buffer is too small, we resize it - if ((size_t) res > buf.size()) { - buf.resize(res); - res = llama_chat_apply_template(tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size()); - } - - std::string formatted_chat(buf.data(), res); - return formatted_chat; -} - -std::string common_chat_format_single( - const common_chat_template & tmpl, - const std::vector & past_msg, - const common_chat_msg & new_msg, - bool add_ass, - bool use_jinja) { - std::ostringstream ss; - auto fmt_past_msg = past_msg.empty() ? "" : common_chat_apply_template(tmpl, past_msg, false, use_jinja); - std::vector chat_new(past_msg); - // if the past_msg ends with a newline, we must preserve it in the formatted version - if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') { - ss << "\n"; - }; - // format chat with new_msg - chat_new.push_back(new_msg); - auto fmt_new_msg = common_chat_apply_template(tmpl, chat_new, add_ass, use_jinja); - // get the diff part - ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size()); - return ss.str(); -} - -std::string common_chat_format_example(const common_chat_template & tmpl, bool use_jinja) { - std::vector msgs = { - {"system", "You are a helpful assistant"}, - {"user", "Hello"}, - {"assistant", "Hi there"}, - {"user", "How are you?"}, - }; - return common_chat_apply_template(tmpl, msgs, true, use_jinja); -} - -common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override) -{ - auto vocab = llama_model_get_vocab(model); - std::string default_template_src = chat_template_override; - std::string template_tool_use_src = chat_template_override; - bool has_explicit_template = !chat_template_override.empty(); - if (chat_template_override.empty()) { - auto str = llama_model_chat_template(model, /* name */ nullptr); - if (str) { - default_template_src = str; - has_explicit_template = true; - } - str = llama_model_chat_template(model, /* name */ "tool_use"); - if (str) { - template_tool_use_src = str; - has_explicit_template = true; - } - } - if (default_template_src.empty() || default_template_src == "chatml") { - if (!template_tool_use_src.empty()) { - default_template_src = template_tool_use_src; - } else { - default_template_src = R"( - {%- for message in messages -%} - {{- "<|im_start|>" + message.role + "\n" + message.content + "<|im_end|>\n" -}} - {%- endfor -%} - {%- if add_generation_prompt -%} - {{- "<|im_start|>assistant\n" -}} - {%- endif -%} - )"; - } - } - const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) { - if (token == LLAMA_TOKEN_NULL) { - if (default_template_src.find(jinja_variable_name) != std::string::npos - || template_tool_use_src.find(jinja_variable_name) != std::string::npos) { - LOG_WRN("%s: warning: vocab does not have a %s token, jinja template won't work as intended.\n", __func__, name); - } - return std::string(); - } else { - return common_token_to_piece(vocab, token, true); - } - }; - auto token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token"); - auto token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token"); - return { - has_explicit_template, - std::make_unique(default_template_src, token_bos, token_eos), - template_tool_use_src.empty() - ? nullptr - : std::make_unique(template_tool_use_src, token_bos, token_eos) - }; -} - // // KV cache utils // @@ -2176,4 +1565,3 @@ common_control_vector_data common_control_vector_load(const std::vector #include #include #include @@ -109,6 +110,19 @@ enum common_conversation_mode { COMMON_CONVERSATION_MODE_AUTO = 2, }; +enum common_grammar_trigger_type { + COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN, + COMMON_GRAMMAR_TRIGGER_TYPE_WORD, + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START, +}; + +struct common_grammar_trigger { + common_grammar_trigger_type type; + std::string value; + llama_token token = LLAMA_TOKEN_NULL; +}; + // sampling parameters struct common_params_sampling { uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler @@ -134,6 +148,7 @@ struct common_params_sampling { int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size) int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 + float top_n_sigma = -1.00f;// -1.0 = disabled float mirostat_tau = 5.00f; // target entropy float mirostat_eta = 0.10f; // learning rate bool ignore_eos = false; @@ -154,7 +169,10 @@ struct common_params_sampling { COMMON_SAMPLER_TYPE_TEMPERATURE, }; - std::string grammar; // optional BNF-like grammar to constrain sampling + std::string grammar; // optional BNF-like grammar to constrain sampling + bool grammar_lazy = false; + std::vector grammar_triggers; // optional triggers (for lazy grammars) + std::set preserved_tokens; std::vector logit_bias; // logit biases to apply @@ -162,36 +180,42 @@ struct common_params_sampling { std::string print() const; }; +struct common_params_model { + std::string path = ""; // model local path // NOLINT + std::string url = ""; // model url to download // NOLINT + std::string hf_repo = ""; // HF repo // NOLINT + std::string hf_file = ""; // HF file // NOLINT +}; + struct common_params_speculative { std::vector devices; // devices to use for offloading int32_t n_ctx = 0; // draft context size int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding - int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding + int32_t n_min = 0; // minimum number of draft tokens to use for speculative decoding int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default) float p_split = 0.1f; // speculative decoding split probability - float p_min = 0.9f; // minimum speculative decoding probability (greedy) + float p_min = 0.75f; // minimum speculative decoding probability (greedy) struct cpu_params cpuparams; struct cpu_params cpuparams_batch; - std::string hf_repo = ""; // HF repo // NOLINT - std::string hf_file = ""; // HF file // NOLINT - - std::string model = ""; // draft model for speculative decoding // NOLINT - std::string model_url = ""; // model url to download // NOLINT + struct common_params_model model; }; struct common_params_vocoder { - std::string hf_repo = ""; // HF repo // NOLINT - std::string hf_file = ""; // HF file // NOLINT + struct common_params_model model; - std::string model = ""; // model path // NOLINT - std::string model_url = ""; // model url to download // NOLINT + std::string speaker_file = ""; // speaker file path // NOLINT bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT }; +enum common_reasoning_format { + COMMON_REASONING_FORMAT_NONE, + COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content` +}; + struct common_params { int32_t n_predict = -1; // new tokens to predict int32_t n_ctx = 4096; // context size @@ -238,13 +262,12 @@ struct common_params { struct common_params_speculative speculative; struct common_params_vocoder vocoder; - std::string model = ""; // model path // NOLINT + struct common_params_model model; + std::string model_alias = ""; // model alias // NOLINT - std::string model_url = ""; // model url to download // NOLINT std::string hf_token = ""; // HF token // NOLINT - std::string hf_repo = ""; // HF repo // NOLINT - std::string hf_file = ""; // HF file // NOLINT std::string prompt = ""; // NOLINT + std::string system_prompt = ""; // NOLINT std::string prompt_file = ""; // store the external prompt file name // NOLINT std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT std::string input_prefix = ""; // string to prefix user inputs with // NOLINT @@ -256,6 +279,7 @@ struct common_params { std::vector in_files; // all input files std::vector antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts) std::vector kv_overrides; + std::vector tensor_buft_overrides; bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply) std::vector lora_adapters; // lora adapter path with user defined scale @@ -282,6 +306,7 @@ struct common_params { bool kl_divergence = false; // compute KL divergence bool usage = false; // print usage + bool completion = false; // print source-able completion script bool use_color = false; // use color to distinguish generations and inputs bool special = false; // enable special token output bool interactive = false; // interactive mode @@ -308,13 +333,15 @@ struct common_params { bool warmup = true; // warmup run bool check_tensors = false; // validate tensor data + bool single_turn = false; // single turn chat conversation + ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO; // multimodal models (see examples/llava) - std::string mmproj = ""; // path to multimodal projector // NOLINT + struct common_params_model mmproj; std::vector image; // path to image file(s) // embedding @@ -336,6 +363,7 @@ struct common_params { std::string chat_template = ""; // NOLINT bool use_jinja = false; // NOLINT bool enable_chat_template = true; + common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; std::vector api_keys; @@ -373,8 +401,6 @@ struct common_params { int32_t i_pos = -1; // position of the passkey in the junk text // imatrix params - std::string out_file = "imatrix.dat"; // save the resulting imatrix to this file - int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations int32_t i_chunk = 0; // start processing from this chunk @@ -386,16 +412,16 @@ struct common_params { int n_pca_batch = 100; int n_pca_iterations = 1000; dimre_method cvector_dimre_method = DIMRE_METHOD_PCA; - std::string cvector_outfile = "control_vector.gguf"; std::string cvector_positive_file = "examples/cvector-generator/positive.txt"; std::string cvector_negative_file = "examples/cvector-generator/negative.txt"; bool spm_infill = false; // suffix/prefix/middle pattern for infill - std::string lora_outfile = "ggml-lora-merged-f16.gguf"; - // batched-bench params bool batched_bench_output_jsonl = false; + + // common params + std::string out_file; // output filename for all example programs }; // call once at the start of a program if it uses libcommon @@ -414,13 +440,13 @@ bool set_process_priority(enum ggml_sched_priority prio); // #ifdef __GNUC__ -#ifdef __MINGW32__ -#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) +# if defined(__MINGW32__) && !defined(__clang__) +# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) +# else +# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) +# endif #else -#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) -#endif -#else -#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) +# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) #endif LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2) @@ -435,6 +461,8 @@ std::string string_repeat(const std::string & str, size_t n); void string_replace_all(std::string & s, const std::string & search, const std::string & replace); +std::string regex_escape(const std::string & s); + template static std::vector string_split(const std::string & str, char delim) { static_assert(!std::is_same::value, "Please use the specialized version for std::string"); @@ -512,26 +540,11 @@ struct llama_model_params common_model_params_to_llama ( common_params struct llama_context_params common_context_params_to_llama(const common_params & params); struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params); -struct llama_model * common_load_model_from_url( - const std::string & model_url, - const std::string & local_path, - const std::string & hf_token, - const struct llama_model_params & params); - -struct llama_model * common_load_model_from_hf( - const std::string & repo, - const std::string & remote_path, - const std::string & local_path, - const std::string & hf_token, - const struct llama_model_params & params); - -std::pair common_get_hf_file( - const std::string & hf_repo_with_tag, - const std::string & hf_token); - // clear LoRA adapters from context, then apply new list of adapters void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora); +std::string get_model_endpoint(); + // // Batch utils // @@ -598,54 +611,6 @@ std::string common_detokenize( const std::vector & tokens, bool special = true); -// -// Chat template utils -// - -// same with llama_chat_message, but uses std::string -struct common_chat_msg { - std::string role; - std::string content; -}; - -// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid -bool common_chat_verify_template(const std::string & tmpl, bool use_jinja); - -namespace minja { - class chat_template; -} - -typedef minja::chat_template common_chat_template; - -struct common_chat_templates { - bool has_explicit_template; // Model had builtin template or template overridde was specified. - std::unique_ptr template_default; // always set (defaults to chatml) - std::unique_ptr template_tool_use; -}; - -// CPP wrapper for llama_chat_apply_template -// If the built-in template is not supported, we default to chatml -// If the custom "tmpl" is not supported, we throw an error -std::string common_chat_apply_template( - const common_chat_template & tmpl, - const std::vector & chat, - bool add_ass, - bool use_jinja); - -// Format single message, while taking into account the position of that message in chat history -std::string common_chat_format_single( - const common_chat_template & tmpl, - const std::vector & past_msg, - const common_chat_msg & new_msg, - bool add_ass, - bool use_jinja); - -// Returns an example of formatted chat -std::string common_chat_format_example( - const common_chat_template & tmpl, bool use_jinja); - -common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override); - // // KV cache utils // diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 4d426b6bd1..9067982257 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -264,7 +264,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream & throw std::runtime_error("At least one of min_value or max_value must be set"); } -const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}"; +const std::string SPACE_RULE = "| \" \" | \"\\n\"{1,2} [ \\t]{0,20}"; struct BuiltinRule { std::string content; @@ -343,7 +343,7 @@ static std::string format_literal(const std::string & literal) { class SchemaConverter { private: - friend std::string build_grammar(const std::function & cb); + friend std::string build_grammar(const std::function & cb, const common_grammar_options & options); std::function _fetch_json; bool _dotall; std::map _rules; @@ -990,17 +990,24 @@ public: } }; -std::string json_schema_to_grammar(const json & schema) { - return build_grammar([&](const llama_grammar_builder & callbacks) { +std::string json_schema_to_grammar(const json & schema, bool force_gbnf) { +#ifdef LLAMA_USE_LLGUIDANCE + if (!force_gbnf) { + return "%llguidance {}\nstart: %json " + schema.dump(); + } +#else + (void)force_gbnf; +#endif // LLAMA_USE_LLGUIDANCE + return build_grammar([&](const common_grammar_builder & callbacks) { auto copy = schema; callbacks.resolve_refs(copy); callbacks.add_schema("", copy); }); } -std::string build_grammar(const std::function & cb) { - SchemaConverter converter([&](const std::string &) { return json(); }, /* dotall= */ false); - llama_grammar_builder builder { +std::string build_grammar(const std::function & cb, const common_grammar_options & options) { + SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall); + common_grammar_builder builder { /* .add_rule = */ [&](const std::string & name, const std::string & rule) { return converter._add_rule(name, rule); }, diff --git a/common/json-schema-to-grammar.h b/common/json-schema-to-grammar.h index 4f43ab3a52..4613f5d9f9 100644 --- a/common/json-schema-to-grammar.h +++ b/common/json-schema-to-grammar.h @@ -5,12 +5,17 @@ #define JSON_ASSERT GGML_ASSERT #include "json.hpp" -std::string json_schema_to_grammar(const nlohmann::ordered_json & schema); +std::string json_schema_to_grammar(const nlohmann::ordered_json & schema, + bool force_gbnf = false); -struct llama_grammar_builder { +struct common_grammar_builder { std::function add_rule; std::function add_schema; std::function resolve_refs; }; -std::string build_grammar(const std::function & cb); +struct common_grammar_options { + bool dotall = false; +}; + +std::string build_grammar(const std::function & cb, const common_grammar_options & options = {}); diff --git a/common/llguidance.cpp b/common/llguidance.cpp new file mode 100644 index 0000000000..8bff89ea4a --- /dev/null +++ b/common/llguidance.cpp @@ -0,0 +1,253 @@ +#include "sampling.h" +#include "log.h" + +#ifdef LLAMA_USE_LLGUIDANCE + +# include "llguidance.h" +# include + +struct llama_sampler_llg { + const llama_vocab * vocab; + std::string grammar_kind; + std::string grammar_data; + LlgTokenizer * tokenizer; + LlgMatcher * grammar; +}; + +static LlgMatcher * llama_sampler_llg_new(LlgTokenizer * tokenizer, const char * grammar_kind, + const char * grammar_data) { + LlgConstraintInit cinit; + llg_constraint_init_set_defaults(&cinit, tokenizer); + const char * log_level = getenv("LLGUIDANCE_LOG_LEVEL"); + if (log_level && *log_level) { + cinit.log_stderr_level = atoi(log_level); + } + auto c = llg_new_matcher(&cinit, grammar_kind, grammar_data); + if (llg_matcher_get_error(c)) { + LOG_ERR("llg error: %s\n", llg_matcher_get_error(c)); + llg_free_matcher(c); + return nullptr; + } + + return c; +} + +static const char * llama_sampler_llg_name(const llama_sampler * /*smpl*/) { + return "llguidance"; +} + +static void llama_sampler_llg_accept_impl(llama_sampler * smpl, llama_token token) { + auto * ctx = (llama_sampler_llg *) smpl->ctx; + if (ctx->grammar) { + llg_matcher_consume_token(ctx->grammar, token); + } +} + +static void llama_sampler_llg_apply(llama_sampler * smpl, llama_token_data_array * cur_p) { + auto * ctx = (llama_sampler_llg *) smpl->ctx; + if (ctx->grammar) { + const uint32_t * mask = llg_matcher_get_mask(ctx->grammar); + if (mask == nullptr) { + if (llg_matcher_compute_mask(ctx->grammar) == 0) { + mask = llg_matcher_get_mask(ctx->grammar); + } else { + LOG_ERR("llg error: %s\n", llg_matcher_get_error(ctx->grammar)); + llg_free_matcher(ctx->grammar); + ctx->grammar = nullptr; + return; + } + } + + for (size_t i = 0; i < cur_p->size; ++i) { + auto token = cur_p->data[i].id; + if ((mask[token / 32] & (1 << (token % 32))) == 0) { + cur_p->data[i].logit = -INFINITY; + } + } + } +} + +static void llama_sampler_llg_reset(llama_sampler * smpl) { + auto * ctx = (llama_sampler_llg *) smpl->ctx; + if (ctx->grammar) { + llg_matcher_reset(ctx->grammar); + } +} + +static llama_sampler * llama_sampler_llg_clone(const llama_sampler * smpl) { + const auto * ctx = (const llama_sampler_llg *) smpl->ctx; + + auto * result = llama_sampler_init_llg(ctx->vocab, nullptr, nullptr); + + // copy the state + { + auto * result_ctx = (llama_sampler_llg *) result->ctx; + + if (ctx->grammar) { + result_ctx->grammar_kind = ctx->grammar_kind; + result_ctx->grammar_data = ctx->grammar_data; + result_ctx->grammar = llg_clone_matcher(ctx->grammar); + result_ctx->tokenizer = llg_clone_tokenizer(ctx->tokenizer); + } + } + + return result; +} + +static void llama_sampler_llg_free(llama_sampler * smpl) { + const auto * ctx = (llama_sampler_llg *) smpl->ctx; + + if (ctx->grammar) { + llg_free_matcher(ctx->grammar); + llg_free_tokenizer(ctx->tokenizer); + } + + delete ctx; +} + +static llama_sampler_i llama_sampler_llg_i = { + /* .name = */ llama_sampler_llg_name, + /* .accept = */ llama_sampler_llg_accept_impl, + /* .apply = */ llama_sampler_llg_apply, + /* .reset = */ llama_sampler_llg_reset, + /* .clone = */ llama_sampler_llg_clone, + /* .free = */ llama_sampler_llg_free, +}; + +static size_t llama_sampler_llg_tokenize_fn(const void * user_data, const uint8_t * bytes, size_t bytes_len, + uint32_t * output_tokens, size_t output_tokens_len) { + const llama_vocab * vocab = (const llama_vocab *) user_data; + int r = 0; + try { + r = llama_tokenize(vocab, (const char *) bytes, bytes_len, (int32_t *) output_tokens, output_tokens_len, false, + true); + } catch (const std::exception & e) { + GGML_ABORT("llama_tokenize failed: %s\n", e.what()); + } + if (r < 0) { + return -r; + } + return r; +} + +static LlgTokenizer * llama_sampler_llg_new_tokenizer(const llama_vocab * vocab) { + // TODO store the tokenizer in the vocab somehow + static const llama_vocab * vocab_cache; + static LlgTokenizer * tokenizer_cache; + + if (vocab_cache == vocab) { + return llg_clone_tokenizer(tokenizer_cache); + } + + auto tok_eos = llama_vocab_eot(vocab); + if (tok_eos == LLAMA_TOKEN_NULL) { + tok_eos = llama_vocab_eos(vocab); + } + + size_t vocab_size = llama_vocab_n_tokens(vocab); + + auto token_lens = new uint32_t[vocab_size]; + // we typically have ~7 bytes per token; let's go on the safe side here + auto token_bytes_size = vocab_size * 16 + 1024 * 1024; + auto token_bytes = new uint8_t[token_bytes_size]; + + size_t offset = 0; + for (size_t i = 0; i < vocab_size; i++) { + size_t max_token = 1024; + if (token_bytes_size - offset < max_token) { + GGML_ABORT("token_bytes buffer too small\n"); + } + + llama_token token = i; + auto dp = (char *) token_bytes + offset; + auto size = llama_detokenize(vocab, &token, 1, dp, max_token, false, false); + if (size < 0) { + GGML_ABORT("llama_detokenize failed\n"); + } + if (size == 0) { + size = llama_detokenize(vocab, &token, 1, dp + 1, max_token - 1, false, true); + if (size < 0) { + GGML_ABORT("llama_detokenize failed\n"); + } + if (size != 0) { + *dp = '\xff'; // special token prefix marker + size += 1; + } + } + + token_lens[i] = size; + offset += size; + } + + LlgTokenizerInit tinit = { + /* .vocab_size = */ (uint32_t) vocab_size, + /* .tok_eos = */ (uint32_t) tok_eos, + /* .token_lens = */ token_lens, + /* .token_bytes = */ token_bytes, + /* .tokenizer_json = */ nullptr, + /* .tokenize_assumes_string = */ true, + /* .tokenize_fn = */ llama_sampler_llg_tokenize_fn, + /* .use_approximate_greedy_tokenize_fn = */ false, + /* .tokenize_user_data = */ vocab, + }; + + char error_buffer[1024]; + LlgTokenizer * tokenizer = llg_new_tokenizer(&tinit, error_buffer, sizeof(error_buffer)); + + delete[] token_bytes; + delete[] token_lens; + + if (tokenizer == nullptr) { + LOG_ERR("llg tokenizer error: %s\n", error_buffer); + return tokenizer; + } + + if (tokenizer_cache) { + llg_free_tokenizer(tokenizer_cache); + } + vocab_cache = vocab; + tokenizer_cache = tokenizer; + + return llg_clone_tokenizer(tokenizer_cache); +} + +llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * grammar_kind, + const char * grammar_data) { + auto * ctx = new llama_sampler_llg; + + if (grammar_kind != nullptr && grammar_kind[0] != '\0') { + auto tokenizer = llama_sampler_llg_new_tokenizer(vocab); + *ctx = { + /* .vocab = */ vocab, + /* .grammar_kind = */ grammar_kind, + /* .grammar_data = */ grammar_data, + /* .tokenizer = */ tokenizer, + /* .grammar = */ llama_sampler_llg_new(tokenizer, grammar_kind, grammar_data), + }; + if (ctx->grammar) { + GGML_ASSERT(((size_t) llama_vocab_n_tokens(vocab) + 31) / 32 * 4 == + llg_matcher_get_mask_byte_size(ctx->grammar)); + } + } else { + *ctx = { + /* .vocab = */ vocab, + /* .grammar_kind = */ {}, + /* .grammar_data = */ {}, + /* .tokenizer = */ nullptr, + /* .grammar = */ nullptr, + }; + } + + return llama_sampler_init( + /* .iface = */ &llama_sampler_llg_i, + /* .ctx = */ ctx); +} + +#else + +llama_sampler * llama_sampler_init_llg(const llama_vocab *, const char *, const char *) { + LOG_WRN("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled"); + return nullptr; +} + +#endif // LLAMA_USE_LLGUIDANCE diff --git a/common/log.cpp b/common/log.cpp index 04c7c0ed10..52b31470c4 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -1,5 +1,6 @@ #include "log.h" +#include #include #include #include @@ -14,16 +15,6 @@ void common_log_set_verbosity_thold(int verbosity) { common_log_verbosity_thold = verbosity; } -#define LOG_COL_DEFAULT "\033[0m" -#define LOG_COL_BOLD "\033[1m" -#define LOG_COL_RED "\033[31m" -#define LOG_COL_GREEN "\033[32m" -#define LOG_COL_YELLOW "\033[33m" -#define LOG_COL_BLUE "\033[34m" -#define LOG_COL_MAGENTA "\033[35m" -#define LOG_COL_CYAN "\033[36m" -#define LOG_COL_WHITE "\033[37m" - static int64_t t_us() { return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); } @@ -206,6 +197,7 @@ public: vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args_copy); } #endif + va_end(args_copy); } entry.level = level; diff --git a/common/log.h b/common/log.h index 66605cc69a..c56bb50d95 100644 --- a/common/log.h +++ b/common/log.h @@ -2,9 +2,20 @@ #include "ggml.h" // for ggml_log_level +#define LOG_CLR_TO_EOL "\033[K\r" +#define LOG_COL_DEFAULT "\033[0m" +#define LOG_COL_BOLD "\033[1m" +#define LOG_COL_RED "\033[31m" +#define LOG_COL_GREEN "\033[32m" +#define LOG_COL_YELLOW "\033[33m" +#define LOG_COL_BLUE "\033[34m" +#define LOG_COL_MAGENTA "\033[35m" +#define LOG_COL_CYAN "\033[36m" +#define LOG_COL_WHITE "\033[37m" + #ifndef __GNUC__ # define LOG_ATTRIBUTE_FORMAT(...) -#elif defined(__MINGW32__) +#elif defined(__MINGW32__) && !defined(__clang__) # define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) #else # define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) diff --git a/common/minja/chat-template.hpp b/common/minja/chat-template.hpp new file mode 100644 index 0000000000..237a7625cd --- /dev/null +++ b/common/minja/chat-template.hpp @@ -0,0 +1,537 @@ +/* + Copyright 2024 Google LLC + + Use of this source code is governed by an MIT-style + license that can be found in the LICENSE file or at + https://opensource.org/licenses/MIT. +*/ +// SPDX-License-Identifier: MIT +#pragma once + +#include "minja.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using json = nlohmann::ordered_json; + +namespace minja { + +struct chat_template_caps { + bool supports_tools = false; + bool supports_tool_calls = false; + bool supports_tool_responses = false; + bool supports_system_role = false; + bool supports_parallel_tool_calls = false; + bool supports_tool_call_id = false; + // meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object. + // Most other templates (and OpenAI's API) expect the arguments object to be stringified. + bool requires_object_arguments = false; + // CohereForAI/c4ai-command-r-plus simple variant + bool requires_non_null_content = false; + // MiniMaxAI/MiniMax-Text-01 special + bool requires_typed_content = false; +}; + +struct chat_template_inputs { + nlohmann::ordered_json messages; + nlohmann::ordered_json tools; + bool add_generation_prompt = true; + nlohmann::ordered_json extra_context; + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); +}; + +struct chat_template_options { + bool apply_polyfills = true; + bool use_bos_token = true; + bool use_eos_token = true; + bool define_strftime_now = true; + + bool polyfill_tools = true; + bool polyfill_tool_call_examples = true; + bool polyfill_tool_calls = true; + bool polyfill_tool_responses = true; + bool polyfill_system_role = true; + bool polyfill_object_arguments = true; + bool polyfill_typed_content = true; +}; + +class chat_template { + + private: + chat_template_caps caps_; + std::string source_; + std::string bos_token_; + std::string eos_token_; + std::shared_ptr template_root_; + std::string tool_call_example_; + + std::string try_raw_render( + const nlohmann::ordered_json & messages, + const nlohmann::ordered_json & tools, + bool add_generation_prompt, + const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const + { + try { + chat_template_inputs inputs; + inputs.messages = messages; + inputs.tools = tools; + inputs.add_generation_prompt = add_generation_prompt; + inputs.extra_context = extra_context; + // Use fixed date for tests + inputs.now = std::chrono::system_clock::from_time_t(0); + + chat_template_options opts; + opts.apply_polyfills = false; + + auto prompt = apply(inputs, opts); + // fprintf(stderr, "try_raw_render: %s\n", prompt.c_str()); + return prompt; + } catch (const std::exception & e) { + // fprintf(stderr, "try_raw_render error: %s\n", e.what()); + return ""; + } + } + + public: + + chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token) + : source_(source), bos_token_(bos_token), eos_token_(eos_token) + { + template_root_ = minja::Parser::parse(source_, { + /* .trim_blocks = */ true, + /* .lstrip_blocks = */ true, + /* .keep_trailing_newline = */ false, + }); + + auto contains = [](const std::string & haystack, const std::string & needle) { + return haystack.find(needle) != std::string::npos; + }; + + const std::string user_needle = ""; + const std::string sys_needle = ""; + const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}}; + const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}}; + + caps_.requires_typed_content = + !contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle) + && contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle); + + const auto dummy_user_msg = caps_.requires_typed_content + ? dummy_typed_user_msg + : dummy_str_user_msg; + const json needle_system_msg = { + {"role", "system"}, + {"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)}, + }; + + caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle); + + auto out = try_raw_render(json::array({ + dummy_user_msg + }), json::array({ + { + {"name", "some_tool"}, + {"type", "function"}, + {"function", { + {"name", "some_tool"}, + {"description", "Some tool."}, + {"parameters", { + {"type", "object"}, + {"properties", { + {"arg", { + {"type", "string"}, + {"description", "Some argument."}, + }}, + }}, + {"required", json::array({ "arg" })}, + }}, + }}, + }, + }), false); + caps_.supports_tools = contains(out, "some_tool"); + + auto make_tool_calls_msg = [&](const json & tool_calls) { + return json { + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", tool_calls}, + }; + }; + auto make_tool_call = [](const std::string & tool_name, const json & arguments) { + return json { + {"id", "call_1___"}, + {"type", "function"}, + {"function", { + {"arguments", arguments}, + {"name", tool_name}, + }}, + }; + }; + const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}}; + + // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want. + out = try_raw_render(json::array({ + dummy_user_msg, + make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})), + }), {}, false); + auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + out = try_raw_render(json::array({ + dummy_user_msg, + make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})), + }), {}, false); + auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + + caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments; + caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments; + auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false); + auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false); + caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle); + + if (caps_.supports_tool_calls) { + auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump()); + auto tc1 = make_tool_call("test_tool1", dummy_args); + auto tc2 = make_tool_call("test_tool2", dummy_args); + auto out = try_raw_render(json::array({ + dummy_user_msg, + make_tool_calls_msg(json::array({tc1, tc2})), + }), {}, false); + caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2"); + + out = try_raw_render(json::array({ + dummy_user_msg, + make_tool_calls_msg(json::array({tc1})), + { + {"role", "tool"}, + {"name", "test_tool1"}, + {"content", "Some response!"}, + {"tool_call_id", "call_911_"}, + } + }), {}, false); + caps_.supports_tool_responses = contains(out, "Some response!"); + caps_.supports_tool_call_id = contains(out, "call_911_"); + } + + try { + if (!caps_.supports_tools) { + const json user_msg { + {"role", "user"}, + {"content", "Hey"}, + }; + const json args { + {"arg1", "some_value"}, + }; + const json tool_call_msg { + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + { + // TODO: detect if requires numerical id or fixed length == 6 like Nemo + {"id", "call_1___"}, + {"type", "function"}, + {"function", { + {"name", "tool_name"}, + {"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))}, + }}, + }, + })}, + }; + std::string prefix, full; + { + chat_template_inputs inputs; + inputs.messages = json::array({user_msg}); + inputs.add_generation_prompt = true; + prefix = apply(inputs); + } + { + chat_template_inputs inputs; + inputs.messages = json::array({user_msg, tool_call_msg}); + inputs.add_generation_prompt = false; + full = apply(inputs); + } + auto eos_pos_last = full.rfind(eos_token_); + if (eos_pos_last == prefix.size() - eos_token_.size() || + (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) { + full = full.substr(0, eos_pos_last); + } + size_t common_prefix_length = 0; + for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) { + if (prefix[i] != full[i]) { + break; + } + if (prefix[i] == '<') { + // DeepSeek R1's template (as of 20250209) adds a trailing if add_generation_prompt, + // but it removes thinking tags for past messages. + // The prefix and full strings diverge at vs. <|tool▁calls▁begin|>, we avoid consuming the leading <. + continue; + } + common_prefix_length = i + 1; + } + auto example = full.substr(common_prefix_length); + if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) { + fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n"); + } else { + tool_call_example_ = example; + } + } + } catch (const std::exception & e) { + fprintf(stderr, "Failed to generate tool call example: %s\n", e.what()); + } + } + + const std::string & source() const { return source_; } + const std::string & bos_token() const { return bos_token_; } + const std::string & eos_token() const { return eos_token_; } + const chat_template_caps & original_caps() const { return caps_; } + + // Deprecated, please use the form with chat_template_inputs and chat_template_options + std::string apply( + const nlohmann::ordered_json & messages, + const nlohmann::ordered_json & tools, + bool add_generation_prompt, + const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(), + bool apply_polyfills = true) + { + fprintf(stderr, "[%s] Deprecated!\n", __func__); + chat_template_inputs inputs; + inputs.messages = messages; + inputs.tools = tools; + inputs.add_generation_prompt = add_generation_prompt; + inputs.extra_context = extra_context; + inputs.now = std::chrono::system_clock::now(); + + chat_template_options opts; + opts.apply_polyfills = apply_polyfills; + + return apply(inputs, opts); + } + + std::string apply( + const chat_template_inputs & inputs, + const chat_template_options & opts = chat_template_options()) const + { + json actual_messages; + + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto has_tool_calls = false; + auto has_tool_responses = false; + auto has_string_content = false; + for (const auto & message : inputs.messages) { + if (message.contains("tool_calls") && !message["tool_calls"].is_null()) { + has_tool_calls = true; + } + if (message.contains("role") && message["role"] == "tool") { + has_tool_responses = true; + } + if (message.contains("content") && message["content"].is_string()) { + has_string_content = true; + } + } + + auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role; + auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools; + auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples; + auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls; + auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses; + auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments; + auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content; + + auto needs_polyfills = opts.apply_polyfills && (false + || polyfill_system_role + || polyfill_tools + || polyfill_tool_calls + || polyfill_tool_responses + || polyfill_object_arguments + || polyfill_typed_content + ); + + if (needs_polyfills) { + actual_messages = json::array(); + + auto add_message = [&](const json & msg) { + if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) { + actual_messages.push_back({ + {"role", msg.at("role")}, + {"content", {{ + {"type", "text"}, + {"text", msg.at("content")}, + }}}, + }); + } else { + actual_messages.push_back(msg); + } + }; + + std::string pending_system; + auto flush_sys = [&]() { + if (!pending_system.empty()) { + add_message({ + {"role", "user"}, + {"content", pending_system}, + }); + pending_system.clear(); + } + }; + + json adjusted_messages; + if (polyfill_tools) { + adjusted_messages = add_system(inputs.messages, + "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) + + (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n")); + } else { + adjusted_messages = inputs.messages; + } + + for (const auto & message_ : adjusted_messages) { + auto message = message_; + if (!message.contains("role") || !message.contains("content")) { + throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump()); + } + std::string role = message.at("role"); + + if (message.contains("tool_calls")) { + if (polyfill_object_arguments || polyfill_tool_calls) { + for (auto & tool_call : message.at("tool_calls")) { + if (tool_call["type"] == "function") { + auto & function = tool_call.at("function"); + auto & arguments = function.at("arguments"); + if (arguments.is_string()) { + try { + arguments = json::parse(arguments.get()); + } catch (const std::exception & ecvt) { + fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what()); + } + } + } + } + } + if (polyfill_tool_calls) { + auto content = message.at("content"); + auto tool_calls = json::array(); + for (const auto & tool_call : message.at("tool_calls")) { + if (tool_call.at("type") != "function") { + continue; + } + const auto & function = tool_call.at("function"); + auto tc = json { + {"name", function.at("name")}, + {"arguments", function.at("arguments")}, + }; + if (tool_call.contains("id")) { + tc["id"] = tool_call["id"]; + } + tool_calls.push_back(tc); + } + auto obj = json { + {"tool_calls", tool_calls}, + }; + if (!content.is_null() && !content.empty()) { + obj["content"] = content; + } + message["content"] = obj.dump(2); + message.erase("tool_calls"); + } + } + if (polyfill_tool_responses && role == "tool") { + message["role"] = "user"; + auto obj = json { + {"tool_response", json::object()}, + }; + if (message.contains("name")) { + obj["tool_response"]["tool"] = message.at("name"); + } + obj["tool_response"]["content"] = message.at("content"); + if (message.contains("tool_call_id")) { + obj["tool_response"]["tool_call_id"] = message.at("tool_call_id"); + } + message["content"] = obj.dump(2); + message.erase("name"); + } + + if (!message["content"].is_null() && polyfill_system_role) { + std::string content = message.at("content"); + if (role == "system") { + if (!pending_system.empty()) pending_system += "\n"; + pending_system += content; + continue; + } else { + if (role == "user") { + if (!pending_system.empty()) { + message["content"] = pending_system + (content.empty() ? "" : "\n" + content); + pending_system.clear(); + } + } else { + flush_sys(); + } + } + } + add_message(message); + } + flush_sys(); + } else { + actual_messages = inputs.messages; + } + + auto context = minja::Context::make(json({ + {"messages", actual_messages}, + {"add_generation_prompt", inputs.add_generation_prompt}, + })); + context->set("bos_token", opts.use_bos_token ? bos_token_ : ""); + context->set("eos_token", opts.use_eos_token ? eos_token_ : ""); + if (opts.define_strftime_now) { + auto now = inputs.now; + context->set("strftime_now", Value::callable([now](const std::shared_ptr &, minja::ArgumentsValue & args) { + args.expectArgs("strftime_now", {1, 1}, {0, 0}); + auto format = args.args[0].get(); + + auto time = std::chrono::system_clock::to_time_t(now); + auto local_time = *std::localtime(&time); + std::ostringstream ss; + ss << std::put_time(&local_time, format.c_str()); + return ss.str(); + })); + } + if (!inputs.tools.is_null()) { + context->set("tools", minja::Value(inputs.tools)); + } + if (!inputs.extra_context.is_null()) { + for (auto & kv : inputs.extra_context.items()) { + context->set(kv.key(), minja::Value(kv.value())); + } + } + + auto ret = template_root_->render(context); + // fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str()); + // fprintf(stderr, "apply: %s\n\n", ret.c_str()); + return ret; + } + + static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) { + json messages_with_system = messages; + + if (!messages_with_system.empty() && messages_with_system[0].at("role") == "system") { + std::string existing_system = messages_with_system.at(0).at("content"); + messages_with_system[0] = json { + {"role", "system"}, + {"content", existing_system + "\n\n" + system_prompt}, + }; + } else { + messages_with_system.insert(messages_with_system.begin(), json { + {"role", "system"}, + {"content", system_prompt}, + }); + } + return messages_with_system; + } +}; + +} // namespace minja diff --git a/common/minja.hpp b/common/minja/minja.hpp similarity index 86% rename from common/minja.hpp rename to common/minja/minja.hpp index 80bdd4b412..e52e792d84 100644 --- a/common/minja.hpp +++ b/common/minja/minja.hpp @@ -8,14 +8,26 @@ // SPDX-License-Identifier: MIT #pragma once +#include +#include +#include +#include +#include +#include #include -#include -#include -#include +#include +#include +#include #include -#include +#include #include +#include +#include +#include #include +#include +#include + #include using json = nlohmann::ordered_json; @@ -628,7 +640,7 @@ class Context : public std::enable_shared_from_this { if (parent_) return parent_->contains(key); return false; } - virtual void set(const Value & key, Value & value) { + virtual void set(const Value & key, const Value & value) { values_.set(key, value); } }; @@ -693,7 +705,7 @@ enum SpaceHandling { Keep, Strip, StripSpaces, StripNewline }; class TemplateToken { public: - enum class Type { Text, Expression, If, Else, Elif, EndIf, For, EndFor, Generation, EndGeneration, Set, EndSet, Comment, Macro, EndMacro, Filter, EndFilter }; + enum class Type { Text, Expression, If, Else, Elif, EndIf, For, EndFor, Generation, EndGeneration, Set, EndSet, Comment, Macro, EndMacro, Filter, EndFilter, Break, Continue }; static std::string typeToString(Type t) { switch (t) { @@ -714,6 +726,8 @@ public: case Type::EndFilter: return "endfilter"; case Type::Generation: return "generation"; case Type::EndGeneration: return "endgeneration"; + case Type::Break: return "break"; + case Type::Continue: return "continue"; } return "Unknown"; } @@ -729,51 +743,51 @@ public: struct TextTemplateToken : public TemplateToken { std::string text; - TextTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, const std::string& t) : TemplateToken(Type::Text, location, pre, post), text(t) {} + TextTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, const std::string& t) : TemplateToken(Type::Text, loc, pre, post), text(t) {} }; struct ExpressionTemplateToken : public TemplateToken { std::shared_ptr expr; - ExpressionTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && e) : TemplateToken(Type::Expression, location, pre, post), expr(std::move(e)) {} + ExpressionTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && e) : TemplateToken(Type::Expression, loc, pre, post), expr(std::move(e)) {} }; struct IfTemplateToken : public TemplateToken { std::shared_ptr condition; - IfTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && c) : TemplateToken(Type::If, location, pre, post), condition(std::move(c)) {} + IfTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && c) : TemplateToken(Type::If, loc, pre, post), condition(std::move(c)) {} }; struct ElifTemplateToken : public TemplateToken { std::shared_ptr condition; - ElifTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && c) : TemplateToken(Type::Elif, location, pre, post), condition(std::move(c)) {} + ElifTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && c) : TemplateToken(Type::Elif, loc, pre, post), condition(std::move(c)) {} }; struct ElseTemplateToken : public TemplateToken { - ElseTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::Else, location, pre, post) {} + ElseTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::Else, loc, pre, post) {} }; struct EndIfTemplateToken : public TemplateToken { - EndIfTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndIf, location, pre, post) {} + EndIfTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndIf, loc, pre, post) {} }; struct MacroTemplateToken : public TemplateToken { std::shared_ptr name; Expression::Parameters params; - MacroTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && n, Expression::Parameters && p) - : TemplateToken(Type::Macro, location, pre, post), name(std::move(n)), params(std::move(p)) {} + MacroTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && n, Expression::Parameters && p) + : TemplateToken(Type::Macro, loc, pre, post), name(std::move(n)), params(std::move(p)) {} }; struct EndMacroTemplateToken : public TemplateToken { - EndMacroTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndMacro, location, pre, post) {} + EndMacroTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndMacro, loc, pre, post) {} }; struct FilterTemplateToken : public TemplateToken { std::shared_ptr filter; - FilterTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && filter) - : TemplateToken(Type::Filter, location, pre, post), filter(std::move(filter)) {} + FilterTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && filter) + : TemplateToken(Type::Filter, loc, pre, post), filter(std::move(filter)) {} }; struct EndFilterTemplateToken : public TemplateToken { - EndFilterTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndFilter, location, pre, post) {} + EndFilterTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndFilter, loc, pre, post) {} }; struct ForTemplateToken : public TemplateToken { @@ -781,38 +795,54 @@ struct ForTemplateToken : public TemplateToken { std::shared_ptr iterable; std::shared_ptr condition; bool recursive; - ForTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, const std::vector & vns, std::shared_ptr && iter, + ForTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, const std::vector & vns, std::shared_ptr && iter, std::shared_ptr && c, bool r) - : TemplateToken(Type::For, location, pre, post), var_names(vns), iterable(std::move(iter)), condition(std::move(c)), recursive(r) {} + : TemplateToken(Type::For, loc, pre, post), var_names(vns), iterable(std::move(iter)), condition(std::move(c)), recursive(r) {} }; struct EndForTemplateToken : public TemplateToken { - EndForTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndFor, location, pre, post) {} + EndForTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndFor, loc, pre, post) {} }; struct GenerationTemplateToken : public TemplateToken { - GenerationTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::Generation, location, pre, post) {} + GenerationTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::Generation, loc, pre, post) {} }; struct EndGenerationTemplateToken : public TemplateToken { - EndGenerationTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndGeneration, location, pre, post) {} + EndGenerationTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndGeneration, loc, pre, post) {} }; struct SetTemplateToken : public TemplateToken { std::string ns; std::vector var_names; std::shared_ptr value; - SetTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, const std::string & ns, const std::vector & vns, std::shared_ptr && v) - : TemplateToken(Type::Set, location, pre, post), ns(ns), var_names(vns), value(std::move(v)) {} + SetTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, const std::string & ns, const std::vector & vns, std::shared_ptr && v) + : TemplateToken(Type::Set, loc, pre, post), ns(ns), var_names(vns), value(std::move(v)) {} }; struct EndSetTemplateToken : public TemplateToken { - EndSetTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndSet, location, pre, post) {} + EndSetTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndSet, loc, pre, post) {} }; struct CommentTemplateToken : public TemplateToken { std::string text; - CommentTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, const std::string& t) : TemplateToken(Type::Comment, location, pre, post), text(t) {} + CommentTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, const std::string& t) : TemplateToken(Type::Comment, loc, pre, post), text(t) {} +}; + +enum class LoopControlType { Break, Continue }; + +class LoopControlException : public std::runtime_error { +public: + LoopControlType control_type; + LoopControlException(const std::string & message, LoopControlType control_type) : std::runtime_error(message), control_type(control_type) {} + LoopControlException(LoopControlType control_type) + : std::runtime_error((control_type == LoopControlType::Continue ? "continue" : "break") + std::string(" outside of a loop")), + control_type(control_type) {} +}; + +struct LoopControlTemplateToken : public TemplateToken { + LoopControlType control_type; + LoopControlTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, LoopControlType control_type) : TemplateToken(Type::Break, loc, pre, post), control_type(control_type) {} }; class TemplateNode { @@ -825,6 +855,12 @@ public: void render(std::ostringstream & out, const std::shared_ptr & context) const { try { do_render(out, context); + } catch (const LoopControlException & e) { + // TODO: make stack creation lazy. Only needed if it was thrown outside of a loop. + std::ostringstream err; + err << e.what(); + if (location_.source) err << error_location_suffix(*location_.source, location_.pos); + throw LoopControlException(err.str(), e.control_type); } catch (const std::exception & e) { std::ostringstream err; err << e.what(); @@ -844,8 +880,8 @@ public: class SequenceNode : public TemplateNode { std::vector> children; public: - SequenceNode(const Location & location, std::vector> && c) - : TemplateNode(location), children(std::move(c)) {} + SequenceNode(const Location & loc, std::vector> && c) + : TemplateNode(loc), children(std::move(c)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { for (const auto& child : children) child->render(out, context); } @@ -854,7 +890,7 @@ public: class TextNode : public TemplateNode { std::string text; public: - TextNode(const Location & location, const std::string& t) : TemplateNode(location), text(t) {} + TextNode(const Location & loc, const std::string& t) : TemplateNode(loc), text(t) {} void do_render(std::ostringstream & out, const std::shared_ptr &) const override { out << text; } @@ -863,7 +899,7 @@ public: class ExpressionNode : public TemplateNode { std::shared_ptr expr; public: - ExpressionNode(const Location & location, std::shared_ptr && e) : TemplateNode(location), expr(std::move(e)) {} + ExpressionNode(const Location & loc, std::shared_ptr && e) : TemplateNode(loc), expr(std::move(e)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { if (!expr) throw std::runtime_error("ExpressionNode.expr is null"); auto result = expr->evaluate(context); @@ -880,8 +916,8 @@ public: class IfNode : public TemplateNode { std::vector, std::shared_ptr>> cascade; public: - IfNode(const Location & location, std::vector, std::shared_ptr>> && c) - : TemplateNode(location), cascade(std::move(c)) {} + IfNode(const Location & loc, std::vector, std::shared_ptr>> && c) + : TemplateNode(loc), cascade(std::move(c)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { for (const auto& branch : cascade) { auto enter_branch = true; @@ -897,6 +933,15 @@ public: } }; +class LoopControlNode : public TemplateNode { + LoopControlType control_type_; + public: + LoopControlNode(const Location & loc, LoopControlType control_type) : TemplateNode(loc), control_type_(control_type) {} + void do_render(std::ostringstream &, const std::shared_ptr &) const override { + throw LoopControlException(control_type_); + } +}; + class ForNode : public TemplateNode { std::vector var_names; std::shared_ptr iterable; @@ -905,9 +950,9 @@ class ForNode : public TemplateNode { bool recursive; std::shared_ptr else_body; public: - ForNode(const Location & location, std::vector && var_names, std::shared_ptr && iterable, + ForNode(const Location & loc, std::vector && var_names, std::shared_ptr && iterable, std::shared_ptr && condition, std::shared_ptr && body, bool recursive, std::shared_ptr && else_body) - : TemplateNode(location), var_names(var_names), iterable(std::move(iterable)), condition(std::move(condition)), body(std::move(body)), recursive(recursive), else_body(std::move(else_body)) {} + : TemplateNode(loc), var_names(var_names), iterable(std::move(iterable)), condition(std::move(condition)), body(std::move(body)), recursive(recursive), else_body(std::move(else_body)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { // https://jinja.palletsprojects.com/en/3.0.x/templates/#for @@ -961,7 +1006,12 @@ public: loop.set("last", i == (n - 1)); loop.set("previtem", i > 0 ? filtered_items.at(i - 1) : Value()); loop.set("nextitem", i < n - 1 ? filtered_items.at(i + 1) : Value()); - body->render(out, loop_context); + try { + body->render(out, loop_context); + } catch (const LoopControlException & e) { + if (e.control_type == LoopControlType::Break) break; + if (e.control_type == LoopControlType::Continue) continue; + } } } }; @@ -987,8 +1037,8 @@ class MacroNode : public TemplateNode { std::shared_ptr body; std::unordered_map named_param_positions; public: - MacroNode(const Location & location, std::shared_ptr && n, Expression::Parameters && p, std::shared_ptr && b) - : TemplateNode(location), name(std::move(n)), params(std::move(p)), body(std::move(b)) { + MacroNode(const Location & loc, std::shared_ptr && n, Expression::Parameters && p, std::shared_ptr && b) + : TemplateNode(loc), name(std::move(n)), params(std::move(p)), body(std::move(b)) { for (size_t i = 0; i < params.size(); ++i) { const auto & name = params[i].first; if (!name.empty()) { @@ -1034,8 +1084,8 @@ class FilterNode : public TemplateNode { std::shared_ptr body; public: - FilterNode(const Location & location, std::shared_ptr && f, std::shared_ptr && b) - : TemplateNode(location), filter(std::move(f)), body(std::move(b)) {} + FilterNode(const Location & loc, std::shared_ptr && f, std::shared_ptr && b) + : TemplateNode(loc), filter(std::move(f)), body(std::move(b)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { if (!filter) throw std::runtime_error("FilterNode.filter is null"); @@ -1057,8 +1107,8 @@ class SetNode : public TemplateNode { std::vector var_names; std::shared_ptr value; public: - SetNode(const Location & location, const std::string & ns, const std::vector & vns, std::shared_ptr && v) - : TemplateNode(location), ns(ns), var_names(vns), value(std::move(v)) {} + SetNode(const Location & loc, const std::string & ns, const std::vector & vns, std::shared_ptr && v) + : TemplateNode(loc), ns(ns), var_names(vns), value(std::move(v)) {} void do_render(std::ostringstream &, const std::shared_ptr & context) const override { if (!value) throw std::runtime_error("SetNode.value is null"); if (!ns.empty()) { @@ -1080,8 +1130,8 @@ class SetTemplateNode : public TemplateNode { std::string name; std::shared_ptr template_value; public: - SetTemplateNode(const Location & location, const std::string & name, std::shared_ptr && tv) - : TemplateNode(location), name(name), template_value(std::move(tv)) {} + SetTemplateNode(const Location & loc, const std::string & name, std::shared_ptr && tv) + : TemplateNode(loc), name(name), template_value(std::move(tv)) {} void do_render(std::ostringstream &, const std::shared_ptr & context) const override { if (!template_value) throw std::runtime_error("SetTemplateNode.template_value is null"); Value value { template_value->render(context) }; @@ -1094,8 +1144,8 @@ class IfExpr : public Expression { std::shared_ptr then_expr; std::shared_ptr else_expr; public: - IfExpr(const Location & location, std::shared_ptr && c, std::shared_ptr && t, std::shared_ptr && e) - : Expression(location), condition(std::move(c)), then_expr(std::move(t)), else_expr(std::move(e)) {} + IfExpr(const Location & loc, std::shared_ptr && c, std::shared_ptr && t, std::shared_ptr && e) + : Expression(loc), condition(std::move(c)), then_expr(std::move(t)), else_expr(std::move(e)) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!condition) throw std::runtime_error("IfExpr.condition is null"); if (!then_expr) throw std::runtime_error("IfExpr.then_expr is null"); @@ -1112,16 +1162,16 @@ public: class LiteralExpr : public Expression { Value value; public: - LiteralExpr(const Location & location, const Value& v) - : Expression(location), value(v) {} + LiteralExpr(const Location & loc, const Value& v) + : Expression(loc), value(v) {} Value do_evaluate(const std::shared_ptr &) const override { return value; } }; class ArrayExpr : public Expression { std::vector> elements; public: - ArrayExpr(const Location & location, std::vector> && e) - : Expression(location), elements(std::move(e)) {} + ArrayExpr(const Location & loc, std::vector> && e) + : Expression(loc), elements(std::move(e)) {} Value do_evaluate(const std::shared_ptr & context) const override { auto result = Value::array(); for (const auto& e : elements) { @@ -1135,8 +1185,8 @@ public: class DictExpr : public Expression { std::vector, std::shared_ptr>> elements; public: - DictExpr(const Location & location, std::vector, std::shared_ptr>> && e) - : Expression(location), elements(std::move(e)) {} + DictExpr(const Location & loc, std::vector, std::shared_ptr>> && e) + : Expression(loc), elements(std::move(e)) {} Value do_evaluate(const std::shared_ptr & context) const override { auto result = Value::object(); for (const auto& [key, value] : elements) { @@ -1151,8 +1201,8 @@ public: class SliceExpr : public Expression { public: std::shared_ptr start, end; - SliceExpr(const Location & location, std::shared_ptr && s, std::shared_ptr && e) - : Expression(location), start(std::move(s)), end(std::move(e)) {} + SliceExpr(const Location & loc, std::shared_ptr && s, std::shared_ptr && e) + : Expression(loc), start(std::move(s)), end(std::move(e)) {} Value do_evaluate(const std::shared_ptr &) const override { throw std::runtime_error("SliceExpr not implemented"); } @@ -1162,8 +1212,8 @@ class SubscriptExpr : public Expression { std::shared_ptr base; std::shared_ptr index; public: - SubscriptExpr(const Location & location, std::shared_ptr && b, std::shared_ptr && i) - : Expression(location), base(std::move(b)), index(std::move(i)) {} + SubscriptExpr(const Location & loc, std::shared_ptr && b, std::shared_ptr && i) + : Expression(loc), base(std::move(b)), index(std::move(i)) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!base) throw std::runtime_error("SubscriptExpr.base is null"); if (!index) throw std::runtime_error("SubscriptExpr.index is null"); @@ -1205,8 +1255,8 @@ public: enum class Op { Plus, Minus, LogicalNot, Expansion, ExpansionDict }; std::shared_ptr expr; Op op; - UnaryOpExpr(const Location & location, std::shared_ptr && e, Op o) - : Expression(location), expr(std::move(e)), op(o) {} + UnaryOpExpr(const Location & loc, std::shared_ptr && e, Op o) + : Expression(loc), expr(std::move(e)), op(o) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!expr) throw std::runtime_error("UnaryOpExpr.expr is null"); auto e = expr->evaluate(context); @@ -1231,8 +1281,8 @@ private: std::shared_ptr right; Op op; public: - BinaryOpExpr(const Location & location, std::shared_ptr && l, std::shared_ptr && r, Op o) - : Expression(location), left(std::move(l)), right(std::move(r)), op(o) {} + BinaryOpExpr(const Location & loc, std::shared_ptr && l, std::shared_ptr && r, Op o) + : Expression(loc), left(std::move(l)), right(std::move(r)), op(o) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!left) throw std::runtime_error("BinaryOpExpr.left is null"); if (!right) throw std::runtime_error("BinaryOpExpr.right is null"); @@ -1340,13 +1390,34 @@ struct ArgumentsExpression { } }; -static std::string strip(const std::string & s) { - auto start = s.find_first_not_of(" \t\n\r"); +static std::string strip(const std::string & s, const std::string & chars = "", bool left = true, bool right = true) { + auto charset = chars.empty() ? " \t\n\r" : chars; + auto start = left ? s.find_first_not_of(charset) : 0; if (start == std::string::npos) return ""; - auto end = s.find_last_not_of(" \t\n\r"); + auto end = right ? s.find_last_not_of(charset) : s.size() - 1; return s.substr(start, end - start + 1); } +static std::vector split(const std::string & s, const std::string & sep) { + std::vector result; + size_t start = 0; + size_t end = s.find(sep); + while (end != std::string::npos) { + result.push_back(s.substr(start, end - start)); + start = end + sep.length(); + end = s.find(sep, start); + } + result.push_back(s.substr(start)); + return result; +} + +static std::string capitalize(const std::string & s) { + if (s.empty()) return s; + auto result = s; + result[0] = std::toupper(result[0]); + return result; +} + static std::string html_escape(const std::string & s) { std::string result; result.reserve(s.size()); @@ -1368,8 +1439,8 @@ class MethodCallExpr : public Expression { std::shared_ptr method; ArgumentsExpression args; public: - MethodCallExpr(const Location & location, std::shared_ptr && obj, std::shared_ptr && m, ArgumentsExpression && a) - : Expression(location), object(std::move(obj)), method(std::move(m)), args(std::move(a)) {} + MethodCallExpr(const Location & loc, std::shared_ptr && obj, std::shared_ptr && m, ArgumentsExpression && a) + : Expression(loc), object(std::move(obj)), method(std::move(m)), args(std::move(a)) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!object) throw std::runtime_error("MethodCallExpr.object is null"); if (!method) throw std::runtime_error("MethodCallExpr.method is null"); @@ -1422,8 +1493,29 @@ public: } else if (obj.is_string()) { auto str = obj.get(); if (method->get_name() == "strip") { - vargs.expectArgs("strip method", {0, 0}, {0, 0}); - return Value(strip(str)); + vargs.expectArgs("strip method", {0, 1}, {0, 0}); + auto chars = vargs.args.empty() ? "" : vargs.args[0].get(); + return Value(strip(str, chars)); + } else if (method->get_name() == "lstrip") { + vargs.expectArgs("lstrip method", {0, 1}, {0, 0}); + auto chars = vargs.args.empty() ? "" : vargs.args[0].get(); + return Value(strip(str, chars, /* left= */ true, /* right= */ false)); + } else if (method->get_name() == "rstrip") { + vargs.expectArgs("rstrip method", {0, 1}, {0, 0}); + auto chars = vargs.args.empty() ? "" : vargs.args[0].get(); + return Value(strip(str, chars, /* left= */ false, /* right= */ true)); + } else if (method->get_name() == "split") { + vargs.expectArgs("split method", {1, 1}, {0, 0}); + auto sep = vargs.args[0].get(); + auto parts = split(str, sep); + Value result = Value::array(); + for (const auto& part : parts) { + result.push_back(Value(part)); + } + return result; + } else if (method->get_name() == "capitalize") { + vargs.expectArgs("capitalize method", {0, 0}, {0, 0}); + return Value(capitalize(str)); } else if (method->get_name() == "endswith") { vargs.expectArgs("endswith method", {1, 1}, {0, 0}); auto suffix = vargs.args[0].get(); @@ -1446,8 +1538,8 @@ class CallExpr : public Expression { public: std::shared_ptr object; ArgumentsExpression args; - CallExpr(const Location & location, std::shared_ptr && obj, ArgumentsExpression && a) - : Expression(location), object(std::move(obj)), args(std::move(a)) {} + CallExpr(const Location & loc, std::shared_ptr && obj, ArgumentsExpression && a) + : Expression(loc), object(std::move(obj)), args(std::move(a)) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!object) throw std::runtime_error("CallExpr.object is null"); auto obj = object->evaluate(context); @@ -1462,8 +1554,8 @@ public: class FilterExpr : public Expression { std::vector> parts; public: - FilterExpr(const Location & location, std::vector> && p) - : Expression(location), parts(std::move(p)) {} + FilterExpr(const Location & loc, std::vector> && p) + : Expression(loc), parts(std::move(p)) {} Value do_evaluate(const std::shared_ptr & context) const override { Value result; bool first = true; @@ -1754,7 +1846,7 @@ private: auto left = parseStringConcat(); if (!left) throw std::runtime_error("Expected left side of 'logical compare' expression"); - static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not[\r\n\s]+in\b)"); + static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not\s+in\b)"); static std::regex not_tok(R"(not\b)"); std::string op_str; while (!(op_str = consumeToken(compare_tok)).empty()) { @@ -2133,7 +2225,7 @@ private: using TemplateTokenIterator = TemplateTokenVector::const_iterator; std::vector parseVarNames() { - static std::regex varnames_regex(R"(((?:\w+)(?:[\r\n\s]*,[\r\n\s]*(?:\w+))*)[\r\n\s]*)"); + static std::regex varnames_regex(R"(((?:\w+)(?:\s*,\s*(?:\w+))*)\s*)"); std::vector group; if ((group = consumeTokenGroups(varnames_regex)).empty()) throw std::runtime_error("Expected variable names"); @@ -2156,13 +2248,13 @@ private: } TemplateTokenVector tokenize() { - static std::regex comment_tok(R"(\{#([-~]?)(.*?)([-~]?)#\})"); + static std::regex comment_tok(R"(\{#([-~]?)([\s\S]*?)([-~]?)#\})"); static std::regex expr_open_regex(R"(\{\{([-~])?)"); - static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)"); - static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter)\b)"); + static std::regex block_open_regex(R"(^\{%([-~])?\s*)"); + static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter|break|continue)\b)"); static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)"); - static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})"); - static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})"); + static std::regex expr_close_regex(R"(\s*([-~])?\}\})"); + static std::regex block_close_regex(R"(\s*([-~])?%\})"); TemplateTokenVector tokens; std::vector group; @@ -2246,7 +2338,7 @@ private: auto post_space = parseBlockClose(); tokens.push_back(std::make_unique(location, pre_space, post_space)); } else if (keyword == "set") { - static std::regex namespaced_var_regex(R"((\w+)[\s\n\r]*\.[\s\n\r]*(\w+))"); + static std::regex namespaced_var_regex(R"((\w+)\s*\.\s*(\w+))"); std::string ns; std::vector var_names; @@ -2291,10 +2383,18 @@ private: } else if (keyword == "endfilter") { auto post_space = parseBlockClose(); tokens.push_back(std::make_unique(location, pre_space, post_space)); + } else if (keyword == "break" || keyword == "continue") { + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique(location, pre_space, post_space, keyword == "break" ? LoopControlType::Break : LoopControlType::Continue)); } else { throw std::runtime_error("Unexpected block: " + keyword); } } else if (std::regex_search(it, end, match, non_text_open_regex)) { + if (!match.position()) { + if (match[0] != "{#") + throw std::runtime_error("Internal error: Expected a comment"); + throw std::runtime_error("Missing end of comment tag"); + } auto text_end = it + match.position(); text = std::string(it, text_end); it = text_end; @@ -2359,7 +2459,7 @@ private: auto text = text_token->text; if (post_space == SpaceHandling::Strip) { - static std::regex trailing_space_regex(R"((\s|\r|\n)+$)"); + static std::regex trailing_space_regex(R"(\s+$)"); text = std::regex_replace(text, trailing_space_regex, ""); } else if (options.lstrip_blocks && it != end) { auto i = text.size(); @@ -2369,10 +2469,10 @@ private: } } if (pre_space == SpaceHandling::Strip) { - static std::regex leading_space_regex(R"(^(\s|\r|\n)+)"); + static std::regex leading_space_regex(R"(^\s+)"); text = std::regex_replace(text, leading_space_regex, ""); } else if (options.trim_blocks && (it - 1) != begin && !dynamic_cast((*(it - 2)).get())) { - if (text.length() > 0 && text[0] == '\n') { + if (!text.empty() && text[0] == '\n') { text.erase(0, 1); } } @@ -2414,6 +2514,8 @@ private: children.emplace_back(std::make_shared(token->location, std::move(filter_token->filter), std::move(body))); } else if (dynamic_cast(token.get())) { // Ignore comments + } else if (auto ctrl_token = dynamic_cast(token.get())) { + children.emplace_back(std::make_shared(token->location, ctrl_token->control_type)); } else if (dynamic_cast(token.get()) || dynamic_cast(token.get()) || dynamic_cast(token.get()) @@ -2448,7 +2550,7 @@ public: TemplateTokenIterator begin = tokens.begin(); auto it = begin; TemplateTokenIterator end = tokens.end(); - return parser.parseTemplate(begin, it, end, /* full= */ true); + return parser.parseTemplate(begin, it, end, /* fully= */ true); } }; @@ -2487,7 +2589,7 @@ inline std::shared_ptr Context::builtins() { throw std::runtime_error(args.at("message").get()); })); globals.set("tojson", simple_function("tojson", { "value", "indent" }, [](const std::shared_ptr &, Value & args) { - return Value(args.at("value").dump(args.get("indent", -1), /* tojson= */ true)); + return Value(args.at("value").dump(args.get("indent", -1), /* to_json= */ true)); })); globals.set("items", simple_function("items", { "object" }, [](const std::shared_ptr &, Value & args) { auto items = Value::array(); @@ -2509,21 +2611,25 @@ inline std::shared_ptr Context::builtins() { globals.set("last", simple_function("last", { "items" }, [](const std::shared_ptr &, Value & args) { auto items = args.at("items"); if (!items.is_array()) throw std::runtime_error("object is not a list"); - if (items.size() == 0) return Value(); + if (items.empty()) return Value(); return items.at(items.size() - 1); })); globals.set("trim", simple_function("trim", { "text" }, [](const std::shared_ptr &, Value & args) { auto & text = args.at("text"); return text.is_null() ? text : Value(strip(text.get())); })); - globals.set("lower", simple_function("lower", { "text" }, [](const std::shared_ptr &, Value & args) { - auto text = args.at("text"); - if (text.is_null()) return text; - std::string res; - auto str = text.get(); - std::transform(str.begin(), str.end(), std::back_inserter(res), ::tolower); - return Value(res); - })); + auto char_transform_function = [](const std::string & name, const std::function & fn) { + return simple_function(name, { "text" }, [=](const std::shared_ptr &, Value & args) { + auto text = args.at("text"); + if (text.is_null()) return text; + std::string res; + auto str = text.get(); + std::transform(str.begin(), str.end(), std::back_inserter(res), fn); + return Value(res); + }); + }; + globals.set("lower", char_transform_function("lower", ::tolower)); + globals.set("upper", char_transform_function("upper", ::toupper)); globals.set("default", Value::callable([=](const std::shared_ptr &, ArgumentsValue & args) { args.expectArgs("default", {2, 3}, {0, 1}); auto & value = args.args[0]; @@ -2572,6 +2678,7 @@ inline std::shared_ptr Context::builtins() { })); globals.set("join", simple_function("join", { "items", "d" }, [](const std::shared_ptr &, Value & args) { auto do_join = [](Value & items, const std::string & sep) { + if (!items.is_array()) throw std::runtime_error("object is not iterable: " + items.dump()); std::ostringstream oss; auto first = true; for (size_t i = 0, n = items.size(); i < n; ++i) { @@ -2648,31 +2755,43 @@ inline std::shared_ptr Context::builtins() { return filter.call(context, actual_args); }); }; - // https://jinja.palletsprojects.com/en/3.0.x/templates/#jinja-filters.reject - globals.set("reject", Value::callable([=](const std::shared_ptr & context, ArgumentsValue & args) { - args.expectArgs("reject", {2, (std::numeric_limits::max)()}, {0, 0}); - auto & items = args.args[0]; - auto filter_fn = context->get(args.args[1]); - if (filter_fn.is_null()) throw std::runtime_error("Undefined filter: " + args.args[1].dump()); - - auto filter_args = Value::array(); - for (size_t i = 2, n = args.args.size(); i < n; i++) { - filter_args.push_back(args.args[i]); - } - auto filter = make_filter(filter_fn, filter_args); - - auto res = Value::array(); - for (size_t i = 0, n = items.size(); i < n; i++) { - auto & item = items.at(i); - ArgumentsValue filter_args; - filter_args.args.emplace_back(item); - auto pred_res = filter.call(context, filter_args); - if (!pred_res.to_bool()) { - res.push_back(item); + auto select_or_reject = [make_filter](bool is_select) { + return Value::callable([=](const std::shared_ptr & context, ArgumentsValue & args) { + args.expectArgs(is_select ? "select" : "reject", {2, (std::numeric_limits::max)()}, {0, 0}); + auto & items = args.args[0]; + if (items.is_null()) { + return Value::array(); } - } - return res; - })); + if (!items.is_array()) { + throw std::runtime_error("object is not iterable: " + items.dump()); + } + + auto filter_fn = context->get(args.args[1]); + if (filter_fn.is_null()) { + throw std::runtime_error("Undefined filter: " + args.args[1].dump()); + } + + auto filter_args = Value::array(); + for (size_t i = 2, n = args.args.size(); i < n; i++) { + filter_args.push_back(args.args[i]); + } + auto filter = make_filter(filter_fn, filter_args); + + auto res = Value::array(); + for (size_t i = 0, n = items.size(); i < n; i++) { + auto & item = items.at(i); + ArgumentsValue filter_args; + filter_args.args.emplace_back(item); + auto pred_res = filter.call(context, filter_args); + if (pred_res.to_bool() == (is_select ? true : false)) { + res.push_back(item); + } + } + return res; + }); + }; + globals.set("select", select_or_reject(/* is_select= */ true)); + globals.set("reject", select_or_reject(/* is_select= */ false)); globals.set("map", Value::callable([=](const std::shared_ptr & context, ArgumentsValue & args) { auto res = Value::array(); if (args.args.size() == 1 && @@ -2720,41 +2839,46 @@ inline std::shared_ptr Context::builtins() { if (!text.empty() && text.back() == '\n') out += "\n"; return out; })); - globals.set("selectattr", Value::callable([=](const std::shared_ptr & context, ArgumentsValue & args) { - args.expectArgs("selectattr", {2, (std::numeric_limits::max)()}, {0, 0}); - auto & items = args.args[0]; - if (items.is_null()) - return Value::array(); - auto attr_name = args.args[1].get(); + auto select_or_reject_attr = [](bool is_select) { + return Value::callable([=](const std::shared_ptr & context, ArgumentsValue & args) { + args.expectArgs(is_select ? "selectattr" : "rejectattr", {2, (std::numeric_limits::max)()}, {0, 0}); + auto & items = args.args[0]; + if (items.is_null()) + return Value::array(); + if (!items.is_array()) throw std::runtime_error("object is not iterable: " + items.dump()); + auto attr_name = args.args[1].get(); - bool has_test = false; - Value test_fn; - ArgumentsValue test_args {{Value()}, {}}; - if (args.args.size() >= 3) { - has_test = true; - test_fn = context->get(args.args[2]); - if (test_fn.is_null()) throw std::runtime_error("Undefined test: " + args.args[2].dump()); - for (size_t i = 3, n = args.args.size(); i < n; i++) { - test_args.args.emplace_back(args.args[i]); - } - test_args.kwargs = args.kwargs; - } - - auto res = Value::array(); - for (size_t i = 0, n = items.size(); i < n; i++) { - auto & item = items.at(i); - auto attr = item.get(attr_name); - if (has_test) { - test_args.args[0] = attr; - if (test_fn.call(context, test_args).to_bool()) { - res.push_back(item); + bool has_test = false; + Value test_fn; + ArgumentsValue test_args {{Value()}, {}}; + if (args.args.size() >= 3) { + has_test = true; + test_fn = context->get(args.args[2]); + if (test_fn.is_null()) throw std::runtime_error("Undefined test: " + args.args[2].dump()); + for (size_t i = 3, n = args.args.size(); i < n; i++) { + test_args.args.emplace_back(args.args[i]); } - } else { - res.push_back(attr); + test_args.kwargs = args.kwargs; } - } - return res; - })); + + auto res = Value::array(); + for (size_t i = 0, n = items.size(); i < n; i++) { + auto & item = items.at(i); + auto attr = item.get(attr_name); + if (has_test) { + test_args.args[0] = attr; + if (test_fn.call(context, test_args).to_bool() == (is_select ? true : false)) { + res.push_back(item); + } + } else { + res.push_back(attr); + } + } + return res; + }); + }; + globals.set("selectattr", select_or_reject_attr(/* is_select= */ true)); + globals.set("rejectattr", select_or_reject_attr(/* is_select= */ false)); globals.set("range", Value::callable([=](const std::shared_ptr &, ArgumentsValue & args) { std::vector startEndStep(3); std::vector param_set(3); @@ -2767,20 +2891,25 @@ inline std::shared_ptr Context::builtins() { auto v = arg.get(); startEndStep[i] = v; param_set[i] = true; - } } - for (auto & [name, value] : args.kwargs) { - size_t i; - if (name == "start") i = 0; - else if (name == "end") i = 1; - else if (name == "step") i = 2; - else throw std::runtime_error("Unknown argument " + name + " for function range"); + } + for (auto & [name, value] : args.kwargs) { + size_t i; + if (name == "start") { + i = 0; + } else if (name == "end") { + i = 1; + } else if (name == "step") { + i = 2; + } else { + throw std::runtime_error("Unknown argument " + name + " for function range"); + } - if (param_set[i]) { - throw std::runtime_error("Duplicate argument " + name + " for function range"); - } - startEndStep[i] = value.get(); - param_set[i] = true; + if (param_set[i]) { + throw std::runtime_error("Duplicate argument " + name + " for function range"); + } + startEndStep[i] = value.get(); + param_set[i] = true; } if (!param_set[1]) { throw std::runtime_error("Missing required argument 'end' for function range"); diff --git a/common/ngram-cache.cpp b/common/ngram-cache.cpp index a057ae45f5..d1a4d84c40 100644 --- a/common/ngram-cache.cpp +++ b/common/ngram-cache.cpp @@ -7,6 +7,7 @@ #include #include #include +#include void common_ngram_cache_update(common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector & inp, int nnew, bool print_progress) { diff --git a/common/sampling.cpp b/common/sampling.cpp index 7241ac3212..1735b65018 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -4,6 +4,7 @@ #include #include +#include // the ring buffer works similarly to std::deque, but with a fixed capacity // TODO: deduplicate with llama-impl.h @@ -134,11 +135,11 @@ std::string common_params_sampling::print() const { snprintf(result, sizeof(result), "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n" "\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n" - "\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n" + "\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n" "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f", penalty_last_n, penalty_repeat, penalty_freq, penalty_present, dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n, - top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp, + top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, top_n_sigma, temp, mirostat, mirostat_eta, mirostat_tau); return std::string(result); @@ -151,9 +152,70 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co lparams.no_perf = params.no_perf; + struct llama_sampler * grmr; + if (params.grammar.compare(0, 11, "%llguidance") == 0) { +#ifdef LLAMA_USE_LLGUIDANCE + grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str()); +#else + GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled"); +#endif // LLAMA_USE_LLGUIDANCE + } else { + std::vector patterns_at_start; + std::vector patterns_anywhere; + std::vector trigger_tokens; + for (const auto & trigger : params.grammar_triggers) { + switch (trigger.type) { + case COMMON_GRAMMAR_TRIGGER_TYPE_WORD: + { + const auto & word = trigger.value; + patterns_anywhere.push_back(regex_escape(word)); + break; + } + case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN: + case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START: + { + const auto & pattern = trigger.value; + (trigger.type == COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START ? patterns_at_start : patterns_anywhere).push_back(pattern); + break; + } + case COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN: + { + const auto token = trigger.token; + trigger_tokens.push_back(token); + break; + } + default: + GGML_ASSERT(false && "unknown trigger type"); + } + } + + std::vector trigger_patterns; + if (!patterns_at_start.empty()) { + trigger_patterns.push_back("^(" + string_join(patterns_at_start, "|") + ")[\\s\\S]*"); + } + if (!patterns_anywhere.empty()) { + trigger_patterns.push_back("^[\\s\\S]*?(" + string_join(patterns_anywhere, "|") + ")[\\s\\S]*"); + } + + std::vector trigger_patterns_c; + trigger_patterns_c.reserve(trigger_patterns.size()); + for (const auto & regex : trigger_patterns) { + trigger_patterns_c.push_back(regex.c_str()); + } + + grmr = params.grammar_lazy + ? llama_sampler_init_grammar_lazy_patterns(vocab, params.grammar.c_str(), "root", + trigger_patterns_c.data(), trigger_patterns_c.size(), + trigger_tokens.data(), trigger_tokens.size()) + : llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root"); + if (!grmr) { + return nullptr; + } + } + auto * result = new common_sampler { /* .params = */ params, - /* .grmr = */ llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root"), + /* .grmr = */ grmr, /* .chain = */ llama_sampler_chain_init(lparams), /* .prev = */ ring_buffer(std::max(32, params.n_prev)), /* .cur = */ {}, @@ -167,45 +229,51 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co params.logit_bias.data())); if (params.mirostat == 0) { - for (const auto & cnstr : params.samplers) { - switch (cnstr) { - case COMMON_SAMPLER_TYPE_DRY: - { - std::vector c_breakers; - c_breakers.reserve(params.dry_sequence_breakers.size()); - for (const auto & str : params.dry_sequence_breakers) { - c_breakers.push_back(str.c_str()); - } + if (params.top_n_sigma >= 0) { + llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); + llama_sampler_chain_add(result->chain, llama_sampler_init_temp (params.temp)); + llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma (params.top_n_sigma)); + } else { + for (const auto & cnstr : params.samplers) { + switch (cnstr) { + case COMMON_SAMPLER_TYPE_DRY: + { + std::vector c_breakers; + c_breakers.reserve(params.dry_sequence_breakers.size()); + for (const auto & str : params.dry_sequence_breakers) { + c_breakers.push_back(str.c_str()); + } - llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); - } - break; - case COMMON_SAMPLER_TYPE_TOP_K: - llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); - break; - case COMMON_SAMPLER_TYPE_TOP_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep)); - break; - case COMMON_SAMPLER_TYPE_MIN_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep)); - break; - case COMMON_SAMPLER_TYPE_XTC: - llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); - break; - case COMMON_SAMPLER_TYPE_TYPICAL_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep)); - break; - case COMMON_SAMPLER_TYPE_TEMPERATURE: - llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); - break; - case COMMON_SAMPLER_TYPE_INFILL: - llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab)); - break; - case COMMON_SAMPLER_TYPE_PENALTIES: - llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); - break; - default: - GGML_ASSERT(false && "unknown sampler type"); + llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); + } + break; + case COMMON_SAMPLER_TYPE_TOP_K: + llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); + break; + case COMMON_SAMPLER_TYPE_TOP_P: + llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep)); + break; + case COMMON_SAMPLER_TYPE_MIN_P: + llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep)); + break; + case COMMON_SAMPLER_TYPE_XTC: + llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); + break; + case COMMON_SAMPLER_TYPE_TYPICAL_P: + llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep)); + break; + case COMMON_SAMPLER_TYPE_TEMPERATURE: + llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); + break; + case COMMON_SAMPLER_TYPE_INFILL: + llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab)); + break; + case COMMON_SAMPLER_TYPE_PENALTIES: + llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); + break; + default: + GGML_ASSERT(false && "unknown sampler type"); + } } } llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed)); diff --git a/common/sampling.h b/common/sampling.h index 348911b188..2064421db4 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -102,3 +102,6 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr); std::vector common_sampler_types_from_names(const std::vector & names, bool allow_alt_names); std::vector common_sampler_types_from_chars(const std::string & chars); + +llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, + const char * grammar_kind, const char * grammar_data); diff --git a/common/speculative.cpp b/common/speculative.cpp index 318e96ea35..ccad70fa9e 100644 --- a/common/speculative.cpp +++ b/common/speculative.cpp @@ -5,6 +5,7 @@ #include "sampling.h" #include +#include #define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 128 #define SPEC_VOCAB_CHECK_START_TOKEN_ID 5 @@ -172,7 +173,7 @@ llama_tokens common_speculative_gen_draft( result.reserve(params.n_draft); if (reuse_n == 0) { - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); prompt.clear(); } else { @@ -191,14 +192,14 @@ llama_tokens common_speculative_gen_draft( } if (reuse_i > 0) { - llama_kv_cache_seq_rm (ctx, 0, 0, reuse_i); - llama_kv_cache_seq_add(ctx, 0, reuse_i, -1, -reuse_i); + llama_kv_self_seq_rm (ctx, 0, 0, reuse_i); + llama_kv_self_seq_add(ctx, 0, reuse_i, -1, -reuse_i); prompt.erase(prompt.begin(), prompt.begin() + reuse_i); } if (reuse_n < (int) prompt.size()) { - llama_kv_cache_seq_rm (ctx, 0, reuse_n, -1); + llama_kv_self_seq_rm (ctx, 0, reuse_n, -1); prompt.erase(prompt.begin() + reuse_n, prompt.end()); } @@ -252,11 +253,6 @@ llama_tokens common_speculative_gen_draft( // add drafted token for each sequence const llama_token id = cur_p->data[0].id; - // only collect very high-confidence draft tokens - if (cur_p->data[0].p < params.p_min) { - break; - } - common_sampler_accept(smpl, id, true); result.push_back(id); @@ -265,6 +261,11 @@ llama_tokens common_speculative_gen_draft( break; } + // only collect very high-confidence draft tokens + if (cur_p->data[0].p < params.p_min) { + break; + } + common_batch_add(batch, id, n_past + i + 1, { 0 }, true); // evaluate the drafted tokens on the draft model diff --git a/common/speculative.h b/common/speculative.h index 50ec034461..2b51a70ca1 100644 --- a/common/speculative.h +++ b/common/speculative.h @@ -9,7 +9,7 @@ struct common_speculative_params { int n_draft = 16; // max drafted tokens int n_reuse = 256; - float p_min = 0.9f; // min probabiliy required to accept a token in the draft + float p_min = 0.75f; // min probability required to accept a token in the draft }; struct common_speculative * common_speculative_init(struct llama_context * ctx_dft); diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 63b54a9cf6..89522dee8b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -65,6 +65,7 @@ class Model: model_name: str | None metadata_override: Path | None dir_model_card: Path + remote_hf_model_id: str | None # subclasses should define this! model_arch: gguf.MODEL_ARCH @@ -73,7 +74,7 @@ class Model: use_temp_file: bool = False, eager: bool = False, metadata_override: Path | None = None, model_name: str | None = None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, - small_first_shard: bool = False, hparams: dict[str, Any] | None = None): + small_first_shard: bool = False, hparams: dict[str, Any] | None = None, remote_hf_model_id: str | None = None): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") @@ -83,11 +84,24 @@ class Model: self.is_big_endian = is_big_endian self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE self.use_temp_file = use_temp_file - self.lazy = not eager - self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors") - self.is_safetensors = len(self.part_names) > 0 - if not self.is_safetensors: - self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") + self.lazy = not eager or (remote_hf_model_id is not None) + self.remote_hf_model_id = remote_hf_model_id + if remote_hf_model_id is not None: + self.is_safetensors = True + + def get_remote_tensors() -> Iterator[tuple[str, Tensor]]: + logger.info(f"Using remote model with HuggingFace id: {remote_hf_model_id}") + remote_tensors = gguf.utility.SafetensorRemote.get_list_tensors_hf_model(remote_hf_model_id) + self.tensor_names = set(name for name in remote_tensors.keys()) + for name, remote_tensor in gguf.utility.SafetensorRemote.get_list_tensors_hf_model(remote_hf_model_id).items(): + yield (name, LazyTorchTensor.from_remote_tensor(remote_tensor)) + + self.get_tensors = get_remote_tensors + else: + self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors") + self.is_safetensors = len(self.part_names) > 0 + if not self.is_safetensors: + self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") self.hparams = Model.load_hparams(self.dir_model) if hparams is None else hparams self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @@ -180,7 +194,8 @@ class Model: extra = sorted(tensor_names_from_parts.difference(self.tensor_names)) missing_files = sorted(set(weight_map[n] for n in missing if n in weight_map)) if len(extra) == 0 and len(missing_files) > 0: - raise ValueError(f"Missing or incomplete model files: {missing_files}") + raise ValueError(f"Missing or incomplete model files: {missing_files}\n" + f"Missing tensors: {missing}") else: raise ValueError("Mismatch between weight map and model parts for tensor names:\n" f"Missing tensors: {missing}\n" @@ -392,6 +407,10 @@ class Model: self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model_card, self.model_name, total_params) + # If we are using HF model id, set the metadata name to the model id + if self.remote_hf_model_id: + self.metadata.name = self.remote_hf_model_id + # Fallback to model directory name if metadata name is still missing if self.metadata.name is None: self.metadata.name = self.dir_model.name @@ -528,6 +547,8 @@ class Model: reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()} added_vocab = tokenizer.get_added_vocab() + added_tokens_decoder = tokenizer.added_tokens_decoder + for i in range(vocab_size): if i not in reverse_vocab: tokens.append(f"[PAD{i}]") @@ -537,13 +558,13 @@ class Model: if token in added_vocab: # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized. # To avoid unexpected issues - we make sure to normalize non-normalized tokens - if not tokenizer.added_tokens_decoder[i].normalized: + if not added_tokens_decoder[i].normalized: previous_token = token token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False)) if previous_token != token: logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer") - if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token): + if added_tokens_decoder[i].special or self.does_token_look_special(token): toktypes.append(gguf.TokenType.CONTROL) else: # NOTE: this was added for Gemma. @@ -558,7 +579,7 @@ class Model: # NOTE: this function is generated by convert_hf_to_gguf_update.py # do not modify it manually! - # ref: https://github.com/ggerganov/llama.cpp/pull/6920 + # ref: https://github.com/ggml-org/llama.cpp/pull/6920 # Marker: Start get_vocab_base_pre def get_vocab_base_pre(self, tokenizer) -> str: # encoding this string and hashing the resulting tokens would (hopefully) give us a unique identifier that @@ -648,7 +669,7 @@ class Model: if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a": # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code res = "jina-v2-code" - if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b": + if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516": # ref: https://huggingface.co/THUDM/glm-4-9b-chat res = "chatglm-bpe" if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee": @@ -699,6 +720,24 @@ class Model: if chkhsh == "b3f499bb4255f8ca19fccd664443283318f2fd2414d5e0b040fbdd0cc195d6c5": # ref: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B res = "deepseek-r1-qwen" + if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e": + # ref: https://huggingface.co/Xenova/gpt-4o + res = "gpt-4o" + if chkhsh == "7dec86086fcc38b66b7bc1575a160ae21cf705be7718b9d5598190d7c12db76f": + # ref: https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k + res = "superbpe" + if chkhsh == "1994ffd01900cfb37395608534236ecd63f2bd5995d6cb1004dda1af50240f15": + # ref: https://huggingface.co/trillionlabs/Trillion-7B-preview + res = "trillion" + if chkhsh == "96a5f08be6259352137b512d4157e333e21df7edd3fcd152990608735a65b224": + # ref: https://huggingface.co/inclusionAI/Ling-lite + res = "bailingmoe" + if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406": + # ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct + res = "llama4" + if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2": + # ref: https://huggingface.co/THUDM/glm-4-9b-hf + res = "glm4" if res is None: logger.warning("\n") @@ -708,7 +747,7 @@ class Model: logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet") logger.warning("** - the pre-tokenization config has changed upstream") logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.") - logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920") + logger.warning("** ref: https://github.com/ggml-org/llama.cpp/pull/6920") logger.warning("**") logger.warning(f"** chkhsh: {chkhsh}") logger.warning("**************************************************************************************") @@ -858,6 +897,9 @@ class Model: for token_id, token_data in added_tokens_decoder.items(): token_id = int(token_id) token: str = token_data["content"] + if token_id >= vocab_size: + logger.warning(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}') + continue if toktypes[token_id] != SentencePieceTokenTypes.UNUSED: if tokens[token_id] != token.encode("utf-8"): logger.warning(f'replacing token {token_id}: {tokens[token_id].decode("utf-8")!r} -> {token!r}') @@ -902,6 +944,40 @@ class Model: special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) special_vocab.add_to_gguf(self.gguf_writer) + def _set_vocab_rwkv_world(self): + assert (self.dir_model / "rwkv_vocab_v20230424.txt").is_file() + vocab_size = self.hparams.get("vocab_size", 65536) + + tokens: list[bytes] = [''.encode("utf-8")] + toktypes: list[int] = [gguf.TokenType.CONTROL] + + with open(self.dir_model / "rwkv_vocab_v20230424.txt", "r", encoding="utf-8") as f: + lines = f.readlines() + for line in lines: + parts = line.split(' ') + assert len(parts) >= 3 + token, token_len = ast.literal_eval(' '.join(parts[1:-1])), int(parts[-1]) + token = token.encode("utf-8") if isinstance(token, str) else token + assert isinstance(token, bytes) + assert len(token) == token_len + token_text: str = repr(token)[2:-1] # "b'\xff'" -> "\xff" + tokens.append(token_text.encode("utf-8")) + toktypes.append(gguf.TokenType.NORMAL) + remainder = vocab_size - len(tokens) + assert remainder >= 0 + for i in range(len(tokens), vocab_size): + tokens.append(f"[PAD{i}]".encode("utf-8")) + toktypes.append(gguf.TokenType.UNUSED) + + self.gguf_writer.add_tokenizer_model("rwkv") + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_types(toktypes) + special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) + special_vocab.chat_template = "rwkv-world" + # hack: Add '\n\n' as the EOT token to make it chat normally + special_vocab._set_special_token("eot", 261) + special_vocab.add_to_gguf(self.gguf_writer) + def _set_vocab_builtin(self, model_name: Literal["gpt-neox", "llama-spm"], vocab_size: int): tokenizer_path = Path(sys.path[0]) / "models" / f"ggml-vocab-{model_name}.gguf" logger.warning(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'") @@ -1059,13 +1135,6 @@ class BloomModel(Model): tensors.append((self.map_tensor_name(name), data_torch)) - if name == "word_embeddings.weight": - assert self.tensor_names is not None - - # TODO: tie them at runtime, don't duplicate in the model file - if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")): - tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch)) - return tensors @@ -1563,6 +1632,7 @@ class StableLMModel(Model): @Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM") class LlamaModel(Model): model_arch = gguf.MODEL_ARCH.LLAMA + undo_permute = True def set_vocab(self): try: @@ -1627,10 +1697,11 @@ class LlamaModel(Model): n_head = self.hparams["num_attention_heads"] n_kv_head = self.hparams.get("num_key_value_heads") - if name.endswith(("q_proj.weight", "q_proj.bias")): - data_torch = LlamaModel.permute(data_torch, n_head, n_head) - if name.endswith(("k_proj.weight", "k_proj.bias")): - data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head) + if self.undo_permute: + if name.endswith(("q_proj.weight", "q_proj.bias")): + data_torch = LlamaModel.permute(data_torch, n_head, n_head) + if name.endswith(("k_proj.weight", "k_proj.bias")): + data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head) # process the experts separately if name.find("block_sparse_moe.experts") != -1: @@ -1682,7 +1753,7 @@ class LlamaModel(Model): low_freq_wavelen = old_context_len / low_freq_factor high_freq_wavelen = old_context_len / high_freq_factor - assert low_freq_wavelen != high_freq_wavelen + # assert low_freq_wavelen != high_freq_wavelen # Errors for Llama4 rope_factors = [] for freq in freqs: @@ -1707,6 +1778,76 @@ class LlamaModel(Model): raise ValueError(f"Unprocessed experts: {experts}") +@Model.register("Llama4ForConditionalGeneration") +class Llama4Model(LlamaModel): + model_arch = gguf.MODEL_ARCH.LLAMA4 + has_vision: bool = False + undo_permute = False + + # TODO @ngxson : avoid duplicate this code everywhere by at least support "text_config" + # same with llama, but we need to merge the text_config into the root level of hparams + def __init__(self, *args, **kwargs): + hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0]) + if "text_config" in hparams: + hparams = {**hparams, **hparams["text_config"]} + kwargs["hparams"] = hparams + super().__init__(*args, **kwargs) + if "vision_config" in hparams: + logger.info("Has vision encoder, but it will be ignored") + self.has_vision = True + # IMPORTANT: the normal "intermediate_size" is renamed to "intermediate_size_mlp", we need to undo this + self.hparams["intermediate_size_moe"] = self.hparams["intermediate_size"] + self.hparams["intermediate_size"] = self.hparams["intermediate_size_mlp"] + + def set_vocab(self): + self._set_vocab_gpt2() + self.gguf_writer.add_add_bos_token(True) + + def set_gguf_parameters(self): + super().set_gguf_parameters() + self.gguf_writer.add_interleave_moe_layer_step(self.hparams["interleave_moe_layer_step"]) + self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"]) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None): + # split the gate_up into gate and up + if "gate_up_proj" in name: + name_up = name.replace("gate_up_proj", "up_proj.weight") + name_gate = name.replace("gate_up_proj", "gate_proj.weight") + dim_half = data_torch.shape[-1] // 2 + gate_proj_weight, up_proj_weight = data_torch.transpose(-1, -2).split(dim_half, dim=-2) + return [ + (self.map_tensor_name(name_gate), gate_proj_weight), + (self.map_tensor_name(name_up), up_proj_weight) + ] + + if name.endswith("down_proj"): + name += ".weight" + data_torch = data_torch.transpose(-1, -2) + + if "multi_modal_projector" in name or "vision_model" in name: + return [] + return super().modify_tensors(data_torch, name, bid) + + +@Model.register("Mistral3ForConditionalGeneration") +class Mistral3Model(LlamaModel): + model_arch = gguf.MODEL_ARCH.LLAMA + + # we need to merge the text_config into the root level of hparams + def __init__(self, *args, **kwargs): + hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0]) + if "text_config" in hparams: + hparams = {**hparams, **hparams["text_config"]} + kwargs["hparams"] = hparams + super().__init__(*args, **kwargs) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None): + name = name.replace("language_model.", "") + if "multi_modal_projector" in name or "vision_tower" in name: + return [] + return super().modify_tensors(data_torch, name, bid) + + @Model.register("DeciLMForCausalLM") class DeciModel(Model): model_arch = gguf.MODEL_ARCH.DECI @@ -2211,7 +2352,7 @@ class Qwen2Model(Model): self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) -@Model.register("Qwen2VLForConditionalGeneration") +@Model.register("Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration") class Qwen2VLModel(Model): model_arch = gguf.MODEL_ARCH.QWEN2VL @@ -2335,6 +2476,16 @@ class Qwen2MoeModel(Model): raise ValueError(f"Unprocessed experts: {experts}") +@Model.register("Qwen3ForCausalLM") +class Qwen3Model(Qwen2Model): + model_arch = gguf.MODEL_ARCH.QWEN3 + + +@Model.register("Qwen3MoeForCausalLM") +class Qwen3MoeModel(Qwen2MoeModel): + model_arch = gguf.MODEL_ARCH.QWEN3MOE + + @Model.register("GPT2LMHeadModel") class GPT2Model(Model): model_arch = gguf.MODEL_ARCH.GPT2 @@ -2364,10 +2515,6 @@ class GPT2Model(Model): tensors.append((new_name, data_torch)) - # note: GPT2 output is tied to (same as) wte in original model - if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD): - tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch)) - return tensors @@ -2512,7 +2659,8 @@ class Phi3MiniModel(Model): rms_eps = self.find_hparam(["rms_norm_eps"]) max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"]) orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"]) - rope_dims = n_embd // n_head + rot_pct = self.hparams.get("partial_rotary_factor", 1.0) + rope_dims = int(rot_pct * n_embd) // n_head self.gguf_writer.add_context_length(max_pos_embds) self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds) @@ -2536,7 +2684,8 @@ class Phi3MiniModel(Model): n_head = self.find_hparam(["num_attention_heads", "n_head"]) max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"]) orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"]) - rope_dims = n_embd // n_head + rot_pct = self.hparams.get("partial_rotary_factor", 1.0) + rope_dims = int(rot_pct * n_embd) // n_head # write rope scaling for long context (128k) model rope_scaling = self.find_hparam(['rope_scaling'], True) @@ -2565,7 +2714,7 @@ class Phi3MiniModel(Model): raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor') if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2: - raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}') + raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}. long_factors = {len(long_factors)}, short_factors = {len(short_factors)}.') yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32)) yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32)) @@ -2695,21 +2844,26 @@ class CodeShellModel(Model): self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR) self.gguf_writer.add_rope_scaling_factor(1.0) + _has_tok_embd = False + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: del bid # unused + output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT) + tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD) + new_name = self.map_tensor_name(name) - tensors: list[tuple[str, Tensor]] = [(new_name, data_torch)] + # assuming token_embd.weight is seen before output.weight + if not self._has_tok_embd and new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT): + # even though the tensor file(s) does not contain the word embeddings they are still in the weight map + if self.tensor_names and "transformer.wte.weight" in self.tensor_names: + logger.debug(f"{tok_embd_name} not found before {output_name}, assuming they are tied") + self.tensor_names.remove("transformer.wte.weight") + elif new_name == tok_embd_name: + self._has_tok_embd = True - if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD): - assert self.tensor_names is not None - - if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")): - # copy tok_embd.weight to output.weight - tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch)) - - return tensors + return [(new_name, data_torch)] @Model.register("InternLM2ForCausalLM") @@ -2835,7 +2989,7 @@ class InternLM2Model(Model): if chat_eos_token_id is not None: # For the chat model, we replace the eos with '<|im_end|>'. # TODO: this is a hack, should be fixed - # https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2067687048 + # https://github.com/ggml-org/llama.cpp/pull/6745#issuecomment-2067687048 special_vocab.special_token_ids["eos"] = chat_eos_token_id logger.warning(f"Replace eos:{old_eos} with a special token:{chat_eos_token_id}" " in chat mode so that the conversation can end normally.") @@ -3317,6 +3471,83 @@ class Gemma2Model(Model): return [(self.map_tensor_name(name), data_torch)] +@Model.register("Gemma3ForCausalLM", "Gemma3ForConditionalGeneration") +class Gemma3Model(Model): + model_arch = gguf.MODEL_ARCH.GEMMA3 + has_vision: bool = False + + # we need to merge the text_config into the root level of hparams + def __init__(self, *args, **kwargs): + hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0]) + if "text_config" in hparams: + hparams = {**hparams, **hparams["text_config"]} + kwargs["hparams"] = hparams + super().__init__(*args, **kwargs) + if "vision_config" in hparams: + logger.info("Has vision encoder, but it will be ignored") + self.has_vision = True + + def write(self): + super().write() + if self.has_vision: + logger.info("NOTE: this script only convert the language model to GGUF") + logger.info(" for the vision model, please use gemma3_convert_encoder_to_gguf.py") + + def set_vocab(self): + self._set_vocab_sentencepiece() + + self.gguf_writer.add_add_space_prefix(False) + + def set_gguf_parameters(self): + hparams = self.hparams + block_count = hparams["num_hidden_layers"] + + # some default values are not specified in the hparams + self.gguf_writer.add_context_length(hparams.get("max_position_embeddings", 131072)) + self.gguf_writer.add_embedding_length(hparams["hidden_size"]) + self.gguf_writer.add_block_count(block_count) + self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"]) + self.gguf_writer.add_head_count(hparams.get("num_attention_heads", 8)) + self.gguf_writer.add_layer_norm_rms_eps(self.hparams.get("rms_norm_eps", 1e-6)) + self.gguf_writer.add_key_length(hparams.get("head_dim", 256)) + self.gguf_writer.add_value_length(hparams.get("head_dim", 256)) + self.gguf_writer.add_file_type(self.ftype) + self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 1_000_000.0)) # for global layers + # both attn_logit_softcapping and final_logit_softcapping are removed in Gemma3 + assert hparams.get("attn_logit_softcapping") is None + assert hparams.get("final_logit_softcapping") is None + self.gguf_writer.add_sliding_window(hparams["sliding_window"]) + self.gguf_writer.add_head_count_kv(hparams.get("num_key_value_heads", 4)) + if hparams.get("rope_scaling") is not None: + assert hparams["rope_scaling"]["rope_type"] == "linear" + # important: this rope_scaling is only applied for global layers, and not used by 1B model + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR) + self.gguf_writer.add_rope_scaling_factor(hparams["rope_scaling"]["factor"]) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + del bid # unused + + if name.startswith("language_model."): + name = name.replace("language_model.", "") + elif name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \ + or name.startswith("multimodal_projector.") or name.startswith("vision_model."): # this is for old HF model, should be removed later + # ignore vision tensors + return [] + + # remove OOV (out-of-vocabulary) rows in token_embd + if "embed_tokens.weight" in name: + vocab = self._create_vocab_sentencepiece() + tokens = vocab[0] + data_torch = data_torch[:len(tokens)] + + # ref code in Gemma3RMSNorm + # output = output * (1.0 + self.weight.float()) + if name.endswith("norm.weight"): + data_torch = data_torch + 1 + + return [(self.map_tensor_name(name), data_torch)] + + @Model.register("Starcoder2ForCausalLM") class StarCoder2Model(Model): model_arch = gguf.MODEL_ARCH.STARCODER2 @@ -3327,38 +3558,7 @@ class Rwkv6Model(Model): model_arch = gguf.MODEL_ARCH.RWKV6 def set_vocab(self): - assert (self.dir_model / "rwkv_vocab_v20230424.txt").is_file() - vocab_size = self.hparams.get("vocab_size", 65536) - - tokens: list[bytes] = [''.encode("utf-8")] - toktypes: list[int] = [gguf.TokenType.CONTROL] - - with open(self.dir_model / "rwkv_vocab_v20230424.txt", "r", encoding="utf-8") as f: - lines = f.readlines() - for line in lines: - parts = line.split(' ') - assert len(parts) >= 3 - token, token_len = ast.literal_eval(' '.join(parts[1:-1])), int(parts[-1]) - token = token.encode("utf-8") if isinstance(token, str) else token - assert isinstance(token, bytes) - assert len(token) == token_len - token_text: str = repr(token)[2:-1] # "b'\xff'" -> "\xff" - tokens.append(token_text.encode("utf-8")) - toktypes.append(gguf.TokenType.NORMAL) - remainder = vocab_size - len(tokens) - assert remainder >= 0 - for i in range(len(tokens), vocab_size): - tokens.append(f"[PAD{i}]".encode("utf-8")) - toktypes.append(gguf.TokenType.UNUSED) - - self.gguf_writer.add_tokenizer_model("rwkv") - self.gguf_writer.add_token_list(tokens) - self.gguf_writer.add_token_types(toktypes) - special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) - special_vocab.chat_template = "rwkv-world" - # hack: Add '\n\n' as the EOT token to make it chat normally - special_vocab._set_special_token("eot", 261) - special_vocab.add_to_gguf(self.gguf_writer) + self._set_vocab_rwkv_world() def set_gguf_parameters(self): block_count = self.hparams["num_hidden_layers"] @@ -3444,8 +3644,8 @@ class RWKV6Qwen2Model(Rwkv6Model): head_size = hidden_size // num_attention_heads rms_norm_eps = self.hparams["rms_norm_eps"] intermediate_size = self.hparams["intermediate_size"] - time_mix_extra_dim = 64 if hidden_size >= 4096 else 32 - time_decay_extra_dim = 128 if hidden_size >= 4096 else 64 + time_mix_extra_dim = self.hparams.get("lora_rank_tokenshift", 64 if hidden_size >= 4096 else 32) + time_decay_extra_dim = self.hparams.get("lora_rank_decay", 128 if hidden_size >= 4096 else 64) # RWKV isn't context limited self.gguf_writer.add_context_length(1048576) @@ -3480,6 +3680,168 @@ class RWKV6Qwen2Model(Rwkv6Model): yield (new_name, data) +@Model.register("Rwkv7ForCausalLM", "RWKV7ForCausalLM") +class Rwkv7Model(Model): + model_arch = gguf.MODEL_ARCH.RWKV7 + + def set_vocab(self): + self._set_vocab_rwkv_world() + + def calc_lora_rank(self, hidden_size, exponent, multiplier): + return max(1, round(hidden_size ** exponent * multiplier / 32)) * 32 + + def set_gguf_parameters(self): + block_count = self.hparams["num_hidden_layers"] + try: + head_size = self.hparams["head_size"] + layer_norm_eps = self.hparams["layer_norm_epsilon"] + except KeyError: + head_size = self.hparams["head_dim"] + layer_norm_eps = self.hparams["norm_eps"] + hidden_size = self.hparams["hidden_size"] + intermediate_size = self.hparams["intermediate_size"] if self.hparams["intermediate_size"] is not None else (hidden_size * 4) + + # ICLR: In-Context-Learning-Rate + try: + lora_rank_decay = self.hparams["lora_rank_decay"] if self.hparams["lora_rank_decay"] is not None else self.calc_lora_rank(hidden_size, 0.5, 1.8) + lora_rank_iclr = self.hparams["lora_rank_iclr"] if self.hparams["lora_rank_iclr"] is not None else self.calc_lora_rank(hidden_size, 0.5, 1.8) + lora_rank_value_residual_mix = self.hparams["lora_rank_value_residual_mix"] if self.hparams["lora_rank_value_residual_mix"] is not None else self.calc_lora_rank(hidden_size, 0.5, 1.3) + lora_rank_gate = self.hparams["lora_rank_gate"] if self.hparams["lora_rank_gate"] is not None else self.calc_lora_rank(hidden_size, 0.8, 0.6) + except KeyError: + lora_rank_decay = self.hparams["decay_low_rank_dim"] if self.hparams["decay_low_rank_dim"] is not None else self.calc_lora_rank(hidden_size, 0.5, 1.8) + lora_rank_iclr = self.hparams["a_low_rank_dim"] if self.hparams["a_low_rank_dim"] is not None else self.calc_lora_rank(hidden_size, 0.5, 1.8) + lora_rank_value_residual_mix = self.hparams["v_low_rank_dim"] if self.hparams["v_low_rank_dim"] is not None else self.calc_lora_rank(hidden_size, 0.5, 1.3) + lora_rank_gate = self.hparams["gate_low_rank_dim"] if self.hparams["gate_low_rank_dim"] is not None else self.calc_lora_rank(hidden_size, 0.8, 0.6) + + # RWKV isn't context limited + self.gguf_writer.add_context_length(1048576) + self.gguf_writer.add_embedding_length(hidden_size) + self.gguf_writer.add_block_count(block_count) + self.gguf_writer.add_layer_norm_eps(layer_norm_eps) + self.gguf_writer.add_wkv_head_size(head_size) + self.gguf_writer.add_decay_lora_rank(lora_rank_decay) + self.gguf_writer.add_iclr_lora_rank(lora_rank_iclr) + self.gguf_writer.add_value_residual_mix_lora_rank(lora_rank_value_residual_mix) + self.gguf_writer.add_gate_lora_rank(lora_rank_gate) + self.gguf_writer.add_feed_forward_length(intermediate_size) + self.gguf_writer.add_file_type(self.ftype) + + # required by llama.cpp, unused + self.gguf_writer.add_head_count(0) + + lerp_weights: dict[int, dict[str, Tensor]] = {} + lora_needs_transpose: bool = True + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + # unify tensor names here to make life easier + name = name.replace("blocks", "layers").replace("ffn", "feed_forward") + name = name.replace("self_attn", "attention").replace("attn", "attention") + name = name.replace("time_mixer.", "") + # lora layer names in fla-hub's impl + if "_lora.lora" in name: + self.lora_needs_transpose = False + name = name.replace("_lora.lora.0.weight", "1.weight") + name = name.replace("_lora.lora.2.weight", "2.weight") + name = name.replace("_lora.lora.2.bias", "0.weight") + + name = name.replace("feed_forward_norm", "ln2") + name = name.replace("g_norm", "ln_x") + + if "attention.v" in name and "value" not in self.map_tensor_name(name) and bid == 0: + # some models have dummy v0/v1/v2 on first layer while others don't + # ignore them all since they are not used + return + + wkv_has_gate = self.hparams.get("wkv_has_gate", True) + lerp_list = ["r", "w", "k", "v", "a", "g"] if wkv_has_gate else ["r", "w", "k", "v", "a"] + + if bid is not None and "attention.x_" in name: + if "attention.x_x" in name: + # already concatenated + new_name = f"blk.{bid}.time_mix_lerp_fused.weight" + data = data_torch.reshape(len(lerp_list), 1, 1, -1) + yield (new_name, data) + else: + try: + self.lerp_weights[bid][name] = data_torch + except KeyError: + self.lerp_weights[bid] = {name: data_torch} + if all(f"model.layers.{bid}.attention.x_{i}" in self.lerp_weights[bid].keys() for i in lerp_list): + new_name = f"blk.{bid}.time_mix_lerp_fused.weight" + data = torch.stack([self.lerp_weights[bid][f"model.layers.{bid}.attention.x_{i}"] for i in lerp_list], dim=0) + yield (new_name, data) + return + else: + data_torch = data_torch.squeeze() + new_name = self.map_tensor_name(name) + + if not (new_name.endswith(".weight") or new_name.endswith(".bias")): + new_name += ".weight" + + if self.lora_needs_transpose and any( + new_name.endswith(t) for t in [ + "time_mix_w1.weight", "time_mix_w2.weight", + "time_mix_a1.weight", "time_mix_a2.weight", + "time_mix_v1.weight", "time_mix_v2.weight", + "time_mix_g1.weight", "time_mix_g2.weight", + ] + ): + data_torch = data_torch.transpose(0, 1) + + if 'r_k' in new_name: + data_torch = data_torch.flatten() + + if bid == 0 and "time_mix_a" in new_name: + # dummy v0/v1/v2 on first layer + # easist way to make llama happy + yield (new_name.replace("time_mix_a", "time_mix_v"), data_torch) + + yield (new_name, data_torch) + + +@Model.register("RwkvHybridForCausalLM") +class ARwkv7Model(Rwkv7Model): + model_arch = gguf.MODEL_ARCH.ARWKV7 + + def set_vocab(self): + try: + self._set_vocab_sentencepiece() + except FileNotFoundError: + self._set_vocab_gpt2() + + def set_gguf_parameters(self): + block_count = self.hparams["num_hidden_layers"] + hidden_size = self.hparams["hidden_size"] + head_size = self.hparams["head_size"] + rms_norm_eps = self.hparams["rms_norm_eps"] + intermediate_size = self.hparams["intermediate_size"] + wkv_has_gate = self.hparams["wkv_has_gate"] + assert self.hparams["wkv_version"] == 7 + + # ICLR: In-Context-Learning-Rate + lora_rank_decay = 64 + lora_rank_iclr = 64 + lora_rank_value_residual_mix = 32 + lora_rank_gate = 128 if wkv_has_gate else 0 + + # RWKV isn't context limited + self.gguf_writer.add_context_length(1048576) + self.gguf_writer.add_embedding_length(hidden_size) + self.gguf_writer.add_block_count(block_count) + self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps) + self.gguf_writer.add_wkv_head_size(head_size) + self.gguf_writer.add_decay_lora_rank(lora_rank_decay) + self.gguf_writer.add_iclr_lora_rank(lora_rank_iclr) + self.gguf_writer.add_value_residual_mix_lora_rank(lora_rank_value_residual_mix) + self.gguf_writer.add_gate_lora_rank(lora_rank_gate) + self.gguf_writer.add_feed_forward_length(intermediate_size) + self.gguf_writer.add_file_type(self.ftype) + self.gguf_writer.add_token_shift_count(1) + + # required by llama.cpp, unused + self.gguf_writer.add_head_count(0) + + @Model.register("MambaForCausalLM", "MambaLMHeadModel", "FalconMambaForCausalLM") class MambaModel(Model): model_arch = gguf.MODEL_ARCH.MAMBA @@ -3534,8 +3896,6 @@ class MambaModel(Model): _tok_embd = None def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: - del bid # unused - output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT) tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD) @@ -3545,6 +3905,10 @@ class MambaModel(Model): logger.debug("A_log --> A ==> " + new_name) data_torch = -torch.exp(data_torch) + # [4 1 8192 1] -> [4 8192 1 1] + if self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_CONV1D, bid): + data_torch = data_torch.squeeze() + # assuming token_embd.weight is seen before output.weight if self._tok_embd is not None and new_name == output_name: if torch.equal(self._tok_embd, data_torch): @@ -4058,6 +4422,10 @@ class DeepseekV2Model(Model): self._set_vocab_gpt2() def set_gguf_parameters(self): + + # note: deepseek2 using MLA converts into MQA (ie: GQA with 1 group) + self.hparams["num_key_value_heads"] = 1 + super().set_gguf_parameters() hparams = self.hparams @@ -4066,8 +4434,13 @@ class DeepseekV2Model(Model): if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None: self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"]) self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"]) - self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"]) - self.gguf_writer.add_value_length(hparams["v_head_dim"]) + + # note: deepseek2 using MLA converts into MQA with larger heads, then decompresses to MHA + self.gguf_writer.add_key_length(hparams["kv_lora_rank"] + hparams["qk_rope_head_dim"]) + self.gguf_writer.add_value_length(hparams["kv_lora_rank"]) + self.gguf_writer.add_key_length_mla(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"]) + self.gguf_writer.add_value_length_mla(hparams["v_head_dim"]) + self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"]) self.gguf_writer.add_expert_count(hparams["n_routed_experts"]) self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"]) @@ -4136,6 +4509,26 @@ class DeepseekV2Model(Model): else: return [] + # note: MLA with the absorption optimization, needs these two split and k_b_proj transposed + if name.endswith("kv_b_proj.weight"): + name_kb = name.replace("kv_b_proj", "k_b_proj") + name_vb = name.replace("kv_b_proj", "v_b_proj") + + n_head_kv = self.hparams["num_key_value_heads"] + v_head_dim = self.hparams["v_head_dim"] + qk_nope_head_dim = self.hparams["qk_nope_head_dim"] + + assert data_torch.shape[0] == n_head_kv * (v_head_dim + qk_nope_head_dim) + + kv_b = data_torch.view(n_head_kv, v_head_dim + qk_nope_head_dim, data_torch.shape[-1]) + k_b, v_b = torch.split(kv_b, [qk_nope_head_dim, v_head_dim], dim=1) + k_b = k_b.transpose(1, 2) + + return [ + (self.map_tensor_name(name_kb), k_b), + (self.map_tensor_name(name_vb), v_b) + ] + return [(self.map_tensor_name(name), data_torch)] def prepare_tensors(self): @@ -4148,6 +4541,29 @@ class DeepseekV2Model(Model): raise ValueError(f"Unprocessed experts: {experts}") +@Model.register("PLMForCausalLM") +class PLMModel(Model): + model_arch = gguf.MODEL_ARCH.PLM + + def set_vocab(self): + self._set_vocab_gpt2() + + def set_gguf_parameters(self): + super().set_gguf_parameters() + hparams = self.hparams + self.gguf_writer.add_vocab_size(hparams["vocab_size"]) + self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"]) + self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"]) + self.gguf_writer.add_value_length(hparams["v_head_dim"]) + self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"]) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + return [(self.map_tensor_name(name), data_torch)] + + def prepare_tensors(self): + super().prepare_tensors() + + @Model.register("T5WithLMHeadModel") @Model.register("T5ForConditionalGeneration") @Model.register("MT5ForConditionalGeneration") @@ -4513,7 +4929,23 @@ class JaisModel(Model): self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias) -@Model.register("ChatGLMModel", "ChatGLMForConditionalGeneration") +@Model.register("Glm4ForCausalLM") +class Glm4Model(Model): + model_arch = gguf.MODEL_ARCH.GLM4 + + def set_vocab(self): + self._set_vocab_gpt2() + + def set_gguf_parameters(self): + super().set_gguf_parameters() + if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: + if self.hparams["rope_scaling"].get("type") == "yarn": + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) + self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) + self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) + + +@Model.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration") class ChatGLMModel(Model): model_arch = gguf.MODEL_ARCH.CHATGLM @@ -4619,47 +5051,15 @@ class ChatGLMModel(Model): from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True) - vocab_size = hparams["padded_vocab_size"] + vocab_size = hparams.get("padded_vocab_size",hparams["vocab_size"]) assert max(tokenizer.get_vocab().values()) < vocab_size - tokpre = self.get_vocab_base_pre(tokenizer) - - merges = [] - vocab = {} - mergeable_ranks = tokenizer.mergeable_ranks - for token, rank in mergeable_ranks.items(): - vocab[ChatGLMModel.token_bytes_to_string(token)] = rank - if len(token) == 1: - continue - merged = ChatGLMModel.bpe(mergeable_ranks, token, max_rank=rank) - assert len(merged) >= 2 and len(merged) <= 7 - merges.append(' '.join(map(ChatGLMModel.token_bytes_to_string, merged))) - - # for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined - added_vocab = tokenizer.get_added_vocab() - reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **added_vocab}.items()} - - for i in range(vocab_size): - if i not in reverse_vocab: - tokens.append(f"[PAD{i}]") - toktypes.append(gguf.TokenType.UNUSED) - elif reverse_vocab[i] in added_vocab: - tokens.append(reverse_vocab[i]) - if tokenizer.added_tokens_decoder[i].special: - toktypes.append(gguf.TokenType.CONTROL) - else: - toktypes.append(gguf.TokenType.USER_DEFINED) - else: - tokens.append(reverse_vocab[i]) - toktypes.append(gguf.TokenType.NORMAL) - + tokens, toktypes, tokpre = self.get_vocab_base() self.gguf_writer.add_tokenizer_model("gpt2") self.gguf_writer.add_tokenizer_pre(tokpre) self.gguf_writer.add_token_list(tokens) self.gguf_writer.add_token_types(toktypes) - - special_vocab = gguf.SpecialVocab(dir_model, load_merges=False) - special_vocab.merges = merges + special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True) # only add special tokens when they were not already loaded from config.json special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"]) special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"]) @@ -4670,16 +5070,20 @@ class ChatGLMModel(Model): def set_gguf_parameters(self): n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed")) n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads")) - n_head_kv = self.hparams.get("multi_query_group_num", n_head) + n_head_kv = self.hparams.get("multi_query_group_num", self.hparams.get("num_key_value_heads", n_head)) self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed)) self.gguf_writer.add_embedding_length(n_embed) - self.gguf_writer.add_feed_forward_length(self.hparams.get("ffn_hidden_size", 4 * n_embed)) - self.gguf_writer.add_block_count(self.hparams["num_layers"]) + self.gguf_writer.add_feed_forward_length(self.hparams.get("ffn_hidden_size", self.hparams.get("intermediate_size", 4 * n_embed))) + self.gguf_writer.add_block_count(self.hparams.get("num_layers", self.hparams["num_hidden_layers"])) self.gguf_writer.add_head_count(n_head) self.gguf_writer.add_head_count_kv(n_head_kv) - self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layernorm_epsilon"]) + self.gguf_writer.add_layer_norm_rms_eps(self.hparams.get("layernorm_epsilon",1e-5)) self.gguf_writer.add_file_type(self.ftype) - self.gguf_writer.add_rope_dimension_count(64) + if "attention_dim" in self.hparams: + rope_dim = self.hparams["attention_dim"] + else: + rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"] + self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5))) self.gguf_writer.add_add_bos_token(False) rope_freq = 10000 if "rope_ratio" in self.hparams: @@ -4689,7 +5093,7 @@ class ChatGLMModel(Model): def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: del bid # unused - if name.endswith(".rotary_pos_emb.inv_freq"): + if name.endswith(".rotary_pos_emb.inv_freq") or name.startswith("model.vision."): return [] name = name.removeprefix("transformer.") @@ -4864,6 +5268,105 @@ class GraniteMoeModel(GraniteModel): return super().modify_tensors(data_torch, name, bid) +@Model.register("BailingMoeForCausalLM") +class BailingMoeModel(Model): + model_arch = gguf.MODEL_ARCH.BAILINGMOE + + def set_vocab(self): + self._set_vocab_gpt2() + + def set_gguf_parameters(self): + super().set_gguf_parameters() + hparams = self.hparams + rope_dim = hparams.get("head_dim") or hparams["hidden_size"] // hparams["num_attention_heads"] + + self.gguf_writer.add_rope_dimension_count(rope_dim) + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE) + self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"]) + self.gguf_writer.add_vocab_size(hparams["vocab_size"]) + self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"]) + self.gguf_writer.add_expert_weights_scale(1.0) + self.gguf_writer.add_expert_count(hparams["num_experts"]) + self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"]) + self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"]) + + _experts: list[dict[str, Tensor]] | None = None + + @staticmethod + def permute(weights: Tensor, n_head: int, n_head_kv: int | None): + if n_head_kv is not None and n_head != n_head_kv: + n_head = n_head_kv + return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) + .swapaxes(1, 2) + .reshape(weights.shape)) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + n_head = self.hparams["num_attention_heads"] + n_kv_head = self.hparams.get("num_key_value_heads") + n_embd = self.hparams["hidden_size"] + head_dim = self.hparams.get("head_dim") or n_embd // n_head + + output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT) + + if name.endswith("attention.dense.weight"): + return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_OUT, bid), data_torch)] + elif name.endswith("query_key_value.weight"): + q, k, v = data_torch.split([n_head * head_dim, n_kv_head * head_dim, n_kv_head * head_dim], dim=-2) + + return [ + (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), BailingMoeModel.permute(q, n_head, n_head)), + (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), BailingMoeModel.permute(k, n_head, n_kv_head)), + (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), v) + ] + elif name.find("mlp.experts") != -1: + n_experts = self.hparams["num_experts"] + assert bid is not None + + tensors: list[tuple[str, Tensor]] = [] + + if self._experts is None: + self._experts = [{} for _ in range(self.block_count)] + + self._experts[bid][name] = data_torch + + if len(self._experts[bid]) >= n_experts * 3: + # merge the experts into a single 3d tensor + for w_name in ["down_proj", "gate_proj", "up_proj"]: + datas: list[Tensor] = [] + + for xid in range(n_experts): + ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight" + datas.append(self._experts[bid][ename]) + del self._experts[bid][ename] + + data_torch = torch.stack(datas, dim=0) + + merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight" + + new_name = self.map_tensor_name(merged_name) + + tensors.append((new_name, data_torch)) + + return tensors + + new_name = self.map_tensor_name(name) + + if new_name == output_name and self.hparams.get("norm_head"): + data_torch = data_torch.float() + data_torch /= torch.norm(data_torch, p=2, dim=0, keepdim=True) + 1e-7 + + return [(new_name, data_torch)] + + def prepare_tensors(self): + super().prepare_tensors() + + if self._experts is not None: + # flatten `list[dict[str, Tensor]]` into `list[str]` + experts = [k for d in self._experts for k in d.keys()] + if len(experts) > 0: + raise ValueError(f"Unprocessed experts: {experts}") + + @Model.register("ChameleonForConditionalGeneration") @Model.register("ChameleonForCausalLM") # obsolete class ChameleonModel(Model): @@ -4962,6 +5465,14 @@ class LazyTorchTensor(gguf.LazyBase): lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(st_slice,), func=lambda s: s[:]) return cast(torch.Tensor, lazy) + @classmethod + def from_remote_tensor(cls, remote_tensor: gguf.utility.RemoteTensor): + dtype = cls._dtype_str_map[remote_tensor.dtype] + shape = remote_tensor.shape + meta = cls.meta_with_dtype_and_shape(dtype, shape) + lazy = cls(meta=meta, args=(remote_tensor,), func=lambda r: torch.frombuffer(r.data(), dtype=dtype).reshape(shape)) + return cast(torch.Tensor, lazy) + @classmethod def __torch_function__(cls, func, types, args=(), kwargs=None): del types # unused @@ -5039,6 +5550,10 @@ def parse_args() -> argparse.Namespace: "--print-supported-models", action="store_true", help="Print the supported models" ) + parser.add_argument( + "--remote", action="store_true", + help="(Experimental) Read safetensors file remotely without downloading to disk. Config and tokenizer files will still be downloaded. To use this feature, you need to specify Hugging Face model repo name instead of a local directory. For example: 'HuggingFaceTB/SmolLM2-1.7B-Instruct'. Note: To access gated repo, set HF_TOKEN environment variable to your Hugging Face token.", + ) args = parser.parse_args() if not args.print_supported_models and args.model is None: @@ -5079,6 +5594,14 @@ def main() -> None: dir_model = args.model + if args.remote: + from huggingface_hub import snapshot_download + local_dir = snapshot_download( + repo_id=str(dir_model), + allow_patterns=["LICENSE", "*.json", "*.md", "*.txt", "tokenizer.model"]) + dir_model = Path(local_dir) + logger.info(f"Downloaded config and tokenizer to {local_dir}") + if not dir_model.is_dir(): logger.error(f'Error: {args.model} is not a directory') sys.exit(1) @@ -5100,6 +5623,9 @@ def main() -> None: if args.outfile is not None: fname_out = args.outfile + elif args.remote: + # if remote, use the model ID as the output file name + fname_out = Path("./" + str(args.model).replace("/", "-") + "-{ftype}.gguf") else: fname_out = dir_model @@ -5110,20 +5636,20 @@ def main() -> None: with torch.inference_mode(): output_type = ftype_map[args.outtype] model_architecture = hparams["architectures"][0] - try: model_class = Model.from_model_architecture(model_architecture) except NotImplementedError: logger.error(f"Model {model_architecture} is not supported") sys.exit(1) - model_instance = model_class(dir_model=dir_model, ftype=output_type, fname_out=fname_out, + model_instance = model_class(dir_model, output_type, fname_out, is_big_endian=args.bigendian, use_temp_file=args.use_temp_file, eager=args.no_lazy, metadata_override=args.metadata, model_name=args.model_name, split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, - small_first_shard=args.no_tensor_first_split) + small_first_shard=args.no_tensor_first_split, + remote_hf_model_id=str(args.model) if args.remote else None) if args.vocab_only: logger.info("Exporting model vocab...") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index cea34413f4..160c9fe0e6 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -8,7 +8,7 @@ # provide the necessary information to llama.cpp via the GGUF header in order to implement # the same pre-tokenizer. # -# ref: https://github.com/ggerganov/llama.cpp/pull/6920 +# ref: https://github.com/ggml-org/llama.cpp/pull/6920 # # Instructions: # @@ -109,6 +109,12 @@ models = [ {"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"}, {"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"}, {"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"}, + {"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", }, + {"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", }, + {"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", }, + {"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", }, + {"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", }, + {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", }, ] @@ -131,6 +137,10 @@ def download_model(model): files = ["config.json", "tokenizer.json", "tokenizer_config.json"] + if name == "gpt-4o": + # Xenova/gpt-4o is tokenizer-only, it does not contain config.json + files = ["tokenizer.json", "tokenizer_config.json"] + if tokt == TOKENIZER_TYPE.SPM: files.append("tokenizer.model") @@ -246,7 +256,7 @@ src_func = f""" logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet") logger.warning("** - the pre-tokenization config has changed upstream") logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.") - logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920") + logger.warning("** ref: https://github.com/ggml-org/llama.cpp/pull/6920") logger.warning("**") logger.warning(f"** chkhsh: {{chkhsh}}") logger.warning("**************************************************************************************") diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index 6dea14a232..bdc991533b 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -395,7 +395,7 @@ if __name__ == '__main__': logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor") if ".embed_tokens.weight" in name or ".lm_head.weight" in name: logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning") - logger.error("Please refer to https://github.com/ggerganov/llama.cpp/pull/9948") + logger.error("Please refer to https://github.com/ggml-org/llama.cpp/pull/9948") sys.exit(1) if base_name in tensor_map: @@ -419,7 +419,7 @@ if __name__ == '__main__': # some archs may have the same tensor for lm_head and output (tie word embeddings) # in this case, adapters targeting lm_head will fail when using llama-export-lora # therefore, we ignore them for now - # see: https://github.com/ggerganov/llama.cpp/issues/9065 + # see: https://github.com/ggml-org/llama.cpp/issues/9065 if name == "lm_head.weight" and len(dest) == 0: raise ValueError("lm_head is present in adapter, but is ignored in base model") for dest_name, dest_data in dest: diff --git a/docs/android.md b/docs/android.md index 47530c6c1d..d2a835653f 100644 --- a/docs/android.md +++ b/docs/android.md @@ -12,7 +12,7 @@ $ apt update && apt upgrade -y $ apt install git cmake ``` -Then, follow the [build instructions](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md), specifically for CMake. +Then, follow the [build instructions](https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md), specifically for CMake. Once the binaries are built, download your model of choice (e.g., from Hugging Face). It's recommended to place it in the `~/` directory for best performance: diff --git a/docs/cuda-fedora.md b/docs/backend/CUDA-FEDORA.md similarity index 52% rename from docs/cuda-fedora.md rename to docs/backend/CUDA-FEDORA.md index b993386c8b..1508faf776 100644 --- a/docs/cuda-fedora.md +++ b/docs/backend/CUDA-FEDORA.md @@ -1,23 +1,20 @@ # Setting Up CUDA on Fedora In this guide we setup [Nvidia CUDA](https://docs.nvidia.com/cuda/) in a toolbox container. This guide is applicable for: + - [Fedora Workstation](https://fedoraproject.org/workstation/) - [Atomic Desktops for Fedora](https://fedoraproject.org/atomic-desktops/) - [Fedora Spins](https://fedoraproject.org/spins) -- [Other Distributions](https://containertoolbx.org/distros/), including `Red Hat Enterprise Linux >= 8.`, `Arch Linux`, and `Ubuntu`. - +- [Other Distributions](https://containertoolbx.org/distros/), including `Red Hat Enterprise Linux >= 8.5`, `Arch Linux`, and `Ubuntu`. ## Table of Contents - [Prerequisites](#prerequisites) -- [Monitoring NVIDIA CUDA Repositories](#monitoring-nvidia-cuda-repositories) -- [Using the Fedora 39 CUDA Repository](#using-the-fedora-39-cuda-repository) +- [Using the Fedora 41 CUDA Repository](#using-the-fedora-41-cuda-repository) - [Creating a Fedora Toolbox Environment](#creating-a-fedora-toolbox-environment) - [Installing Essential Development Tools](#installing-essential-development-tools) - [Adding the CUDA Repository](#adding-the-cuda-repository) -- [Installing `nvidia-driver-libs`](#installing-nvidia-driver-libs) -- [Manually Resolving Package Conflicts](#manually-resolving-package-conflicts) -- [Finalizing the Installation of `nvidia-driver-libs`](#finalizing-the-installation-of-nvidia-driver-libs) +- [Installing Nvidia Driver Libraries](#installing-nvidia-driver-libraries) - [Installing the CUDA Meta-Package](#installing-the-cuda-meta-package) - [Configuring the Environment](#configuring-the-environment) - [Verifying the Installation](#verifying-the-installation) @@ -29,44 +26,33 @@ In this guide we setup [Nvidia CUDA](https://docs.nvidia.com/cuda/) in a toolbox ## Prerequisites - **Toolbox Installed on the Host System** `Fedora Silverblue` and `Fedora Workstation` both have toolbox by default, other distributions may need to install the [toolbox package](https://containertoolbx.org/install/). -- **NVIDIA Drivers and Graphics Card installed on Host System (optional)** To run CUDA program, such as `llama.cpp`, the host should be setup to access your NVIDIA hardware. Fedora Hosts can use the [RPM Fusion Repository](https://rpmfusion.org/Howto/NVIDIA). +- **NVIDIA Drivers and Graphics Card installed on Host System (recommended)** To run CUDA program, such as `llama.cpp`, the host should be setup to access your NVIDIA hardware. Fedora Hosts can use the [RPM Fusion Repository](https://rpmfusion.org/Howto/NVIDIA). - **Internet connectivity** to download packages. -### Monitoring NVIDIA CUDA Repositories +### Using the Fedora 41 CUDA Repository -Before proceeding, it is advisable to check if NVIDIA has updated their CUDA repositories for your Fedora version. NVIDIA's repositories can be found at: +The latest release is 41. -- [Fedora 40 CUDA Repository](https://developer.download.nvidia.com/compute/cuda/repos/fedora40/x86_64/) - [Fedora 41 CUDA Repository](https://developer.download.nvidia.com/compute/cuda/repos/fedora41/x86_64/) -As of the latest update, these repositories do not contain the `cuda` meta-package or are missing essential components. - -### Using the Fedora 39 CUDA Repository - -Since the newer repositories are incomplete, we'll use the Fedora 39 repository: - -- [Fedora 39 CUDA Repository](https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/) - -**Note:** Fedora 39 is no longer maintained, so we recommend using a toolbox environment to prevent system conflicts. +**Note:** We recommend using a toolbox environment to prevent system conflicts. ## Creating a Fedora Toolbox Environment -This guide focuses on Fedora hosts, but with small adjustments, it can work for other hosts. Using a Fedora 39 toolbox allows us to install the necessary packages without affecting the host system. +This guide focuses on Fedora hosts, but with small adjustments, it can work for other hosts. Using the Fedora Toolbox allows us to install the necessary packages without affecting the host system. **Note:** Toolbox is available for other systems, and even without Toolbox, it is possible to use Podman or Docker. -We do not recommend installing on the host system, as Fedora 39 is out-of-maintenance, and instead you should upgrade to a maintained version of Fedora for your host. - -1. **Create a Fedora 39 Toolbox:** +1. **Create a Fedora 41 Toolbox:** ```bash - toolbox create --image registry.fedoraproject.org/fedora-toolbox:39 --container fedora-toolbox-39-cuda + toolbox create --image registry.fedoraproject.org/fedora-toolbox:41 --container fedora-toolbox-41-cuda ``` 2. **Enter the Toolbox:** ```bash - toolbox enter --container fedora-toolbox-39-cuda + toolbox enter --container fedora-toolbox-41-cuda ``` Inside the toolbox, you have root privileges and can install packages without affecting the host system. @@ -79,13 +65,13 @@ We do not recommend installing on the host system, as Fedora 39 is out-of-mainte sudo dnf distro-sync ``` -2. **Install the Default Text Editor (Optional):** +2. **Install **Vim** the default text editor (Optional):** ```bash sudo dnf install vim-default-editor --allowerasing ``` - The `--allowerasing` flag resolves any package conflicts. + The `--allowerasing` flag will allow the removal of the conflicting `nano-default-editor` package. 3. **Install Development Tools and Libraries:** @@ -100,7 +86,7 @@ We do not recommend installing on the host system, as Fedora 39 is out-of-mainte Add the NVIDIA CUDA repository to your DNF configuration: ```bash -sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo +sudo dnf config-manager addrepo --from-repofile=https://developer.download.nvidia.com/compute/cuda/repos/fedora41/x86_64/cuda-fedora41.repo ``` After adding the repository, synchronize the package manager again: @@ -109,106 +95,77 @@ After adding the repository, synchronize the package manager again: sudo dnf distro-sync ``` -## Installing `nvidia-driver-libs` +## Installing Nvidia Driver Libraries -Attempt to install `nvidia-driver-libs`: +First, we need to detect if the host is supplying the [NVIDIA driver libraries into the toolbox](https://github.com/containers/toolbox/blob/main/src/pkg/nvidia/nvidia.go): ```bash -sudo dnf install nvidia-driver-libs +ls -la /usr/lib64/libcuda.so.1 +``` + +### If *`libcuda.so.1`* is missing: + +``` +ls: cannot access '/usr/lib64/libcuda.so.1': No such file or directory ``` **Explanation:** +The host dose not supply the CUDA drivers, **install them now:** -- `nvidia-driver-libs` contains necessary NVIDIA driver libraries required by CUDA. -- This step might fail due to conflicts with existing NVIDIA drivers on the host system. - -## Manually Resolving Package Conflicts - -If the installation fails due to conflicts, we'll manually download and install the required packages, excluding conflicting files. - -### 1. Download the `nvidia-driver-libs` RPM +#### Install the Nvidia Driver Libraries on Guest: ```bash -sudo dnf download --arch x86_64 nvidia-driver-libs +sudo dnf install nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced ``` -You should see a file similar to: - +### If *`libcuda.so.1`* exists: ``` -nvidia-driver-libs-560.35.05-1.fc39.x86_64.rpm -``` - -### 2. Attempt to Install the RPM - -```bash -sudo dnf install nvidia-driver-libs-560.35.05-1.fc39.x86_64.rpm -``` - -**Expected Error:** - -Installation may fail with errors pointing to conflicts with `egl-gbm` and `egl-wayland`. - -**Note: It is important to carefully read the error messages to identify the exact paths that need to be excluded.** - -### 3. Download Dependencies - -```bash -sudo dnf download --arch x86_64 egl-gbm egl-wayland -``` - -### 4. Install `egl-gbm` with Excluded Paths - -Exclude conflicting files during installation: - -```bash -sudo rpm --install --verbose --hash \ - --excludepath=/usr/lib64/libnvidia-egl-gbm.so.1.1.2 \ - --excludepath=/usr/share/egl/egl_external_platform.d/15_nvidia_gbm.json \ - egl-gbm-1.1.2^20240919gitb24587d-3.fc39.x86_64.rpm +lrwxrwxrwx. 1 root root 21 Mar 24 11:26 /usr/lib64/libcuda.so.1 -> libcuda.so.570.133.07 ``` **Explanation:** +The host is supply the CUDA drivers, **we need to update the guest RPM Database accordingly:** -- The `--excludepath` option skips installing files that conflict with existing files. -- Adjust the paths based on the error messages you receive. +#### Update the Toolbox RPM Database to include the Host-Supplied Libraries: -### 5. Install `egl-wayland` with Excluded Paths +Note: we do not actually install the libraries, we just update the DB so that the guest system knows they are supplied by the host. + +##### 1. Download `nvidia-` parts that are supplied by the host RPM's (with dependencies) ```bash -sudo rpm --install --verbose --hash \ - --excludepath=/usr/share/egl/egl_external_platform.d/10_nvidia_wayland.json \ - egl-wayland-1.1.17^20241118giteeb29e1-5.fc39.x86_64.rpm +sudo dnf download --destdir=/tmp/nvidia-driver-libs --resolve --arch x86_64 nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced ``` -### 6. Install `nvidia-driver-libs` with Excluded Paths +##### 2. Update the RPM database to assume the installation of these packages. ```bash -sudo rpm --install --verbose --hash \ - --excludepath=/usr/share/glvnd/egl_vendor.d/10_nvidia.json \ - --excludepath=/usr/share/nvidia/nvoptix.bin \ - nvidia-driver-libs-560.35.05-1.fc39.x86_64.rpm +sudo rpm --install --verbose --hash --justdb /tmp/nvidia-driver-libs/* ``` **Note:** -- Replace the paths with the ones causing conflicts in your installation if they differ. -- The `--verbose` and `--hash` options provide detailed output during installation. +- The `--justdb` option only updates the RPM database, without touching the filesystem elsewhere. -## Finalizing the Installation of `nvidia-driver-libs` +##### Check that the RPM Database has been correctly updated: + +**Note:** This is the same command as in the *"Install the Nvidia Driver Libraries on Guest"* for if *`libcuda.so.1`* was missing. -After manually installing the dependencies, run: ```bash -sudo dnf install nvidia-driver-libs +sudo dnf install nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced ``` -You should receive a message indicating the package is already installed: +*(this time it will not install anything, as the database things that these packages are already installed)* ``` -Package nvidia-driver-libs-3:560.35.05-1.fc39.x86_64 is already installed. -Dependencies resolved. +Updating and loading repositories: +Repositories loaded. +Package "nvidia-driver-cuda-3:570.124.06-1.fc41.x86_64" is already installed. +Package "nvidia-driver-libs-3:570.124.06-1.fc41.x86_64" is already installed. +Package "nvidia-driver-cuda-libs-3:570.124.06-1.fc41.x86_64" is already installed. +Package "nvidia-persistenced-3:570.124.06-1.fc41.x86_64" is already installed. + Nothing to do. -Complete! ``` ## Installing the CUDA Meta-Package @@ -233,7 +190,7 @@ To use CUDA, add its binary directory to your system's `PATH`. **Explanation:** - - We add to `/etc/profile.d/` as the `/etc/` folder is unique to this particular container, and is not shared with other containers or the host system. + - We add to `/etc/profile.d/` as the `/etc/` folder is unique to this particular container, and is not shared with other containers or the host system. - The backslash `\` before `$PATH` ensures the variable is correctly written into the script. 2. **Make the Script Executable:** @@ -262,26 +219,33 @@ You should see output similar to: ``` nvcc: NVIDIA (R) Cuda compiler driver -Copyright (c) 2005-2024 NVIDIA Corporation -Built on Tue_Oct_29_23:50:19_PDT_2024 -Cuda compilation tools, release 12.6, V12.6.85 -Build cuda_12.6.r12.6/compiler.35059454_0 +Copyright (c) 2005-2025 NVIDIA Corporation +Built on Fri_Feb_21_20:23:50_PST_2025 +Cuda compilation tools, release 12.8, V12.8.93 +Build cuda_12.8.r12.8/compiler.35583870_0 ``` This output confirms that the CUDA compiler is accessible and indicates the installed version. ## Conclusion -You have successfully set up CUDA on Fedora within a toolbox environment using the Fedora 39 CUDA repository. By manually resolving package conflicts and configuring the environment, you can develop CUDA applications without affecting your host system. +You have successfully set up CUDA on Fedora within a toolbox environment using the Fedora 41 CUDA repository. By manually updating the RPM db and configuring the environment, you can develop CUDA applications without affecting your host system. ## Troubleshooting - **Installation Failures:** - - If you encounter errors during installation, carefully read the error messages. They often indicate conflicting files or missing dependencies. - - Use the `--excludepath` option with `rpm` to exclude conflicting files during manual installations. -- **Driver Conflicts:** - - Since the host system may already have NVIDIA drivers installed, conflicts can arise. Using the toolbox environment helps isolate these issues. + - If you encounter errors during installation, carefully read the error messages. They often indicate conflicting files or missing dependencies. + - You may use the `--excludepath` option with `rpm` to exclude conflicting files during manual RPM installations. + +- **Rebooting the Container:** + + - Sometimes there may be a bug in the NVIDIA driver host passthrough (such as missing a shared library). Rebooting the container may solve this issue: + + ```bash + # on the host system + podman container restart --all + ``` - **Environment Variables Not Set:** - If `nvcc` is not found after installation, ensure that `/usr/local/cuda/bin` is in your `PATH`. @@ -291,11 +255,13 @@ You have successfully set up CUDA on Fedora within a toolbox environment using t ## Additional Notes - **Updating CUDA in the Future:** + - Keep an eye on the official NVIDIA repositories for updates to your Fedora version. - When an updated repository becomes available, adjust your `dnf` configuration accordingly. - **Building `llama.cpp`:** - - With CUDA installed, you can follow these [build instructions for `llama.cpp`](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) to compile it with CUDA support. + + - With CUDA installed, you can follow these [build instructions for `llama.cpp`](https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md) to compile it with CUDA support. - Ensure that any CUDA-specific build flags or paths are correctly set in your build configuration. - **Using the Toolbox Environment:** diff --git a/docs/backend/OPENCL.md b/docs/backend/OPENCL.md new file mode 100644 index 0000000000..07146f7102 --- /dev/null +++ b/docs/backend/OPENCL.md @@ -0,0 +1,209 @@ +# llama.cpp for OpenCL + +- [Background](#background) +- [OS](#os) +- [Hardware](#hardware) +- [DataType Supports](#datatype-supports) +- [Model Preparation](#model-preparation) +- [CMake Options](#cmake-options) +- [Android](#android) +- [Windows 11 Arm64](#windows-11-arm64) +- [Known Issue](#known-issues) +- [TODO](#todo) + +## Background + +OpenCL (Open Computing Language) is an open, royalty-free standard for cross-platform, parallel programming of diverse accelerators found in supercomputers, cloud servers, personal computers, mobile devices and embedded platforms. OpenCL specifies a programming language (based on C99) for programming these devices and application programming interfaces (APIs) to control the platform and execute programs on the compute devices. Similar to CUDA, OpenCL has been widely used to program GPUs and is supported by most GPU vendors. + +### Llama.cpp + OpenCL + +The llama.cpp OpenCL backend is designed to enable llama.cpp on **Qualcomm Adreno GPU** firstly via OpenCL. Thanks to the portabilty of OpenCL, the OpenCL backend can also run on certain Intel GPUs although the performance is not optimal. + +## OS + +| OS | Status | Verified | +|---------|---------|------------------------------------------------| +| Android | Support | Snapdragon 8 Gen 3, Snapdragon 8 Elite | +| Windows | Support | Windows 11 Arm64 with Snapdragon X Elite | +| Linux | Support | Ubuntu 22.04 WSL2 with Intel 12700H | + +## Hardware + +### Adreno GPU + +**Verified devices** + +| Adreno GPU | Status | +|:------------------------------------:|:-------:| +| Adreno 750 (Snapdragon 8 Gen 3) | Support | +| Adreno 830 (Snapdragon 8 Elite) | Support | +| Adreno X85 (Snapdragon X Elite) | Support | + +## DataType Supports + +| DataType | Status | +|:----------------------:|:--------------------------:| +| Q4_0 | Support | +| Q6_K | Support, but not optimized | + +## Model Preparation + +You can refer to the general [*Prepare and Quantize*](README.md#prepare-and-quantize) guide for model prepration. + +Currently we support `Q4_0` quantization and have optimize for it. To achieve best performance on Adreno GPU, add `--pure` to `llama-quantize`. For example, + +```sh +./llama-quantize --pure ggml-model-qwen2.5-3b-f16.gguf ggml-model-qwen-3b-Q4_0.gguf Q4_0 +``` + +Since `Q6_K` is also supported, `Q4_0` quantization without `--pure` will also work. However, the performance will be worse compared to pure `Q4_0` quantization. + +## CMake Options + +The OpenCL backend has the following CMake options that control the behavior of the backend. + +| CMake options | Default value | Description | +|:---------------------------------:|:--------------:|:------------------------------------------| +| `GGML_OPENCL_EMBED_KERNELS` | `ON` | Embed OpenCL kernels into the executable. | +| `GGML_OPENCL_USE_ADRENO_KERNELS` | `ON` | Use kernels optimized for Adreno. | + +## Android + +Ubuntu 22.04 is used for targeting Android. Make sure the following tools are accessible from command line, + +* Git +* CMake 3.29 +* Ninja +* Python3 + +### I. Setup Environment + +1. **Install NDK** + +```sh +cd ~ +wget https://dl.google.com/android/repository/commandlinetools-linux-8512546_latest.zip && \ +unzip commandlinetools-linux-8512546_latest.zip && \ +mkdir -p ~/android-sdk/cmdline-tools && \ +mv cmdline-tools latest && \ +mv latest ~/android-sdk/cmdline-tools/ && \ +rm -rf commandlinetools-linux-8512546_latest.zip + +yes | ~/android-sdk/cmdline-tools/latest/bin/sdkmanager "ndk;26.3.11579264" +``` + +2. **Install OpenCL Headers and Library** + +```sh +mkdir -p ~/dev/llm +cd ~/dev/llm + +git clone https://github.com/KhronosGroup/OpenCL-Headers && \ +cd OpenCL-Headers && \ +cp -r CL ~/android-sdk/ndk/26.3.11579264/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include + +cd ~/dev/llm + +git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader && \ +cd OpenCL-ICD-Loader && \ +mkdir build_ndk26 && cd build_ndk26 && \ +cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=$HOME/android-sdk/ndk/26.3.11579264/build/cmake/android.toolchain.cmake \ + -DOPENCL_ICD_LOADER_HEADERS_DIR=$HOME/android-sdk/ndk/26.3.11579264/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=24 \ + -DANDROID_STL=c++_shared && \ +ninja && \ +cp libOpenCL.so ~/android-sdk/ndk/26.3.11579264/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android +``` + +### II. Build llama.cpp + +```sh +cd ~/dev/llm + +git clone https://github.com/ggml-org/llama.cpp && \ +cd llama.cpp && \ +mkdir build-android && cd build-android + +cmake .. -G Ninja \ + -DCMAKE_TOOLCHAIN_FILE=$HOME/android-sdk/ndk/26.3.11579264/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=android-28 \ + -DBUILD_SHARED_LIBS=OFF \ + -DGGML_OPENCL=ON + +ninja +``` + +## Windows 11 Arm64 + +A Snapdragon X Elite device with Windows 11 Arm64 is used. Make sure the following tools are accessible from command line, + +* Git +* CMake 3.29 +* Clang 19 +* Ninja +* Visual Studio 2022 +* Powershell 7 + +Visual Studio provides necessary headers and libraries although it is not directly used for building. +Alternatively, Visual Studio Build Tools can be installed instead of the full Visual Studio. + +Powershell 7 is used for the following commands. +If an older version of Powershell is used, these commands may not work as they are. + +### I. Setup Environment + +1. **Install OpenCL Headers and Library** + +```powershell +mkdir -p ~/dev/llm + +cd ~/dev/llm +git clone https://github.com/KhronosGroup/OpenCL-Headers && cd OpenCL-Headers +mkdir build && cd build +cmake .. -G Ninja ` + -DBUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` + -DCMAKE_INSTALL_PREFIX="$HOME/dev/llm/opencl" +cmake --build . --target install + +cd ~/dev/llm +git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader && cd OpenCL-ICD-Loader +mkdir build && cd build +cmake .. -G Ninja ` + -DCMAKE_BUILD_TYPE=Release ` + -DCMAKE_PREFIX_PATH="$HOME/dev/llm/opencl" ` + -DCMAKE_INSTALL_PREFIX="$HOME/dev/llm/opencl" +cmake --build . --target install +``` + +### II. Build llama.cpp + +```powershell + +mkdir -p ~/dev/llm +cd ~/dev/llm + +git clone https://github.com/ggml-org/llama.cpp && cd llama.cpp +mkdir build && cd build + +cmake .. -G Ninja ` + -DCMAKE_TOOLCHAIN_FILE="$HOME/dev/llm/llama.cpp/cmake/arm64-windows-llvm.cmake" ` + -DCMAKE_BUILD_TYPE=Release ` + -DCMAKE_PREFIX_PATH="$HOME/dev/llm/opencl" ` + -DBUILD_SHARED_LIBS=OFF ` + -DGGML_OPENCL=ON +ninja +``` + +## Known Issues + +- Currently OpenCL backend does not work on Adreno 6xx GPUs. + +## TODO + +- Optimization for Q6_K +- Support and optimization for Q4_K diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 89ddbd669a..20aefec2f3 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -20,7 +20,7 @@ **oneAPI** is an open ecosystem and a standard-based specification, supporting multiple architectures including but not limited to intel CPUs, GPUs and FPGAs. The key components of the oneAPI ecosystem include: - **DPCPP** *(Data Parallel C++)*: The primary oneAPI SYCL implementation, which includes the icpx/icx Compilers. -- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. oneMKL and oneDNN)*. +- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. Intel oneMKL, oneMath and oneDNN)*. - **oneAPI LevelZero**: A high performance low level interface for fine-grained control over intel iGPUs and dGPUs. - **Nvidia & AMD Plugins**: These are plugins extending oneAPI's DPCPP support to SYCL on Nvidia and AMD GPU targets. @@ -36,12 +36,22 @@ The following release is verified with good quality: |Commit ID|Tag|Release|Verified Platform| Update date| |-|-|-|-|-| -|3bcd40b3c593d14261fb2abfabad3c0fb5b9e318|b4040 |[llama-b4040-bin-win-sycl-x64.zip](https://github.com/ggerganov/llama.cpp/releases/download/b4040/llama-b4040-bin-win-sycl-x64.zip) |Arc770/Linux/oneAPI 2024.1
MTL Arc GPU/Windows 11/oneAPI 2024.1| 2024-11-19| -|fb76ec31a9914b7761c1727303ab30380fd4f05c|b3038 |[llama-b3038-bin-win-sycl-x64.zip](https://github.com/ggerganov/llama.cpp/releases/download/b3038/llama-b3038-bin-win-sycl-x64.zip) |Arc770/Linux/oneAPI 2024.1
MTL Arc GPU/Windows 11/oneAPI 2024.1|| +|3bcd40b3c593d14261fb2abfabad3c0fb5b9e318|b4040 |[llama-b4040-bin-win-sycl-x64.zip](https://github.com/ggml-org/llama.cpp/releases/download/b4040/llama-b4040-bin-win-sycl-x64.zip) |Arc770/Linux/oneAPI 2024.1
MTL Arc GPU/Windows 11/oneAPI 2024.1| 2024-11-19| +|fb76ec31a9914b7761c1727303ab30380fd4f05c|b3038 |[llama-b3038-bin-win-sycl-x64.zip](https://github.com/ggml-org/llama.cpp/releases/download/b3038/llama-b3038-bin-win-sycl-x64.zip) |Arc770/Linux/oneAPI 2024.1
MTL Arc GPU/Windows 11/oneAPI 2024.1|| ## News +- 2025.2 + - Optimize MUL_MAT Q4_0 on Intel GPU for all dGPUs and built-in GPUs since MTL. Increase the performance of LLM (llama-2-7b.Q4_0.gguf) 21%-87% on Intel GPUs (MTL, ARL-H, Arc, Flex, PVC). + |GPU|Base tokens/s|Increased tokens/s|Percent| + |-|-|-|-| + |PVC 1550|39|73|+87%| + |Flex 170|39|50|+28%| + |Arc770|42|55|+30%| + |MTL|13|16|+23%| + |ARL-H|14|17|+21%| + - 2024.11 - Use syclcompat to improve the performance on some platforms. This requires to use oneAPI 2025.0 or newer. @@ -58,7 +68,7 @@ The following release is verified with good quality: - 2024.3 - Release binary files of Windows. - A blog is published: **Run LLM on all Intel GPUs Using llama.cpp**: [intel.com](https://www.intel.com/content/www/us/en/developer/articles/technical/run-llm-on-all-gpus-using-llama-cpp-artical.html) or [medium.com](https://medium.com/@jianyu_neo/run-llm-on-all-intel-gpus-using-llama-cpp-fd2e2dcbd9bd). - - New base line is ready: [tag b2437](https://github.com/ggerganov/llama.cpp/tree/b2437). + - New base line is ready: [tag b2437](https://github.com/ggml-org/llama.cpp/tree/b2437). - Support multiple cards: **--split-mode**: [none|layer]; not support [row], it's on developing. - Support to assign main GPU by **--main-gpu**, replace $GGML_SYCL_DEVICE. - Support detecting all GPUs with level-zero and same top **Max compute units**. @@ -97,8 +107,8 @@ SYCL backend supports Intel GPU Family: | Intel Data Center Max Series | Support | Max 1550, 1100 | | Intel Data Center Flex Series | Support | Flex 170 | | Intel Arc Series | Support | Arc 770, 730M, Arc A750 | -| Intel built-in Arc GPU | Support | built-in Arc GPU in Meteor Lake | -| Intel iGPU | Support | iGPU in 13700k, i5-1250P, i7-1260P, i7-1165G7 | +| Intel built-in Arc GPU | Support | built-in Arc GPU in Meteor Lake, Arrow Lake | +| Intel iGPU | Support | iGPU in 13700k,iGPU in 13400, i5-1250P, i7-1260P, i7-1165G7 | *Notes:* @@ -217,30 +227,19 @@ Upon a successful installation, SYCL is enabled for the available intel devices, **oneAPI Plugin**: In order to enable SYCL support on Nvidia GPUs, please install the [Codeplay oneAPI Plugin for Nvidia GPUs](https://developer.codeplay.com/products/oneapi/nvidia/download). User should also make sure the plugin version matches the installed base toolkit one *(previous step)* for a seamless "oneAPI on Nvidia GPU" setup. - -**oneMKL for cuBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *cuBLAS* backend enabled is thus required to run it on Nvidia GPUs. +**oneDNN**: The current oneDNN releases *(shipped with the oneAPI base-toolkit)* do not include the NVIDIA backend. Therefore, oneDNN must be compiled from source to enable the NVIDIA target: ```sh -git clone https://github.com/oneapi-src/oneMKL -cd oneMKL -cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas -cmake --build buildWithCublas --config Release +git clone https://github.com/oneapi-src/oneDNN.git +cd oneDNN +cmake -GNinja -Bbuild-nvidia -DDNNL_CPU_RUNTIME=DPCPP -DDNNL_GPU_RUNTIME=DPCPP -DDNNL_GPU_VENDOR=NVIDIA -DONEDNN_BUILD_GRAPH=OFF -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake --build build-nvidia --config Release ``` - **Adding support to AMD GPUs** **oneAPI Plugin**: In order to enable SYCL support on AMD GPUs, please install the [Codeplay oneAPI Plugin for AMD GPUs](https://developer.codeplay.com/products/oneapi/amd/download). As with Nvidia GPUs, the user should also make sure the plugin version matches the installed base toolkit. -**oneMKL for rocBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* doesn't contain the rocBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *rocBLAS* backend enabled is thus required to run it on AMD GPUs. - -```sh -git clone https://github.com/oneapi-src/oneMKL -cd oneMKL -# Find your HIPTARGET with rocminfo, under the key 'Name:' -cmake -B buildWithrocBLAS -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_ROCBLAS_BACKEND=ON -DHIPTARGETS=${HIPTARGET} -DTARGET_DOMAINS=blas -cmake --build buildWithrocBLAS --config Release -``` - 3. **Verify installation and environment** In order to check the available SYCL devices on the machine, please use the `sycl-ls` command. @@ -303,37 +302,39 @@ cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx - cmake --build build --config Release -j -v ``` +It is possible to come across some precision issues when running tests that stem from using faster +instructions, which can be circumvented by setting the environment variable `SYCL_PROGRAM_COMPILE_OPTIONS` +as `-cl-fp32-correctly-rounded-divide-sqrt` + #### Nvidia GPU -```sh -# Export relevant ENV variables -export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LD_LIBRARY_PATH -export LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LIBRARY_PATH -export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_DIR -export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR +The SYCL backend depends on [oneMath](https://github.com/uxlfoundation/oneMath) for Nvidia and AMD devices. +By default it is automatically built along with the project. A specific build can be provided by setting the CMake flag `-DoneMath_DIR=/path/to/oneMath/install/lib/cmake/oneMath`. +```sh # Build LLAMA with Nvidia BLAS acceleration through SYCL # Setting GGML_SYCL_DEVICE_ARCH is optional but can improve performance GGML_SYCL_DEVICE_ARCH=sm_80 # Example architecture # Option 1: Use FP32 (recommended for better performance in most cases) -cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DDNNL_DIR=/path/to/oneDNN/build-nvidia/install/lib/cmake/dnnl # Option 2: Use FP16 -cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON +cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DDNNL_DIR=/path/to/oneDNN/build-nvidia/install/lib/cmake/dnnl # build all binary cmake --build build --config Release -j -v ``` +It is possible to come across some precision issues when running tests that stem from using faster +instructions, which can be circumvented by passing the `-fno-fast-math` flag to the compiler. + #### AMD GPU -```sh -# Export relevant ENV variables -export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LD_LIBRARY_PATH -export LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LIBRARY_PATH -export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithrocBLAS/include:$CPLUS_INCLUDE_DIR +The SYCL backend depends on [oneMath](https://github.com/uxlfoundation/oneMath) for Nvidia and AMD devices. +By default it is automatically built along with the project. A specific build can be provided by setting the CMake flag `-DoneMath_DIR=/path/to/oneMath/install/lib/cmake/oneMath`. +```sh # Build LLAMA with rocBLAS acceleration through SYCL ## AMD @@ -424,13 +425,13 @@ Examples: - Use device 0: ```sh -ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0 +ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -no-cnv -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0 ``` - Use multiple devices: ```sh -ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer +ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -no-cnv -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer ``` *Notes:* @@ -474,6 +475,12 @@ b. Enable oneAPI running environment: "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 ``` +- if you are using Powershell, enable the runtime environment with the following: + +``` +cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell' +``` + c. Verify installation In the oneAPI command line, run the following to print the available SYCL devices: @@ -504,13 +511,13 @@ You could download the release package for Windows directly, which including bin Choose one of following methods to build from source code. -1. Script +#### 1. Script ```sh .\examples\sycl\win-build-sycl.bat ``` -2. CMake +#### 2. CMake On the oneAPI command line window, step into the llama.cpp main directory and run the following: @@ -539,13 +546,84 @@ cmake --preset x64-windows-sycl-debug cmake --build build-x64-windows-sycl-debug -j --target llama-cli ``` -3. Visual Studio +#### 3. Visual Studio -You can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project. +You have two options to use Visual Studio to build llama.cpp: +- As CMake Project using CMake presets. +- Creating a Visual Studio solution to handle the project. + +**Note**: + +All following commands are executed in PowerShell. + +##### - Open as a CMake Project + +You can use Visual Studio to open the `llama.cpp` folder directly as a CMake project. Before compiling, select one of the SYCL CMake presets: + +- `x64-windows-sycl-release` + +- `x64-windows-sycl-debug` *Notes:* +- For a minimal experimental setup, you can build only the inference executable using: -- In case of a minimal experimental setup, the user can build the inference executable only through `cmake --build build --config Release -j --target llama-cli`. + ```Powershell + cmake --build build --config Release -j --target llama-cli + ``` + +##### - Generating a Visual Studio Solution + +You can use Visual Studio solution to build and work on llama.cpp on Windows. You need to convert the CMake Project into a `.sln` file. + +If you want to use the Intel C++ Compiler for the entire `llama.cpp` project, run the following command: + +```Powershell +cmake -B build -G "Visual Studio 17 2022" -T "Intel C++ Compiler 2025" -A x64 -DGGML_SYCL=ON -DCMAKE_BUILD_TYPE=Release +``` + +If you prefer to use the Intel C++ Compiler only for `ggml-sycl`, ensure that `ggml` and its backend libraries are built as shared libraries ( i.e. `-DBUILD_SHARED_LIBRARIES=ON`, this is default behaviour): + +```Powershell +cmake -B build -G "Visual Studio 17 2022" -A x64 -DGGML_SYCL=ON -DCMAKE_BUILD_TYPE=Release \ + -DSYCL_INCLUDE_DIR="C:\Program Files (x86)\Intel\oneAPI\compiler\latest\include" \ + -DSYCL_LIBRARY_DIR="C:\Program Files (x86)\Intel\oneAPI\compiler\latest\lib" +``` + +If successful the build files have been written to: *path/to/llama.cpp/build* +Open the project file **build/llama.cpp.sln** with Visual Studio. + +Once the Visual Studio solution is created, follow these steps: + +1. Open the solution in Visual Studio. + +2. Right-click on `ggml-sycl` and select **Properties**. + +3. In the left column, expand **C/C++** and select **DPC++**. + +4. In the right panel, find **Enable SYCL Offload** and set it to `Yes`. + +5. Apply the changes and save. + + +*Navigation Path:* + +``` +Properties -> C/C++ -> DPC++ -> Enable SYCL Offload (Yes) +``` + +Now, you can build `llama.cpp` with the SYCL backend as a Visual Studio project. +To do it from menu: `Build -> Build Solution`. +Once it is completed, final results will be in **build/Release/bin** + +*Additional Note* + +- You can avoid specifying `SYCL_INCLUDE_DIR` and `SYCL_LIBRARY_DIR` in the CMake command by setting the environment variables: + + - `SYCL_INCLUDE_DIR_HINT` + + - `SYCL_LIBRARY_DIR_HINT` + +- Above instruction has been tested with Visual Studio 17 Community edition and oneAPI 2025.0. We expect them to work also with future version if the instructions are adapted accordingly. ### III. Run the inference @@ -619,13 +697,13 @@ Examples: - Use device 0: ``` -build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm none -mg 0 +build\bin\llama-cli.exe -no-cnv -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm none -mg 0 ``` - Use multiple devices: ``` -build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm layer +build\bin\llama-cli.exe -no-cnv -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm layer ``` @@ -650,8 +728,9 @@ use 1 SYCL GPUs: [0] with Max compute units:512 |--------------------|---------------------------------------|---------------------------------------------| | GGML_SYCL | ON (mandatory) | Enable build with SYCL code path.
FP32 path - recommended for better perforemance than FP16 on quantized model| | GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA \| AMD | Set the SYCL target device type. | -| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. | +| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. | | GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. | +| GGML_SYCL_GRAPH | ON *(default)* \|OFF *(Optional)* | Enable build with [SYCL Graph extension](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc). | | CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. | | CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)* | Set `icpx/icx` compiler for SYCL code path. | @@ -660,8 +739,11 @@ use 1 SYCL GPUs: [0] with Max compute units:512 | Name | Value | Function | |-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------| | GGML_SYCL_DEBUG | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG | +| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features based on Intel GPU type, to compare the performance increase | +| GGML_SYCL_DISABLE_GRAPH | 0 or 1 (default) | Disable running computations through SYCL Graphs feature. Disabled by default because graph performance isn't yet better than non-graph performance. | | ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.
Recommended to use when --split-mode = layer | + ## Known Issues - `Split-mode:[row]` is not supported. diff --git a/docs/build.md b/docs/build.md index dd6495028b..c9027c0b58 100644 --- a/docs/build.md +++ b/docs/build.md @@ -3,7 +3,7 @@ **To get the Code:** ```bash -git clone https://github.com/ggerganov/llama.cpp +git clone https://github.com/ggml-org/llama.cpp cd llama.cpp ``` @@ -46,7 +46,7 @@ cmake --build build --config Release ``` - Building for Windows (x86, x64 and arm64) with MSVC or clang as compilers: - - Install Visual Studio 2022, e.g. via the [Community Edition](https://visualstudio.microsoft.com/de/vs/community/). In the installer, select at least the following options (this also automatically installs the required additional tools like CMake,...): + - Install Visual Studio 2022, e.g. via the [Community Edition](https://visualstudio.microsoft.com/vs/community/). In the installer, select at least the following options (this also automatically installs the required additional tools like CMake,...): - Tab Workload: Desktop-development with C++ - Tab Components (select quickly via search): C++-_CMake_ Tools for Windows, _Git_ for Windows, C++-_Clang_ Compiler for Windows, MS-Build Support for LLVM-Toolset (clang) - Please remember to always use a Developer Command Prompt / PowerShell for VS2022 for git, build, test @@ -125,26 +125,73 @@ For detailed info, please refer to [llama.cpp for SYCL](./backend/SYCL.md). ## CUDA -This provides GPU acceleration using an NVIDIA GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from the [NVIDIA developer site](https://developer.nvidia.com/cuda-downloads). +This provides GPU acceleration using an NVIDIA GPU. Make sure to have the [CUDA toolkit](https://developer.nvidia.com/cuda-toolkit) installed. -If you are using Fedora (using Fedora Workstation, or an 'Atomic' variant such as Silverblue), or would like to set up CUDA in a toolbox, please consider our [Fedora CUDA guide](./cuda-fedora.md). Unfortunately, the process is not as simple as one might expect. +#### Download directly from NVIDIA +You may find the official downloads here: [NVIDIA developer site](https://developer.nvidia.com/cuda-downloads). -- Using `CMake`: - ```bash - cmake -B build -DGGML_CUDA=ON - cmake --build build --config Release - ``` +#### Compile and run inside a Fedora Toolbox Container +We also have a [guide](./backend/CUDA-FEDORA.md) for setting up CUDA toolkit in a Fedora [toolbox container](https://containertoolbx.org/). -The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. +**Recommended for:** +- ***Necessary*** for users of [Atomic Desktops for Fedora](https://fedoraproject.org/atomic-desktops/); such as: [Silverblue](https://fedoraproject.org/atomic-desktops/silverblue/) and [Kinoite](https://fedoraproject.org/atomic-desktops/kinoite/). + - (there are no supported CUDA packages for these systems) +- ***Necessary*** for users that have a host that is not a: [Supported Nvidia CUDA Release Platform](https://developer.nvidia.com/cuda-downloads). + - (for example, you may have [Fedora 42 Beta](https://fedoramagazine.org/announcing-fedora-linux-42-beta/) as your your host operating system) +- ***Convenient*** For those running [Fedora Workstation](https://fedoraproject.org/workstation/) or [Fedora KDE Plasma Desktop](https://fedoraproject.org/spins/kde), and want to keep their host system clean. +- *Optionally* toolbox packages are available: [Arch Linux](https://archlinux.org/), [Red Hat Enterprise Linux >= 8.5](https://www.redhat.com/en/technologies/linux-platforms/enterprise-linux), or [Ubuntu](https://ubuntu.com/download) + + +### Compilation +```bash +cmake -B build -DGGML_CUDA=ON +cmake --build build --config Release +``` + +### Override Compute Capability Specifications + +If `nvcc` cannot detect your gpu, you may get compile-warnings such as: + ```text +nvcc warning : Cannot find valid GPU for '-arch=native', default arch is used +``` + +To override the `native` GPU detection: + +#### 1. Take note of the `Compute Capability` of your NVIDIA devices: ["CUDA: Your GPU Compute > Capability"](https://developer.nvidia.com/cuda-gpus). + +```text +GeForce RTX 4090 8.9 +GeForce RTX 3080 Ti 8.6 +GeForce RTX 3070 8.6 +``` + +#### 2. Manually list each varying `Compute Capability` in the `CMAKE_CUDA_ARCHITECTURES` list. + +```bash +cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="86;89" +``` + +### Runtime CUDA environmental variables + +You may set the [cuda environmental variables](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) at runtime. + +```bash +# Use `CUDA_VISIBLE_DEVICES` to hide the first compute device. +CUDA_VISIBLE_DEVICES="-0" ./build/bin/llama-server --model /srv/models/llama.gguf +``` + +### Unified Memory The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. In Windows this setting is available in the NVIDIA control panel as `System Memory Fallback`. +### Performance Tuning + The following compilation options are also available to tweak performance: | Option | Legal values | Default | Description | |-------------------------------|------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| GGML_CUDA_FORCE_MMQ | Boolean | false | Force the use of custom matrix multiplication kernels for quantized models instead of FP16 cuBLAS even if there is no int8 tensor core implementation available (affects V100, RDNA3). MMQ kernels are enabled by default on GPUs with int8 tensor core support. With MMQ force enabled, speed for large batch sizes will be worse but VRAM consumption will be lower. | +| GGML_CUDA_FORCE_MMQ | Boolean | false | Force the use of custom matrix multiplication kernels for quantized models instead of FP16 cuBLAS even if there is no int8 tensor core implementation available (affects V100, CDNA and RDNA3+). MMQ kernels are enabled by default on GPUs with int8 tensor core support. With MMQ force enabled, speed for large batch sizes will be worse but VRAM consumption will be lower. | | GGML_CUDA_FORCE_CUBLAS | Boolean | false | Force the use of FP16 cuBLAS instead of custom matrix multiplication kernels for quantized models | | GGML_CUDA_F16 | Boolean | false | If enabled, use half-precision floating point arithmetic for the CUDA dequantization + mul mat vec kernels and for the q4_1 and q5_1 matrix matrix multiplication kernels. Can improve performance on relatively recent GPUs. | | GGML_CUDA_PEER_MAX_BATCH_SIZE | Positive integer | 128 | Maximum batch size for which to enable peer access between multiple GPUs. Peer access requires either Linux or NVLink. When using NVLink enabling peer access for larger batch sizes is potentially beneficial. | @@ -152,21 +199,54 @@ The following compilation options are also available to tweak performance: ## MUSA -This provides GPU acceleration using the MUSA cores of your Moore Threads MTT GPU. Make sure to have the MUSA SDK installed. You can download it from here: [MUSA SDK](https://developer.mthreads.com/sdk/download/musa). +This provides GPU acceleration using a Moore Threads GPU. Make sure to have the [MUSA SDK](https://developer.mthreads.com/musa/musa-sdk) installed. -- Using `CMake`: +#### Download directly from Moore Threads - ```bash - cmake -B build -DGGML_MUSA=ON +You may find the official downloads here: [Moore Threads developer site](https://developer.mthreads.com/sdk/download/musa). + +### Compilation + +```bash +cmake -B build -DGGML_MUSA=ON +cmake --build build --config Release +``` + +#### Override Compute Capability Specifications + +By default, all supported compute capabilities are enabled. To customize this behavior, you can specify the `MUSA_ARCHITECTURES` option in the CMake command: + +```bash +cmake -B build -DGGML_MUSA=ON -DMUSA_ARCHITECTURES="21" +cmake --build build --config Release +``` + +This configuration enables only compute capability `2.1` (MTT S80) during compilation, which can help reduce compilation time. + +#### Compilation options + +Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet. + +- For static builds, add `-DBUILD_SHARED_LIBS=OFF` and `-DCMAKE_POSITION_INDEPENDENT_CODE=ON`: + ``` + cmake -B build -DGGML_MUSA=ON \ + -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON cmake --build build --config Release ``` -The environment variable [`MUSA_VISIBLE_DEVICES`](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) can be used to specify which GPU(s) will be used. +### Runtime MUSA environmental variables + +You may set the [musa environmental variables](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) at runtime. + +```bash +# Use `MUSA_VISIBLE_DEVICES` to hide the first compute device. +MUSA_VISIBLE_DEVICES="-0" ./build/bin/llama-server --model /srv/models/llama.gguf +``` + +### Unified Memory The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. -Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet. - ## HIP This provides GPU acceleration on HIP-supported AMD GPUs. @@ -179,8 +259,12 @@ You can download it from your Linux distro's package manager or from here: [ROCm cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \ && cmake --build build --config Release -- -j 16 ``` - On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DGGML_HIP_UMA=ON`. - However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs). + + To enhance flash attention performance on RDNA3+ or CDNA architectures, you can utilize the rocWMMA library by enabling the `-DGGML_HIP_ROCWMMA_FATTN=ON` option. This requires rocWMMA headers to be installed on the build system. + + The rocWMMA library is included by default when installing the ROCm SDK using the `rocm` meta package provided by AMD. Alternatively, if you are not using the meta package, you can install the library using the `rocwmma-dev` or `rocwmma-devel` package, depending on your system's package manager. + + As an alternative, you can manually install the library by cloning it from the official [GitHub repository](https://github.com/ROCm/rocWMMA), checkout the corresponding version tag (e.g. `rocm-6.2.4`) and set `-DCMAKE_CXX_FLAGS="-I/library/include/"` in CMake. This also works under Windows despite not officially supported by AMD. Note that if you get the following error: ``` @@ -210,6 +294,10 @@ You can download it from your Linux distro's package manager or from here: [ROCm The environment variable [`HIP_VISIBLE_DEVICES`](https://rocm.docs.amd.com/en/latest/understand/gpu_isolation.html#hip-visible-devices) can be used to specify which GPU(s) will be used. If your GPU is not officially supported you can use the environment variable [`HSA_OVERRIDE_GFX_VERSION`] set to a similar GPU, for example 10.3.0 on RDNA2 (e.g. gfx1030, gfx1031, or gfx1035) or 11.0.0 on RDNA3. +### Unified Memory + +On Linux it is possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1`. However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs). + ## Vulkan **Windows** @@ -350,6 +438,116 @@ llama_new_context_with_model: CANN compute buffer size = 1260.81 MiB For detailed info, such as model/device supports, CANN install, please refer to [llama.cpp for CANN](./backend/CANN.md). +## Arm® KleidiAI™ +KleidiAI is a library of optimized microkernels for AI workloads, specifically designed for Arm CPUs. These microkernels enhance performance and can be enabled for use by the CPU backend. + +To enable KleidiAI, go to the llama.cpp directory and build using CMake +```bash +cmake -B build -DGGML_CPU_KLEIDIAI=ON +cmake --build build --config Release +``` +You can verify that KleidiAI is being used by running +```bash +./build/bin/llama-cli -m PATH_TO_MODEL -p "What is a car?" +``` +If KleidiAI is enabled, the ouput will contain a line similar to: +``` +load_tensors: CPU_KLEIDIAI model buffer size = 3474.00 MiB +``` +KleidiAI's microkernels implement optimized tensor operations using Arm CPU features such as dotprod, int8mm and SME. llama.cpp selects the most efficient kernel based on runtime CPU feature detection. However, on platforms that support SME, you must manually enable SME microkernels by setting the environment variable `GGML_KLEIDIAI_SME=1`. + +Depending on your build target, other higher priority backends may be enabled by default. To ensure the CPU backend is used, you must disable the higher priority backends either at compile time, e.g. -DGGML_METAL=OFF, or during run-time using the command line option `--device none`. + +## OpenCL + +This provides GPU acceleration through OpenCL on recent Adreno GPU. +More information about OpenCL backend can be found in [OPENCL.md](./backend/OPENCL.md) for more information. + +### Android + +Assume NDK is available in `$ANDROID_NDK`. First, install OpenCL headers and ICD loader library if not available, + +```sh +mkdir -p ~/dev/llm +cd ~/dev/llm + +git clone https://github.com/KhronosGroup/OpenCL-Headers && \ +cd OpenCL-Headers && \ +cp -r CL $ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include + +cd ~/dev/llm + +git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader && \ +cd OpenCL-ICD-Loader && \ +mkdir build_ndk && cd build_ndk && \ +cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DOPENCL_ICD_LOADER_HEADERS_DIR=$ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=24 \ + -DANDROID_STL=c++_shared && \ +ninja && \ +cp libOpenCL.so $ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android +``` + +Then build llama.cpp with OpenCL enabled, + +```sh +cd ~/dev/llm + +git clone https://github.com/ggml-org/llama.cpp && \ +cd llama.cpp && \ +mkdir build-android && cd build-android + +cmake .. -G Ninja \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=android-28 \ + -DBUILD_SHARED_LIBS=OFF \ + -DGGML_OPENCL=ON + +ninja +``` + +### Windows Arm64 + +First, install OpenCL headers and ICD loader library if not available, + +```powershell +mkdir -p ~/dev/llm + +cd ~/dev/llm +git clone https://github.com/KhronosGroup/OpenCL-Headers && cd OpenCL-Headers +mkdir build && cd build +cmake .. -G Ninja ` + -DBUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` + -DCMAKE_INSTALL_PREFIX="$HOME/dev/llm/opencl" +cmake --build . --target install + +cd ~/dev/llm +git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader && cd OpenCL-ICD-Loader +mkdir build && cd build +cmake .. -G Ninja ` + -DCMAKE_BUILD_TYPE=Release ` + -DCMAKE_PREFIX_PATH="$HOME/dev/llm/opencl" ` + -DCMAKE_INSTALL_PREFIX="$HOME/dev/llm/opencl" +cmake --build . --target install +``` + +Then build llama.cpp with OpenCL enabled, + +```powershell +cmake .. -G Ninja ` + -DCMAKE_TOOLCHAIN_FILE="$HOME/dev/llm/llama.cpp/cmake/arm64-windows-llvm.cmake" ` + -DCMAKE_BUILD_TYPE=Release ` + -DCMAKE_PREFIX_PATH="$HOME/dev/llm/opencl" ` + -DBUILD_SHARED_LIBS=OFF ` + -DGGML_OPENCL=ON +ninja +``` + ## Android To read documentation for how to build on Android, [click here](./android.md) diff --git a/docs/development/HOWTO-add-model.md b/docs/development/HOWTO-add-model.md index 8fcd708113..78c6f76077 100644 --- a/docs/development/HOWTO-add-model.md +++ b/docs/development/HOWTO-add-model.md @@ -104,16 +104,16 @@ Note: to debug the inference graph: you can use [llama-eval-callback](/examples/ ## GGUF specification -https://github.com/ggerganov/ggml/blob/master/docs/gguf.md +https://github.com/ggml-org/ggml/blob/master/docs/gguf.md ## Resources -- YaRN RoPE scaling https://github.com/ggerganov/llama.cpp/pull/2268 -- support Baichuan serial models https://github.com/ggerganov/llama.cpp/pull/3009 -- support attention bias https://github.com/ggerganov/llama.cpp/pull/4283 -- Mixtral support https://github.com/ggerganov/llama.cpp/pull/4406 -- BERT embeddings https://github.com/ggerganov/llama.cpp/pull/5423 -- Grok-1 support https://github.com/ggerganov/llama.cpp/pull/6204 -- Command R Plus support https://github.com/ggerganov/llama.cpp/pull/6491 -- support arch DBRX https://github.com/ggerganov/llama.cpp/pull/6515 -- How to convert HuggingFace model to GGUF format https://github.com/ggerganov/llama.cpp/discussions/2948 +- YaRN RoPE scaling https://github.com/ggml-org/llama.cpp/pull/2268 +- support Baichuan serial models https://github.com/ggml-org/llama.cpp/pull/3009 +- support attention bias https://github.com/ggml-org/llama.cpp/pull/4283 +- Mixtral support https://github.com/ggml-org/llama.cpp/pull/4406 +- BERT embeddings https://github.com/ggml-org/llama.cpp/pull/5423 +- Grok-1 support https://github.com/ggml-org/llama.cpp/pull/6204 +- Command R Plus support https://github.com/ggml-org/llama.cpp/pull/6491 +- support arch DBRX https://github.com/ggml-org/llama.cpp/pull/6515 +- How to convert HuggingFace model to GGUF format https://github.com/ggml-org/llama.cpp/discussions/2948 diff --git a/docs/docker.md b/docs/docker.md index dac9a9ec16..343146dbd2 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -7,21 +7,21 @@ ## Images We have three Docker images available for this project: -1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. (platforms: `linux/amd64`, `linux/arm64`) -2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file. (platforms: `linux/amd64`, `linux/arm64`) -3. `ghcr.io/ggerganov/llama.cpp:server`: This image only includes the server executable file. (platforms: `linux/amd64`, `linux/arm64`) +1. `ghcr.io/ggml-org/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. (platforms: `linux/amd64`, `linux/arm64`) +2. `ghcr.io/ggml-org/llama.cpp:light`: This image only includes the main executable file. (platforms: `linux/amd64`, `linux/arm64`) +3. `ghcr.io/ggml-org/llama.cpp:server`: This image only includes the server executable file. (platforms: `linux/amd64`, `linux/arm64`) Additionally, there the following images, similar to the above: -- `ghcr.io/ggerganov/llama.cpp:full-cuda`: Same as `full` but compiled with CUDA support. (platforms: `linux/amd64`) -- `ghcr.io/ggerganov/llama.cpp:light-cuda`: Same as `light` but compiled with CUDA support. (platforms: `linux/amd64`) -- `ghcr.io/ggerganov/llama.cpp:server-cuda`: Same as `server` but compiled with CUDA support. (platforms: `linux/amd64`) -- `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) -- `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) -- `ghcr.io/ggerganov/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) -- `ghcr.io/ggerganov/llama.cpp:full-musa`: Same as `full` but compiled with MUSA support. (platforms: `linux/amd64`) -- `ghcr.io/ggerganov/llama.cpp:light-musa`: Same as `light` but compiled with MUSA support. (platforms: `linux/amd64`) -- `ghcr.io/ggerganov/llama.cpp:server-musa`: Same as `server` but compiled with MUSA support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:full-cuda`: Same as `full` but compiled with CUDA support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:light-cuda`: Same as `light` but compiled with CUDA support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:server-cuda`: Same as `server` but compiled with CUDA support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:full-musa`: Same as `full` but compiled with MUSA support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:light-musa`: Same as `light` but compiled with MUSA support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:server-musa`: Same as `server` but compiled with MUSA support. (platforms: `linux/amd64`) The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA, ROCm or MUSA library, you'll need to build the images locally for now). @@ -32,25 +32,25 @@ The easiest way to download the models, convert them to ggml and optimize them i Replace `/path/to/models` below with the actual path where you downloaded the models. ```bash -docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B +docker run -v /path/to/models:/models ghcr.io/ggml-org/llama.cpp:full --all-in-one "/models/" 7B ``` On completion, you are ready to play! ```bash -docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 +docker run -v /path/to/models:/models ghcr.io/ggml-org/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 ``` or with a light image: ```bash -docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 +docker run -v /path/to/models:/models ghcr.io/ggml-org/llama.cpp:light -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 ``` or with a server image: ```bash -docker run -v /path/to/models:/models -p 8000:8000 ghcr.io/ggerganov/llama.cpp:server -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 +docker run -v /path/to/models:/models -p 8000:8000 ghcr.io/ggml-org/llama.cpp:server -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 ``` ## Docker With CUDA @@ -69,7 +69,7 @@ You may want to pass in some different `ARGS`, depending on the CUDA environment The defaults are: -- `CUDA_VERSION` set to `12.6.0` +- `CUDA_VERSION` set to `12.4.0` - `CUDA_DOCKER_ARCH` set to the cmake build default, which includes all the supported architectures The resulting images, are essentially the same as the non-CUDA images: @@ -104,7 +104,7 @@ You may want to pass in some different `ARGS`, depending on the MUSA environment The defaults are: -- `MUSA_VERSION` set to `rc3.1.0` +- `MUSA_VERSION` set to `rc3.1.1` The resulting images, are essentially the same as the non-MUSA images: diff --git a/docs/function-calling.md b/docs/function-calling.md new file mode 100644 index 0000000000..c3873c3fa6 --- /dev/null +++ b/docs/function-calling.md @@ -0,0 +1,394 @@ +# Function Calling + +[chat.h](../common/chat.h) (https://github.com/ggml-org/llama.cpp/pull/9639) adds support for [OpenAI-style function calling](https://platform.openai.com/docs/guides/function-calling) and is used in: +- `llama-server` when started w/ `--jinja` flag +- `llama-cli` (WIP: https://github.com/ggml-org/llama.cpp/pull/11556) + +## Universal support w/ Native & Generic handlers + +Function calling is supported for all models (see https://github.com/ggml-org/llama.cpp/pull/9639): + +- Native tool call formats supported: + - Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2 + - Functionary v3.1 / v3.2 + - Hermes 2/3, Qwen 2.5 + - Qwen 2.5 Coder (WIP: https://github.com/ggml-org/llama.cpp/pull/12034) + - Mistral Nemo + - Firefunction v2 + - Command R7B + - DeepSeek R1 (WIP / seems reluctant to call any tools?) + +- Generic tool call is supported when the template isn't recognized by native format handlers (you'll see `Chat format: Generic` in the logs). + - Use `--chat-template-file` to override the template when appropriate (see examples below) + - Generic support may consume more tokens and be less efficient than a model's native format. + +
+Show some common templates and which format handler they use + +| Template | Format | +|----------|--------| +| Almawave-Velvet-14B.jinja | Hermes 2 Pro | +| AtlaAI-Selene-1-Mini-Llama-3.1-8B.jinja | Llama 3.x | +| CohereForAI-aya-expanse-8b.jinja | Generic | +| CohereForAI-c4ai-command-r-plus-default.jinja | Generic | +| CohereForAI-c4ai-command-r-plus-rag.jinja | Generic | +| CohereForAI-c4ai-command-r-plus-tool_use.jinja | Generic | +| CohereForAI-c4ai-command-r7b-12-2024-default.jinja | Command R7B (extract reasoning) | +| CohereForAI-c4ai-command-r7b-12-2024-rag.jinja | Command R7B (extract reasoning) | +| CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja | Command R7B (extract reasoning) | +| CohereForAI-c4ai-command-r7b-12-2024.jinja | Generic | +| DavieLion-Llama-3.2-1B-SPIN-iter3.jinja | Generic | +| Delta-Vector-Rei-12B.jinja | Mistral Nemo | +| EpistemeAI-Mistral-Nemo-Instruct-12B-Philosophy-Math.jinja | Mistral Nemo | +| FlofloB-83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit.jinja | Hermes 2 Pro | +| FlofloB-test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit.jinja | Generic | +| HelpingAI-HAI-SER.jinja | Generic | +| HuggingFaceTB-SmolLM2-1.7B-Instruct.jinja | Generic | +| HuggingFaceTB-SmolLM2-135M-Instruct.jinja | Generic | +| HuggingFaceTB-SmolLM2-360M-Instruct.jinja | Generic | +| INSAIT-Institute-BgGPT-Gemma-2-27B-IT-v1.0.jinja | Generic | +| Ihor-Text2Graph-R1-Qwen2.5-0.5b.jinja | Hermes 2 Pro | +| Infinigence-Megrez-3B-Instruct.jinja | Generic | +| Josephgflowers-TinyLlama_v1.1_math_code-world-test-1.jinja | Generic | +| LGAI-EXAONE-EXAONE-3.5-2.4B-Instruct.jinja | Generic | +| LGAI-EXAONE-EXAONE-3.5-7.8B-Instruct.jinja | Generic | +| LatitudeGames-Wayfarer-12B.jinja | Generic | +| Magpie-Align-Llama-3-8B-Magpie-Align-v0.1.jinja | Generic | +| Magpie-Align-Llama-3.1-8B-Magpie-Align-v0.1.jinja | Generic | +| MaziyarPanahi-calme-3.2-instruct-78b.jinja | Generic | +| MiniMaxAI-MiniMax-Text-01.jinja | Generic | +| MiniMaxAI-MiniMax-VL-01.jinja | Generic | +| NaniDAO-deepseek-r1-qwen-2.5-32B-ablated.jinja | DeepSeek R1 (extract reasoning) | +| NexaAIDev-Octopus-v2.jinja | Generic | +| NousResearch-Hermes-2-Pro-Llama-3-8B-default.jinja | Generic | +| NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja | Hermes 2 Pro | +| NousResearch-Hermes-2-Pro-Mistral-7B-default.jinja | Generic | +| NousResearch-Hermes-2-Pro-Mistral-7B-tool_use.jinja | Hermes 2 Pro | +| NousResearch-Hermes-3-Llama-3.1-70B-default.jinja | Generic | +| NousResearch-Hermes-3-Llama-3.1-70B-tool_use.jinja | Hermes 2 Pro | +| NovaSky-AI-Sky-T1-32B-Flash.jinja | Hermes 2 Pro | +| NovaSky-AI-Sky-T1-32B-Preview.jinja | Hermes 2 Pro | +| OnlyCheeini-greesychat-turbo.jinja | Generic | +| Orenguteng-Llama-3.1-8B-Lexi-Uncensored-V2.jinja | Llama 3.x | +| OrionStarAI-Orion-14B-Chat.jinja | Generic | +| PowerInfer-SmallThinker-3B-Preview.jinja | Generic | +| PrimeIntellect-INTELLECT-1-Instruct.jinja | Generic | +| Qwen-QVQ-72B-Preview.jinja | Generic | +| Qwen-QwQ-32B-Preview.jinja | Hermes 2 Pro | +| Qwen-Qwen1.5-7B-Chat.jinja | Generic | +| Qwen-Qwen2-7B-Instruct.jinja | Generic | +| Qwen-Qwen2-VL-72B-Instruct.jinja | Generic | +| Qwen-Qwen2-VL-7B-Instruct.jinja | Generic | +| Qwen-Qwen2.5-0.5B.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-1.5B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-14B-Instruct-1M.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-14B.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-32B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-32B.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-3B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-72B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-7B-Instruct-1M.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-7B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-7B.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-Coder-32B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-Coder-7B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-Math-1.5B.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-Math-7B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-VL-3B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-VL-72B-Instruct.jinja | Hermes 2 Pro | +| Qwen-Qwen2.5-VL-7B-Instruct.jinja | Hermes 2 Pro | +| RWKV-Red-Team-ARWKV-7B-Preview-0.1.jinja | Hermes 2 Pro | +| SakanaAI-TinySwallow-1.5B-Instruct.jinja | Hermes 2 Pro | +| SakanaAI-TinySwallow-1.5B.jinja | Hermes 2 Pro | +| Sao10K-70B-L3.3-Cirrus-x1.jinja | Llama 3.x | +| SentientAGI-Dobby-Mini-Leashed-Llama-3.1-8B.jinja | Llama 3.x | +| SentientAGI-Dobby-Mini-Unhinged-Llama-3.1-8B.jinja | Llama 3.x | +| Steelskull-L3.3-Damascus-R1.jinja | Llama 3.x | +| Steelskull-L3.3-MS-Nevoria-70b.jinja | Llama 3.x | +| Steelskull-L3.3-Nevoria-R1-70b.jinja | Llama 3.x | +| THUDM-glm-4-9b-chat.jinja | Generic | +| THUDM-glm-edge-1.5b-chat.jinja | Generic | +| Tarek07-Progenitor-V1.1-LLaMa-70B.jinja | Llama 3.x | +| TheBloke-FusionNet_34Bx2_MoE-AWQ.jinja | Generic | +| TinyLlama-TinyLlama-1.1B-Chat-v1.0.jinja | Generic | +| UCLA-AGI-Mistral7B-PairRM-SPPO-Iter3.jinja | Generic | +| ValiantLabs-Llama3.1-8B-Enigma.jinja | Llama 3.x | +| abacusai-Fewshot-Metamath-OrcaVicuna-Mistral.jinja | Generic | +| ai21labs-AI21-Jamba-1.5-Large.jinja | Generic | +| allenai-Llama-3.1-Tulu-3-405B-SFT.jinja | Generic | +| allenai-Llama-3.1-Tulu-3-405B.jinja | Generic | +| allenai-Llama-3.1-Tulu-3-8B.jinja | Generic | +| arcee-ai-Virtuoso-Lite.jinja | Hermes 2 Pro | +| arcee-ai-Virtuoso-Medium-v2.jinja | Hermes 2 Pro | +| arcee-ai-Virtuoso-Small-v2.jinja | Hermes 2 Pro | +| avemio-GRAG-NEMO-12B-ORPO-HESSIAN-AI.jinja | Generic | +| bespokelabs-Bespoke-Stratos-7B.jinja | Hermes 2 Pro | +| bfuzzy1-acheron-m1a-llama.jinja | Generic | +| bofenghuang-vigogne-2-70b-chat.jinja | Generic | +| bytedance-research-UI-TARS-72B-DPO.jinja | Generic | +| bytedance-research-UI-TARS-7B-DPO.jinja | Generic | +| bytedance-research-UI-TARS-7B-SFT.jinja | Generic | +| carsenk-phi3.5_mini_exp_825_uncensored.jinja | Generic | +| cyberagent-DeepSeek-R1-Distill-Qwen-14B-Japanese.jinja | DeepSeek R1 (extract reasoning) | +| cyberagent-DeepSeek-R1-Distill-Qwen-32B-Japanese.jinja | DeepSeek R1 (extract reasoning) | +| databricks-dbrx-instruct.jinja | Generic | +| deepseek-ai-DeepSeek-Coder-V2-Instruct.jinja | Generic | +| deepseek-ai-DeepSeek-Coder-V2-Lite-Base.jinja | Generic | +| deepseek-ai-DeepSeek-Coder-V2-Lite-Instruct.jinja | Generic | +| deepseek-ai-DeepSeek-R1-Distill-Llama-70B.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-DeepSeek-R1-Distill-Qwen-14B.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-DeepSeek-R1-Distill-Qwen-7B.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-DeepSeek-R1-Zero.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-DeepSeek-R1.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-DeepSeek-V2-Lite.jinja | Generic | +| deepseek-ai-DeepSeek-V2.5.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-DeepSeek-V3.jinja | DeepSeek R1 (extract reasoning) | +| deepseek-ai-deepseek-coder-33b-instruct.jinja | Generic | +| deepseek-ai-deepseek-coder-6.7b-instruct.jinja | Generic | +| deepseek-ai-deepseek-coder-7b-instruct-v1.5.jinja | Generic | +| deepseek-ai-deepseek-llm-67b-chat.jinja | Generic | +| deepseek-ai-deepseek-llm-7b-chat.jinja | Generic | +| dicta-il-dictalm2.0-instruct.jinja | Generic | +| ehristoforu-Falcon3-8B-Franken-Basestruct.jinja | Hermes 2 Pro | +| fireworks-ai-llama-3-firefunction-v2.jinja | FireFunction v2 | +| godlikehhd-alpaca_data_sampled_ifd_new_5200.jinja | Hermes 2 Pro | +| godlikehhd-alpaca_data_score_max_0.7_2600.jinja | Hermes 2 Pro | +| google-gemma-2-27b-it.jinja | Generic | +| google-gemma-2-2b-it.jinja | Generic | +| google-gemma-2-2b-jpn-it.jinja | Generic | +| google-gemma-7b-it.jinja | Generic | +| huihui-ai-DeepSeek-R1-Distill-Llama-70B-abliterated.jinja | DeepSeek R1 (extract reasoning) | +| huihui-ai-DeepSeek-R1-Distill-Llama-8B-abliterated.jinja | DeepSeek R1 (extract reasoning) | +| huihui-ai-DeepSeek-R1-Distill-Qwen-14B-abliterated-v2.jinja | DeepSeek R1 (extract reasoning) | +| huihui-ai-DeepSeek-R1-Distill-Qwen-32B-abliterated.jinja | DeepSeek R1 (extract reasoning) | +| huihui-ai-DeepSeek-R1-Distill-Qwen-7B-abliterated-v2.jinja | DeepSeek R1 (extract reasoning) | +| huihui-ai-Qwen2.5-14B-Instruct-1M-abliterated.jinja | Hermes 2 Pro | +| ibm-granite-granite-3.1-8b-instruct.jinja | Generic | +| indischepartij-MiniCPM-3B-OpenHermes-2.5-v2.jinja | Generic | +| inflatebot-MN-12B-Mag-Mell-R1.jinja | Generic | +| jinaai-ReaderLM-v2.jinja | Generic | +| kms7530-chemeng_qwen-math-7b_24_1_100_1_nonmath.jinja | Hermes 2 Pro | +| knifeayumu-Cydonia-v1.3-Magnum-v4-22B.jinja | Mistral Nemo | +| langgptai-qwen1.5-7b-chat-sa-v0.1.jinja | Generic | +| lightblue-DeepSeek-R1-Distill-Qwen-7B-Japanese.jinja | DeepSeek R1 (extract reasoning) | +| mattshumer-Reflection-Llama-3.1-70B.jinja | Generic | +| meetkai-functionary-medium-v3.1.jinja | Functionary v3.1 Llama 3.1 | +| meetkai-functionary-medium-v3.2.jinja | Functionary v3.2 | +| meta-llama-Llama-2-7b-chat-hf.jinja | Generic | +| meta-llama-Llama-3.1-8B-Instruct.jinja | Llama 3.x | +| meta-llama-Llama-3.2-11B-Vision-Instruct.jinja | Llama 3.x | +| meta-llama-Llama-3.2-1B-Instruct.jinja | Llama 3.x | +| meta-llama-Llama-3.2-3B-Instruct.jinja | Llama 3.x | +| meta-llama-Llama-3.3-70B-Instruct.jinja | Llama 3.x | +| meta-llama-Meta-Llama-3-8B-Instruct.jinja | Generic | +| meta-llama-Meta-Llama-3.1-8B-Instruct.jinja | Llama 3.x | +| microsoft-Phi-3-medium-4k-instruct.jinja | Generic | +| microsoft-Phi-3-mini-4k-instruct.jinja | Generic | +| microsoft-Phi-3-small-8k-instruct.jinja | Generic | +| microsoft-Phi-3.5-mini-instruct.jinja | Generic | +| microsoft-Phi-3.5-vision-instruct.jinja | Generic | +| microsoft-phi-4.jinja | Generic | +| migtissera-Tess-3-Mistral-Nemo-12B.jinja | Generic | +| ministral-Ministral-3b-instruct.jinja | Generic | +| mistralai-Codestral-22B-v0.1.jinja | Generic | +| mistralai-Mistral-7B-Instruct-v0.1.jinja | Generic | +| mistralai-Mistral-7B-Instruct-v0.2.jinja | Generic | +| mistralai-Mistral-7B-Instruct-v0.3.jinja | Mistral Nemo | +| mistralai-Mistral-Large-Instruct-2407.jinja | Mistral Nemo | +| mistralai-Mistral-Large-Instruct-2411.jinja | Generic | +| mistralai-Mistral-Nemo-Instruct-2407.jinja | Mistral Nemo | +| mistralai-Mistral-Small-24B-Instruct-2501.jinja | Generic | +| mistralai-Mixtral-8x7B-Instruct-v0.1.jinja | Generic | +| mkurman-Qwen2.5-14B-DeepSeek-R1-1M.jinja | Hermes 2 Pro | +| mlabonne-AlphaMonarch-7B.jinja | Generic | +| mlx-community-Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32.jinja | Hermes 2 Pro | +| mlx-community-Qwen2.5-VL-7B-Instruct-8bit.jinja | Hermes 2 Pro | +| mobiuslabsgmbh-DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1.jinja | DeepSeek R1 (extract reasoning) | +| netcat420-MFANNv0.20.jinja | Generic | +| netcat420-MFANNv0.24.jinja | Generic | +| netease-youdao-Confucius-o1-14B.jinja | Hermes 2 Pro | +| nvidia-AceMath-7B-RM.jinja | Hermes 2 Pro | +| nvidia-Eagle2-1B.jinja | Hermes 2 Pro | +| nvidia-Eagle2-9B.jinja | Hermes 2 Pro | +| nvidia-Llama-3.1-Nemotron-70B-Instruct-HF.jinja | Llama 3.x | +| onnx-community-DeepSeek-R1-Distill-Qwen-1.5B-ONNX.jinja | DeepSeek R1 (extract reasoning) | +| open-thoughts-OpenThinker-7B.jinja | Hermes 2 Pro | +| openchat-openchat-3.5-0106.jinja | Generic | +| pankajmathur-orca_mini_v6_8b.jinja | Generic | +| princeton-nlp-Mistral-7B-Base-SFT-RDPO.jinja | Generic | +| princeton-nlp-Mistral-7B-Instruct-DPO.jinja | Generic | +| princeton-nlp-Mistral-7B-Instruct-RDPO.jinja | Generic | +| prithivMLmods-Bellatrix-Tiny-1.5B-R1.jinja | Hermes 2 Pro | +| prithivMLmods-Bellatrix-Tiny-1B-R1.jinja | Llama 3.x | +| prithivMLmods-Bellatrix-Tiny-1B-v3.jinja | Generic | +| prithivMLmods-Bellatrix-Tiny-3B-R1.jinja | Llama 3.x | +| prithivMLmods-Blaze-14B-xElite.jinja | Generic | +| prithivMLmods-Calcium-Opus-14B-Elite2-R1.jinja | Hermes 2 Pro | +| prithivMLmods-Calme-Ties-78B.jinja | Generic | +| prithivMLmods-Calme-Ties2-78B.jinja | Generic | +| prithivMLmods-Calme-Ties3-78B.jinja | Generic | +| prithivMLmods-ChemQwen2-vL.jinja | Generic | +| prithivMLmods-GWQ2b.jinja | Generic | +| prithivMLmods-LatexMind-2B-Codec.jinja | Generic | +| prithivMLmods-Llama-3.2-6B-AlgoCode.jinja | Llama 3.x | +| prithivMLmods-Megatron-Opus-14B-Exp.jinja | Hermes 2 Pro | +| prithivMLmods-Megatron-Opus-14B-Stock.jinja | Hermes 2 Pro | +| prithivMLmods-Megatron-Opus-7B-Exp.jinja | Hermes 2 Pro | +| prithivMLmods-Omni-Reasoner-Merged.jinja | Hermes 2 Pro | +| prithivMLmods-Omni-Reasoner4-Merged.jinja | Hermes 2 Pro | +| prithivMLmods-Primal-Opus-14B-Optimus-v1.jinja | Hermes 2 Pro | +| prithivMLmods-QwQ-Math-IO-500M.jinja | Hermes 2 Pro | +| prithivMLmods-Qwen-7B-Distill-Reasoner.jinja | DeepSeek R1 (extract reasoning) | +| prithivMLmods-Qwen2.5-1.5B-DeepSeek-R1-Instruct.jinja | Hermes 2 Pro | +| prithivMLmods-Qwen2.5-14B-DeepSeek-R1-1M.jinja | Hermes 2 Pro | +| prithivMLmods-Qwen2.5-32B-DeepSeek-R1-Instruct.jinja | Hermes 2 Pro | +| prithivMLmods-Qwen2.5-7B-DeepSeek-R1-1M.jinja | Hermes 2 Pro | +| prithivMLmods-Triangulum-v2-10B.jinja | Hermes 2 Pro | +| qingy2024-Falcon3-2x10B-MoE-Instruct.jinja | Hermes 2 Pro | +| rubenroy-Zurich-14B-GCv2-5m.jinja | Hermes 2 Pro | +| rubenroy-Zurich-7B-GCv2-5m.jinja | Hermes 2 Pro | +| silma-ai-SILMA-Kashif-2B-Instruct-v1.0.jinja | Generic | +| simplescaling-s1-32B.jinja | Hermes 2 Pro | +| sometimesanotion-Lamarck-14B-v0.7.jinja | Hermes 2 Pro | +| sonthenguyen-zephyr-sft-bnb-4bit-DPO-mtbr-180steps.jinja | Generic | +| sthenno-tempesthenno-icy-0130.jinja | Generic | +| sumink-qwft.jinja | Hermes 2 Pro | +| teknium-OpenHermes-2.5-Mistral-7B.jinja | Generic | +| thirdeyeai-elevate360m.jinja | Generic | +| tiiuae-Falcon3-10B-Instruct.jinja | Hermes 2 Pro | +| unsloth-DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit.jinja | DeepSeek R1 (extract reasoning) | +| unsloth-DeepSeek-R1-Distill-Llama-8B.jinja | DeepSeek R1 (extract reasoning) | +| unsloth-DeepSeek-R1.jinja | DeepSeek R1 (extract reasoning) | +| unsloth-Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit.jinja | Generic | +| upstage-solar-pro-preview-instruct.jinja | Generic | +| whyhow-ai-PatientSeek.jinja | Generic | +| xwen-team-Xwen-72B-Chat.jinja | Hermes 2 Pro | +| xwen-team-Xwen-7B-Chat.jinja | Hermes 2 Pro | + +This table can be generated with: + +```bash +./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null +``` + +
+ +# Usage - need tool-aware Jinja template + +First, start a server with any model, but make sure it has a tools-enabled template: you can verify this by inspecting the `chat_template` or `chat_template_tool_use` properties in `http://localhost:8080/props`). + +Here are some models known to work (w/ chat template override when needed): + +```shell +# Native support: + +llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M +llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L +llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M + +# Native support for DeepSeek R1 works best w/ our template override (official template is buggy, although we do work around it) + +llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \ + --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja + +llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \ + --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja + +# Native support requires the right template for these GGUFs: + +llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M + --chat-template-file models/templates/meetkai-functionary-medium-v3.2.jinja + +llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \ + --chat-template-file models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja + +llama-server --jinja -fa -hf bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M \ + --chat-template-file models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja + +llama-server --jinja -fa -hf bartowski/firefunction-v2-GGUF -hff firefunction-v2-IQ1_M.gguf \ + --chat-template-file models/templates/fireworks-ai-llama-3-firefunction-v2.jinja + +llama-server --jinja -fa -hf bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L \ + --chat-template-file models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja + +# Generic format support +llama-server --jinja -fa -hf bartowski/phi-4-GGUF:Q4_0 +llama-server --jinja -fa -hf bartowski/gemma-2-2b-it-GGUF:Q8_0 +llama-server --jinja -fa -hf bartowski/c4ai-command-r-v01-GGUF:Q2_K +``` + +To get the official template from original HuggingFace repos, you can use [scripts/get_chat_template.py](../scripts/get_chat_template.py) (see examples invocations in [models/templates/README.md](../models/templates/README.md)) + +> [!TIP] +> If there is no official `tool_use` Jinja template, you may want to set `--chat-template chatml` to use a default that works with many models (YMMV!), or write your own (e.g. we provide a custom [llama-cpp-deepseek-r1.jinja](../models/templates/llama-cpp-deepseek-r1.jinja) for DeepSeek R1 distills) + +Test in CLI (or with any library / software that can use OpenAI-compatible API backends): + +```bash +curl http://localhost:8080/v1/chat/completions -d '{ +"model": "gpt-3.5-turbo", +"tools": [ + { + "type":"function", + "function":{ + "name":"python", + "description":"Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.", + "parameters":{ + "type":"object", + "properties":{ + "code":{ + "type":"string", + "description":"The code to run in the ipython interpreter." + } + }, + "required":["code"] + } + } + } +], +"messages": [ + { + "role": "user", + "content": "Print a hello world message with python." + } +] +}' +``` + +
+Show output + +```json +{ +"choices": [ + { + "finish_reason": "tool", + "index": 0, + "message": { + "content": null, + "tool_calls": [ + { + "name": "python", + "arguments": "{\"code\":\" \\nprint(\\\"Hello, World!\\\")\"}" + } + ], + "role": "assistant" + } + } +], +"created": 1727287211, +"model": "gpt-3.5-turbo", +"object": "chat.completion", +"usage": { + "completion_tokens": 16, + "prompt_tokens": 44, + "total_tokens": 60 +}, +"id": "chatcmpl-Htbgh9feMmGM0LEH2hmQvwsCxq3c6Ni8" +} +``` + +
diff --git a/docs/install.md b/docs/install.md index 10a5685068..4971c18281 100644 --- a/docs/install.md +++ b/docs/install.md @@ -7,7 +7,14 @@ On Mac and Linux, the homebrew package manager can be used via ```sh brew install llama.cpp ``` -The formula is automatically updated with new `llama.cpp` releases. More info: https://github.com/ggerganov/llama.cpp/discussions/7668 +The formula is automatically updated with new `llama.cpp` releases. More info: https://github.com/ggml-org/llama.cpp/discussions/7668 + +## MacPorts + +```sh +sudo port install llama.cpp +``` +see also: https://ports.macports.org/port/llama.cpp/details/ ## Nix diff --git a/docs/llguidance.md b/docs/llguidance.md new file mode 100644 index 0000000000..cda787b14d --- /dev/null +++ b/docs/llguidance.md @@ -0,0 +1,53 @@ +# LLGuidance Support in llama.cpp + +[LLGuidance](https://github.com/guidance-ai/llguidance) is a library for constrained decoding (also called constrained sampling or structured outputs) for Large Language Models (LLMs). Initially developed as the backend for the [Guidance](https://github.com/guidance-ai/guidance) library, it can also be used independently. + +LLGuidance supports JSON Schemas and arbitrary context-free grammars (CFGs) written in a [variant](https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md) of Lark syntax. It is [very fast](https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench) and has [excellent](https://github.com/guidance-ai/llguidance/blob/main/docs/json_schema.md) JSON Schema coverage but requires the Rust compiler, which complicates the llama.cpp build process. + +## Building + +To enable LLGuidance support, build llama.cpp with the `LLAMA_LLGUIDANCE` option: + +```sh +cmake -B build -DLLAMA_LLGUIDANCE=ON +make -C build -j +``` + +For Windows use `cmake --build build --config Release` instead of `make`. + +This requires the Rust compiler and the `cargo` tool to be [installed](https://www.rust-lang.org/tools/install). + +## Interface + +There are no new command-line arguments or modifications to `common_params`. When enabled, grammars starting with `%llguidance` are passed to LLGuidance instead of the [current](../grammars/README.md) llama.cpp grammars. Additionally, JSON Schema requests (e.g., using the `-j` argument in `llama-cli`) are also passed to LLGuidance. + +For your existing GBNF grammars, you can use [gbnf_to_lark.py script](https://github.com/guidance-ai/llguidance/blob/main/python/llguidance/gbnf_to_lark.py) to convert them to LLGuidance Lark-like format. + +## Performance + +Computing a "token mask" (i.e., the set of allowed tokens) for a llama3 tokenizer with 128k tokens takes, on average, 50μs of single-core CPU time for the [JSON Schema Bench](https://github.com/guidance-ai/jsonschemabench). The p99 time is 0.5ms, and the p100 time is 20ms. These results are due to the lexer/parser split and several [optimizations](https://github.com/guidance-ai/llguidance/blob/main/docs/optimizations.md). + +## JSON Schema + +LLGuidance adheres closely to the JSON Schema specification. For example: + +- `additionalProperties` defaults to `true`, unlike current grammars, though you can set `"additionalProperties": false` if needed. +- any whitespace is allowed. +- The definition order in the `"properties": {}` object is maintained, regardless of whether properties are required (current grammars always puts required properties first). + +Unsupported schemas result in an error message—no keywords are silently ignored. + +## Why Not Reuse GBNF Format? + +GBNF lacks the concept of a lexer. + +Most programming languages, including JSON, use a two-step process: a lexer (built with regular expressions) converts a byte stream into lexemes, which are then processed by a CFG parser. This approach is faster because lexers are cheaper to evaluate, and there is ~10x fewer lexemes than bytes. +LLM tokens often align with lexemes, so the parser is engaged in under 0.5% of tokens, with the lexer handling the rest. + +However, the user has to provide the distinction between lexemes and CFG symbols. In [Lark](https://github.com/lark-parser/lark), lexeme names are uppercase, while CFG symbols are lowercase. +The [gbnf_to_lark.py script](https://github.com/guidance-ai/llguidance/blob/main/scripts/gbnf_to_lark.py) can often take care of this automatically. +See [LLGuidance syntax docs](https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#terminals-vs-rules) for more details. + +## Error Handling + +Errors are currently printed to `stderr`, and generation continues. Improved error handling may be added in the future. diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 0659ab6f11..0f4019293d 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -38,7 +38,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = common_model_params_to_llama(params); - llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params); if (model == NULL) { fprintf(stderr , "%s: error: unable to load model\n" , __func__); @@ -132,7 +132,7 @@ int main(int argc, char ** argv) { const auto t_pp_start = ggml_time_us(); - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); if (!decode_helper(ctx, batch, ctx_params.n_batch)) { LOG_ERR("%s: llama_decode() failed\n", __func__); @@ -141,7 +141,7 @@ int main(int argc, char ** argv) { if (is_pp_shared) { for (int32_t i = 1; i < pl; ++i) { - llama_kv_cache_seq_cp(ctx, 0, i, -1, -1); + llama_kv_self_seq_cp(ctx, 0, i, -1, -1); } } diff --git a/examples/batched.swift/Sources/main.swift b/examples/batched.swift/Sources/main.swift index 371917b2ee..514989e340 100644 --- a/examples/batched.swift/Sources/main.swift +++ b/examples/batched.swift/Sources/main.swift @@ -31,6 +31,11 @@ defer { llama_model_free(model) } +guard let vocab = llama_model_get_vocab(model) else { + print("Failed to get vocab") + exit(1) +} + var tokens = tokenize(text: prompt, add_bos: true) let n_kv_req = UInt32(tokens.count) + UInt32((n_len - Int(tokens.count)) * n_parallel) @@ -41,7 +46,7 @@ context_params.n_batch = UInt32(max(n_len, n_parallel)) context_params.n_threads = 8 context_params.n_threads_batch = 8 -let context = llama_new_context_with_model(model, context_params) +let context = llama_init_from_model(model, context_params) guard context != nil else { print("Failed to initialize context") exit(1) @@ -111,7 +116,7 @@ if llama_decode(context, batch) != 0 { } for i in 1 ..< n_parallel { - llama_kv_cache_seq_cp(context, 0, Int32(i), 0, batch.n_tokens) + llama_kv_self_seq_cp(context, 0, Int32(i), 0, batch.n_tokens) } if n_parallel > 1 { @@ -141,7 +146,7 @@ while n_cur <= n_len { let new_token_id = llama_sampler_sample(smpl, context, i_batch[i]) // is it an end of stream? -> mark the stream as finished - if llama_vocab_is_eog(model, new_token_id) || n_cur == n_len { + if llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_len { i_batch[i] = -1 // print("") if n_parallel > 1 { @@ -207,7 +212,7 @@ private func tokenize(text: String, add_bos: Bool) -> [llama_token] { let utf8Count = text.utf8.count let n_tokens = utf8Count + (add_bos ? 1 : 0) let tokens = UnsafeMutablePointer.allocate(capacity: n_tokens) - let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false) + let tokenCount = llama_tokenize(vocab, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false) var swiftTokens: [llama_token] = [] for i in 0 ..< tokenCount { swiftTokens.append(tokens[Int(i)]) @@ -218,12 +223,12 @@ private func tokenize(text: String, add_bos: Bool) -> [llama_token] { private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String? { var result = [CChar](repeating: 0, count: 8) - let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count), 0, false) + let nTokens = llama_token_to_piece(vocab, token, &result, Int32(result.count), 0, false) if nTokens < 0 { let actualTokensCount = -Int(nTokens) result = .init(repeating: 0, count: actualTokensCount) let check = llama_token_to_piece( - model, + vocab, token, &result, Int32(result.count), diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index 21b95ef5e4..1a5de5928a 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -41,7 +41,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = common_model_params_to_llama(params); - llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: error: unable to load model\n" , __func__); diff --git a/examples/cvector-generator/README.md b/examples/cvector-generator/README.md index be4dd5250f..6d5fd74ad8 100644 --- a/examples/cvector-generator/README.md +++ b/examples/cvector-generator/README.md @@ -3,9 +3,9 @@ This example demonstrates how to generate a control vector using gguf models. Related PRs: -- [Add support for control vectors](https://github.com/ggerganov/llama.cpp/pull/5970) -- (Issue) [Generate control vector using llama.cpp](https://github.com/ggerganov/llama.cpp/issues/6880) -- [Add cvector-generator example](https://github.com/ggerganov/llama.cpp/pull/7514) +- [Add support for control vectors](https://github.com/ggml-org/llama.cpp/pull/5970) +- (Issue) [Generate control vector using llama.cpp](https://github.com/ggml-org/llama.cpp/issues/6880) +- [Add cvector-generator example](https://github.com/ggml-org/llama.cpp/pull/7514) ## Examples diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp index 413b71d34c..2a90715501 100644 --- a/examples/cvector-generator/cvector-generator.cpp +++ b/examples/cvector-generator/cvector-generator.cpp @@ -342,7 +342,7 @@ static bool cb_eval(struct ggml_tensor * t, bool ask, void * user_data) { } static bool get_hidden_layers(llama_context * ctx, std::vector & tokens) { - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); if (llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size()))) { fprintf(stderr, "%s : failed to eval\n", __func__); return false; @@ -394,6 +394,8 @@ static int prepare_entries(common_params & params, train_context & ctx_train) { int main(int argc, char ** argv) { common_params params; + params.out_file = "control_vector.gguf"; + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_CVECTOR_GENERATOR, print_usage)) { return 1; } @@ -498,7 +500,7 @@ int main(int argc, char ** argv) { } // write output vectors to gguf - export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint); + export_gguf(ctx_train.v_final, params.out_file, model_hint); llama_backend_free(); diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 38d22c90f8..6f08904159 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -4,6 +4,7 @@ #include "llama.h" #include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -37,7 +38,7 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu const struct llama_model * model = llama_get_model(ctx); // clear previous kv_cache values (irrelevant for embeddings) - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); // run model LOG_INF("%s: n_tokens = %d, n_seq = %d\n", __func__, batch.n_tokens, n_seq); diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index 91238e4beb..24dc85cf27 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -413,20 +413,22 @@ static void print_usage(int, char ** argv) { int main(int argc, char ** argv) { common_params params; + params.out_file = "ggml-lora-merged-f16.gguf"; + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EXPORT_LORA, print_usage)) { return 1; } g_verbose = (params.verbosity > 1); try { - lora_merge_ctx ctx(params.model, params.lora_adapters, params.lora_outfile, params.cpuparams.n_threads); + lora_merge_ctx ctx(params.model.path, params.lora_adapters, params.out_file, params.cpuparams.n_threads); ctx.run_merge(); } catch (const std::exception & err) { fprintf(stderr, "%s\n", err.what()); exit(EXIT_FAILURE); } - printf("done, output file is %s\n", params.lora_outfile.c_str()); + printf("done, output file is %s\n", params.out_file.c_str()); return 0; } diff --git a/examples/gbnf-validator/gbnf-validator.cpp b/examples/gbnf-validator/gbnf-validator.cpp index 17a0e27c44..a610e6a0b1 100644 --- a/examples/gbnf-validator/gbnf-validator.cpp +++ b/examples/gbnf-validator/gbnf-validator.cpp @@ -76,7 +76,7 @@ int main(int argc, char** argv) { grammar_str = buffer.str(); } - llama_grammar * grammar = llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root"); + llama_grammar * grammar = llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0); if (grammar == nullptr) { fprintf(stdout, "Failed to initialize llama_grammar\n"); return 1; diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp index ef3ceb686f..30e771564e 100644 --- a/examples/gguf-split/gguf-split.cpp +++ b/examples/gguf-split/gguf-split.cpp @@ -408,8 +408,6 @@ static void gguf_merge(const split_params & split_params) { exit(EXIT_FAILURE); } - std::ofstream fout(split_params.output.c_str(), std::ios::binary); - fout.exceptions(std::ofstream::failbit); // fail fast on write errors auto * ctx_out = gguf_init_empty(); @@ -453,7 +451,6 @@ static void gguf_merge(const split_params & split_params) { gguf_free(ctx_gguf); ggml_free(ctx_meta); gguf_free(ctx_out); - fout.close(); exit(EXIT_FAILURE); } @@ -466,7 +463,6 @@ static void gguf_merge(const split_params & split_params) { gguf_free(ctx_gguf); ggml_free(ctx_meta); gguf_free(ctx_out); - fout.close(); exit(EXIT_FAILURE); } @@ -479,7 +475,6 @@ static void gguf_merge(const split_params & split_params) { gguf_free(ctx_gguf); ggml_free(ctx_meta); gguf_free(ctx_out); - fout.close(); exit(EXIT_FAILURE); } @@ -500,9 +495,11 @@ static void gguf_merge(const split_params & split_params) { fprintf(stderr, "\033[3Ddone\n"); } - - // placeholder for the meta data - { + std::ofstream fout; + if (!split_params.dry_run) { + fout.open(split_params.output.c_str(), std::ios::binary); + fout.exceptions(std::ofstream::failbit); // fail fast on write errors + // placeholder for the meta data auto meta_size = gguf_get_meta_size(ctx_out); ::zeros(fout, meta_size); } @@ -518,7 +515,9 @@ static void gguf_merge(const split_params & split_params) { ggml_free(ctx_metas[i]); } gguf_free(ctx_out); - fout.close(); + if (!split_params.dry_run) { + fout.close(); + } exit(EXIT_FAILURE); } fprintf(stderr, "%s: writing tensors %s ...", __func__, split_path); @@ -540,10 +539,11 @@ static void gguf_merge(const split_params & split_params) { auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor); f_input.seekg(offset); f_input.read((char *)read_data.data(), n_bytes); - - // write tensor data + padding - fout.write((const char *)read_data.data(), n_bytes); - zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes); + if (!split_params.dry_run) { + // write tensor data + padding + fout.write((const char *)read_data.data(), n_bytes); + zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes); + } } gguf_free(ctx_gguf); @@ -552,16 +552,15 @@ static void gguf_merge(const split_params & split_params) { fprintf(stderr, "\033[3Ddone\n"); } - { + if (!split_params.dry_run) { // go back to beginning of file and write the updated metadata fout.seekp(0); std::vector data(gguf_get_meta_size(ctx_out)); gguf_get_meta_data(ctx_out, data.data()); fout.write((const char *)data.data(), data.size()); - fout.close(); - gguf_free(ctx_out); } + gguf_free(ctx_out); fprintf(stderr, "%s: %s merged from %d split with %d tensors.\n", __func__, split_params.output.c_str(), n_split, total_tensors); diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp index 72eb462574..539bc4d602 100644 --- a/examples/gritlm/gritlm.cpp +++ b/examples/gritlm/gritlm.cpp @@ -45,7 +45,7 @@ static std::vector> encode(llama_context * ctx, const std::ve } // clear previous kv_cache values (irrelevant for embeddings) - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); llama_set_embeddings(ctx, true); llama_set_causal_attn(ctx, false); @@ -102,7 +102,7 @@ static std::string generate(llama_context * ctx, llama_sampler * smpl, const std llama_token eos_token = llama_vocab_eos(vocab); - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); llama_set_embeddings(ctx, false); llama_set_causal_attn(ctx, true); @@ -168,7 +168,7 @@ int main(int argc, char * argv[]) { llama_backend_init(); - llama_model * model = llama_model_load_from_file(params.model.c_str(), mparams); + llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams); // create generation context llama_context * ctx = llama_init_from_model(model, cparams); diff --git a/examples/imatrix/README.md b/examples/imatrix/README.md index 9c056986b3..9aa2b20347 100644 --- a/examples/imatrix/README.md +++ b/examples/imatrix/README.md @@ -1,7 +1,7 @@ # llama.cpp/examples/imatrix -Compute an importance matrix for a model and given text dataset. Can be used during quantization to enchance the quality of the quantized models. -More information is available here: https://github.com/ggerganov/llama.cpp/pull/4861 +Compute an importance matrix for a model and given text dataset. Can be used during quantization to enhance the quality of the quantized models. +More information is available here: https://github.com/ggml-org/llama.cpp/pull/4861 ## Usage diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index b5f3feb9f8..31b675e8f9 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -3,6 +3,7 @@ #include "log.h" #include "llama.h" +#include #include #include #include @@ -99,7 +100,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * const float * data = is_host ? (const float *) src1->data : m_src1_data.data(); // this has been adapted to the new format of storing merged experts in a single 3d tensor - // ref: https://github.com/ggerganov/llama.cpp/pull/6387 + // ref: https://github.com/ggml-org/llama.cpp/pull/6387 if (t->op == GGML_OP_MUL_MAT_ID) { // ids -> [n_experts_used, n_tokens] // src1 -> [cols, n_expert_used, n_tokens] @@ -205,9 +206,6 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * void IMatrixCollector::save_imatrix(int ncall) const { auto fname = m_params.out_file; - if (fname.empty()) { - fname = "imatrix.dat"; - } if (ncall > 0) { fname += ".at_"; @@ -497,7 +495,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) { const auto t_start = std::chrono::high_resolution_clock::now(); // clear the KV cache - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); llama_batch batch = llama_batch_init(n_batch, 0, 1); @@ -582,6 +580,8 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) { int main(int argc, char ** argv) { common_params params; + params.out_file = "imatrix.dat" ; + params.n_ctx = 512; params.logits_all = true; params.escape = false; diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 489a208b66..4e2f7b7270 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -332,8 +332,8 @@ int main(int argc, char ** argv) { LOG_DBG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n", n_past, n_left, n_ctx, params.n_keep, n_discard); - llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1); - llama_kv_cache_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard); + llama_kv_self_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1); + llama_kv_self_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard); n_past -= n_discard; diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index fc9f0097f5..55f94c0b0a 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -195,7 +195,7 @@ class BuiltinRule: self.deps = deps or [] # Constraining spaces to prevent model "running away". -SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}' +SPACE_RULE = '| " " | "\\n"{1,2} [ \\t]{0,20}' PRIMITIVE_RULES = { 'boolean' : BuiltinRule('("true" | "false") space', []), diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 4ac19ca86e..cbcbfcee86 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -876,8 +876,8 @@ static std::vector get_cmd_params_instances(const cmd_param struct test { static const std::string build_commit; static const int build_number; - static const std::string cpu_info; - static const std::string gpu_info; + const std::string cpu_info; + const std::string gpu_info; std::string model_filename; std::string model_type; uint64_t model_size; @@ -903,7 +903,10 @@ struct test { std::string test_time; std::vector samples_ns; - test(const cmd_params_instance & inst, const llama_model * lmodel, const llama_context * ctx) { + test(const cmd_params_instance & inst, const llama_model * lmodel, const llama_context * ctx) : + cpu_info(get_cpu_info()), + gpu_info(get_gpu_info()) { + model_filename = inst.model; char buf[128]; llama_model_desc(lmodel, buf, sizeof(buf)); @@ -1058,8 +1061,6 @@ struct test { const std::string test::build_commit = LLAMA_COMMIT; const int test::build_number = LLAMA_BUILD_NUMBER; -const std::string test::cpu_info = get_cpu_info(); -const std::string test::gpu_info = get_gpu_info(); struct printer { virtual ~printer() {} @@ -1577,7 +1578,7 @@ int main(int argc, char ** argv) { test t(inst, lmodel, ctx); - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); // cool off before the test if (params.delay) { @@ -1617,7 +1618,7 @@ int main(int argc, char ** argv) { } for (int i = 0; i < params.reps; i++) { - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); uint64_t t_start = get_time_ns(); diff --git a/examples/llama.android/llama/build.gradle.kts b/examples/llama.android/llama/build.gradle.kts index 28dbc19048..5bb6478022 100644 --- a/examples/llama.android/llama/build.gradle.kts +++ b/examples/llama.android/llama/build.gradle.kts @@ -18,6 +18,7 @@ android { } externalNativeBuild { cmake { + arguments += "-DLLAMA_CURL=OFF" arguments += "-DLLAMA_BUILD_COMMON=ON" arguments += "-DGGML_LLAMAFILE=OFF" arguments += "-DCMAKE_BUILD_TYPE=Release" diff --git a/examples/llama.android/llama/src/main/cpp/CMakeLists.txt b/examples/llama.android/llama/src/main/cpp/CMakeLists.txt index 2de496574f..6119fe09b0 100644 --- a/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +++ b/examples/llama.android/llama/src/main/cpp/CMakeLists.txt @@ -14,7 +14,7 @@ project("llama-android") #include(FetchContent) #FetchContent_Declare( # llama -# GIT_REPOSITORY https://github.com/ggerganov/llama.cpp +# GIT_REPOSITORY https://github.com/ggml-org/llama.cpp # GIT_TAG master #) diff --git a/examples/llama.android/llama/src/main/cpp/llama-android.cpp b/examples/llama.android/llama/src/main/cpp/llama-android.cpp index 2a73983a98..9654cd53cf 100644 --- a/examples/llama.android/llama/src/main/cpp/llama-android.cpp +++ b/examples/llama.android/llama/src/main/cpp/llama-android.cpp @@ -194,7 +194,7 @@ Java_android_llama_cpp_LLamaAndroid_bench_1model( } batch->logits[batch->n_tokens - 1] = true; - llama_kv_cache_clear(context); + llama_kv_self_clear(context); const auto t_pp_start = ggml_time_us(); if (llama_decode(context, *batch) != 0) { @@ -206,7 +206,7 @@ Java_android_llama_cpp_LLamaAndroid_bench_1model( LOGi("Benchmark text generation (tg)"); - llama_kv_cache_clear(context); + llama_kv_self_clear(context); const auto t_tg_start = ggml_time_us(); for (i = 0; i < tg; i++) { @@ -223,7 +223,7 @@ Java_android_llama_cpp_LLamaAndroid_bench_1model( const auto t_tg_end = ggml_time_us(); - llama_kv_cache_clear(context); + llama_kv_self_clear(context); const auto t_pp = double(t_pp_end - t_pp_start) / 1000000.0; const auto t_tg = double(t_tg_end - t_tg_start) / 1000000.0; @@ -361,7 +361,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1init( const auto tokens_list = common_tokenize(context, text, true, parse_special); auto n_ctx = llama_n_ctx(context); - auto n_kv_req = tokens_list.size() + (n_len - tokens_list.size()); + auto n_kv_req = tokens_list.size() + n_len; LOGi("n_len = %d, n_ctx = %d, n_kv_req = %d", n_len, n_ctx, n_kv_req); @@ -448,5 +448,5 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop( extern "C" JNIEXPORT void JNICALL Java_android_llama_cpp_LLamaAndroid_kv_1cache_1clear(JNIEnv *, jobject, jlong context) { - llama_kv_cache_clear(reinterpret_cast(context)); + llama_kv_self_clear(reinterpret_cast(context)); } diff --git a/examples/llama.swiftui/README.md b/examples/llama.swiftui/README.md index 96cf743d48..bd7ce37747 100644 --- a/examples/llama.swiftui/README.md +++ b/examples/llama.swiftui/README.md @@ -3,9 +3,24 @@ Local inference of llama.cpp on an iPhone. This is a sample app that can be used as a starting point for more advanced projects. -For usage instructions and performance stats, check the following discussion: https://github.com/ggerganov/llama.cpp/discussions/4508 +For usage instructions and performance stats, check the following discussion: https://github.com/ggml-org/llama.cpp/discussions/4508 -![image](https://github.com/ggerganov/llama.cpp/assets/1991296/2b40284f-8421-47a2-b634-74eece09a299) + +### Building +First llama.cpp need to be built and a XCFramework needs to be created. This can be done by running +the following script from the llama.cpp project root: +```console +$ ./build-xcframework.sh +``` +Open `llama.swiftui.xcodeproj` project in Xcode and you should be able to build and run the app on +a simulator or a real device. + +To use the framework with a different project, the XCFramework can be added to the project by +adding `build-apple/llama.xcframework` by dragging and dropping it into the project navigator, or +by manually selecting the framework in the "Frameworks, Libraries, and Embedded Content" section +of the project settings. + +![image](https://github.com/ggml-org/llama.cpp/assets/1991296/2b40284f-8421-47a2-b634-74eece09a299) Video demonstration: diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift index 477c3e6f2e..f6e31abc93 100644 --- a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +++ b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift @@ -24,6 +24,7 @@ func llama_batch_add(_ batch: inout llama_batch, _ id: llama_token, _ pos: llama actor LlamaContext { private var model: OpaquePointer private var context: OpaquePointer + private var vocab: OpaquePointer private var sampling: UnsafeMutablePointer private var batch: llama_batch private var tokens_list: [llama_token] @@ -47,6 +48,7 @@ actor LlamaContext { self.sampling = llama_sampler_chain_init(sparams) llama_sampler_chain_add(self.sampling, llama_sampler_init_temp(0.4)) llama_sampler_chain_add(self.sampling, llama_sampler_init_dist(1234)) + vocab = llama_model_get_vocab(model) } deinit { @@ -79,7 +81,7 @@ actor LlamaContext { ctx_params.n_threads = Int32(n_threads) ctx_params.n_threads_batch = Int32(n_threads) - let context = llama_new_context_with_model(model, ctx_params) + let context = llama_init_from_model(model, ctx_params) guard let context else { print("Could not load context!") throw LlamaError.couldNotInitializeContext @@ -151,7 +153,7 @@ actor LlamaContext { new_token_id = llama_sampler_sample(sampling, context, batch.n_tokens - 1) - if llama_vocab_is_eog(model, new_token_id) || n_cur == n_len { + if llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_len { print("\n") is_done = true let new_token_str = String(cString: temporary_invalid_cchars + [0]) @@ -208,7 +210,7 @@ actor LlamaContext { } batch.logits[Int(batch.n_tokens) - 1] = 1 // true - llama_kv_cache_clear(context) + llama_kv_self_clear(context) let t_pp_start = DispatchTime.now().uptimeNanoseconds / 1000; @@ -221,7 +223,7 @@ actor LlamaContext { // bench text generation - llama_kv_cache_clear(context) + llama_kv_self_clear(context) let t_tg_start = DispatchTime.now().uptimeNanoseconds / 1000; @@ -240,7 +242,7 @@ actor LlamaContext { let t_tg_end = DispatchTime.now().uptimeNanoseconds / 1000; - llama_kv_cache_clear(context) + llama_kv_self_clear(context) let t_pp = Double(t_pp_end - t_pp_start) / 1000000.0 let t_tg = Double(t_tg_end - t_tg_start) / 1000000.0 @@ -290,14 +292,14 @@ actor LlamaContext { func clear() { tokens_list.removeAll() temporary_invalid_cchars.removeAll() - llama_kv_cache_clear(context) + llama_kv_self_clear(context) } private func tokenize(text: String, add_bos: Bool) -> [llama_token] { let utf8Count = text.utf8.count let n_tokens = utf8Count + (add_bos ? 1 : 0) + 1 let tokens = UnsafeMutablePointer.allocate(capacity: n_tokens) - let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false) + let tokenCount = llama_tokenize(vocab, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false) var swiftTokens: [llama_token] = [] for i in 0...allocate(capacity: Int(-nTokens)) @@ -324,7 +326,7 @@ actor LlamaContext { defer { newResult.deallocate() } - let nNewTokens = llama_token_to_piece(model, token, newResult, -nTokens, 0, false) + let nNewTokens = llama_token_to_piece(vocab, token, newResult, -nTokens, 0, false) let bufferPointer = UnsafeBufferPointer(start: newResult, count: Int(nNewTokens)) return Array(bufferPointer) } else { diff --git a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj index ff3d108b2a..6f08fe220a 100644 --- a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +++ b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj @@ -7,7 +7,6 @@ objects = { /* Begin PBXBuildFile section */ - 1809696D2D05A39F00400EE8 /* llama in Frameworks */ = {isa = PBXBuildFile; productRef = 1809696C2D05A39F00400EE8 /* llama */; }; 549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; }; 79E1D9CD2B4CD16E005F8E46 /* InputButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 79E1D9CC2B4CD16E005F8E46 /* InputButton.swift */; }; 7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */; }; @@ -18,9 +17,25 @@ 8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; }; 8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; }; 8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; }; + DD84C9FD2D747FED007778EC /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD84C9FC2D747FED007778EC /* llama.xcframework */; }; + DD84C9FE2D747FED007778EC /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = DD84C9FC2D747FED007778EC /* llama.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */; }; /* End PBXBuildFile section */ +/* Begin PBXCopyFilesBuildPhase section */ + DD84C9FF2D747FED007778EC /* Embed Frameworks */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + DD84C9FE2D747FED007778EC /* llama.xcframework in Embed Frameworks */, + ); + name = "Embed Frameworks"; + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + /* Begin PBXFileReference section */ 549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; }; 79E1D9CC2B4CD16E005F8E46 /* InputButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputButton.swift; sourceTree = ""; }; @@ -33,6 +48,7 @@ 8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = ""; }; 8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = ""; }; 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = ""; }; + DD84C9FC2D747FED007778EC /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = llama.xcframework; path = "../../build-apple/llama.xcframework"; sourceTree = ""; }; DF2D2FE72B4A59BE00FCB72D /* llama.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = llama.cpp; path = ../..; sourceTree = ""; }; F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LoadCustomButton.swift; sourceTree = ""; }; /* End PBXFileReference section */ @@ -42,9 +58,9 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - 1809696D2D05A39F00400EE8 /* llama in Frameworks */, 549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */, 8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */, + DD84C9FD2D747FED007778EC /* llama.xcframework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -86,6 +102,7 @@ 8A39BE082AC7601000BFEB40 /* Frameworks */ = { isa = PBXGroup; children = ( + DD84C9FC2D747FED007778EC /* llama.xcframework */, 549479CA2AC9E16000E0F78B /* Metal.framework */, 8A39BE092AC7601000BFEB40 /* Accelerate.framework */, ); @@ -144,6 +161,7 @@ 8A1C836F2AC328BD0096AF73 /* Sources */, 8A1C83702AC328BD0096AF73 /* Frameworks */, 8A1C83712AC328BD0096AF73 /* Resources */, + DD84C9FF2D747FED007778EC /* Embed Frameworks */, ); buildRules = ( ); @@ -151,7 +169,6 @@ ); name = llama.swiftui; packageProductDependencies = ( - 1809696C2D05A39F00400EE8 /* llama */, ); productName = llama.swiftui; productReference = 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */; @@ -427,13 +444,6 @@ defaultConfigurationName = Release; }; /* End XCConfigurationList section */ - -/* Begin XCSwiftPackageProductDependency section */ - 1809696C2D05A39F00400EE8 /* llama */ = { - isa = XCSwiftPackageProductDependency; - productName = llama; - }; -/* End XCSwiftPackageProductDependency section */ }; rootObject = 8A1C836B2AC328BD0096AF73 /* Project object */; } diff --git a/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift b/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift index 30c2dc4310..1c3cd9d2ef 100644 --- a/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +++ b/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift @@ -124,15 +124,26 @@ struct ContentView: View { } } }.sheet(isPresented: $showingHelp) { // Sheet for help modal - VStack(alignment: .leading) { + NavigationView { VStack(alignment: .leading) { - Text("1. Make sure the model is in GGUF Format") - .padding() - Text("2. Copy the download link of the quantized model") - .padding() + VStack(alignment: .leading) { + Text("1. Make sure the model is in GGUF Format") + .padding() + Text("2. Copy the download link of the quantized model") + .padding() + } + Spacer() } - Spacer() - } + .navigationTitle("Help") + .navigationBarTitleDisplayMode(.inline) + .toolbar { + ToolbarItem(placement: .navigationBarTrailing) { + Button("Done") { + showingHelp = false + } + } + } + } } } } diff --git a/examples/llama.vim b/examples/llama.vim index 57eb2a9772..af3fd3935d 100644 --- a/examples/llama.vim +++ b/examples/llama.vim @@ -39,7 +39,7 @@ " " :call llama#init() " -" more info: https://github.com/ggerganov/llama.cpp/pull/9787 +" more info: https://github.com/ggml-org/llama.cpp/pull/9787 " " colors (adjust to your liking) diff --git a/examples/llava/CMakeLists.txt b/examples/llava/CMakeLists.txt index 3ce0d60c80..2d5061de46 100644 --- a/examples/llava/CMakeLists.txt +++ b/examples/llava/CMakeLists.txt @@ -1,3 +1,5 @@ +# llava (legacy) + add_library(llava OBJECT llava.cpp llava.h @@ -22,12 +24,41 @@ if (BUILD_SHARED_LIBS) install(TARGETS llava_shared LIBRARY) endif() +# mtmd + +add_library(mtmd OBJECT + mtmd.cpp + mtmd.h + clip.cpp + clip.h + clip-impl.h + ) + +target_link_libraries(mtmd PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT}) + +target_include_directories(mtmd PUBLIC .) +target_include_directories(mtmd PRIVATE ../..) +target_include_directories(mtmd PRIVATE ../../common) # for stb_image.h + +target_compile_features(mtmd PRIVATE cxx_std_17) + +add_library(mtmd_static STATIC $) +if (BUILD_SHARED_LIBS) + set_target_properties(mtmd PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(mtmd PRIVATE LLAMA_SHARED LLAMA_BUILD) + add_library(mtmd_shared SHARED $) + target_link_libraries(mtmd_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT}) + install(TARGETS mtmd_shared LIBRARY) +endif() + if (NOT MSVC) target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h + target_compile_options(mtmd PRIVATE -Wno-cast-qual) # stb_image.h endif() if(TARGET BUILD_INFO) add_dependencies(llava BUILD_INFO) + add_dependencies(mtmd BUILD_INFO) endif() set(TARGET llama-llava-cli) @@ -50,3 +81,17 @@ set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-qwen2vl-cli) install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) + +set(TARGET llama-gemma3-cli) +add_executable(${TARGET} gemma3-cli.cpp) +set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-gemma3-cli) +install(TARGETS ${TARGET} RUNTIME) +target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_17) + +set(TARGET llama-llava-clip-quantize-cli) +add_executable(${TARGET} clip-quantize-cli.cpp) +set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-clip-quantize-cli) +install(TARGETS ${TARGET} RUNTIME) +target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/llava/README-gemma3.md b/examples/llava/README-gemma3.md new file mode 100644 index 0000000000..3c25ee2583 --- /dev/null +++ b/examples/llava/README-gemma3.md @@ -0,0 +1,50 @@ +# Gemma 3 vision + +> [!IMPORTANT] +> +> This is very experimental, only used for demo purpose. + +## Quick started + +You can use pre-quantized model from [ggml-org](https://huggingface.co/ggml-org)'s Hugging Face account + +```bash +# build +cmake -B build +cmake --build build --target llama-gemma3-cli + +# alternatively, install from brew (MacOS) +brew install llama.cpp + +# run it +llama-gemma3-cli -hf ggml-org/gemma-3-4b-it-GGUF +llama-gemma3-cli -hf ggml-org/gemma-3-12b-it-GGUF +llama-gemma3-cli -hf ggml-org/gemma-3-27b-it-GGUF + +# note: 1B model does not support vision +``` + +## How to get mmproj.gguf? + +```bash +cd gemma-3-4b-it +python ../llama.cpp/examples/llava/gemma3_convert_encoder_to_gguf.py . + +# output file is mmproj.gguf +``` + +## How to run it? + +What you need: +- The text model GGUF, can be converted using `convert_hf_to_gguf.py` +- The mmproj file from step above +- An image file + +```bash +# build +cmake -B build +cmake --build build --target llama-gemma3-cli + +# run it +./build/bin/llama-gemma3-cli -m {text_model}.gguf --mmproj mmproj.gguf --image your_image.jpg +``` diff --git a/examples/llava/README-glmedge.md b/examples/llava/README-glmedge.md new file mode 100644 index 0000000000..603d014745 --- /dev/null +++ b/examples/llava/README-glmedge.md @@ -0,0 +1,43 @@ +# GLMV-EDGE + +Currently this implementation supports [glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b) and [glm-edge-v-5b](https://huggingface.co/THUDM/glm-edge-v-5b). + +## Usage +Build with cmake or run `make llama-llava-cli` to build it. + +After building, run: `./llama-llava-cli` to see the usage. For example: + +```sh +./llama-llava-cli -m model_path/ggml-model-f16.gguf --mmproj model_path/mmproj-model-f16.gguf --image img_path/image.jpg -p "<|system|>\n system prompt <|user|>\n prompt <|assistant|>\n" +``` + +**note**: A lower temperature like 0.1 is recommended for better quality. add `--temp 0.1` to the command to do so. +**note**: For GPU offloading ensure to use the `-ngl` flag just like usual + +## GGUF conversion + +1. Clone a GLMV-EDGE model ([2B](https://huggingface.co/THUDM/glm-edge-v-2b) or [5B](https://huggingface.co/THUDM/glm-edge-v-5b)). For example: + +```sh +git clone https://huggingface.co/THUDM/glm-edge-v-5b or https://huggingface.co/THUDM/glm-edge-v-2b +``` + +2. Use `glmedge-surgery.py` to split the GLMV-EDGE model to LLM and multimodel projector constituents: + +```sh +python ./examples/llava/glmedge-surgery.py -m ../model_path +``` + +4. Use `glmedge-convert-image-encoder-to-gguf.py` to convert the GLMV-EDGE image encoder to GGUF: + +```sh +python ./examples/llava/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path +``` + +5. Use `examples/convert_hf_to_gguf.py` to convert the LLM part of GLMV-EDGE to GGUF: + +```sh +python convert_hf_to_gguf.py ../model_path +``` + +Now both the LLM part and the image encoder are in the `model_path` directory. diff --git a/examples/llava/README-granitevision.md b/examples/llava/README-granitevision.md new file mode 100644 index 0000000000..f08a21cc17 --- /dev/null +++ b/examples/llava/README-granitevision.md @@ -0,0 +1,190 @@ +# Granite Vision + +Download the model and point your `GRANITE_MODEL` environment variable to the path. + +```bash +$ git clone https://huggingface.co/ibm-granite/granite-vision-3.2-2b +$ export GRANITE_MODEL=./granite-vision-3.2-2b +``` + + +### 1. Running llava surgery v2. +First, we need to run the llava surgery script as shown below: + +`python llava_surgery_v2.py -C -m $GRANITE_MODEL` + +You should see two new files (`llava.clip` and `llava.projector`) written into your model's directory, as shown below. + +```bash +$ ls $GRANITE_MODEL | grep -i llava +llava.clip +llava.projector +``` + +We should see that the projector and visual encoder get split out into the llava files. Quick check to make sure they aren't empty: +```python +import os +import torch + +MODEL_PATH = os.getenv("GRANITE_MODEL") +if not MODEL_PATH: + raise ValueError("env var GRANITE_MODEL is unset!") + +encoder_tensors = torch.load(os.path.join(MODEL_PATH, "llava.clip")) +projector_tensors = torch.load(os.path.join(MODEL_PATH, "llava.projector")) + +assert len(encoder_tensors) > 0 +assert len(projector_tensors) > 0 +``` + +If you actually inspect the `.keys()` of the loaded tensors, you should see a lot of `vision_model` tensors in the `encoder_tensors`, and 5 tensors (`'multi_modal_projector.linear_1.bias'`, `'multi_modal_projector.linear_1.weight'`, `'multi_modal_projector.linear_2.bias'`, `'multi_modal_projector.linear_2.weight'`, `'image_newline'`) in the multimodal `projector_tensors`. + + +### 2. Creating the Visual Component GGUF +Next, create a new directory to hold the visual components, and copy the llava.clip/projector files, as shown below. + +```bash +$ ENCODER_PATH=$PWD/visual_encoder +$ mkdir $ENCODER_PATH + +$ cp $GRANITE_MODEL/llava.clip $ENCODER_PATH/pytorch_model.bin +$ cp $GRANITE_MODEL/llava.projector $ENCODER_PATH/ +``` + +Now, we need to write a config for the visual encoder. In order to convert the model, be sure to use the correct `image_grid_pinpoints`, as these may vary based on the model. You can find the `image_grid_pinpoints` in `$GRANITE_MODEL/config.json`. + +```json +{ + "_name_or_path": "siglip-model", + "architectures": [ + "SiglipVisionModel" + ], + "image_grid_pinpoints": [ + [384,384], + [384,768], + [384,1152], + [384,1536], + [384,1920], + [384,2304], + [384,2688], + [384,3072], + [384,3456], + [384,3840], + [768,384], + [768,768], + [768,1152], + [768,1536], + [768,1920], + [1152,384], + [1152,768], + [1152,1152], + [1536,384], + [1536,768], + [1920,384], + [1920,768], + [2304,384], + [2688,384], + [3072,384], + [3456,384], + [3840,384] + ], + "mm_patch_merge_type": "spatial_unpad", + "hidden_size": 1152, + "image_size": 384, + "intermediate_size": 4304, + "model_type": "siglip_vision_model", + "num_attention_heads": 16, + "num_hidden_layers": 27, + "patch_size": 14, + "layer_norm_eps": 1e-6, + "hidden_act": "gelu_pytorch_tanh", + "projection_dim": 0, + "vision_feature_layer": [-24, -20, -12, -1] +} +``` + +At this point you should have something like this: +```bash +$ ls $ENCODER_PATH +config.json llava.projector pytorch_model.bin +``` + +Now convert the components to GGUF; Note that we also override the image mean/std dev to `[.5,.5,.5]` since we use the SigLIP visual encoder - in the transformers model, you can find these numbers in the `preprocessor_config.json`. +```bash +$ python convert_image_encoder_to_gguf.py \ + -m $ENCODER_PATH \ + --llava-projector $ENCODER_PATH/llava.projector \ + --output-dir $ENCODER_PATH \ + --clip-model-is-vision \ + --clip-model-is-siglip \ + --image-mean 0.5 0.5 0.5 \ + --image-std 0.5 0.5 0.5 +``` + +This will create the first GGUF file at `$ENCODER_PATH/mmproj-model-f16.gguf`; we will refer to the absolute path of this file as the `$VISUAL_GGUF_PATH.` + + +### 3. Creating the LLM GGUF. +The granite vision model contains a granite LLM as its language model. For now, the easiest way to get the GGUF for LLM is by loading the composite model in `transformers` and exporting the LLM so that it can be directly converted with the normal conversion path. + +First, set the `LLM_EXPORT_PATH` to the path to export the `transformers` LLM to. +```bash +$ export LLM_EXPORT_PATH=$PWD/granite_vision_llm +``` + +```python +import os +import transformers + +MODEL_PATH = os.getenv("GRANITE_MODEL") +if not MODEL_PATH: + raise ValueError("env var GRANITE_MODEL is unset!") + +LLM_EXPORT_PATH = os.getenv("LLM_EXPORT_PATH") +if not LLM_EXPORT_PATH: + raise ValueError("env var LLM_EXPORT_PATH is unset!") + +tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_PATH) + +# NOTE: granite vision support was added to transformers very recently (4.49); +# if you get size mismatches, your version is too old. +# If you are running with an older version, set `ignore_mismatched_sizes=True` +# as shown below; it won't be loaded correctly, but the LLM part of the model that +# we are exporting will be loaded correctly. +model = transformers.AutoModelForImageTextToText.from_pretrained(MODEL_PATH, ignore_mismatched_sizes=True) + +tokenizer.save_pretrained(LLM_EXPORT_PATH) +model.language_model.save_pretrained(LLM_EXPORT_PATH) +``` + +Now you can convert the exported LLM to GGUF with the normal converter in the root of the llama cpp project. +```bash +$ LLM_GGUF_PATH=$LLM_EXPORT_PATH/granite_llm.gguf +... +$ python convert_hf_to_gguf.py --outfile $LLM_GGUF_PATH $LLM_EXPORT_PATH +``` + + +### 4. Quantization +If you want to quantize the LLM, you can do so with `llama-quantize` as you would any other LLM. For example: +```bash +$ ./build/bin/llama-quantize $LLM_EXPORT_PATH/granite_llm.gguf $LLM_EXPORT_PATH/granite_llm_q4_k_m.gguf Q4_K_M +$ LLM_GGUF_PATH=$LLM_EXPORT_PATH/granite_llm_q4_k_m.gguf +``` + +Note that currently you cannot quantize the visual encoder because granite vision models use SigLIP as the visual encoder, which has tensor dimensions that are not divisible by 32. + + +### 5. Running the Model in Llama cpp +Build llama cpp normally; you should have a target binary named `llama-llava-cli`, which you can pass two binaries to. As an example, we pass the the llama.cpp banner. + +```bash +$ ./build/bin/llama-llava-cli -m $LLM_GGUF_PATH \ + --mmproj $VISUAL_GGUF_PATH \ + --image ./media/llama0-banner.png \ + -c 16384 \ + -p "<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n<|user|>\n\\nWhat does the text in this image say?\n<|assistant|>\n" \ + --temp 0 +``` + +Sample output: `The text in the image reads "LLAMA C++ Can it run DOOM Llama?"` diff --git a/examples/llava/README-minicpmo2.6.md b/examples/llava/README-minicpmo2.6.md index 8713a43d64..48c4232383 100644 --- a/examples/llava/README-minicpmo2.6.md +++ b/examples/llava/README-minicpmo2.6.md @@ -5,13 +5,25 @@ Currently, this readme only supports minicpm-omni's image capabilities, and we w Download [MiniCPM-o-2_6](https://huggingface.co/openbmb/MiniCPM-o-2_6) PyTorch model from huggingface to "MiniCPM-o-2_6" folder. + +### Build llama.cpp +Readme modification time: 20250206 + +If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) + Clone llama.cpp: ```bash -git clone git@github.com:OpenBMB/llama.cpp.git +git clone https://github.com/ggerganov/llama.cpp cd llama.cpp -git checkout minicpm-omni ``` +Build llama.cpp using `CMake`: +```bash +cmake -B build +cmake --build build --config Release +``` + + ### Usage of MiniCPM-o 2.6 Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf) by us) @@ -22,25 +34,15 @@ python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM- python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model # quantize int4 version -./llama-quantize ../MiniCPM-o-2_6/model/ggml-model-f16.gguf ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M +./build/bin/llama-quantize ../MiniCPM-o-2_6/model/ggml-model-f16.gguf ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M ``` -Build llama.cpp using `CMake`: -https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md - -```bash -cmake -B build -cmake --build build --config Release -``` Inference on Linux or Mac -``` +```bash # run f16 version -./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" +./build/bin/llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" # run quantized int4 version -./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" - -# or run in interactive mode -./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i +./build/bin/llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" ``` diff --git a/examples/llava/README-minicpmv2.5.md b/examples/llava/README-minicpmv2.5.md index 1c8498ff9e..6bfe7abd16 100644 --- a/examples/llava/README-minicpmv2.5.md +++ b/examples/llava/README-minicpmv2.5.md @@ -4,13 +4,26 @@ Download [MiniCPM-Llama3-V-2_5](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5) PyTorch model from huggingface to "MiniCPM-Llama3-V-2_5" folder. + +### Build llama.cpp +Readme modification time: 20250206 + +If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) + Clone llama.cpp: ```bash -git clone https://github.com/ggerganov/llama.cpp +git clone https://github.com/ggml-org/llama.cpp cd llama.cpp ``` -### Usage +Build llama.cpp using `CMake`: +```bash +cmake -B build +cmake --build build --config Release +``` + + +### Usage of MiniCPM-Llama3-V 2.5 Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) by us) @@ -20,80 +33,15 @@ python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM- python ./convert_hf_to_gguf.py ../MiniCPM-Llama3-V-2_5/model # quantize int4 version -./llama-quantize ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf Q4_K_M +./build/bin/llama-quantize ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf Q4_K_M ``` -Build for Linux or Mac - -```bash -make -make llama-minicpmv-cli -``` Inference on Linux or Mac -``` +```bash # run f16 version -./llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" +./build/bin/llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" # run quantized int4 version -./llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" - -# or run in interactive mode -./llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i -``` - -### Android - -#### Build on Android device using Termux -We found that build on Android device would bring better runtime performance, so we recommend to build on device. - -[Termux](https://github.com/termux/termux-app#installation) is a terminal app on Android device (no root required). - -Install tools in Termux: -``` -apt update && apt upgrade -y -apt install git make cmake -``` - -It's recommended to move your model inside the `~/` directory for best performance: -``` -cd storage/downloads -mv model.gguf ~/ -``` - -#### Building the Project using Android NDK -Obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake. - -Execute the following commands on your computer to avoid downloading the NDK to your mobile. Alternatively, you can also do this in Termux: - -```bash -mkdir build-android -cd build-android -export NDK=/your_ndk_path -cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod .. -make -``` - -Install [termux](https://github.com/termux/termux-app#installation) on your device and run `termux-setup-storage` to get access to your SD card (if Android 11+ then run the command twice). - -Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission: - -(Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`) -``` -$cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/ -$cd /data/data/com.termux/files/home/bin -$chmod +x ./* -``` - -Download models and push them to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/` - -``` -$mv /sdcard/llama.cpp/ggml-model-Q4_K_M.gguf /data/data/com.termux/files/home/model/ -$mv /sdcard/llama.cpp/mmproj-model-f16.gguf /data/data/com.termux/files/home/model/ -``` - -Now, you can start chatting: -``` -$cd /data/data/com.termux/files/home/bin -$./llama-minicpmv-cli -m ../model/ggml-model-Q4_K_M.gguf --mmproj ../model/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" +./build/bin/llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" ``` diff --git a/examples/llava/README-minicpmv2.6.md b/examples/llava/README-minicpmv2.6.md index c4be5e5dd6..2df39cdbac 100644 --- a/examples/llava/README-minicpmv2.6.md +++ b/examples/llava/README-minicpmv2.6.md @@ -4,13 +4,25 @@ Download [MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6) PyTorch model from huggingface to "MiniCPM-V-2_6" folder. + +### Build llama.cpp +Readme modification time: 20250206 + +If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) + Clone llama.cpp: ```bash -git clone git@github.com:OpenBMB/llama.cpp.git +git clone https://github.com/ggerganov/llama.cpp cd llama.cpp -git checkout minicpmv-main ``` +Build llama.cpp using `CMake`: +```bash +cmake -B build +cmake --build build --config Release +``` + + ### Usage of MiniCPM-V 2.6 Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) by us) @@ -21,87 +33,15 @@ python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM- python ./convert_hf_to_gguf.py ../MiniCPM-V-2_6/model # quantize int4 version -./llama-quantize ../MiniCPM-V-2_6/model/ggml-model-f16.gguf ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M +./build/bin/llama-quantize ../MiniCPM-V-2_6/model/ggml-model-f16.gguf ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M ``` -Build for Linux or Mac - -```bash -make -make llama-minicpmv-cli -``` Inference on Linux or Mac -``` +```bash # run f16 version -./llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" +./build/bin/llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" # run quantized int4 version -./llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" - -# or run in interactive mode -./llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i -``` - -### Video -Install FFmpeg -``` -brew install ffmpeg -brew install pkg-config -``` - -### Android - -#### Build on Android device using Termux -We found that build on Android device would bring better runtime performance, so we recommend to build on device. - -[Termux](https://github.com/termux/termux-app#installation) is a terminal app on Android device (no root required). - -Install tools in Termux: -``` -apt update && apt upgrade -y -apt install git make cmake -``` - -It's recommended to move your model inside the `~/` directory for best performance: -``` -cd storage/downloads -mv model.gguf ~/ -``` - -#### Building the Project using Android NDK -Obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake. - -Execute the following commands on your computer to avoid downloading the NDK to your mobile. Alternatively, you can also do this in Termux: - -```bash -mkdir build-android -cd build-android -export NDK=/your_ndk_path -cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod .. -make -``` - -Install [termux](https://github.com/termux/termux-app#installation) on your device and run `termux-setup-storage` to get access to your SD card (if Android 11+ then run the command twice). - -Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission: - -(Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`) -``` -$cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/ -$cd /data/data/com.termux/files/home/bin -$chmod +x ./* -``` - -Download models and push them to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/` - -``` -$mv /sdcard/llama.cpp/ggml-model-Q4_K_M.gguf /data/data/com.termux/files/home/model/ -$mv /sdcard/llama.cpp/mmproj-model-f16.gguf /data/data/com.termux/files/home/model/ -``` - -Now, you can start chatting: -``` -$cd /data/data/com.termux/files/home/bin -$./llama-minicpmv-cli -m ../model/ggml-model-Q4_K_M.gguf --mmproj ../model/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" +./build/bin/llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" ``` diff --git a/examples/llava/README-quantize.md b/examples/llava/README-quantize.md new file mode 100644 index 0000000000..b931513ab7 --- /dev/null +++ b/examples/llava/README-quantize.md @@ -0,0 +1,44 @@ +# Quantizing CLIP Visual Projector + +This is the tool for quantizing the CLIP visual projector model. Quantization reduces the precision of the model's weights, which can significantly decrease the model size and improve inference speed, often with minimal impact on performance. + +## Usage + +To quantize a CLIP visual projector model, use the following command: + +```sh +./bin/llama-llava-clip-quantize-cli /path/to/ggml-model-f32.gguf /path/to/ggml-model-quantized.gguf +``` + +After the quantization, the visual projector can be used freely with the existing LLAVA cli (LLAVA, Qwen2VL, etc). + +### Arguments + +- `/path/to/ggml-model-f32.gguf`: The path to the input model file in FP32 or FP16 format. +- `/path/to/ggml-model-quantized.gguf`: The path where the quantized model will be saved. +- ``: The quantization type to apply. This should be an integer corresponding to one of the quantization types defined in the `enum ggml_type`. + +### Quantization Types + +The following quantization types are supported, based on the `enum ggml_type` definition: + +- `2` - `q4_0`: 4-bit quantization with a single scale value. +- `3` - `q4_1`: 4-bit quantization with a separate scale value for each block. +- `6` - `q5_0`: 5-bit quantization with a single scale value. +- `7` - `q5_1`: 5-bit quantization with a separate scale value for each block. +- `8` - `q8_0`: 8-bit quantization with a single scale value. + +### Example + +To quantize a model using the `q4_0` quantization type, you would run: + +```sh +./bin/llama-llava-clip-quantize-cli /path/to/ggml-model-f32.gguf /path/to/ggml-model-quantized.gguf 2 +``` + +This command will generate a quantized model at `/path/to/ggml-model-quantized.gguf` using the `q4_0` quantization method. + +## Notes + +- Quantization can lead to a loss in model accuracy, depending on the chosen quantization type. It is recommended to evaluate the quantized model's performance on your specific task to ensure it meets your requirements. +- The quantized model will typically be smaller in size and faster to run, making it more suitable for deployment in resource-constrained environments. diff --git a/examples/llava/README.md b/examples/llava/README.md index 0124513617..0e3c320320 100644 --- a/examples/llava/README.md +++ b/examples/llava/README.md @@ -101,8 +101,27 @@ python ./examples/convert_legacy_llama.py ../llava-v1.6-vicuna-7b/ --skip-unknow ``` **note** llava-1.6 needs more context than llava-1.5, at least 3000 is needed (just run it at -c 4096) + **note** llava-1.6 greatly benefits from batched prompt processing (defaults work) +**note** if the language model in step `6)` is incompatible with the legacy conversion script, the easiest way handle the LLM model conversion is to load the model in transformers, and export only the LLM from the llava next model. + +```python +import os +import transformers + +model_path = ... +llm_export_path = ... + +tokenizer = transformers.AutoTokenizer.from_pretrained(model_path) +model = transformers.AutoModelForImageTextToText.from_pretrained(model_path) + +tokenizer.save_pretrained(llm_export_path) +model.language_model.save_pretrained(llm_export_path) +``` + +Then, you can convert the LLM using the `convert_hf_to_gguf.py` script, which handles more LLM architectures. + ## llava-cli templating and llava-1.6 prompting llava-1.5 models all use the same vicuna prompt, here you can just add your image question like `-p "Provide a full description."` diff --git a/examples/llava/clip-impl.h b/examples/llava/clip-impl.h new file mode 100644 index 0000000000..4d7340a56b --- /dev/null +++ b/examples/llava/clip-impl.h @@ -0,0 +1,344 @@ +#include "ggml.h" +#include "gguf.h" +#include "clip.h" + +#include "clip.h" + +#include +#include +#include +#include +#include +#include +#include + +// Internal header for clip.cpp + +#define KEY_FTYPE "general.file_type" +#define KEY_NAME "general.name" +#define KEY_DESCRIPTION "general.description" +#define KEY_HAS_TEXT_ENC "clip.has_text_encoder" +#define KEY_HAS_VIS_ENC "clip.has_vision_encoder" +#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector" +#define KEY_HAS_MINICPMV_PROJ "clip.has_minicpmv_projector" +#define KEY_HAS_GLM_PROJ "clip.has_glm_projector" +#define KEY_MINICPMV_VERSION "clip.minicpmv_version" +#define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger" +#define KEY_USE_GELU "clip.use_gelu" +#define KEY_USE_SILU "clip.use_silu" +#define KEY_N_EMBD "clip.%s.embedding_length" +#define KEY_N_FF "clip.%s.feed_forward_length" +#define KEY_N_BLOCK "clip.%s.block_count" +#define KEY_N_HEAD "clip.%s.attention.head_count" +#define KEY_LAYER_NORM_EPS "clip.%s.attention.layer_norm_epsilon" +#define KEY_PROJ_DIM "clip.%s.projection_dim" +#define KEY_TOKENS "tokenizer.ggml.tokens" +#define KEY_N_POSITIONS "clip.text.context_length" +#define KEY_IMAGE_SIZE "clip.vision.image_size" +#define KEY_PATCH_SIZE "clip.vision.patch_size" +#define KEY_IMAGE_MEAN "clip.vision.image_mean" +#define KEY_IMAGE_STD "clip.vision.image_std" +#define KEY_PROJ_TYPE "clip.projector_type" +#define KEY_FEATURE_LAYER "clip.vision.feature_layer" + +#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type" +#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints" +#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution" + + +// +// tensor name constants +// + +#define TN_TOKEN_EMBD "%s.token_embd.weight" +#define TN_POS_EMBD "%s.position_embd.weight" +#define TN_CLASS_EMBD "v.class_embd" +#define TN_PATCH_EMBD "v.patch_embd.weight" // not rename tensor with ".0" postfix for backwrad compat +#define TN_PATCH_EMBD_1 "v.patch_embd.weight.1" +#define TN_PATCH_BIAS "v.patch_embd.bias" +#define TN_ATTN_K "%s.blk.%d.attn_k.%s" +#define TN_ATTN_Q "%s.blk.%d.attn_q.%s" +#define TN_ATTN_V "%s.blk.%d.attn_v.%s" +#define TN_ATTN_OUTPUT "%s.blk.%d.attn_out.%s" +#define TN_FFN_DOWN "%s.blk.%d.ffn_down.%s" +#define TN_FFN_UP "%s.blk.%d.ffn_up.%s" +#define TN_LN_1 "%s.blk.%d.ln1.%s" +#define TN_LN_2 "%s.blk.%d.ln2.%s" +#define TN_LN_PRE "%s.pre_ln.%s" +#define TN_LN_POST "%s.post_ln.%s" +#define TN_TEXT_PROJ "text_projection.weight" +#define TN_VIS_PROJ "visual_projection.weight" +#define TN_LLAVA_PROJ "mm.%d.%s" +#define TN_MVLM_PROJ_MLP "mm.model.mlp.%d.%s" +#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s" +#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s" +#define TN_IMAGE_NEWLINE "model.image_newline" +#define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3 +#define TN_MM_SOFT_EMB_N "mm.soft_emb_norm.weight" // gemma3 + +// mimicpmv +#define TN_MINICPMV_POS_EMBD_K "resampler.pos_embed_k" +#define TN_MINICPMV_QUERY "resampler.query" +#define TN_MINICPMV_PROJ "resampler.proj.weight" +#define TN_MINICPMV_KV_PROJ "resampler.kv.weight" +#define TN_MINICPMV_ATTN "resampler.attn.%s.%s" +#define TN_MINICPMV_LN "resampler.ln_%s.%s" + +#define TN_GLM_ADAPER_CONV "adapter.conv.%s" +#define TN_GLM_ADAPTER_LINEAR "adapter.linear.linear.%s" +#define TN_GLM_ADAPTER_NORM_1 "adapter.linear.norm1.%s" +#define TN_GLM_ADAPTER_D_H_2_4H "adapter.linear.dense_h_to_4h.%s" +#define TN_GLM_ADAPTER_GATE "adapter.linear.gate.%s" +#define TN_GLM_ADAPTER_D_4H_2_H "adapter.linear.dense_4h_to_h.%s" +#define TN_GLM_BOI_W "adapter.boi" +#define TN_GLM_EOI_W "adapter.eoi" + +enum projector_type { + PROJECTOR_TYPE_MLP, + PROJECTOR_TYPE_MLP_NORM, + PROJECTOR_TYPE_LDP, + PROJECTOR_TYPE_LDPV2, + PROJECTOR_TYPE_RESAMPLER, + PROJECTOR_TYPE_GLM_EDGE, + PROJECTOR_TYPE_MERGER, + PROJECTOR_TYPE_GEMMA3, + PROJECTOR_TYPE_UNKNOWN, +}; + +static std::map PROJECTOR_TYPE_NAMES = { + { PROJECTOR_TYPE_MLP, "mlp" }, + { PROJECTOR_TYPE_LDP, "ldp" }, + { PROJECTOR_TYPE_LDPV2, "ldpv2"}, + { PROJECTOR_TYPE_RESAMPLER, "resampler"}, + { PROJECTOR_TYPE_GLM_EDGE, "adapter"}, + { PROJECTOR_TYPE_MERGER, "qwen2vl_merger"}, + { PROJECTOR_TYPE_GEMMA3, "gemma3"}, +}; + +static projector_type clip_projector_type_from_string(const std::string & str) { + for (const auto & pair : PROJECTOR_TYPE_NAMES) { + if (pair.second == str) { + return pair.first; + } + } + return PROJECTOR_TYPE_UNKNOWN; +} + +// RGB uint8 image +struct clip_image_u8 { + int nx; + int ny; + + std::vector buf; +}; + +// RGB float32 image (NHWC) +// Memory layout: RGBRGBRGB... +struct clip_image_f32 { + int nx; + int ny; + + std::vector buf; +}; + +// +// logging +// + +static void clip_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) { + (void) level; + (void) user_data; + fputs(text, stderr); + fflush(stderr); +} + +struct clip_logger_state { + ggml_log_level verbosity_thold; + ggml_log_callback log_callback; + void * log_callback_user_data; +}; + +extern struct clip_logger_state g_logger_state; + +static void clip_log_internal_v(enum ggml_log_level level, const char * format, va_list args) { + if (format == NULL) { + return; + } + va_list args_copy; + va_copy(args_copy, args); + char buffer[128]; + int len = vsnprintf(buffer, 128, format, args); + if (len < 128) { + g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data); + } else { + char * buffer2 = (char *) calloc(len + 1, sizeof(char)); + vsnprintf(buffer2, len + 1, format, args_copy); + buffer2[len] = 0; + g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data); + free(buffer2); + } + va_end(args_copy); +} + +static void clip_log_internal(enum ggml_log_level level, const char * format, ...) { + va_list args; + va_start(args, format); + clip_log_internal_v(level, format, args); + va_end(args); +} + +#define LOG_TMPL(level, ...) \ + do { \ + if ((level) >= g_logger_state.verbosity_thold) { \ + clip_log_internal((level), __VA_ARGS__); \ + } \ + } while (0) +#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, __VA_ARGS__) +#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, __VA_ARGS__) +#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) +#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__) +#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, __VA_ARGS__) + +// +// cpp wrappers +// + +// wrapper for clip_image_size +struct clip_image_size_deleter { + void operator()(clip_image_size * val) { clip_image_size_free(val); } +}; +typedef std::unique_ptr clip_image_size_ptr; + +// wrapper for clip_image_u8 +struct clip_image_u8_deleter { + void operator()(clip_image_u8 * val) { clip_image_u8_free(val); } +}; +typedef std::unique_ptr clip_image_u8_ptr; + +// wrapper for clip_image_f32 +struct clip_image_f32_deleter { + void operator()(clip_image_f32 * val) { clip_image_f32_free(val); } +}; +typedef std::unique_ptr clip_image_f32_ptr; + +struct clip_image_u8_batch { + std::vector entries; +}; + +struct clip_image_f32_batch { + std::vector entries; +}; + +// +// common utils +// + +static std::string string_format(const char * fmt, ...) { + va_list ap; + va_list ap2; + va_start(ap, fmt); + va_copy(ap2, ap); + int size = vsnprintf(NULL, 0, fmt, ap); + GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT + std::vector buf(size + 1); + int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); + GGML_ASSERT(size2 == size); + va_end(ap2); + va_end(ap); + return std::string(buf.data(), buf.size()); +} + +static void string_replace_all(std::string & s, const std::string & search, const std::string & replace) { + if (search.empty()) { + return; + } + std::string builder; + builder.reserve(s.length()); + size_t pos = 0; + size_t last_pos = 0; + while ((pos = s.find(search, last_pos)) != std::string::npos) { + builder.append(s, last_pos, pos - last_pos); + builder.append(replace); + last_pos = pos + search.length(); + } + builder.append(s, last_pos, std::string::npos); + s = std::move(builder); +} + +// split string by a `std::string delim` instead of `char delim` +static std::vector string_split_str(std::string s, const std::string & delimiter) { + std::vector tokens; + size_t pos = 0; + std::string token; + while ((pos = s.find(delimiter)) != std::string::npos) { + token = s.substr(0, pos); + tokens.push_back(token); + s.erase(0, pos + delimiter.length()); + } + tokens.push_back(s); + return tokens; +} + +// +// gguf utils +// + +static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) { + switch (type) { + case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]); + case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]); + case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]); + case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]); + case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]); + case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]); + case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]); + case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]); + case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]); + case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]); + case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false"; + default: return string_format("unknown type %d", type); + } +} + +static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { + const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); + + switch (type) { + case GGUF_TYPE_STRING: + return gguf_get_val_str(ctx_gguf, i); + case GGUF_TYPE_ARRAY: + { + const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); + int arr_n = gguf_get_arr_n(ctx_gguf, i); + const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i); + std::stringstream ss; + ss << "["; + for (int j = 0; j < arr_n; j++) { + if (arr_type == GGUF_TYPE_STRING) { + std::string val = gguf_get_arr_str(ctx_gguf, i, j); + // escape quotes + string_replace_all(val, "\\", "\\\\"); + string_replace_all(val, "\"", "\\\""); + ss << '"' << val << '"'; + } else if (arr_type == GGUF_TYPE_ARRAY) { + ss << "???"; + } else { + ss << gguf_data_to_str(arr_type, data, j); + } + if (j < arr_n - 1) { + ss << ", "; + } + } + ss << "]"; + return ss.str(); + } + default: + return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); + } +} + +// +// API used internally with mtmd +// + +projector_type clip_get_projector_type(const struct clip_ctx * ctx); diff --git a/examples/llava/clip-quantize-cli.cpp b/examples/llava/clip-quantize-cli.cpp new file mode 100644 index 0000000000..566506954e --- /dev/null +++ b/examples/llava/clip-quantize-cli.cpp @@ -0,0 +1,59 @@ +#include "arg.h" +#include "base64.hpp" +#include "log.h" +#include "common.h" +#include "sampling.h" +#include "clip.h" +#include "llava.h" +#include "llama.h" +#include "ggml.h" + +static void print_usage(int argc, char ** argv) { + (void) argc; + + fprintf(stderr, "usage: %s /path/to/ggml-model-f32.gguf /path/to/ggml-model-quantized.gguf type\n", argv[0]); + fprintf(stderr, " type = 2 - q4_0\n"); + fprintf(stderr, " type = 3 - q4_1\n"); + fprintf(stderr, " type = 6 - q5_0\n"); + fprintf(stderr, " type = 7 - q5_1\n"); + fprintf(stderr, " type = 8 - q8_0\n"); +} + +int main(int argc, char ** argv) { + if (argc != 4) { + print_usage(argc, argv); + return 1; + } + + const std::string fname_inp = argv[1]; + const std::string fname_out = argv[2]; + + const int itype = atoi(argv[3]); + + const int64_t t_main_start_us = ggml_time_us(); + + int64_t t_quantize_us = 0; + + // load the model + { + const int64_t t_start_us = ggml_time_us(); + + if (!clip_model_quantize(fname_inp.c_str(), fname_out.c_str(), itype)) { + fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str()); + return 1; + } + + t_quantize_us = ggml_time_us() - t_start_us; + } + + // report timing + { + const int64_t t_main_end_us = ggml_time_us(); + + printf("\n"); + printf("%s: quantize time = %8.2f ms\n", __func__, t_quantize_us / 1000.0f); + printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f); + } + + return 0; +} diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 24073c5a9b..759706156d 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -3,32 +3,14 @@ // I'll gradually clean and extend it // Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch #include "clip.h" +#include "clip-impl.h" #include "ggml.h" +#include "ggml-cpp.h" #include "ggml-cpu.h" #include "ggml-alloc.h" #include "ggml-backend.h" #include "gguf.h" -//#ifdef GGML_USE_CUDA -//#include "ggml-cuda.h" -//#endif -// -//#ifdef GGML_USE_SYCL -//#include "ggml-sycl.h" -//#endif -// -//#ifdef GGML_USE_METAL -//#include "ggml-metal.h" -//#endif -// -//#ifdef GGML_USE_CANN -//#include "ggml-cann.h" -//#endif -// -//#ifdef GGML_USE_VULKAN -//#include "ggml-vulkan.h" -//#endif - #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" @@ -40,272 +22,17 @@ #include #include #include +#include #include #include #include #include +#include -#if defined(LLAVA_LOG_OFF) -# define LOG_INF(...) -# define LOG_WRN(...) -# define LOG_ERR(...) -# define LOG_DBG(...) -#else // defined(LLAVA_LOG_OFF) -# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0) -# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0) -# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0) -# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0) -#endif // defined(LLAVA_LOG_OFF) +struct clip_logger_state g_logger_state = {GGML_LOG_LEVEL_CONT, clip_log_callback_default, NULL}; //#define CLIP_DEBUG_FUNCTIONS -// RGB uint8 image -struct clip_image_u8 { - int nx; - int ny; - - std::vector buf; -}; - -// RGB float32 image (NHWC) -// Memory layout: RGBRGBRGB... -struct clip_image_f32 { - int nx; - int ny; - - std::vector buf; -}; - -static std::string format(const char * fmt, ...) { - va_list ap; - va_list ap2; - va_start(ap, fmt); - va_copy(ap2, ap); - int size = vsnprintf(NULL, 0, fmt, ap); - GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT - std::vector buf(size + 1); - int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); - GGML_ASSERT(size2 == size); - va_end(ap2); - va_end(ap); - return std::string(buf.data(), buf.size()); -} - -// -// key constants -// - -#define KEY_FTYPE "general.file_type" -#define KEY_NAME "general.name" -#define KEY_DESCRIPTION "general.description" -#define KEY_HAS_TEXT_ENC "clip.has_text_encoder" -#define KEY_HAS_VIS_ENC "clip.has_vision_encoder" -#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector" -#define KEY_HAS_MINICPMV_PROJ "clip.has_minicpmv_projector" -#define KEY_MINICPMV_VERSION "clip.minicpmv_version" -#define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger" -#define KEY_USE_GELU "clip.use_gelu" -#define KEY_USE_SILU "clip.use_silu" -#define KEY_N_EMBD "clip.%s.embedding_length" -#define KEY_N_FF "clip.%s.feed_forward_length" -#define KEY_N_BLOCK "clip.%s.block_count" -#define KEY_N_HEAD "clip.%s.attention.head_count" -#define KEY_LAYER_NORM_EPS "clip.%s.attention.layer_norm_epsilon" -#define KEY_PROJ_DIM "clip.%s.projection_dim" -#define KEY_TOKENS "tokenizer.ggml.tokens" -#define KEY_N_POSITIONS "clip.text.context_length" -#define KEY_IMAGE_SIZE "clip.vision.image_size" -#define KEY_PATCH_SIZE "clip.vision.patch_size" -#define KEY_IMAGE_MEAN "clip.vision.image_mean" -#define KEY_IMAGE_STD "clip.vision.image_std" -#define KEY_PROJ_TYPE "clip.projector_type" - -#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type" -#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints" -#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution" - - -// -// tensor name constants -// - -#define TN_TOKEN_EMBD "%s.token_embd.weight" -#define TN_POS_EMBD "%s.position_embd.weight" -#define TN_CLASS_EMBD "v.class_embd" -#define TN_PATCH_EMBD "v.patch_embd.weight" // not rename tensor with ".0" postfix for backwrad compat -#define TN_PATCH_EMBD_1 "v.patch_embd.weight.1" -#define TN_PATCH_BIAS "v.patch_embd.bias" -#define TN_ATTN_K "%s.blk.%d.attn_k.%s" -#define TN_ATTN_Q "%s.blk.%d.attn_q.%s" -#define TN_ATTN_V "%s.blk.%d.attn_v.%s" -#define TN_ATTN_OUTPUT "%s.blk.%d.attn_out.%s" -#define TN_FFN_DOWN "%s.blk.%d.ffn_down.%s" -#define TN_FFN_UP "%s.blk.%d.ffn_up.%s" -#define TN_LN_1 "%s.blk.%d.ln1.%s" -#define TN_LN_2 "%s.blk.%d.ln2.%s" -#define TN_LN_PRE "%s.pre_ln.%s" -#define TN_LN_POST "%s.post_ln.%s" -#define TN_TEXT_PROJ "text_projection.weight" -#define TN_VIS_PROJ "visual_projection.weight" -#define TN_LLAVA_PROJ "mm.%d.%s" -#define TN_MVLM_PROJ_MLP "mm.model.mlp.%d.%s" -#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s" -#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s" -#define TN_IMAGE_NEWLINE "model.image_newline" - -#define TN_MINICPMV_POS_EMBD_K "resampler.pos_embed_k" -#define TN_MINICPMV_QUERY "resampler.query" -#define TN_MINICPMV_PROJ "resampler.proj.weight" -#define TN_MINICPMV_KV_PROJ "resampler.kv.weight" -#define TN_MINICPMV_ATTN "resampler.attn.%s.%s" -#define TN_MINICPMV_LN "resampler.ln_%s.%s" - - -enum projector_type { - PROJECTOR_TYPE_MLP, - PROJECTOR_TYPE_MLP_NORM, - PROJECTOR_TYPE_LDP, - PROJECTOR_TYPE_LDPV2, - PROJECTOR_TYPE_RESAMPLER, - PROJECTOR_TYPE_MERGER, - PROJECTOR_TYPE_UNKNOWN, -}; - -static std::map PROJECTOR_TYPE_NAMES = { - { PROJECTOR_TYPE_MLP, "mlp" }, - { PROJECTOR_TYPE_LDP, "ldp" }, - { PROJECTOR_TYPE_LDPV2, "ldpv2"}, - { PROJECTOR_TYPE_RESAMPLER, "resampler"}, - { PROJECTOR_TYPE_MERGER, "qwen2vl_merger"}, -}; - - -// -// utilities to get data from a gguf file -// - -static int get_key_idx(const gguf_context * ctx, const char * key) { - int i = gguf_find_key(ctx, key); - if (i == -1) { - LOG_ERR("key %s not found in file\n", key); - throw std::runtime_error(format("Missing required key: %s", key)); - } - - return i; -} - -static uint32_t get_u32(const gguf_context * ctx, const std::string & key) { - const int i = get_key_idx(ctx, key.c_str()); - - return gguf_get_val_u32(ctx, i); -} - -static float get_f32(const gguf_context * ctx, const std::string & key) { - const int i = get_key_idx(ctx, key.c_str()); - - return gguf_get_val_f32(ctx, i); -} - -static struct ggml_tensor * get_tensor(struct ggml_context * ctx, const std::string & name) { - struct ggml_tensor * cur = ggml_get_tensor(ctx, name.c_str()); - if (!cur) { - throw std::runtime_error(format("%s: unable to find tensor %s\n", __func__, name.c_str())); - } - - return cur; -} - -static std::string get_ftype(int ftype) { - return ggml_type_name(static_cast(ftype)); -} - -static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) { - switch (type) { - case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]); - case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]); - case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]); - case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]); - case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]); - case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]); - case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]); - case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]); - case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]); - case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]); - case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false"; - default: return format("unknown type %d", type); - } -} - -static void replace_all(std::string & s, const std::string & search, const std::string & replace) { - if (search.empty()) { - return; - } - std::string builder; - builder.reserve(s.length()); - size_t pos = 0; - size_t last_pos = 0; - while ((pos = s.find(search, last_pos)) != std::string::npos) { - builder.append(s, last_pos, pos - last_pos); - builder.append(replace); - last_pos = pos + search.length(); - } - builder.append(s, last_pos, std::string::npos); - s = std::move(builder); -} - -static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { - const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); - - switch (type) { - case GGUF_TYPE_STRING: - return gguf_get_val_str(ctx_gguf, i); - case GGUF_TYPE_ARRAY: - { - const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); - int arr_n = gguf_get_arr_n(ctx_gguf, i); - const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i); - std::stringstream ss; - ss << "["; - for (int j = 0; j < arr_n; j++) { - if (arr_type == GGUF_TYPE_STRING) { - std::string val = gguf_get_arr_str(ctx_gguf, i, j); - // escape quotes - replace_all(val, "\\", "\\\\"); - replace_all(val, "\"", "\\\""); - ss << '"' << val << '"'; - } else if (arr_type == GGUF_TYPE_ARRAY) { - ss << "???"; - } else { - ss << gguf_data_to_str(arr_type, data, j); - } - if (j < arr_n - 1) { - ss << ", "; - } - } - ss << "]"; - return ss.str(); - } - default: - return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); - } -} - -static void print_tensor_info(const ggml_tensor * tensor, const char * prefix = "") { - size_t tensor_size = ggml_nbytes(tensor); - LOG_INF("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n", - prefix, ggml_n_dims(tensor), tensor->name, tensor_size, - tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], ggml_type_name(tensor->type)); -} - -static projector_type clip_projector_type_from_string(const std::string & name) { - for (const auto & kv : PROJECTOR_TYPE_NAMES) { // NOLINT - if (kv.second == name) { - return kv.first; - } - } - return PROJECTOR_TYPE_UNKNOWN; -} - #ifdef CLIP_DEBUG_FUNCTIONS static void clip_image_write_image_to_ppm(const clip_image_u8& img, const std::string& filename) { std::ofstream file(filename, std::ios::binary); @@ -419,6 +146,11 @@ static void clip_image_convert_f32_to_u8(const clip_image_f32& src, clip_image_u // clip layers // +enum patch_merge_type { + PATCH_MERGE_FLAT, + PATCH_MERGE_SPATIAL_UNPAD, +}; + struct clip_hparams { int32_t image_size; int32_t patch_size; @@ -428,54 +160,55 @@ struct clip_hparams { int32_t n_head; int32_t n_layer; + patch_merge_type mm_patch_merge_type = PATCH_MERGE_FLAT; + float eps; - char mm_patch_merge_type[32] = "flat"; // spatial_unpad or flat (default) - - int32_t image_grid_pinpoints[32]; + std::vector image_grid_pinpoints; int32_t image_crop_resolution; + std::unordered_set vision_feature_layer; }; struct clip_layer { // attention - struct ggml_tensor * k_w; - struct ggml_tensor * k_b; - struct ggml_tensor * q_w; - struct ggml_tensor * q_b; - struct ggml_tensor * v_w; - struct ggml_tensor * v_b; + struct ggml_tensor * k_w = nullptr; + struct ggml_tensor * k_b = nullptr; + struct ggml_tensor * q_w = nullptr; + struct ggml_tensor * q_b = nullptr; + struct ggml_tensor * v_w = nullptr; + struct ggml_tensor * v_b = nullptr; - struct ggml_tensor * o_w; - struct ggml_tensor * o_b; + struct ggml_tensor * o_w = nullptr; + struct ggml_tensor * o_b = nullptr; // layernorm 1 - struct ggml_tensor * ln_1_w; - struct ggml_tensor * ln_1_b; + struct ggml_tensor * ln_1_w = nullptr; + struct ggml_tensor * ln_1_b = nullptr; // ff - struct ggml_tensor * ff_i_w; - struct ggml_tensor * ff_i_b; + struct ggml_tensor * ff_i_w = nullptr; + struct ggml_tensor * ff_i_b = nullptr; - struct ggml_tensor * ff_o_w; - struct ggml_tensor * ff_o_b; + struct ggml_tensor * ff_o_w = nullptr; + struct ggml_tensor * ff_o_b = nullptr; // layernorm 2 - struct ggml_tensor * ln_2_w; - struct ggml_tensor * ln_2_b; + struct ggml_tensor * ln_2_w = nullptr; + struct ggml_tensor * ln_2_b = nullptr; }; struct clip_vision_model { struct clip_hparams hparams; // embeddings - struct ggml_tensor * class_embedding; - struct ggml_tensor * patch_embeddings_0; - struct ggml_tensor * patch_embeddings_1; // second Conv2D kernel when we decouple Conv3D along temproal dimension (Qwen2VL) - struct ggml_tensor * patch_bias; - struct ggml_tensor * position_embeddings; + struct ggml_tensor * class_embedding = nullptr; + struct ggml_tensor * patch_embeddings_0 = nullptr; + struct ggml_tensor * patch_embeddings_1 = nullptr; // second Conv2D kernel when we decouple Conv3D along temproal dimension (Qwen2VL) + struct ggml_tensor * patch_bias = nullptr; + struct ggml_tensor * position_embeddings = nullptr; - struct ggml_tensor * pre_ln_w; - struct ggml_tensor * pre_ln_b; + struct ggml_tensor * pre_ln_w = nullptr; + struct ggml_tensor * pre_ln_b = nullptr; std::vector layers; @@ -485,74 +218,84 @@ struct clip_vision_model { struct ggml_tensor * projection; // LLaVA projection - struct ggml_tensor * mm_0_w = NULL; - struct ggml_tensor * mm_0_b = NULL; - struct ggml_tensor * mm_2_w = NULL; - struct ggml_tensor * mm_2_b = NULL; + struct ggml_tensor * mm_0_w = nullptr; + struct ggml_tensor * mm_0_b = nullptr; + struct ggml_tensor * mm_2_w = nullptr; + struct ggml_tensor * mm_2_b = nullptr; - struct ggml_tensor * image_newline = NULL; + struct ggml_tensor * image_newline = nullptr; // Yi type models with mlp+normalization projection - struct ggml_tensor * mm_1_w = NULL; // Yi type models have 0, 1, 3, 4 - struct ggml_tensor * mm_1_b = NULL; - struct ggml_tensor * mm_3_w = NULL; - struct ggml_tensor * mm_3_b = NULL; - struct ggml_tensor * mm_4_w = NULL; - struct ggml_tensor * mm_4_b = NULL; + struct ggml_tensor * mm_1_w = nullptr; // Yi type models have 0, 1, 3, 4 + struct ggml_tensor * mm_1_b = nullptr; + struct ggml_tensor * mm_3_w = nullptr; + struct ggml_tensor * mm_3_b = nullptr; + struct ggml_tensor * mm_4_w = nullptr; + struct ggml_tensor * mm_4_b = nullptr; + + //GLMV-Edge projection + struct ggml_tensor * mm_model_adapter_conv_w = nullptr; + struct ggml_tensor * mm_model_adapter_conv_b = nullptr; + struct ggml_tensor * boi_w = nullptr; + struct ggml_tensor * eoi_w = nullptr; // MobileVLM projection - struct ggml_tensor * mm_model_mlp_1_w; - struct ggml_tensor * mm_model_mlp_1_b; - struct ggml_tensor * mm_model_mlp_3_w; - struct ggml_tensor * mm_model_mlp_3_b; - struct ggml_tensor * mm_model_block_1_block_0_0_w; - struct ggml_tensor * mm_model_block_1_block_0_1_w; - struct ggml_tensor * mm_model_block_1_block_0_1_b; - struct ggml_tensor * mm_model_block_1_block_1_fc1_w; - struct ggml_tensor * mm_model_block_1_block_1_fc1_b; - struct ggml_tensor * mm_model_block_1_block_1_fc2_w; - struct ggml_tensor * mm_model_block_1_block_1_fc2_b; - struct ggml_tensor * mm_model_block_1_block_2_0_w; - struct ggml_tensor * mm_model_block_1_block_2_1_w; - struct ggml_tensor * mm_model_block_1_block_2_1_b; - struct ggml_tensor * mm_model_block_2_block_0_0_w; - struct ggml_tensor * mm_model_block_2_block_0_1_w; - struct ggml_tensor * mm_model_block_2_block_0_1_b; - struct ggml_tensor * mm_model_block_2_block_1_fc1_w; - struct ggml_tensor * mm_model_block_2_block_1_fc1_b; - struct ggml_tensor * mm_model_block_2_block_1_fc2_w; - struct ggml_tensor * mm_model_block_2_block_1_fc2_b; - struct ggml_tensor * mm_model_block_2_block_2_0_w; - struct ggml_tensor * mm_model_block_2_block_2_1_w; - struct ggml_tensor * mm_model_block_2_block_2_1_b; + struct ggml_tensor * mm_model_mlp_1_w = nullptr; + struct ggml_tensor * mm_model_mlp_1_b = nullptr; + struct ggml_tensor * mm_model_mlp_3_w = nullptr; + struct ggml_tensor * mm_model_mlp_3_b = nullptr; + struct ggml_tensor * mm_model_block_1_block_0_0_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_0_1_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_0_1_b = nullptr; + struct ggml_tensor * mm_model_block_1_block_1_fc1_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_1_fc1_b = nullptr; + struct ggml_tensor * mm_model_block_1_block_1_fc2_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_1_fc2_b = nullptr; + struct ggml_tensor * mm_model_block_1_block_2_0_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_2_1_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_2_1_b = nullptr; + struct ggml_tensor * mm_model_block_2_block_0_0_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_0_1_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_0_1_b = nullptr; + struct ggml_tensor * mm_model_block_2_block_1_fc1_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_1_fc1_b = nullptr; + struct ggml_tensor * mm_model_block_2_block_1_fc2_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_1_fc2_b = nullptr; + struct ggml_tensor * mm_model_block_2_block_2_0_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_2_1_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_2_1_b = nullptr; // MobileVLM_V2 projection - struct ggml_tensor * mm_model_mlp_0_w; - struct ggml_tensor * mm_model_mlp_0_b; - struct ggml_tensor * mm_model_mlp_2_w; - struct ggml_tensor * mm_model_mlp_2_b; - struct ggml_tensor * mm_model_peg_0_w; - struct ggml_tensor * mm_model_peg_0_b; + struct ggml_tensor * mm_model_mlp_0_w = nullptr; + struct ggml_tensor * mm_model_mlp_0_b = nullptr; + struct ggml_tensor * mm_model_mlp_2_w = nullptr; + struct ggml_tensor * mm_model_mlp_2_b = nullptr; + struct ggml_tensor * mm_model_peg_0_w = nullptr; + struct ggml_tensor * mm_model_peg_0_b = nullptr; // MINICPMV projection - struct ggml_tensor * mm_model_pos_embed_k; - struct ggml_tensor * mm_model_query; - struct ggml_tensor * mm_model_proj; - struct ggml_tensor * mm_model_kv_proj; - struct ggml_tensor * mm_model_attn_q_w; - struct ggml_tensor * mm_model_attn_q_b; - struct ggml_tensor * mm_model_attn_k_w; - struct ggml_tensor * mm_model_attn_k_b; - struct ggml_tensor * mm_model_attn_v_w; - struct ggml_tensor * mm_model_attn_v_b; - struct ggml_tensor * mm_model_attn_o_w; - struct ggml_tensor * mm_model_attn_o_b; - struct ggml_tensor * mm_model_ln_q_w; - struct ggml_tensor * mm_model_ln_q_b; - struct ggml_tensor * mm_model_ln_kv_w; - struct ggml_tensor * mm_model_ln_kv_b; - struct ggml_tensor * mm_model_ln_post_w; - struct ggml_tensor * mm_model_ln_post_b; + struct ggml_tensor * mm_model_pos_embed_k = nullptr; + struct ggml_tensor * mm_model_query = nullptr; + struct ggml_tensor * mm_model_proj = nullptr; + struct ggml_tensor * mm_model_kv_proj = nullptr; + struct ggml_tensor * mm_model_attn_q_w = nullptr; + struct ggml_tensor * mm_model_attn_q_b = nullptr; + struct ggml_tensor * mm_model_attn_k_w = nullptr; + struct ggml_tensor * mm_model_attn_k_b = nullptr; + struct ggml_tensor * mm_model_attn_v_w = nullptr; + struct ggml_tensor * mm_model_attn_v_b = nullptr; + struct ggml_tensor * mm_model_attn_o_w = nullptr; + struct ggml_tensor * mm_model_attn_o_b = nullptr; + struct ggml_tensor * mm_model_ln_q_w = nullptr; + struct ggml_tensor * mm_model_ln_q_b = nullptr; + struct ggml_tensor * mm_model_ln_kv_w = nullptr; + struct ggml_tensor * mm_model_ln_kv_b = nullptr; + struct ggml_tensor * mm_model_ln_post_w = nullptr; + struct ggml_tensor * mm_model_ln_post_b = nullptr; + + // gemma3 + struct ggml_tensor * mm_input_proj_w = nullptr; + struct ggml_tensor * mm_soft_emb_norm_w = nullptr; }; struct clip_ctx { @@ -560,38 +303,218 @@ struct clip_ctx { bool has_vision_encoder = false; bool has_llava_projector = false; bool has_minicpmv_projector = false; + bool has_glm_projector = false; bool has_qwen2vl_merger = false; int minicpmv_version = 2; struct clip_vision_model vision_model; projector_type proj_type = PROJECTOR_TYPE_MLP; + int32_t max_feature_layer; // unused in newer models like gemma3 float image_mean[3]; float image_std[3]; bool use_gelu = false; bool use_silu = false; - int32_t ftype = 1; - bool has_class_embedding = true; - bool has_pre_norm = true; - bool has_post_norm = false; - bool has_patch_bias = false; - - struct gguf_context * ctx_gguf; - struct ggml_context * ctx_data; + gguf_context_ptr ctx_gguf; + ggml_context_ptr ctx_data; std::vector buf_compute_meta; - // memory buffers to evaluate the model - ggml_backend_buffer_t params_buffer = NULL; + std::vector backend_ptrs; + std::vector backend_buft; - ggml_backend_t backend = NULL; - ggml_gallocr_t compute_alloc = NULL; + ggml_backend_t backend; + ggml_backend_t backend_cpu; + ggml_backend_buffer_ptr buf; - struct clip_image_size * load_image_size; + ggml_backend_sched_ptr sched; + + clip_image_size load_image_size; + + clip_ctx(clip_context_params & ctx_params) { + backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr); + backend = ctx_params.use_gpu + ? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr) + : nullptr; + + if (backend) { + LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend)); + backend_ptrs.push_back(backend); + backend_buft.push_back(ggml_backend_get_default_buffer_type(backend)); + } else { + backend = backend_cpu; + LOG_INF("%s: CLIP using CPU backend\n", __func__); + } + + backend_ptrs.push_back(backend_cpu); + backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu)); + + sched.reset( + ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false) + ); + } + + ~clip_ctx() { + ggml_backend_free(backend); + if (backend != backend_cpu) { + ggml_backend_free(backend_cpu); + } + } }; -static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32_batch * imgs, struct clip_image_size * load_image_size, bool is_inf = false) { +static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_image_f32_batch & imgs) { + const auto & model = ctx->vision_model; + const auto & hparams = model.hparams; + + const int image_size = hparams.image_size; + int image_size_width = image_size; + int image_size_height = image_size; + + const int patch_size = hparams.patch_size; + const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size)); + const int hidden_size = hparams.hidden_size; + const int n_head = hparams.n_head; + const int d_head = hidden_size / n_head; + const int n_layer = hparams.n_layer; + const float eps = hparams.eps; + + GGML_ASSERT(imgs.entries.size() == 1); // batch_size == 1 + + struct ggml_init_params params = { + /*.mem_size =*/ ctx->buf_compute_meta.size(), + /*.mem_buffer =*/ ctx->buf_compute_meta.data(), + /*.no_alloc =*/ true, + }; + + ggml_context_ptr ctx0_ptr(ggml_init(params)); + auto ctx0 = ctx0_ptr.get(); + + struct ggml_cgraph * gf = ggml_new_graph(ctx0); + + // input raw + struct ggml_tensor * inp_raw = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, image_size_width, image_size_height, 3); + ggml_set_name(inp_raw, "inp_raw"); + ggml_set_input(inp_raw); + + struct ggml_tensor * inp = ggml_conv_2d(ctx0, model.patch_embeddings_0, inp_raw, patch_size, patch_size, 0, 0, 1, 1); + inp = ggml_reshape_2d(ctx0, inp, num_patches, hidden_size); + inp = ggml_cont(ctx0, ggml_transpose(ctx0, inp)); + inp = ggml_add(ctx0, inp, model.patch_bias); + + // position embeddings + struct ggml_tensor * embeddings = ggml_add(ctx0, inp, model.position_embeddings); + + // loop over layers + for (int il = 0; il < n_layer; il++) { + struct ggml_tensor * cur = embeddings; // embeddings = residual, cur = hidden_states + + // layernorm1 + { + cur = ggml_norm(ctx0, cur, eps); + cur = ggml_add(ctx0, ggml_mul(ctx0, cur, model.layers[il].ln_1_w), model.layers[il].ln_1_b); + } + + // self-attention + { + + struct ggml_tensor * Q = + ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].q_w, cur), model.layers[il].q_b); + + Q = ggml_reshape_3d(ctx0, Q, d_head, n_head, num_patches); + Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3)); + + struct ggml_tensor * K = + ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].k_w, cur), model.layers[il].k_b); + + K = ggml_reshape_3d(ctx0, K, d_head, n_head, num_patches); + K = ggml_cont(ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3)); + + struct ggml_tensor * V = + ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].v_w, cur), model.layers[il].v_b); + + V = ggml_reshape_3d(ctx0, V, d_head, n_head, num_patches); + V = ggml_cont(ctx0, ggml_permute(ctx0, V, 1, 2, 0, 3)); + + struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); + KQ = ggml_soft_max_ext(ctx0, KQ, nullptr, 1.0f / sqrtf((float)d_head), 0.0f); + + struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ); + KQV = ggml_reshape_3d(ctx0, KQV, d_head, num_patches, n_head); + KQV = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + + cur = ggml_cont_2d(ctx0, KQV, hidden_size, num_patches); + } + + // attention output + cur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].o_w, cur), model.layers[il].o_b); + + // re-add the layer input, e.g., residual + cur = ggml_add(ctx0, cur, embeddings); + + embeddings = cur; // embeddings = residual, cur = hidden_states + + // layernorm2 + { + cur = ggml_norm(ctx0, cur, eps); + cur = ggml_add(ctx0, ggml_mul(ctx0, cur, model.layers[il].ln_2_w), model.layers[il].ln_2_b); + } + + cur = ggml_mul_mat(ctx0, model.layers[il].ff_i_w, cur); + cur = ggml_add(ctx0, cur, model.layers[il].ff_i_b); + + // siglip uses gelu + cur = ggml_gelu(ctx0, cur); + + cur = ggml_mul_mat(ctx0, model.layers[il].ff_o_w, cur); + cur = ggml_add(ctx0, cur, model.layers[il].ff_o_b); + + // residual 2 + cur = ggml_add(ctx0, embeddings, cur); + + embeddings = cur; + } + + // post-layernorm + if (model.post_ln_w) { + embeddings = ggml_norm(ctx0, embeddings, eps); + ggml_set_name(embeddings, "post_ln"); + + embeddings = ggml_add(ctx0, ggml_mul(ctx0, embeddings, model.post_ln_w), model.post_ln_b); + } + + if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) { + const int batch_size = 1; + const int mm_tokens_per_image = 256; // default value for gemma3 + const int tokens_per_side = sqrt(mm_tokens_per_image); + const int patches_per_image = sqrt(num_patches); + const int kernel_size = patches_per_image / tokens_per_side; + + embeddings = ggml_cont(ctx0, ggml_transpose(ctx0, embeddings)); + embeddings = ggml_reshape_4d(ctx0, embeddings, patches_per_image, patches_per_image, hidden_size, batch_size); + + // doing a pool2d to reduce the number of output tokens to 256 + embeddings = ggml_pool_2d(ctx0, embeddings, GGML_OP_POOL_AVG, kernel_size, kernel_size, kernel_size, kernel_size, 0, 0); + embeddings = ggml_reshape_3d(ctx0, embeddings, embeddings->ne[0] * embeddings->ne[0], hidden_size, batch_size); + embeddings = ggml_cont(ctx0, ggml_transpose(ctx0, embeddings)); + + // apply norm before projection + embeddings = ggml_rms_norm(ctx0, embeddings, eps); + embeddings = ggml_mul(ctx0, embeddings, model.mm_soft_emb_norm_w); + + // apply projection + embeddings = ggml_mul_mat(ctx0, + ggml_cont(ctx0, ggml_transpose(ctx0, model.mm_input_proj_w)), + embeddings); + } + + // build the graph + ggml_build_forward_expand(gf, embeddings); + + return gf; +} + +static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_image_f32_batch & imgs, struct clip_image_size load_image_size, bool is_inf = false) { if (!ctx->has_vision_encoder) { LOG_ERR("This gguf file seems to have no vision encoder\n"); return nullptr; @@ -604,41 +527,37 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 int image_size_width = image_size; int image_size_height = image_size; if (ctx->has_minicpmv_projector) { - if (load_image_size == nullptr) { - load_image_size = clip_image_size_init(); - } - LOG_DBG("%s: %d %d\n", __func__, load_image_size->width, load_image_size->height); - image_size_width = load_image_size->width; - image_size_height = load_image_size->height; + LOG_DBG("%s: %d %d\n", __func__, load_image_size.width, load_image_size.height); + image_size_width = load_image_size.width; + image_size_height = load_image_size.height; if (is_inf) { - image_size_width = imgs->data->nx; - image_size_height = imgs->data->ny; + image_size_width = imgs.entries[0]->nx; + image_size_height = imgs.entries[0]->ny; } } else if (ctx->has_qwen2vl_merger) { // use the image's native resolution when image is avaible if (is_inf) { // if (imgs->data->nx && imgs->data->ny) { - image_size_width = imgs->data->nx; - image_size_height = imgs->data->ny; + image_size_width = imgs.entries[0]->nx; + image_size_height = imgs.entries[0]->ny; } } const int patch_size = hparams.patch_size; const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size)); const int patches_w = image_size_width / patch_size; const int patches_h = image_size_height / patch_size; - const int num_positions = num_patches + (ctx->has_class_embedding ? 1 : 0); + const int num_positions = num_patches + (model.class_embedding ? 1 : 0); const int num_position_ids = ctx->has_qwen2vl_merger ? num_positions * 4 : num_positions; const int hidden_size = hparams.hidden_size; const int n_head = hparams.n_head; const int d_head = hidden_size / n_head; - int n_layer = hparams.n_layer; const float eps = hparams.eps; int mrope_sections[4] = {d_head/4, d_head/4, d_head/4, d_head/4}; - const int batch_size = imgs->size; + const int batch_size = imgs.entries.size(); - if (ctx->has_llava_projector || ctx->has_minicpmv_projector) { + if (ctx->has_llava_projector || ctx->has_minicpmv_projector || ctx->has_glm_projector) { GGML_ASSERT(batch_size == 1); } @@ -648,7 +567,9 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 /*.no_alloc =*/ true, }; - struct ggml_context * ctx0 = ggml_init(params); + ggml_context_ptr ctx0_ptr(ggml_init(params)); + auto ctx0 = ctx0_ptr.get(); + struct ggml_cgraph * gf = ggml_new_graph(ctx0); struct ggml_tensor * inp_raw = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, image_size_width, image_size_height, 3, batch_size); @@ -680,7 +601,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 0, 2, 3)); } - if (ctx->has_patch_bias) { + if (model.patch_bias) { // inp = ggml_add(ctx0, inp, ggml_repeat(ctx0, model.patch_bias, inp)); inp = ggml_add(ctx0, inp, model.patch_bias); } @@ -689,7 +610,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 if (ctx->has_llava_projector) { // concat class_embeddings and patch_embeddings - if (ctx->has_class_embedding) { + if (model.class_embedding) { embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size); ggml_set_name(embeddings, "embeddings"); ggml_set_input(embeddings); @@ -726,21 +647,26 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 } // pre-layernorm - if (ctx->has_pre_norm) { + if (model.pre_ln_w) { embeddings = ggml_norm(ctx0, embeddings, eps); ggml_set_name(embeddings, "pre_ln"); embeddings = ggml_add(ctx0, ggml_mul(ctx0, embeddings, model.pre_ln_w), model.pre_ln_b); } + std::vector embedding_stack; + const auto & vision_feature_layer = hparams.vision_feature_layer; + // loop over layers - if (ctx->has_minicpmv_projector || ctx->has_qwen2vl_merger) { - // TODO: figure out why we doing thing in this way ??? - n_layer += 1; - } - for (int il = 0; il < n_layer - 1; il++) { + for (int il = 0; il < ctx->max_feature_layer; il++) { struct ggml_tensor * cur = embeddings; // embeddings = residual, cur = hidden_states + // If this is an embedding feature layer, save the output. + // NOTE: 0 index here refers to the input to the encoder. + if (vision_feature_layer.find(il) != vision_feature_layer.end()) { + embedding_stack.push_back(embeddings); + } + //const size_t nb_q_w = model.layers[il].q_w->nb[0]; // layernorm1 @@ -763,7 +689,6 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 ctx0, Q, positions, nullptr, d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1); } - Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head)); Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3)); Q = ggml_reshape_3d(ctx0, Q, d_head, num_positions, n_head * batch_size); @@ -787,7 +712,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 V = ggml_reshape_3d(ctx0, V, num_positions, d_head, n_head * batch_size); struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); - KQ = ggml_soft_max_inplace(ctx0, KQ); + KQ = ggml_soft_max_ext(ctx0, KQ, nullptr, 1.0f / sqrtf((float)d_head), 0.0f); struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ); KQV = ggml_reshape_4d(ctx0, KQV, d_head, num_positions, n_head, batch_size); KQV = ggml_permute(ctx0, KQV, 0, 2, 1, 3); @@ -828,17 +753,29 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 cur = ggml_add(ctx0, embeddings, cur); embeddings = cur; - } // post-layernorm - if (ctx->has_post_norm) { + if (model.post_ln_w) { embeddings = ggml_norm(ctx0, embeddings, eps); ggml_set_name(embeddings, "post_ln"); embeddings = ggml_add(ctx0, ggml_mul(ctx0, embeddings, model.post_ln_w), model.post_ln_b); } + // final layer is a vision feature layer + if (vision_feature_layer.find(ctx->max_feature_layer) != vision_feature_layer.end()) { + embedding_stack.push_back(embeddings); + } + + // If feature layers are explicitly set, stack them (if we have multiple) + if (!embedding_stack.empty()) { + embeddings = embedding_stack[0]; + for (size_t i = 1; i < embedding_stack.size(); i++) { + embeddings = ggml_concat(ctx0, embeddings, embedding_stack[i], 0); + } + } + // llava projector if (ctx->has_llava_projector) { embeddings = ggml_reshape_2d(ctx0, embeddings, embeddings->ne[0], embeddings->ne[1]); @@ -859,8 +796,10 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 embeddings = ggml_add(ctx0, embeddings, model.mm_0_b); embeddings = ggml_gelu(ctx0, embeddings); - embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings); - embeddings = ggml_add(ctx0, embeddings, model.mm_2_b); + if (model.mm_2_w) { + embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings); + embeddings = ggml_add(ctx0, embeddings, model.mm_2_b); + } } else if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) { embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings); @@ -1063,7 +1002,6 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 } struct ggml_tensor * Q = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_q_w, q), model.mm_model_attn_q_b); - Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head)); struct ggml_tensor * K = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_k_w, k), model.mm_model_attn_k_b); struct ggml_tensor * V = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_v_w, v), model.mm_model_attn_v_b); // permute @@ -1077,7 +1015,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 V = ggml_cont(ctx0, ggml_permute(ctx0, V, 1, 2, 0, 3)); V = ggml_reshape_3d(ctx0, V, num_positions, d_head, n_head * batch_size); struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); - KQ = ggml_soft_max_inplace(ctx0, KQ); + KQ = ggml_soft_max_ext(ctx0, KQ, nullptr, 1.0f / sqrtf((float)d_head), 0.0f); struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ); KQV = ggml_reshape_4d(ctx0, KQV, d_head, num_query, n_head, batch_size); KQV = ggml_permute(ctx0, KQV, 0, 2, 1, 3); @@ -1095,6 +1033,33 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 GGML_ASSERT(false); } } + // glm projector + else if (ctx->has_glm_projector) { + if (ctx->proj_type == PROJECTOR_TYPE_GLM_EDGE) { + size_t gridsz = (size_t)sqrt(embeddings->ne[1]); + embeddings = ggml_cont(ctx0, ggml_permute(ctx0,embeddings,1,0,2,3)); + embeddings = ggml_reshape_3d(ctx0, embeddings, gridsz, gridsz, embeddings->ne[1]); + embeddings = ggml_conv_2d(ctx0, model.mm_model_adapter_conv_w, embeddings, 2, 2, 0, 0, 1, 1); + embeddings = ggml_reshape_3d(ctx0, embeddings,embeddings->ne[0]*embeddings->ne[1] , embeddings->ne[2], batch_size); + embeddings = ggml_cont(ctx0, ggml_permute(ctx0,embeddings, 1, 0, 2, 3)); + embeddings = ggml_add(ctx0, embeddings, model.mm_model_adapter_conv_b); + //GLU + { + embeddings = ggml_mul_mat(ctx0, model.mm_model_mlp_0_w, embeddings); + embeddings = ggml_norm(ctx0, embeddings, eps); + embeddings = ggml_add(ctx0, ggml_mul(ctx0, embeddings, model.mm_model_ln_q_w), model.mm_model_ln_q_b); + embeddings = ggml_gelu_inplace(ctx0, embeddings); + struct ggml_tensor * x = embeddings; + embeddings = ggml_mul_mat(ctx0, model.mm_model_mlp_2_w, embeddings); + x = ggml_mul_mat(ctx0, model.mm_model_mlp_1_w,x); + embeddings = ggml_silu_inplace(ctx0, embeddings); + embeddings = ggml_mul(ctx0, embeddings,x); + embeddings = ggml_mul_mat(ctx0, model.mm_model_mlp_3_w, embeddings); + } + } else { + GGML_ABORT("fatal error"); + } + } else if (ctx->proj_type == PROJECTOR_TYPE_MERGER) { embeddings = ggml_reshape_3d(ctx0, embeddings, hidden_size * 4, num_positions / 4, batch_size); @@ -1112,529 +1077,513 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 // build the graph ggml_build_forward_expand(gf, embeddings); - ggml_free(ctx0); - return gf; } -// read and create ggml_context containing the tensors and their data -struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { - struct ggml_context * meta = NULL; - - struct gguf_init_params params = { - /*.no_alloc = */ true, - /*.ctx = */ &meta, - }; - - struct gguf_context * ctx = gguf_init_from_file(fname, params); - if (!ctx) { - throw std::runtime_error(format("%s: failed to load CLIP model from %s. Does this file exist?\n", __func__, fname)); +static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32_batch & imgs, struct clip_image_size load_image_size, bool is_inf = false) { + if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) { + return clip_image_build_graph_siglip(ctx, imgs); + } else { + // TODO: we should have one build_* function per model + return clip_image_build_graph_legacy(ctx, imgs, load_image_size, is_inf); } +} - if (verbosity >= 1) { - const int n_tensors = gguf_get_n_tensors(ctx); - const int n_kv = gguf_get_n_kv(ctx); - const int ftype = get_u32(ctx, KEY_FTYPE); - const std::string ftype_str = get_ftype(ftype); - const int idx_desc = get_key_idx(ctx, KEY_DESCRIPTION); - const std::string description = gguf_get_val_str(ctx, idx_desc); - const int idx_name = gguf_find_key(ctx, KEY_NAME); - if (idx_name != -1) { // make name optional temporarily as some of the uploaded models missing it due to a bug - const std::string name = gguf_get_val_str(ctx, idx_name); - LOG_INF("%s: model name: %s\n", __func__, name.c_str()); - } - LOG_INF("%s: description: %s\n", __func__, description.c_str()); - LOG_INF("%s: GGUF version: %d\n", __func__, gguf_get_version(ctx)); - LOG_INF("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx)); - LOG_INF("%s: n_tensors: %d\n", __func__, n_tensors); - LOG_INF("%s: n_kv: %d\n", __func__, n_kv); - LOG_INF("%s: ftype: %s\n", __func__, ftype_str.c_str()); - LOG_INF("\n"); - } - const int n_tensors = gguf_get_n_tensors(ctx); +struct clip_model_loader { + ggml_context_ptr ctx_meta; + gguf_context_ptr ctx_gguf; - // kv - const int n_kv = gguf_get_n_kv(ctx); - LOG_INF("%s: loaded meta data with %d key-value pairs and %d tensors from %s\n", - __func__, n_kv, n_tensors, fname); - { - std::map n_type; + clip_ctx & ctx_clip; + std::string fname; - for (int i = 0; i < n_tensors; i++) { - enum ggml_type type = gguf_get_tensor_type(ctx, i); + size_t model_size; // in bytes - n_type[type]++; + // TODO @ngxson : we should not pass clip_ctx here, it should be clip_vision_model + clip_model_loader(const char * fname, clip_ctx & ctx_clip) : ctx_clip(ctx_clip), fname(fname) { + struct ggml_context * meta = nullptr; + + struct gguf_init_params params = { + /*.no_alloc = */ true, + /*.ctx = */ &meta, + }; + + ctx_gguf = gguf_context_ptr(gguf_init_from_file(fname, params)); + if (!ctx_gguf.get()) { + throw std::runtime_error(string_format("%s: failed to load CLIP model from %s. Does this file exist?\n", __func__, fname)); } - LOG_INF("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__); - for (int i = 0; i < n_kv; i++) { - const char * name = gguf_get_key(ctx, i); - const enum gguf_type type = gguf_get_kv_type(ctx, i); - const std::string type_name = - type == GGUF_TYPE_ARRAY - ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i)) - : gguf_type_name(type); + ctx_meta.reset(meta); - std::string value = gguf_kv_to_str(ctx, i); - const size_t MAX_VALUE_LEN = 40; - if (value.size() > MAX_VALUE_LEN) { - value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()); - } - replace_all(value, "\n", "\\n"); + const int n_tensors = gguf_get_n_tensors(ctx_gguf.get()); - LOG_INF("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str()); + // print gguf info + { + std::string name; + get_string(KEY_NAME, name, false); + std::string description; + get_string(KEY_DESCRIPTION, description, false); + LOG_INF("%s: model name: %s\n", __func__, name.c_str()); + LOG_INF("%s: description: %s\n", __func__, description.c_str()); + LOG_INF("%s: GGUF version: %d\n", __func__, gguf_get_version(ctx_gguf.get())); + LOG_INF("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx_gguf.get())); + LOG_INF("%s: n_tensors: %d\n", __func__, n_tensors); + LOG_INF("%s: n_kv: %d\n", __func__, (int)gguf_get_n_kv(ctx_gguf.get())); + LOG_INF("\n"); } - // print type counts - for (auto & kv : n_type) { - if (kv.second == 0) { - continue; - } - - LOG_INF("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second); - } - } - - // data - size_t model_size = 0; - { - for (int i = 0; i < n_tensors; ++i) { - const char * name = gguf_get_tensor_name(ctx, i); - const size_t offset = gguf_get_tensor_offset(ctx, i); - enum ggml_type type = gguf_get_tensor_type(ctx, i); - struct ggml_tensor * cur = ggml_get_tensor(meta, name); - size_t tensor_size = ggml_nbytes(cur); - model_size += tensor_size; - if (verbosity >= 3) { - LOG_INF("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n", - __func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type)); + // tensors + { + for (int i = 0; i < n_tensors; ++i) { + const char * name = gguf_get_tensor_name(ctx_gguf.get(), i); + const size_t offset = gguf_get_tensor_offset(ctx_gguf.get(), i); + enum ggml_type type = gguf_get_tensor_type(ctx_gguf.get(), i); + struct ggml_tensor * cur = ggml_get_tensor(meta, name); + size_t tensor_size = ggml_nbytes(cur); + model_size += tensor_size; + LOG_DBG("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n", + __func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type)); } } } - clip_ctx * new_clip = new clip_ctx{}; - - // update projector type - { - int idx = gguf_find_key(ctx, KEY_PROJ_TYPE); - if (idx != -1) { - const std::string proj_type = gguf_get_val_str(ctx, idx); - new_clip->proj_type = clip_projector_type_from_string(proj_type); - } else { - new_clip->proj_type = PROJECTOR_TYPE_MLP; - } - - if (new_clip->proj_type == PROJECTOR_TYPE_MLP) { - if (gguf_find_tensor(ctx, format(TN_LLAVA_PROJ, 3, "weight").c_str()) != -1) { - new_clip->proj_type = PROJECTOR_TYPE_MLP_NORM; + void load_hparams() { + // projector type + { + std::string proj_type; + get_string(KEY_PROJ_TYPE, proj_type, false); + if (!proj_type.empty()) { + ctx_clip.proj_type = clip_projector_type_from_string(proj_type); + } + if (ctx_clip.proj_type == PROJECTOR_TYPE_UNKNOWN) { + throw std::runtime_error(string_format("%s: unknown projector type: %s\n", __func__, proj_type.c_str())); } } - } -//#ifdef GGML_USE_CUDA -// new_clip->backend = ggml_backend_cuda_init(0); -// LOG_INF("%s: CLIP using CUDA backend\n", __func__); -//#endif -// -//#ifdef GGML_USE_METAL -// new_clip->backend = ggml_backend_metal_init(); -// LOG_INF("%s: CLIP using Metal backend\n", __func__); -//#endif -// -//#ifdef GGML_USE_CANN -// new_clip->backend = ggml_backend_cann_init(0); -// LOG_INF("%s: CLIP using CANN backend\n", __func__); -//#endif -// -//#ifdef GGML_USE_VULKAN -// new_clip->backend = ggml_backend_vk_init(0); -// LOG_INF("%s: CLIP using Vulkan backend\n", __func__); -//#endif -// -//#ifdef GGML_USE_SYCL -// new_clip->backend = ggml_backend_sycl_init(0); -// LOG_INF("%s: CLIP using SYCL backend\n", __func__); -//#endif + // other hparams + { + get_bool(KEY_HAS_TEXT_ENC, ctx_clip.has_text_encoder, false); + get_bool(KEY_HAS_VIS_ENC, ctx_clip.has_vision_encoder, false); + GGML_ASSERT(ctx_clip.has_vision_encoder); + GGML_ASSERT(!ctx_clip.has_text_encoder); - if (!new_clip->backend) { - new_clip->backend = ggml_backend_cpu_init(); - LOG_INF("%s: CLIP using CPU backend\n", __func__); - } + // legacy keys, use KEY_PROJ_TYPE instead + get_bool(KEY_HAS_LLAVA_PROJ, ctx_clip.has_llava_projector, false); + get_bool(KEY_HAS_MINICPMV_PROJ, ctx_clip.has_minicpmv_projector, false); + get_i32(KEY_MINICPMV_VERSION, ctx_clip.minicpmv_version, false); + get_bool(KEY_HAS_GLM_PROJ, ctx_clip.has_glm_projector, false); + get_bool(KEY_HAS_QWEN2VL_MERGER, ctx_clip.has_qwen2vl_merger, false); + // !!! do NOT extend the list above, use KEY_PROJ_TYPE instead - // model size and capabilities - { - int idx = get_key_idx(ctx, KEY_HAS_TEXT_ENC); - new_clip->has_text_encoder = gguf_get_val_bool(ctx, idx); + get_bool(KEY_USE_GELU, ctx_clip.use_gelu, false); + get_bool(KEY_USE_SILU, ctx_clip.use_silu, false); - idx = get_key_idx(ctx, KEY_HAS_VIS_ENC); - new_clip->has_vision_encoder = gguf_get_val_bool(ctx, idx); + auto & hparams = ctx_clip.vision_model.hparams; + get_u32(string_format(KEY_N_EMBD, "vision"), hparams.hidden_size); + get_u32(string_format(KEY_N_HEAD, "vision"), hparams.n_head); + get_u32(string_format(KEY_N_FF, "vision"), hparams.n_intermediate); + get_u32(string_format(KEY_N_BLOCK, "vision"), hparams.n_layer); + get_u32(string_format(KEY_PROJ_DIM, "vision"), hparams.projection_dim); + get_f32(string_format(KEY_LAYER_NORM_EPS, "vision"), hparams.eps); + get_u32(KEY_IMAGE_SIZE, hparams.image_size); + get_u32(KEY_PATCH_SIZE, hparams.patch_size); + get_u32(KEY_IMAGE_CROP_RESOLUTION, hparams.image_crop_resolution, false); + get_arr_int(KEY_IMAGE_GRID_PINPOINTS, hparams.image_grid_pinpoints, false); - idx = gguf_find_key(ctx, KEY_HAS_LLAVA_PROJ); - if (idx != -1) { - new_clip->has_llava_projector = gguf_get_val_bool(ctx, idx); - } + { + std::string mm_patch_merge_type; + get_string(KEY_MM_PATCH_MERGE_TYPE, mm_patch_merge_type, false); + if (mm_patch_merge_type == "spatial_unpad") { + hparams.mm_patch_merge_type = PATCH_MERGE_SPATIAL_UNPAD; + } + } - idx = gguf_find_key(ctx, KEY_HAS_MINICPMV_PROJ); - if (idx != -1) { - new_clip->has_minicpmv_projector = gguf_get_val_bool(ctx, idx); - } + { + int idx_mean = gguf_find_key(ctx_gguf.get(), KEY_IMAGE_MEAN); + int idx_std = gguf_find_key(ctx_gguf.get(), KEY_IMAGE_STD); + GGML_ASSERT(idx_mean >= 0 && "image_mean not found"); + GGML_ASSERT(idx_std >= 0 && "image_std not found"); + const float * mean_data = (const float *) gguf_get_arr_data(ctx_gguf.get(), idx_mean); + const float * std_data = (const float *) gguf_get_arr_data(ctx_gguf.get(), idx_std); + for (int i = 0; i < 3; ++i) { + ctx_clip.image_mean[i] = mean_data[i]; + ctx_clip.image_std[i] = std_data[i]; + } + } - idx = gguf_find_key(ctx, KEY_MINICPMV_VERSION); - if (idx != -1) { - new_clip->minicpmv_version = gguf_get_val_i32(ctx, idx); - } + // Load the vision feature layer indices if they are explicitly provided; + // if multiple vision feature layers are present, the values will be concatenated + // to form the final visual features. + // NOTE: gguf conversions should standardize the values of the vision feature layer to + // be non-negative, since we use -1 to mark values as unset here. + std::vector vision_feature_layer; + get_arr_int(KEY_FEATURE_LAYER, vision_feature_layer, false); + // convert std::vector to std::unordered_set + for (auto & layer : vision_feature_layer) { + hparams.vision_feature_layer.insert(layer); + } + // Calculate the deepest feature layer based on hparams and projector type + ctx_clip.max_feature_layer = get_deepest_feature_layer(&ctx_clip); - idx = gguf_find_key(ctx, KEY_HAS_QWEN2VL_MERGER); - if (idx != -1) { - new_clip->has_qwen2vl_merger = gguf_get_val_bool(ctx, idx); - } - // GGML_ASSERT(new_clip->has_llava_projector); // see monatis/clip.cpp for image and/or text encoding for semantic search - - GGML_ASSERT(new_clip->has_vision_encoder); - GGML_ASSERT(!new_clip->has_text_encoder); - - idx = get_key_idx(ctx, KEY_USE_GELU); - new_clip->use_gelu = gguf_get_val_bool(ctx, idx); - - try { - idx = get_key_idx(ctx, KEY_USE_SILU); - new_clip->use_silu = gguf_get_val_bool(ctx, idx); - } catch (std::runtime_error & /*e*/) { - new_clip->use_silu = false; - } - - if (verbosity >= 1) { - LOG_INF("%s: text_encoder: %d\n", __func__, new_clip->has_text_encoder); - LOG_INF("%s: vision_encoder: %d\n", __func__, new_clip->has_vision_encoder); - LOG_INF("%s: llava_projector: %d\n", __func__, new_clip->has_llava_projector); - LOG_INF("%s: minicpmv_projector: %d\n", __func__, new_clip->has_minicpmv_projector); - LOG_INF("%s: model size: %.2f MB\n", __func__, model_size / 1024.0 / 1024.0); - LOG_INF("%s: metadata size: %.2f MB\n", __func__, ggml_get_mem_size(meta) / 1024.0 / 1024.0); + LOG_INF("%s: text_encoder: %d\n", __func__, ctx_clip.has_text_encoder); + LOG_INF("%s: vision_encoder: %d\n", __func__, ctx_clip.has_vision_encoder); + LOG_INF("%s: llava_projector: %d\n", __func__, ctx_clip.has_llava_projector); + LOG_INF("%s: minicpmv_projector: %d\n", __func__, ctx_clip.has_minicpmv_projector); + LOG_INF("%s: minicpmv_version: %d\n", __func__, ctx_clip.minicpmv_version); + LOG_INF("%s: glm_projector: %d\n", __func__, ctx_clip.has_glm_projector); + LOG_INF("%s: model size: %.2f MiB\n", __func__, model_size / 1024.0 / 1024.0); + LOG_INF("%s: metadata size: %.2f MiB\n", __func__, ggml_get_mem_size(ctx_meta.get()) / 1024.0 / 1024.0); } } - LOG_INF("%s: params backend buffer size = % 6.2f MB (%i tensors)\n", __func__, model_size / (1024.0 * 1024.0), n_tensors); + void load_tensors() { + std::map tensor_offset; + std::vector tensors_to_load; - // load tensors - { - std::vector read_buf; + // get offsets + for (int64_t i = 0; i < gguf_get_n_tensors(ctx_gguf.get()); ++i) { + const char * name = gguf_get_tensor_name(ctx_gguf.get(), i); + tensor_offset[name] = gguf_get_data_offset(ctx_gguf.get()) + gguf_get_tensor_offset(ctx_gguf.get(), i); + } + + // create data context struct ggml_init_params params = { - /*.mem_size =*/ (n_tensors + 1) * ggml_tensor_overhead(), + /*.mem_size =*/ (gguf_get_n_tensors(ctx_gguf.get()) + 1) * ggml_tensor_overhead(), /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; - - new_clip->ctx_data = ggml_init(params); - if (!new_clip->ctx_data) { - LOG_ERR("%s: ggml_init() failed\n", __func__); - clip_free(new_clip); - gguf_free(ctx); - return nullptr; + ctx_clip.ctx_data.reset(ggml_init(params)); + if (!ctx_clip.ctx_data) { + throw std::runtime_error(string_format("%s: failed to init ggml context\n", __func__)); } - auto fin = std::ifstream(fname, std::ios::binary); - if (!fin) { - LOG_ERR("cannot open model file for loading tensors\n"); - clip_free(new_clip); - gguf_free(ctx); - return nullptr; - } - - // add tensors to context - for (int i = 0; i < n_tensors; ++i) { - const char * name = gguf_get_tensor_name(ctx, i); - struct ggml_tensor * t = ggml_get_tensor(meta, name); - struct ggml_tensor * cur = ggml_dup_tensor(new_clip->ctx_data, t); - ggml_set_name(cur, name); - } - - // alloc memory and offload data - new_clip->params_buffer = ggml_backend_alloc_ctx_tensors(new_clip->ctx_data, new_clip->backend); - for (int i = 0; i < n_tensors; ++i) { - const char * name = gguf_get_tensor_name(ctx, i); - struct ggml_tensor * cur = ggml_get_tensor(new_clip->ctx_data, name); - const size_t offset = gguf_get_data_offset(ctx) + gguf_get_tensor_offset(ctx, i); - fin.seekg(offset, std::ios::beg); - if (!fin) { - LOG_ERR("%s: failed to seek for tensor %s\n", __func__, name); - clip_free(new_clip); - gguf_free(ctx); - return nullptr; + // helper function + auto get_tensor = [&](const std::string & name, bool required = true) { + struct ggml_tensor * cur = ggml_get_tensor(ctx_meta.get(), name.c_str()); + if (!cur && required) { + throw std::runtime_error(string_format("%s: unable to find tensor %s\n", __func__, name.c_str())); } - int num_bytes = ggml_nbytes(cur); - if (ggml_backend_buffer_is_host(new_clip->params_buffer)) { - // for the CPU and Metal backend, we can read directly into the tensor - fin.read(reinterpret_cast(cur->data), num_bytes); - } else { - // read into a temporary buffer first, then copy to device memory - read_buf.resize(num_bytes); - fin.read(reinterpret_cast(read_buf.data()), num_bytes); - ggml_backend_tensor_set(cur, read_buf.data(), 0, num_bytes); + if (cur) { + tensors_to_load.push_back(cur); + // add tensors to context + struct ggml_tensor * data_tensor = ggml_dup_tensor(ctx_clip.ctx_data.get(), cur); + ggml_set_name(data_tensor, cur->name); + cur = data_tensor; } - } - fin.close(); - } + return cur; + }; - // vision model - if (new_clip->has_vision_encoder) { - // load vision model - auto & vision_model = new_clip->vision_model; - auto & hparams = vision_model.hparams; - hparams.hidden_size = get_u32(ctx, format(KEY_N_EMBD, "vision")); - hparams.n_head = get_u32(ctx, format(KEY_N_HEAD, "vision")); - hparams.n_intermediate = get_u32(ctx, format(KEY_N_FF, "vision")); - hparams.n_layer = get_u32(ctx, format(KEY_N_BLOCK, "vision")); - hparams.image_size = get_u32(ctx, KEY_IMAGE_SIZE); - hparams.patch_size = get_u32(ctx, KEY_PATCH_SIZE); - hparams.projection_dim = get_u32(ctx, format(KEY_PROJ_DIM, "vision")); - hparams.eps = get_f32(ctx, format(KEY_LAYER_NORM_EPS, "vision")); + auto & vision_model = ctx_clip.vision_model; - try { - int idx = get_key_idx(ctx, KEY_IMAGE_GRID_PINPOINTS); - int n = gguf_get_arr_n(ctx, idx); - const int32_t * pinpoints = (const int32_t *)gguf_get_arr_data(ctx, idx); - for (int i = 0; i < 32 && i < n && pinpoints[i] != 0; ++i) { - hparams.image_grid_pinpoints[i] = pinpoints[i]; - } - if (n < 32) - hparams.image_grid_pinpoints[n] = 0; - } catch (std::runtime_error & /*e*/) { - hparams.image_grid_pinpoints[0]=0; + vision_model.class_embedding = get_tensor(TN_CLASS_EMBD, false); + + vision_model.pre_ln_w = get_tensor(string_format(TN_LN_PRE, "v", "weight"), false); + vision_model.pre_ln_b = get_tensor(string_format(TN_LN_PRE, "v", "bias"), false); + + vision_model.post_ln_w = get_tensor(string_format(TN_LN_POST, "v", "weight"), false); + vision_model.post_ln_b = get_tensor(string_format(TN_LN_POST, "v", "bias"), false); + + vision_model.patch_bias = get_tensor(TN_PATCH_BIAS, false); + vision_model.patch_embeddings_0 = get_tensor(TN_PATCH_EMBD, false); + vision_model.patch_embeddings_1 = get_tensor(TN_PATCH_EMBD_1, false); + if (vision_model.patch_embeddings_1 == nullptr) { + ctx_clip.has_qwen2vl_merger = false; } - try { - int idx = get_key_idx(ctx, KEY_MM_PATCH_MERGE_TYPE); - strcpy(hparams.mm_patch_merge_type, gguf_get_val_str(ctx, idx)); - } catch (std::runtime_error & /*e*/) { - strcpy(hparams.mm_patch_merge_type, "flat"); - } + vision_model.position_embeddings = get_tensor(string_format(TN_POS_EMBD, "v"), false); - try { - hparams.image_crop_resolution = get_u32(ctx, KEY_IMAGE_CROP_RESOLUTION); // llava-1.6 - } catch(const std::exception& /*e*/) { - hparams.image_crop_resolution = hparams.image_size; - } - - int idx_mean = get_key_idx(ctx, KEY_IMAGE_MEAN); - int idx_std = get_key_idx(ctx, KEY_IMAGE_STD); - - const float * mean_data = (const float *)gguf_get_arr_data(ctx, idx_mean); - const float * std_data = (const float *)gguf_get_arr_data(ctx, idx_std); - - for (int i = 0; i < 3; ++i) { - new_clip->image_mean[i] = mean_data[i]; - new_clip->image_std[i] = std_data[i]; - } - - if (verbosity >= 2) { - LOG_INF("\n%s: vision model hparams\n", __func__); - LOG_INF("image_size %d\n", hparams.image_size); - LOG_INF("patch_size %d\n", hparams.patch_size); - LOG_INF("v_hidden_size %d\n", hparams.hidden_size); - LOG_INF("v_n_intermediate %d\n", hparams.n_intermediate); - LOG_INF("v_projection_dim %d\n", hparams.projection_dim); - LOG_INF("v_n_head %d\n", hparams.n_head); - LOG_INF("v_n_layer %d\n", hparams.n_layer); - LOG_INF("v_eps %f\n", hparams.eps); - LOG_INF("v_image_mean %f %f %f\n", new_clip->image_mean[0], new_clip->image_mean[1], new_clip->image_mean[2]); - LOG_INF("v_image_std %f %f %f\n", new_clip->image_std[0], new_clip->image_std[1], new_clip->image_std[2]); - LOG_INF("v_image_grid_pinpoints: "); - for (int i = 0; i < 32 && (hparams.image_grid_pinpoints[i] != 0); ++i) { - LOG_INF("%d ", hparams.image_grid_pinpoints[i]); - } - LOG_INF("\n"); - LOG_INF("v_mm_patch_merge_type: %s\n", hparams.mm_patch_merge_type); - - } - - try { - vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD); - new_clip->has_class_embedding = true; - } catch (const std::exception& /*e*/) { - new_clip->has_class_embedding = false; - } - - try { - vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight")); - vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias")); - new_clip->has_pre_norm = true; - } catch (std::exception & /*e*/) { - new_clip->has_pre_norm = false; - } - - try { - vision_model.post_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "weight")); - vision_model.post_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "bias")); - new_clip->has_post_norm = true; - } catch (std::exception & /*e*/) { - new_clip->has_post_norm = false; - } - - try { - vision_model.patch_bias = get_tensor(new_clip->ctx_data, TN_PATCH_BIAS); - new_clip->has_patch_bias = true; - } catch (std::exception & /*e*/) { - new_clip->has_patch_bias = false; - } - - try { - vision_model.patch_embeddings_0 = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD); - vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v")); - } catch(const std::exception& /*e*/) { - LOG_ERR("%s: failed to load vision model tensors\n", __func__); - } - try { - vision_model.patch_embeddings_1 = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD_1); - } catch(const std::exception& /*e*/) { - new_clip->has_qwen2vl_merger = false; - } - - // LLaVA projection - if (new_clip->proj_type == PROJECTOR_TYPE_MLP || new_clip->proj_type == PROJECTOR_TYPE_MLP_NORM) { - vision_model.mm_0_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "weight")); - vision_model.mm_0_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "bias")); - try { - // Yi-type llava - vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "weight")); - vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "bias")); - } catch (std::runtime_error & /*e*/) { } - try { - // missing in Yi-type llava - vision_model.mm_2_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight")); - vision_model.mm_2_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias")); - } catch (std::runtime_error & /*e*/) { } - try { - // Yi-type llava - vision_model.mm_3_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "weight")); - vision_model.mm_3_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "bias")); - } catch (std::runtime_error & /*e*/) { } - try { - // Yi-type llava - vision_model.mm_4_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "weight")); - vision_model.mm_4_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "bias")); - } catch (std::runtime_error & /*e*/) { } - try { - vision_model.image_newline = get_tensor(new_clip->ctx_data, TN_IMAGE_NEWLINE); - // LOG_INF("%s: image_newline tensor (llava-1.6) found\n", __func__); - } catch (std::runtime_error & /*e*/) { } - } else if (new_clip->proj_type == PROJECTOR_TYPE_LDP) { - // MobileVLM projection - vision_model.mm_model_mlp_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "weight")); - vision_model.mm_model_mlp_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "bias")); - vision_model.mm_model_mlp_3_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 3, "weight")); - vision_model.mm_model_mlp_3_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 3, "bias")); - vision_model.mm_model_block_1_block_0_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 0, "0.weight")); - vision_model.mm_model_block_1_block_0_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.weight")); - vision_model.mm_model_block_1_block_0_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.bias")); - vision_model.mm_model_block_1_block_1_fc1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.weight")); - vision_model.mm_model_block_1_block_1_fc1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.bias")); - vision_model.mm_model_block_1_block_1_fc2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.weight")); - vision_model.mm_model_block_1_block_1_fc2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.bias")); - vision_model.mm_model_block_1_block_2_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 2, "0.weight")); - vision_model.mm_model_block_1_block_2_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.weight")); - vision_model.mm_model_block_1_block_2_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.bias")); - vision_model.mm_model_block_2_block_0_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 0, "0.weight")); - vision_model.mm_model_block_2_block_0_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.weight")); - vision_model.mm_model_block_2_block_0_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.bias")); - vision_model.mm_model_block_2_block_1_fc1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.weight")); - vision_model.mm_model_block_2_block_1_fc1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.bias")); - vision_model.mm_model_block_2_block_1_fc2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.weight")); - vision_model.mm_model_block_2_block_1_fc2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.bias")); - vision_model.mm_model_block_2_block_2_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "0.weight")); - vision_model.mm_model_block_2_block_2_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.weight")); - vision_model.mm_model_block_2_block_2_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.bias")); - } - else if (new_clip->proj_type == PROJECTOR_TYPE_LDPV2) - { - // MobilVLM_V2 projection - vision_model.mm_model_mlp_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 0, "weight")); - vision_model.mm_model_mlp_0_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 0, "bias")); - vision_model.mm_model_mlp_2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 2, "weight")); - vision_model.mm_model_mlp_2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 2, "bias")); - vision_model.mm_model_peg_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_PEG, 0, "weight")); - vision_model.mm_model_peg_0_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_PEG, 0, "bias")); - } - else if (new_clip->proj_type == PROJECTOR_TYPE_RESAMPLER) { - // vision_model.mm_model_pos_embed = get_tensor(new_clip->ctx_data, TN_MINICPMV_POS_EMBD); - vision_model.mm_model_pos_embed_k = get_tensor(new_clip->ctx_data, TN_MINICPMV_POS_EMBD_K); - vision_model.mm_model_query = get_tensor(new_clip->ctx_data, TN_MINICPMV_QUERY); - vision_model.mm_model_proj = get_tensor(new_clip->ctx_data, TN_MINICPMV_PROJ); - vision_model.mm_model_kv_proj = get_tensor(new_clip->ctx_data, TN_MINICPMV_KV_PROJ); - vision_model.mm_model_attn_q_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "q", "weight")); - vision_model.mm_model_attn_k_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "k", "weight")); - vision_model.mm_model_attn_v_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "v", "weight")); - vision_model.mm_model_attn_q_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "q", "bias")); - vision_model.mm_model_attn_k_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "k", "bias")); - vision_model.mm_model_attn_v_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "v", "bias")); - vision_model.mm_model_attn_o_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "out", "weight")); - vision_model.mm_model_attn_o_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "out", "bias")); - vision_model.mm_model_ln_q_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "q", "weight")); - vision_model.mm_model_ln_q_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "q", "bias")); - vision_model.mm_model_ln_kv_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "kv", "weight")); - vision_model.mm_model_ln_kv_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "kv", "bias")); - vision_model.mm_model_ln_post_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "post", "weight")); - vision_model.mm_model_ln_post_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "post", "bias")); - } - else if (new_clip->proj_type == PROJECTOR_TYPE_MERGER) { - vision_model.mm_0_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "weight")); - vision_model.mm_0_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "bias")); - vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight")); - vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias")); - } - else { - std::string proj_type = PROJECTOR_TYPE_NAMES[new_clip->proj_type]; - throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str())); - } - - vision_model.layers.resize(hparams.n_layer); - - for (int il = 0; il < hparams.n_layer; ++il) { + // layers + vision_model.layers.resize(vision_model.hparams.n_layer); + for (int il = 0; il < vision_model.hparams.n_layer; ++il) { auto & layer = vision_model.layers[il]; - layer.k_w = get_tensor(new_clip->ctx_data, format(TN_ATTN_K, "v", il, "weight")); - layer.q_w = get_tensor(new_clip->ctx_data, format(TN_ATTN_Q, "v", il, "weight")); - layer.v_w = get_tensor(new_clip->ctx_data, format(TN_ATTN_V, "v", il, "weight")); - layer.o_w = get_tensor(new_clip->ctx_data, format(TN_ATTN_OUTPUT, "v", il, "weight")); - layer.ln_1_w = get_tensor(new_clip->ctx_data, format(TN_LN_1, "v", il, "weight")); - layer.ln_2_w = get_tensor(new_clip->ctx_data, format(TN_LN_2, "v", il, "weight")); - layer.ff_i_w = get_tensor(new_clip->ctx_data, format(TN_FFN_DOWN, "v", il, "weight")); - layer.ff_o_w = get_tensor(new_clip->ctx_data, format(TN_FFN_UP, "v", il, "weight")); - layer.k_b = get_tensor(new_clip->ctx_data, format(TN_ATTN_K, "v", il, "bias")); - layer.q_b = get_tensor(new_clip->ctx_data, format(TN_ATTN_Q, "v", il, "bias")); - layer.v_b = get_tensor(new_clip->ctx_data, format(TN_ATTN_V, "v", il, "bias")); - layer.o_b = get_tensor(new_clip->ctx_data, format(TN_ATTN_OUTPUT, "v", il, "bias")); - layer.ln_1_b = get_tensor(new_clip->ctx_data, format(TN_LN_1, "v", il, "bias")); - layer.ln_2_b = get_tensor(new_clip->ctx_data, format(TN_LN_2, "v", il, "bias")); - layer.ff_i_b = get_tensor(new_clip->ctx_data, format(TN_FFN_DOWN, "v", il, "bias")); - layer.ff_o_b = get_tensor(new_clip->ctx_data, format(TN_FFN_UP, "v", il, "bias")); + layer.k_w = get_tensor(string_format(TN_ATTN_K, "v", il, "weight")); + layer.q_w = get_tensor(string_format(TN_ATTN_Q, "v", il, "weight")); + layer.v_w = get_tensor(string_format(TN_ATTN_V, "v", il, "weight")); + layer.o_w = get_tensor(string_format(TN_ATTN_OUTPUT, "v", il, "weight")); + layer.ln_1_w = get_tensor(string_format(TN_LN_1, "v", il, "weight"), false); + layer.ln_2_w = get_tensor(string_format(TN_LN_2, "v", il, "weight"), false); + layer.ff_i_w = get_tensor(string_format(TN_FFN_DOWN, "v", il, "weight")); + layer.ff_o_w = get_tensor(string_format(TN_FFN_UP, "v", il, "weight")); + layer.k_b = get_tensor(string_format(TN_ATTN_K, "v", il, "bias"), false); + layer.q_b = get_tensor(string_format(TN_ATTN_Q, "v", il, "bias"), false); + layer.v_b = get_tensor(string_format(TN_ATTN_V, "v", il, "bias"), false); + layer.o_b = get_tensor(string_format(TN_ATTN_OUTPUT, "v", il, "bias"), false); + layer.ln_1_b = get_tensor(string_format(TN_LN_1, "v", il, "bias"), false); + layer.ln_2_b = get_tensor(string_format(TN_LN_2, "v", il, "bias"), false); + layer.ff_i_b = get_tensor(string_format(TN_FFN_DOWN, "v", il, "bias"), false); + layer.ff_o_b = get_tensor(string_format(TN_FFN_UP, "v", il, "bias"), false); + } + + switch (ctx_clip.proj_type) { + case PROJECTOR_TYPE_MLP: + case PROJECTOR_TYPE_MLP_NORM: + { + // LLaVA projection + vision_model.mm_0_w = get_tensor(string_format(TN_LLAVA_PROJ, 0, "weight"), false); + vision_model.mm_0_b = get_tensor(string_format(TN_LLAVA_PROJ, 0, "bias"), false); + // Yi-type llava + vision_model.mm_1_w = get_tensor(string_format(TN_LLAVA_PROJ, 1, "weight"), false); + vision_model.mm_1_b = get_tensor(string_format(TN_LLAVA_PROJ, 1, "bias"), false); + // missing in Yi-type llava + vision_model.mm_2_w = get_tensor(string_format(TN_LLAVA_PROJ, 2, "weight"), false); + vision_model.mm_2_b = get_tensor(string_format(TN_LLAVA_PROJ, 2, "bias"), false); + // Yi-type llava + vision_model.mm_3_w = get_tensor(string_format(TN_LLAVA_PROJ, 3, "weight"), false); + vision_model.mm_3_b = get_tensor(string_format(TN_LLAVA_PROJ, 3, "bias"), false); + vision_model.mm_4_w = get_tensor(string_format(TN_LLAVA_PROJ, 4, "weight"), false); + vision_model.mm_4_b = get_tensor(string_format(TN_LLAVA_PROJ, 4, "bias"), false); + if (vision_model.mm_3_w) { + // TODO: this is a hack to support Yi-type llava + ctx_clip.proj_type = PROJECTOR_TYPE_MLP_NORM; + } + vision_model.image_newline = get_tensor(TN_IMAGE_NEWLINE, false); + } break; + case PROJECTOR_TYPE_LDP: + { + // MobileVLM projection + vision_model.mm_model_mlp_1_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 1, "weight")); + vision_model.mm_model_mlp_1_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 1, "bias")); + vision_model.mm_model_mlp_3_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 3, "weight")); + vision_model.mm_model_mlp_3_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 3, "bias")); + vision_model.mm_model_block_1_block_0_0_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 0, "0.weight")); + vision_model.mm_model_block_1_block_0_1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.weight")); + vision_model.mm_model_block_1_block_0_1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.bias")); + vision_model.mm_model_block_1_block_1_fc1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.weight")); + vision_model.mm_model_block_1_block_1_fc1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.bias")); + vision_model.mm_model_block_1_block_1_fc2_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.weight")); + vision_model.mm_model_block_1_block_1_fc2_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.bias")); + vision_model.mm_model_block_1_block_2_0_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 2, "0.weight")); + vision_model.mm_model_block_1_block_2_1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.weight")); + vision_model.mm_model_block_1_block_2_1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.bias")); + vision_model.mm_model_block_2_block_0_0_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 0, "0.weight")); + vision_model.mm_model_block_2_block_0_1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.weight")); + vision_model.mm_model_block_2_block_0_1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.bias")); + vision_model.mm_model_block_2_block_1_fc1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.weight")); + vision_model.mm_model_block_2_block_1_fc1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.bias")); + vision_model.mm_model_block_2_block_1_fc2_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.weight")); + vision_model.mm_model_block_2_block_1_fc2_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.bias")); + vision_model.mm_model_block_2_block_2_0_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 2, "0.weight")); + vision_model.mm_model_block_2_block_2_1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.weight")); + vision_model.mm_model_block_2_block_2_1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.bias")); + } break; + case PROJECTOR_TYPE_LDPV2: + { + // MobilVLM_V2 projection + vision_model.mm_model_mlp_0_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "weight")); + vision_model.mm_model_mlp_0_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "bias")); + vision_model.mm_model_mlp_2_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 2, "weight")); + vision_model.mm_model_mlp_2_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 2, "bias")); + vision_model.mm_model_peg_0_w = get_tensor(string_format(TN_MVLM_PROJ_PEG, 0, "weight")); + vision_model.mm_model_peg_0_b = get_tensor(string_format(TN_MVLM_PROJ_PEG, 0, "bias")); + } break; + case PROJECTOR_TYPE_RESAMPLER: + { + // vision_model.mm_model_pos_embed = get_tensor(new_clip->ctx_data, TN_MINICPMV_POS_EMBD); + vision_model.mm_model_pos_embed_k = get_tensor(TN_MINICPMV_POS_EMBD_K); + vision_model.mm_model_query = get_tensor(TN_MINICPMV_QUERY); + vision_model.mm_model_proj = get_tensor(TN_MINICPMV_PROJ); + vision_model.mm_model_kv_proj = get_tensor(TN_MINICPMV_KV_PROJ); + vision_model.mm_model_attn_q_w = get_tensor(string_format(TN_MINICPMV_ATTN, "q", "weight")); + vision_model.mm_model_attn_k_w = get_tensor(string_format(TN_MINICPMV_ATTN, "k", "weight")); + vision_model.mm_model_attn_v_w = get_tensor(string_format(TN_MINICPMV_ATTN, "v", "weight")); + vision_model.mm_model_attn_q_b = get_tensor(string_format(TN_MINICPMV_ATTN, "q", "bias")); + vision_model.mm_model_attn_k_b = get_tensor(string_format(TN_MINICPMV_ATTN, "k", "bias")); + vision_model.mm_model_attn_v_b = get_tensor(string_format(TN_MINICPMV_ATTN, "v", "bias")); + vision_model.mm_model_attn_o_w = get_tensor(string_format(TN_MINICPMV_ATTN, "out", "weight")); + vision_model.mm_model_attn_o_b = get_tensor(string_format(TN_MINICPMV_ATTN, "out", "bias")); + vision_model.mm_model_ln_q_w = get_tensor(string_format(TN_MINICPMV_LN, "q", "weight")); + vision_model.mm_model_ln_q_b = get_tensor(string_format(TN_MINICPMV_LN, "q", "bias")); + vision_model.mm_model_ln_kv_w = get_tensor(string_format(TN_MINICPMV_LN, "kv", "weight")); + vision_model.mm_model_ln_kv_b = get_tensor(string_format(TN_MINICPMV_LN, "kv", "bias")); + vision_model.mm_model_ln_post_w = get_tensor(string_format(TN_MINICPMV_LN, "post", "weight")); + vision_model.mm_model_ln_post_b = get_tensor(string_format(TN_MINICPMV_LN, "post", "bias")); + } break; + case PROJECTOR_TYPE_GLM_EDGE: + { + vision_model.mm_model_adapter_conv_w = get_tensor(string_format(TN_GLM_ADAPER_CONV, "weight")); + vision_model.mm_model_adapter_conv_b = get_tensor(string_format(TN_GLM_ADAPER_CONV, "bias")); + vision_model.mm_model_mlp_0_w = get_tensor(string_format(TN_GLM_ADAPTER_LINEAR,"weight")); + vision_model.mm_model_ln_q_w = get_tensor(string_format(TN_GLM_ADAPTER_NORM_1,"weight")); + vision_model.mm_model_ln_q_b = get_tensor(string_format(TN_GLM_ADAPTER_NORM_1,"bias")); + vision_model.mm_model_mlp_1_w = get_tensor(string_format(TN_GLM_ADAPTER_D_H_2_4H,"weight")); + vision_model.mm_model_mlp_2_w = get_tensor(string_format(TN_GLM_ADAPTER_GATE,"weight")); + vision_model.mm_model_mlp_3_w = get_tensor(string_format(TN_GLM_ADAPTER_D_4H_2_H,"weight")); + vision_model.boi_w = get_tensor(TN_GLM_BOI_W); + vision_model.eoi_w = get_tensor(TN_GLM_EOI_W); + } break; + case PROJECTOR_TYPE_MERGER: + { + vision_model.mm_0_w = get_tensor(string_format(TN_LLAVA_PROJ, 0, "weight")); + vision_model.mm_0_b = get_tensor(string_format(TN_LLAVA_PROJ, 0, "bias")); + vision_model.mm_1_w = get_tensor(string_format(TN_LLAVA_PROJ, 2, "weight")); + vision_model.mm_1_b = get_tensor(string_format(TN_LLAVA_PROJ, 2, "bias")); + } break; + case PROJECTOR_TYPE_GEMMA3: + { + vision_model.mm_input_proj_w = get_tensor(TN_MM_INP_PROJ); + vision_model.mm_soft_emb_norm_w = get_tensor(TN_MM_SOFT_EMB_N); + } break; + default: + GGML_ASSERT(false && "unknown projector type"); + } + + // load data + { + std::vector read_buf; + + auto fin = std::ifstream(fname, std::ios::binary); + if (!fin) { + throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str())); + } + + // alloc memory and offload data + ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend); + ctx_clip.buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(ctx_clip.ctx_data.get(), buft)); + ggml_backend_buffer_set_usage(ctx_clip.buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS); + for (auto & t : tensors_to_load) { + struct ggml_tensor * cur = ggml_get_tensor(ctx_clip.ctx_data.get(), t->name); + const size_t offset = tensor_offset[t->name]; + fin.seekg(offset, std::ios::beg); + if (!fin) { + throw std::runtime_error(string_format("%s: failed to seek for tensor %s\n", __func__, t->name)); + } + size_t num_bytes = ggml_nbytes(cur); + if (ggml_backend_buft_is_host(buft)) { + // for the CPU and Metal backend, we can read directly into the tensor + fin.read(reinterpret_cast(cur->data), num_bytes); + } else { + // read into a temporary buffer first, then copy to device memory + read_buf.resize(num_bytes); + fin.read(reinterpret_cast(read_buf.data()), num_bytes); + ggml_backend_tensor_set(cur, read_buf.data(), 0, num_bytes); + } + } + fin.close(); + + LOG_DBG("%s: loaded %zu tensors from %s\n", __func__, tensors_to_load.size(), fname.c_str()); } } - ggml_free(meta); + void alloc_compute_meta() { + ctx_clip.buf_compute_meta.resize(GGML_DEFAULT_GRAPH_SIZE * ggml_tensor_overhead() + ggml_graph_overhead()); - new_clip->ctx_gguf = ctx; - - // measure mem requirement and allocate - { - new_clip->buf_compute_meta.resize(GGML_DEFAULT_GRAPH_SIZE * ggml_tensor_overhead() + ggml_graph_overhead()); - new_clip->compute_alloc = ggml_gallocr_new(ggml_backend_get_default_buffer_type(new_clip->backend)); + // create a fake batch clip_image_f32_batch batch; - batch.size = 1; - batch.data = nullptr; - ggml_cgraph * gf = clip_image_build_graph(new_clip, &batch, nullptr, false); - ggml_gallocr_reserve(new_clip->compute_alloc, gf); - size_t compute_memory_buffer_size = ggml_gallocr_get_buffer_size(new_clip->compute_alloc, 0); - LOG_INF("%s: compute allocated memory: %.2f MB\n", __func__, compute_memory_buffer_size /1024.0/1024.0); + clip_image_f32_ptr img(clip_image_f32_init()); + clip_image_size image_size; + image_size.width = clip_get_image_size(&ctx_clip); + image_size.height = clip_get_image_size(&ctx_clip); + int n_patches = clip_get_image_size(&ctx_clip) / image_size.width; + img->nx = n_patches; + img->ny = n_patches; + img->buf.resize(n_patches * image_size.width * image_size.height * 3); + batch.entries.push_back(std::move(img)); + + ggml_cgraph * gf = clip_image_build_graph(&ctx_clip, batch, image_size, false); + ggml_backend_sched_reserve(ctx_clip.sched.get(), gf); + for (size_t i = 0; i < ctx_clip.backend_ptrs.size(); ++i) { + ggml_backend_t backend = ctx_clip.backend_ptrs[i]; + ggml_backend_buffer_type_t buft = ctx_clip.backend_buft[i]; + size_t size = ggml_backend_sched_get_buffer_size(ctx_clip.sched.get(), backend); + if (size > 1) { + LOG_INF("%s: %10s compute buffer size = %8.2f MiB\n", __func__, + ggml_backend_buft_name(buft), + size / 1024.0 / 1024.0); + } + } } - return new_clip; + void get_bool(const std::string & key, bool & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = gguf_get_val_bool(ctx_gguf.get(), i); + } + + void get_i32(const std::string & key, int & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = gguf_get_val_i32(ctx_gguf.get(), i); + } + + void get_u32(const std::string & key, int & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = gguf_get_val_u32(ctx_gguf.get(), i); + } + + void get_f32(const std::string & key, float & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = gguf_get_val_f32(ctx_gguf.get(), i); + } + + void get_string(const std::string & key, std::string & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = std::string(gguf_get_val_str(ctx_gguf.get(), i)); + } + + void get_arr_int(const std::string & key, std::vector & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + int n = gguf_get_arr_n(ctx_gguf.get(), i); + output.resize(n); + const int32_t * values = (const int32_t *)gguf_get_arr_data(ctx_gguf.get(), i); + for (int i = 0; i < n; ++i) { + output[i] = values[i]; + } + } +}; + +// read and create ggml_context containing the tensors and their data +struct clip_ctx * clip_model_load(const char * fname, const int verbosity) { + return clip_init(fname, clip_context_params{ + /* use_gpu */ true, + /* verbosity */ static_cast(verbosity), + }); +} + +struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params) { + g_logger_state.verbosity_thold = ctx_params.verbosity; + clip_ctx * ctx_clip = new clip_ctx(ctx_params); + + try { + clip_model_loader loader(fname, *ctx_clip); + loader.load_hparams(); + loader.load_tensors(); + loader.alloc_compute_meta(); + } catch (const std::exception & e) { + LOG_ERR("%s: failed to load model '%s': %s\n", __func__, fname, e.what()); + delete ctx_clip; + return nullptr; + } + + return ctx_clip; } void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size) { - ctx_clip->load_image_size = load_image_size; + ctx_clip->load_image_size = *load_image_size; // copy } struct clip_image_size * clip_get_load_image_size(struct clip_ctx * ctx_clip) { - return ctx_clip->load_image_size; + return &ctx_clip->load_image_size; } struct clip_image_size * clip_image_size_init() { @@ -1652,26 +1601,60 @@ struct clip_image_f32 * clip_image_f32_init() { return new clip_image_f32(); } -void clip_image_u8_free(struct clip_image_u8 * img) { delete img; } -void clip_image_f32_free(struct clip_image_f32 * img) { delete img; } -void clip_image_u8_batch_free(struct clip_image_u8_batch * batch) { - if (batch->size > 0) { - delete[] batch->data; - batch->size = 0; - } -} -void clip_image_f32_batch_free(struct clip_image_f32_batch * batch) { - if (batch->size > 0) { - delete[] batch->data; - batch->size = 0; - } +struct clip_image_f32_batch * clip_image_f32_batch_init() { + return new clip_image_f32_batch(); } -static void build_clip_img_from_data(const stbi_uc * data, int nx, int ny, clip_image_u8 * img) { +unsigned char * clip_image_u8_get_data(struct clip_image_u8 * img, uint32_t * nx, uint32_t * ny) { + if (nx) *nx = img->nx; + if (ny) *ny = img->ny; + return img->buf.data(); +} + +void clip_image_size_free(struct clip_image_size * load_image_size) { + if (load_image_size == nullptr) { + return; + } + delete load_image_size; +} +void clip_image_u8_free(struct clip_image_u8 * img) { if (img) delete img; } +void clip_image_f32_free(struct clip_image_f32 * img) { if (img) delete img; } +void clip_image_u8_batch_free(struct clip_image_u8_batch * batch) { if (batch) delete batch; } +void clip_image_f32_batch_free(struct clip_image_f32_batch * batch) { if (batch) delete batch; } + +size_t clip_image_f32_batch_n_images(const struct clip_image_f32_batch * batch) { + return batch->entries.size(); +} + +size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int idx) { + if (idx < 0 || idx >= (int)batch->entries.size()) { + LOG_ERR("%s: invalid index %d\n", __func__, idx); + return 0; + } + return batch->entries[idx]->nx; +} + +size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx) { + if (idx < 0 || idx >= (int)batch->entries.size()) { + LOG_ERR("%s: invalid index %d\n", __func__, idx); + return 0; + } + return batch->entries[idx]->ny; +} + +clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx) { + if (idx < 0 || idx >= (int)batch->entries.size()) { + LOG_ERR("%s: invalid index %d\n", __func__, idx); + return nullptr; + } + return batch->entries[idx].get(); +} + +void clip_build_img_from_pixels(const unsigned char * rgb_pixels, int nx, int ny, clip_image_u8 * img) { img->nx = nx; img->ny = ny; img->buf.resize(3 * nx * ny); - memcpy(img->buf.data(), data, img->buf.size()); + memcpy(img->buf.data(), rgb_pixels, img->buf.size()); } bool clip_image_load_from_file(const char * fname, clip_image_u8 * img) { @@ -1681,7 +1664,7 @@ bool clip_image_load_from_file(const char * fname, clip_image_u8 * img) { LOG_ERR("%s: failed to load image '%s'\n", __func__, fname); return false; } - build_clip_img_from_data(data, nx, ny, img); + clip_build_img_from_pixels(data, nx, ny, img); stbi_image_free(data); return true; } @@ -1693,596 +1676,577 @@ bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length LOG_ERR("%s: failed to decode image bytes\n", __func__); return false; } - build_clip_img_from_data(data, nx, ny, img); + clip_build_img_from_pixels(data, nx, ny, img); stbi_image_free(data); return true; } -// Linear interpolation between two points -inline float clip_lerp(float s, float e, float t) { - return s + (e - s) * t; -} -// Bilinear resize function -static void bilinear_resize(const clip_image_u8& src, clip_image_u8& dst, int target_width, int target_height) { - dst.nx = target_width; - dst.ny = target_height; - dst.buf.resize(3 * target_width * target_height); - - float x_ratio = static_cast(src.nx - 1) / target_width; - float y_ratio = static_cast(src.ny - 1) / target_height; - - for (int y = 0; y < target_height; y++) { - for (int x = 0; x < target_width; x++) { - float px = x_ratio * x; - float py = y_ratio * y; - int x_floor = static_cast(px); - int y_floor = static_cast(py); - float x_lerp = px - x_floor; - float y_lerp = py - y_floor; - - for (int c = 0; c < 3; c++) { - float top = clip_lerp( - static_cast(src.buf[3 * (y_floor * src.nx + x_floor) + c]), - static_cast(src.buf[3 * (y_floor * src.nx + (x_floor + 1)) + c]), - x_lerp - ); - float bottom = clip_lerp( - static_cast(src.buf[3 * ((y_floor + 1) * src.nx + x_floor) + c]), - static_cast(src.buf[3 * ((y_floor + 1) * src.nx + (x_floor + 1)) + c]), - x_lerp - ); - dst.buf[3 * (y * target_width + x) + c] = static_cast(clip_lerp(top, bottom, y_lerp)); - } - } - } -} - // Normalize image to float32 - careful with pytorch .to(model.device, dtype=torch.float16) - this sometimes reduces precision (32>16>32), sometimes not -static void normalize_image_u8_to_f32(const clip_image_u8* src, clip_image_f32* dst, const float mean[3], const float std[3]) { - dst->nx = src->nx; - dst->ny = src->ny; - dst->buf.resize(src->buf.size()); +static void normalize_image_u8_to_f32(const clip_image_u8 & src, clip_image_f32 & dst, const float mean[3], const float std[3]) { + dst.nx = src.nx; + dst.ny = src.ny; + dst.buf.resize(src.buf.size()); - for (size_t i = 0; i < src->buf.size(); ++i) { + // TODO @ngxson : seems like this could be done more efficiently on cgraph + for (size_t i = 0; i < src.buf.size(); ++i) { int c = i % 3; // rgb - dst->buf[i] = (static_cast(src->buf[i]) / 255.0f - mean[c]) / std[c]; + dst.buf[i] = (static_cast(src.buf[i]) / 255.0f - mean[c]) / std[c]; } } -inline int clip(int x, int lower, int upper) { - return std::max(lower, std::min(x, upper)); -} +// set of tools to manupulate images +// in the future, we can have HW acceleration by allowing this struct to access 3rd party lib like imagick or opencv +struct image_manipulation { + // Bilinear resize function + static void bilinear_resize(const clip_image_u8& src, clip_image_u8& dst, int target_width, int target_height) { + dst.nx = target_width; + dst.ny = target_height; + dst.buf.resize(3 * target_width * target_height); -static bool bicubic_resize(const clip_image_u8 &img, clip_image_u8 &dst, int target_width, int target_height) { - const int nx = img.nx; - const int ny = img.ny; + float x_ratio = static_cast(src.nx - 1) / target_width; + float y_ratio = static_cast(src.ny - 1) / target_height; - dst.nx = target_width; - dst.ny = target_height; - dst.buf.resize(3 * target_width * target_height); + for (int y = 0; y < target_height; y++) { + for (int x = 0; x < target_width; x++) { + float px = x_ratio * x; + float py = y_ratio * y; + int x_floor = static_cast(px); + int y_floor = static_cast(py); + float x_lerp = px - x_floor; + float y_lerp = py - y_floor; - float Cc; - float C[5]; - float d0, d2, d3, a0, a1, a2, a3; - int i, j, k, jj; - int x, y; - float dx, dy; - float tx, ty; - - tx = (float)nx / (float)target_width; - ty = (float)ny / (float)target_height; - - // Bicubic interpolation; adapted from ViT.cpp, inspired from : - // -> https://github.com/yglukhov/bicubic-interpolation-image-processing/blob/master/libimage.c#L36 - // -> https://en.wikipedia.org/wiki/Bicubic_interpolation - - for (i = 0; i < target_height; i++) { - for (j = 0; j < target_width; j++) { - x = (int)(tx * j); - y = (int)(ty * i); - - dx = tx * j - x; - dy = ty * i - y; - - for (k = 0; k < 3; k++) { - for (jj = 0; jj <= 3; jj++) { - d0 = img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x - 1, 0, nx - 1)) * 3 + k] - img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x, 0, nx - 1)) * 3 + k]; - d2 = img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x + 1, 0, nx - 1)) * 3 + k] - img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x, 0, nx - 1)) * 3 + k]; - d3 = img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x + 2, 0, nx - 1)) * 3 + k] - img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x, 0, nx - 1)) * 3 + k]; - a0 = img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x, 0, nx - 1)) * 3 + k]; - - a1 = -1.0 / 3 * d0 + d2 - 1.0 / 6 * d3; - a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2; - a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3; - - C[jj] = a0 + a1 * dx + a2 * dx * dx + a3 * dx * dx * dx; - - d0 = C[0] - C[1]; - d2 = C[2] - C[1]; - d3 = C[3] - C[1]; - a0 = C[1]; - a1 = -1.0 / 3 * d0 + d2 - 1.0 / 6 * d3; - a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2; - a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3; - Cc = a0 + a1 * dy + a2 * dy * dy + a3 * dy * dy * dy; - - const uint8_t Cc2 = std::min(std::max(std::round(Cc), 0.0f), 255.0f); - dst.buf[(i * target_width + j) * 3 + k] = float(Cc2); + for (int c = 0; c < 3; c++) { + float top = lerp( + static_cast(src.buf[3 * (y_floor * src.nx + x_floor) + c]), + static_cast(src.buf[3 * (y_floor * src.nx + (x_floor + 1)) + c]), + x_lerp + ); + float bottom = lerp( + static_cast(src.buf[3 * ((y_floor + 1) * src.nx + x_floor) + c]), + static_cast(src.buf[3 * ((y_floor + 1) * src.nx + (x_floor + 1)) + c]), + x_lerp + ); + dst.buf[3 * (y * target_width + x) + c] = static_cast(lerp(top, bottom, y_lerp)); } } } } - return true; -} + // Bicubic resize function + // part of image will be cropped if the aspect ratio is different + static bool bicubic_resize(const clip_image_u8 & img, clip_image_u8 & dst, int target_width, int target_height) { + const int nx = img.nx; + const int ny = img.ny; -// llava-1.6 type of resize_and_pad (black) -static void resize_and_pad_image(const clip_image_u8& image, clip_image_u8 &image_output, const std::pair& target_resolution) { - int target_width = target_resolution.first; - int target_height = target_resolution.second; + dst.nx = target_width; + dst.ny = target_height; + dst.buf.resize(3 * target_width * target_height); - float scale_w = static_cast(target_width) / image.nx; - float scale_h = static_cast(target_height) / image.ny; + float Cc; + float C[5]; + float d0, d2, d3, a0, a1, a2, a3; + int i, j, k, jj; + int x, y; + float dx, dy; + float tx, ty; - int new_width, new_height; + tx = (float)nx / (float)target_width; + ty = (float)ny / (float)target_height; - if (scale_w < scale_h) { - new_width = target_width; - new_height = std::min(static_cast(std::ceil(image.ny * scale_w)), target_height); - } else { - new_height = target_height; - new_width = std::min(static_cast(std::ceil(image.nx * scale_h)), target_width); + // Bicubic interpolation; adapted from ViT.cpp, inspired from : + // -> https://github.com/yglukhov/bicubic-interpolation-image-processing/blob/master/libimage.c#L36 + // -> https://en.wikipedia.org/wiki/Bicubic_interpolation + + for (i = 0; i < target_height; i++) { + for (j = 0; j < target_width; j++) { + x = (int)(tx * j); + y = (int)(ty * i); + + dx = tx * j - x; + dy = ty * i - y; + + for (k = 0; k < 3; k++) { + for (jj = 0; jj <= 3; jj++) { + d0 = img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x - 1, 0, nx - 1)) * 3 + k] - img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x, 0, nx - 1)) * 3 + k]; + d2 = img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x + 1, 0, nx - 1)) * 3 + k] - img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x, 0, nx - 1)) * 3 + k]; + d3 = img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x + 2, 0, nx - 1)) * 3 + k] - img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x, 0, nx - 1)) * 3 + k]; + a0 = img.buf[(clip(y - 1 + jj, 0, ny - 1) * nx + clip(x, 0, nx - 1)) * 3 + k]; + + a1 = -1.0 / 3 * d0 + d2 - 1.0 / 6 * d3; + a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2; + a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3; + + C[jj] = a0 + a1 * dx + a2 * dx * dx + a3 * dx * dx * dx; + + d0 = C[0] - C[1]; + d2 = C[2] - C[1]; + d3 = C[3] - C[1]; + a0 = C[1]; + a1 = -1.0 / 3 * d0 + d2 - 1.0 / 6 * d3; + a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2; + a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3; + Cc = a0 + a1 * dy + a2 * dy * dy + a3 * dy * dy * dy; + + const uint8_t Cc2 = std::min(std::max(std::round(Cc), 0.0f), 255.0f); + dst.buf[(i * target_width + j) * 3 + k] = float(Cc2); + } + } + } + } + + return true; } - clip_image_u8 resized_image; - // bilinear_resize(image, resized_image, new_width, new_height); - bicubic_resize(image, resized_image, new_width, new_height); + // llava-1.6 type of resize_and_pad + // if the ratio is not 1:1, padding with pad_color will be applied + // pad_color is single channel, default is 0 (black) + static void resize_and_pad_image(const clip_image_u8 & image, clip_image_u8 & dst, const clip_image_size & target_resolution, std::array pad_color = {0, 0, 0}) { + int target_width = target_resolution.width; + int target_height = target_resolution.height; - clip_image_u8 padded_image; - padded_image.nx = target_width; - padded_image.ny = target_height; - padded_image.buf.resize(3 * target_width * target_height, 0); // Initialize with black + float scale_w = static_cast(target_width) / image.nx; + float scale_h = static_cast(target_height) / image.ny; - // Calculate padding offsets - int pad_x = (target_width - new_width) / 2; - int pad_y = (target_height - new_height) / 2; + int new_width, new_height; - // Copy the resized image into the center of the padded buffer - for (int y = 0; y < new_height; ++y) { - for (int x = 0; x < new_width; ++x) { - for (int c = 0; c < 3; ++c) { - padded_image.buf[3 * ((y + pad_y) * target_width + (x + pad_x)) + c] = resized_image.buf[3 * (y * new_width + x) + c]; + if (scale_w < scale_h) { + new_width = target_width; + new_height = std::min(static_cast(std::ceil(image.ny * scale_w)), target_height); + } else { + new_height = target_height; + new_width = std::min(static_cast(std::ceil(image.nx * scale_h)), target_width); + } + + clip_image_u8 resized_image; + bicubic_resize(image, resized_image, new_width, new_height); + + clip_image_u8 padded_image; + padded_image.nx = target_width; + padded_image.ny = target_height; + padded_image.buf.resize(3 * target_width * target_height); + + // Fill the padded image with the fill color + for (size_t i = 0; i < padded_image.buf.size(); i += 3) { + padded_image.buf[i] = pad_color[0]; + padded_image.buf[i + 1] = pad_color[1]; + padded_image.buf[i + 2] = pad_color[2]; + } + + // Calculate padding offsets + int pad_x = (target_width - new_width) / 2; + int pad_y = (target_height - new_height) / 2; + + // Copy the resized image into the center of the padded buffer + for (int y = 0; y < new_height; ++y) { + for (int x = 0; x < new_width; ++x) { + for (int c = 0; c < 3; ++c) { + padded_image.buf[3 * ((y + pad_y) * target_width + (x + pad_x)) + c] = resized_image.buf[3 * (y * new_width + x) + c]; + } + } + } + dst = std::move(padded_image); + } + + static void crop_image(const clip_image_u8 & image, clip_image_u8 & dst, int x, int y, int w, int h) { + dst.nx = w; + dst.ny = h; + dst.buf.resize(3 * w * h); + + for (int i = 0; i < h; ++i) { + for (int j = 0; j < w; ++j) { + int src_idx = 3 * ((y + i)*image.nx + (x + j)); + int dst_idx = 3 * (i*w + j); + dst.buf[dst_idx] = image.buf[src_idx]; + dst.buf[dst_idx + 1] = image.buf[src_idx + 1]; + dst.buf[dst_idx + 2] = image.buf[src_idx + 2]; } } } - image_output = std::move(padded_image); -} + +private: + static inline int clip(int x, int lower, int upper) { + return std::max(lower, std::min(x, upper)); + } + + // Linear interpolation between two points + static inline float lerp(float s, float e, float t) { + return s + (e - s) * t; + } +}; /** - * Selects the best resolution from a list of possible resolutions based on the original size. + * implementation of LLaVA-UHD: + * - https://arxiv.org/pdf/2403.11703 + * - https://github.com/thunlp/LLaVA-UHD + * - https://github.com/thunlp/LLaVA-UHD/blob/302301bc2175f7e717fb8548516188e89f649753/llava_uhd/train/llava-uhd/slice_logic.py#L118 * - * @param original_size The original size of the image in the format (width, height). - * @param possible_resolutions A list of possible resolutions in the format [(width1, height1), (width2, height2), ...]. - * @return The best fit resolution in the format (width, height). + * overview: + * - an image always have a single overview (downscaled image) + * - an image can have 0 or multiple slices, depending on the image size + * - each slice can then be considered as a separate image + * + * for example: + * + * [overview] --> [slice 1] --> [slice 2] + * | | + * +--> [slice 3] --> [slice 4] */ -static std::pair select_best_resolution(const std::pair & original_size, const std::vector> & possible_resolutions) { - int original_width = original_size.first; - int original_height = original_size.second; - std::pair best_fit; - int max_effective_resolution = 0; - int min_wasted_resolution = std::numeric_limits::max(); +struct llava_uhd { + struct slice_coordinates { + int x; + int y; + clip_image_size size; + }; - for (const auto& resolution : possible_resolutions) { - int width = resolution.first; - int height = resolution.second; - float scale = std::min(static_cast(width) / original_width, static_cast(height) / original_height); - int downscaled_width = static_cast(original_width * scale); - int downscaled_height = static_cast(original_height * scale); - int effective_resolution = std::min(downscaled_width * downscaled_height, original_width * original_height); - int wasted_resolution = (width * height) - effective_resolution; - // LOG_INF("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution); - if (effective_resolution > max_effective_resolution || (effective_resolution == max_effective_resolution && wasted_resolution < min_wasted_resolution)) { - max_effective_resolution = effective_resolution; - min_wasted_resolution = wasted_resolution; - best_fit = resolution; + struct slice_instructions { + clip_image_size overview_size; // size of downscaled image + clip_image_size refined_size; // size of image right before slicing (must be multiple of slice size) + clip_image_size grid_size; // grid_size.width * grid_size.height = number of slices + std::vector slices; + bool padding_refined = false; // if true, refine image will be padded to the grid size (e.g. llava-1.6) + }; + + static int get_max_slices(struct clip_ctx * ctx) { + if (clip_is_minicpmv(ctx)) { + return 9; } + return 0; } - return best_fit; -} + static slice_instructions get_slice_instructions(struct clip_ctx * ctx, const clip_image_size & original_size) { + slice_instructions res; + const int patch_size = clip_get_patch_size(ctx); + const int slice_size = clip_get_image_size(ctx); + const int max_slice_nums = get_max_slices(ctx); + const int original_width = original_size.width; + const int original_height = original_size.height; + const float log_ratio = log((float)original_width / original_height); + const float ratio = (float)original_width * original_height / (slice_size * slice_size); + const int multiple = fmin(ceil(ratio), max_slice_nums); + const bool has_slices = (multiple > 1); + const bool has_pinpoints = !ctx->vision_model.hparams.image_grid_pinpoints.empty(); -static std::vector divide_to_patches_u8(const clip_image_u8 & image, int patch_size) { - std::vector patches; - int width = image.nx; - int height = image.ny; - for (int i = 0; i < height; i += patch_size) { - for (int j = 0; j < width; j += patch_size) { - clip_image_u8 *patch = clip_image_u8_init(); - patch->nx = std::min(patch_size, width - j); - patch->ny = std::min(patch_size, height - i); - patch->buf.resize(3 * patch->nx * patch->ny); - for (int y = 0; y < patch->ny; ++y) { - for (int x = 0; x < patch->nx; ++x) { - for (int c = 0; c < 3; ++c) { - patch->buf[3 * (y * patch->nx + x) + c] = image.buf[3 * ((i + y) * width + (j + x)) + c]; + if (has_pinpoints) { + // has pinpoints, use them to calculate the grid size (e.g. llava-1.6) + auto refine_size = llava_uhd::select_best_resolution( + ctx->vision_model.hparams.image_grid_pinpoints, + original_size); + res.overview_size = clip_image_size{slice_size, slice_size}; + res.refined_size = refine_size; + res.grid_size = clip_image_size{0, 0}; + res.padding_refined = true; + + for (int y = 0; y < refine_size.height; y += slice_size) { + for (int x = 0; x < refine_size.width; x += slice_size) { + slice_coordinates slice; + slice.x = x; + slice.y = y; + slice.size.width = std::min(slice_size, refine_size.width - x); + slice.size.height = std::min(slice_size, refine_size.height - y); + res.slices.push_back(slice); + if (x == 0) { + res.grid_size.width++; } } + res.grid_size.height++; } - patches.push_back(patch); + + return res; } - } - return patches; -} -static int ensure_divide(int length, int patch_size) { - return std::max(static_cast(std::round(static_cast(length) / patch_size) * patch_size), patch_size); -} + // no pinpoints, dynamically calculate the grid size (e.g. minicpmv) -static std::pair uhd_find_best_resize(std::pair original_size, int scale_resolution, int patch_size, bool allow_upscale = false) { - int width = original_size.first; - int height = original_size.second; - if ((width * height > scale_resolution * scale_resolution) || allow_upscale) { - float r = static_cast(width) / height; - height = static_cast(scale_resolution / std::sqrt(r)); - width = static_cast(height * r); - } - int best_width = ensure_divide(width, patch_size); - int best_height = ensure_divide(height, patch_size); - return std::make_pair(best_width, best_height); -} + auto best_size = get_best_resize(original_size, slice_size, patch_size, has_slices); + res.overview_size = best_size; -static std::pair uhd_get_refine_size(std::pair original_size, std::pair grid, int scale_resolution, int patch_size, bool allow_upscale = false) { - int width, height; - std::tie(width, height) = original_size; - int grid_x, grid_y; - std::tie(grid_x, grid_y) = grid; + if (!has_slices) { + // skip slicing logic + res.refined_size = clip_image_size{0, 0}; + res.grid_size = clip_image_size{0, 0}; - int refine_width = ensure_divide(width, grid_x); - int refine_height = ensure_divide(height, grid_y); + } else { + auto best_grid = get_best_grid(max_slice_nums, multiple, log_ratio); + auto refine_size = get_refine_size(original_size, best_grid, slice_size, patch_size, true); + res.grid_size = best_grid; + res.refined_size = refine_size; - int grid_width = refine_width / grid_x; - int grid_height = refine_height / grid_y; - - // auto best_grid_size = find_best_resize(std::make_tuple(grid_width, grid_height), scale_resolution, patch_size, allow_upscale); (old line) - auto best_grid_size = uhd_find_best_resize(std::make_pair(grid_width, grid_height), scale_resolution, patch_size, allow_upscale); // (new line) => fixes conversion for make_tuple to make_pair - int best_grid_width, best_grid_height; - std::tie(best_grid_width, best_grid_height) = best_grid_size; - - // std::pair refine_size = std::make_tuple(best_grid_width * grid_x, best_grid_height * grid_y); (old line) - std::pair refine_size = std::make_pair(best_grid_width * grid_x, best_grid_height * grid_y); // (new line) - return refine_size; -} - -static std::pair uhd_best_grid(const int max_slice_nums, const int multiple, const float log_ratio) { - std::vector candidate_split_grids_nums; - for (int i : {multiple - 1, multiple, multiple + 1}) { - if (i == 1 || i > max_slice_nums) { - continue; - } - candidate_split_grids_nums.push_back(i); - } - - std::vector> candidate_grids; - for (int split_grids_nums : candidate_split_grids_nums) { - int m = 1; - while (m <= split_grids_nums) { - if (split_grids_nums % m == 0) { - candidate_grids.emplace_back(m, split_grids_nums / m); - } - ++m; - } - } - - std::pair best_grid{1, 1}; - float min_error = std::numeric_limits::infinity(); - for (const auto& grid : candidate_grids) { - float error = std::abs(log_ratio - std::log(1.0 * grid.first / grid.second)); - if (error < min_error) { - best_grid = grid; - min_error = error; - } - } - return best_grid; -} - -// inspired from LLaVA-UHD: -// -> https://arxiv.org/pdf/2403.11703 -// -> https://github.com/thunlp/LLaVA-UHD -// -> https://github.com/thunlp/LLaVA-UHD/blob/302301bc2175f7e717fb8548516188e89f649753/llava_uhd/train/llava-uhd/slice_logic.py#L118 -static std::vector> uhd_slice_image(const clip_image_u8 * img, const int max_slice_nums=9, const int scale_resolution=448, const int patch_size=14) { - const std::pair original_size={img->nx,img->ny}; - const int original_width = img->nx; - const int original_height = img->ny; - const float log_ratio = log(1.0*original_width/original_height); - const float ratio = 1.0 * original_width * original_height/ (scale_resolution * scale_resolution); - const int multiple = fmin(ceil(ratio), max_slice_nums); - - std::vector> images; - LOG_INF("%s: multiple %d\n", __func__, multiple); - images.push_back(std::vector()); - - if (multiple <= 1) { - auto best_size = uhd_find_best_resize(original_size, scale_resolution, patch_size, true); - clip_image_u8 * source_image = clip_image_u8_init(); - bicubic_resize(*img, *source_image, best_size.first, best_size.second); - // source_image = image.resize(best_size, Image.Resampling.BICUBIC) - images[images.size()-1].push_back(source_image); - } - else if (multiple > 1) { - auto best_size = uhd_find_best_resize(original_size, scale_resolution, patch_size); - clip_image_u8 * source_image = clip_image_u8_init(); - bicubic_resize(*img, *source_image, best_size.first, best_size.second); - // source_image = image.copy().resize(best_resize, Image.Resampling.BICUBIC) - LOG_INF("%s: image_size: %d %d; source_image size: %d %d\n", __func__, img->nx, img->ny, best_size.first, best_size.second); - images[images.size()-1].push_back(source_image); - - std::pair best_grid = uhd_best_grid(max_slice_nums, multiple, log_ratio); - LOG_INF("%s: image_size: %d %d; best_grid: %d %d\n", __func__, img->nx, img->ny, best_grid.first, best_grid.second); - - auto refine_size = uhd_get_refine_size(original_size, best_grid, scale_resolution, patch_size, true); - clip_image_u8 * refine_image = clip_image_u8_init(); - bicubic_resize(*img, *refine_image, refine_size.first, refine_size.second); - - LOG_INF("%s: refine_image_size: %d %d; refine_size: %d %d\n", __func__, refine_image->nx, refine_image->ny, refine_size.first, refine_size.second); - - // split_to_patches - int width = refine_image->nx; - int height = refine_image->ny; - int grid_x = int(width / best_grid.first); - int grid_y = int(height / best_grid.second); - for (int patches_i = 0, ic = 0; patches_i < height && ic < best_grid.second; patches_i += grid_y, ic += 1){ - images.push_back(std::vector()); - for(int patches_j = 0, jc = 0; patches_j < width && jc < best_grid.first; patches_j += grid_x, jc += 1){ - clip_image_u8 * patch = clip_image_u8_init(); - patch->nx = grid_x; - patch->ny = grid_y; - patch->buf.resize(3 * patch->nx * patch->ny); - for (int y = patches_i; y < patches_i + grid_y; ++y) { - for (int x = patches_j; x < patches_j + grid_x; ++x) { - const int i = 3 * (y * refine_image->nx + x); - const int j = 3 * ((y-patches_i) * patch->nx + (x-patches_j)); - patch->buf[j] = refine_image->buf[i]; - patch->buf[j+1] = refine_image->buf[i+1]; - patch->buf[j+2] = refine_image->buf[i+2]; - } + int width = refine_size.width; + int height = refine_size.height; + int grid_x = int(width / best_grid.width); + int grid_y = int(height / best_grid.height); + for (int patches_y = 0, ic = 0; + patches_y < refine_size.height && ic < best_grid.height; + patches_y += grid_y, ic += 1) { + for (int patches_x = 0, jc = 0; + patches_x < refine_size.width && jc < best_grid.width; + patches_x += grid_x, jc += 1) { + slice_coordinates slice; + slice.x = patches_x; + slice.y = patches_y; + slice.size.width = grid_x; + slice.size.height = grid_y; + res.slices.push_back(slice); + // LOG_INF("slice %d: %d %d %d %d\n", ic, patches_i, patches_j, grid_x, grid_y); } - images[images.size()-1].push_back(patch); } } - clip_image_u8_free(refine_image); - } - return images; -} + return res; + } + + static std::vector slice_image(const clip_image_u8 * img, const slice_instructions & inst) { + std::vector output; + + // resize to overview size + clip_image_u8_ptr resized_img(clip_image_u8_init()); + image_manipulation::bicubic_resize(*img, *resized_img, inst.overview_size.width, inst.overview_size.height); + output.push_back(std::move(resized_img)); + if (inst.slices.empty()) { + // no slices, just return the resized image + return output; + } + + // resize to refined size + clip_image_u8_ptr refined_img(clip_image_u8_init()); + if (inst.padding_refined) { + image_manipulation::resize_and_pad_image(*img, *refined_img, inst.refined_size); + } else { + image_manipulation::bilinear_resize(*img, *refined_img, inst.refined_size.width, inst.refined_size.height); + } + + // create slices + for (const auto & slice : inst.slices) { + int x = slice.x; + int y = slice.y; + int w = slice.size.width; + int h = slice.size.height; + + clip_image_u8_ptr img_slice(clip_image_u8_init()); + image_manipulation::crop_image(*refined_img, *img_slice, x, y, w, h); + output.push_back(std::move(img_slice)); + } + + return output; + } + +private: + static clip_image_size get_best_resize(const clip_image_size & original_size, int scale_resolution, int patch_size, bool allow_upscale = false) { + int width = original_size.width; + int height = original_size.height; + if ((width * height > scale_resolution * scale_resolution) || allow_upscale) { + float r = static_cast(width) / height; + height = static_cast(scale_resolution / std::sqrt(r)); + width = static_cast(height * r); + } + clip_image_size res; + res.width = ensure_divide(width, patch_size); + res.height = ensure_divide(height, patch_size); + return res; + } + + /** + * Selects the best resolution from a list of possible resolutions based on the original size. + * + * @param original_size The original size of the image + * @param possible_resolutions A list of possible resolutions + * @return The best fit resolution + */ + static clip_image_size select_best_resolution(const clip_image_size & original_size, const std::vector & possible_resolutions) { + int original_width = original_size.width; + int original_height = original_size.height; + clip_image_size best_fit; + int max_effective_resolution = 0; + int min_wasted_resolution = std::numeric_limits::max(); + + for (const auto & resolution : possible_resolutions) { + int width = resolution.width; + int height = resolution.height; + float scale = std::min(static_cast(width) / original_width, static_cast(height) / original_height); + int downscaled_width = static_cast(original_width * scale); + int downscaled_height = static_cast(original_height * scale); + int effective_resolution = std::min(downscaled_width * downscaled_height, original_width * original_height); + int wasted_resolution = (width * height) - effective_resolution; + // LOG_INF("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution); + if (effective_resolution > max_effective_resolution || (effective_resolution == max_effective_resolution && wasted_resolution < min_wasted_resolution)) { + max_effective_resolution = effective_resolution; + min_wasted_resolution = wasted_resolution; + best_fit = resolution; + } + } + + return best_fit; + } + + // used by llava 1.6 with custom list of pinpoints + static clip_image_size select_best_resolution(const std::vector & pinpoints, const clip_image_size & original_size) { + std::vector possible_resolutions; + for (size_t i = 0; i < pinpoints.size(); i += 2) { + possible_resolutions.push_back(clip_image_size{pinpoints[i], pinpoints[i+1]}); + } + return select_best_resolution(original_size, possible_resolutions); + } + + static int ensure_divide(int length, int patch_size) { + return std::max(static_cast(std::round(static_cast(length) / patch_size) * patch_size), patch_size); + } + + static clip_image_size get_refine_size(const clip_image_size & original_size, const clip_image_size & grid, int scale_resolution, int patch_size, bool allow_upscale = false) { + int width = original_size.width; + int height = original_size.height; + int grid_x = grid.width; + int grid_y = grid.height; + + int refine_width = ensure_divide(width, grid_x); + int refine_height = ensure_divide(height, grid_y); + + clip_image_size grid_size; + grid_size.width = refine_width / grid_x; + grid_size.height = refine_height / grid_y; + + auto best_grid_size = get_best_resize(grid_size, scale_resolution, patch_size, allow_upscale); + int best_grid_width = best_grid_size.width; + int best_grid_height = best_grid_size.height; + + clip_image_size refine_size; + refine_size.width = best_grid_width * grid_x; + refine_size.height = best_grid_height * grid_y; + return refine_size; + } + + static clip_image_size get_best_grid(const int max_slice_nums, const int multiple, const float log_ratio) { + std::vector candidate_split_grids_nums; + for (int i : {multiple - 1, multiple, multiple + 1}) { + if (i == 1 || i > max_slice_nums) { + continue; + } + candidate_split_grids_nums.push_back(i); + } + + std::vector candidate_grids; + for (int split_grids_nums : candidate_split_grids_nums) { + int m = 1; + while (m <= split_grids_nums) { + if (split_grids_nums % m == 0) { + candidate_grids.push_back(clip_image_size{m, split_grids_nums / m}); + } + ++m; + } + } + + clip_image_size best_grid{1, 1}; + float min_error = std::numeric_limits::infinity(); + for (const auto& grid : candidate_grids) { + float error = std::abs(log_ratio - std::log(1.0 * grid.width / grid.height)); + if (error < min_error) { + best_grid = grid; + min_error = error; + } + } + return best_grid; + } +}; + +// TODO @ngxson : decprecate the load_image_size singleton pattern int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip) { - const int max_slice_nums=9; - const int scale_resolution=448; - const int original_width = ctx_clip->load_image_size->width; - const int original_height = ctx_clip->load_image_size->height; - const float log_ratio = log(1.0*original_width/original_height); - const float ratio = 1.0 * original_width * original_height/ (scale_resolution * scale_resolution); - const int multiple = fmin(ceil(ratio), max_slice_nums); - std::pair best_grid = uhd_best_grid(max_slice_nums, multiple, log_ratio); - return best_grid.first; + const auto inst = llava_uhd::get_slice_instructions(ctx_clip, ctx_clip->load_image_size); + return inst.grid_size.width; } // returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector // res_imgs memory is being allocated here, previous allocations will be freed if found -bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) { +bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, struct clip_image_f32_batch * res_imgs) { + if (!ctx->has_vision_encoder) { + LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__); + return false; + } + + clip_image_size original_size{img->nx, img->ny}; + bool pad_to_square = true; + auto & params = ctx->vision_model.hparams; + // The model config actually contains all we need to decide on how to preprocess, here we automatically switch to the new llava-1.6 preprocessing + if (params.mm_patch_merge_type == PATCH_MERGE_SPATIAL_UNPAD) { + pad_to_square = false; + } + + if (clip_is_minicpmv(ctx)) { + auto const inst = llava_uhd::get_slice_instructions(ctx, original_size); + std::vector imgs = llava_uhd::slice_image(img, inst); - if(clip_is_minicpmv(ctx)){ - int max_slice_nums = 9; - std::vector> imgs = uhd_slice_image(img, max_slice_nums); - res_imgs->size = 0; - for (size_t i = 0; i < imgs.size(); ++i){ - res_imgs->size += imgs[i].size(); - } - res_imgs->data = new clip_image_f32[res_imgs->size]; - int idx = 0; for (size_t i = 0; i < imgs.size(); ++i) { - for (size_t j = 0; j < imgs[i].size(); ++j) { - LOG_DBG("%s: %d %d\n", __func__,imgs[i][j]->nx,imgs[i][j]->ny); - clip_image_f32 * res = clip_image_f32_init(); - normalize_image_u8_to_f32(imgs[i][j], res, ctx->image_mean, ctx->image_std); - res_imgs->data[idx++] = *res; - clip_image_f32_free(res); - } - } - for (size_t i = 0; i < imgs.size(); ++i) { - for (size_t j = 0; j < imgs[i].size(); ++j) { - if (imgs[i][j] != nullptr) { - clip_image_u8_free(imgs[i][j]); - } - } + // clip_image_save_to_bmp(*imgs[i], "slice_" + std::to_string(i) + ".bmp"); + clip_image_f32_ptr res(clip_image_f32_init()); + normalize_image_u8_to_f32(*imgs[i], *res, ctx->image_mean, ctx->image_std); + res_imgs->entries.push_back(std::move(res)); } return true; } else if (ctx->has_qwen2vl_merger) { - clip_image_u8 * resized = clip_image_u8_init(); - auto patch_size = clip_patch_size(ctx) * 2; + clip_image_u8 resized; + auto patch_size = clip_get_patch_size(ctx) * 2; int nx = ceil((float)img->nx / patch_size) * patch_size; int ny = ceil((float)img->ny / patch_size) * patch_size; - bicubic_resize(*img, *resized, nx, ny); + image_manipulation::bicubic_resize(*img, resized, nx, ny); - res_imgs->data = new clip_image_f32[1]; - // clip_image_f32 * res = clip_image_f32_init(); - normalize_image_u8_to_f32(resized, res_imgs->data, ctx->image_mean, ctx->image_std); + clip_image_f32_ptr img_f32(clip_image_f32_init()); + // clip_image_f32_ptr res(clip_image_f32_init()); + normalize_image_u8_to_f32(resized, *img_f32, ctx->image_mean, ctx->image_std); // res_imgs->data[0] = *res; - res_imgs->size = 1; - - // clip_image_f32_free(res); - clip_image_u8_free(resized); + res_imgs->entries.push_back(std::move(img_f32)); return true; } - bool pad_to_square = true; - if (!ctx->has_vision_encoder) { - LOG_ERR("This gguf file seems to have no vision encoder\n"); - return false; + if (ctx->has_glm_projector || ctx->proj_type == PROJECTOR_TYPE_GEMMA3) { + clip_image_u8 resized_image; + int sz = params.image_size; + image_manipulation::bicubic_resize(*img, resized_image, sz, sz); + clip_image_f32_ptr img_f32(clip_image_f32_init()); + //clip_image_save_to_bmp(resized_image, "resized.bmp"); + normalize_image_u8_to_f32(resized_image, *img_f32, ctx->image_mean, ctx->image_std); + res_imgs->entries.push_back(std::move(img_f32)); + return true; } - auto & params = ctx->vision_model.hparams; - // The model config actually contains all we need to decide on how to preprocess, here we automatically switch to the new llava-1.6 preprocessing - if (strcmp(params.mm_patch_merge_type, "spatial_unpad") == 0) { - pad_to_square = false; - } - // free the previous res_imgs if any set - if (res_imgs->size > 0) { - clip_image_f32_batch_free(res_imgs); - } - res_imgs->data = nullptr; - res_imgs->size = 0; // the logic below is to pad the shorter side to the longer side with a background color: rgb(122, 116, 104) // see https://github.com/haotian-liu/LLaVA/blob/e854a2bf85118c504f6f16bf5c3c7c92f8fa8c6b/llava/conversation.py#L113-L156 - clip_image_u8 * temp = clip_image_u8_init(); // we will keep the input image data here temporarily - if (pad_to_square && img->nx != img->ny) { - int longer_side = std::max(img->nx, img->ny); + clip_image_u8_ptr temp(clip_image_u8_init()); // we will keep the input image data here temporarily + + if (pad_to_square) { + // for llava-1.5, we resize image to a square, and pad the shorter side with a background color + // see https://github.com/haotian-liu/LLaVA/blob/e854a2bf85118c504f6f16bf5c3c7c92f8fa8c6b/llava/conversation.py#L113-L156 + const int longer_side = std::max(img->nx, img->ny); temp->nx = longer_side; temp->ny = longer_side; temp->buf.resize(3 * longer_side * longer_side); - const uint8_t bc[3] = {122, 116, 104}; // background color in RGB from LLaVA (this is the mean rgb color * 255) - // fill with background color - for (size_t i = 0; i < temp->buf.size(); i++) { - temp->buf[i] = bc[i % 3]; + // background color in RGB from LLaVA (this is the mean rgb color * 255) + const std::array pad_color = {122, 116, 104}; + + // resize the image to the target_size + image_manipulation::resize_and_pad_image(*img, *temp, clip_image_size{params.image_size, params.image_size}, pad_color); + + clip_image_f32_ptr res(clip_image_f32_init()); + normalize_image_u8_to_f32(*temp, *res, ctx->image_mean, ctx->image_std); + res_imgs->entries.push_back(std::move(res)); + return true; + + } else if (!params.image_grid_pinpoints.empty()) { + // "spatial_unpad" with "anyres" processing for llava-1.6 + auto const inst = llava_uhd::get_slice_instructions(ctx, original_size); + std::vector imgs = llava_uhd::slice_image(img, inst); + + for (size_t i = 0; i < imgs.size(); ++i) { + // clip_image_save_to_bmp(*imgs[i], "slice_" + std::to_string(i) + ".bmp"); + clip_image_f32_ptr res(clip_image_f32_init()); + normalize_image_u8_to_f32(*imgs[i], *res, ctx->image_mean, ctx->image_std); + res_imgs->entries.push_back(std::move(res)); } - // copy from the input image - for (int y = 0; y < img->ny; y++) { - for (int x = 0; x < img->nx; x++) { - const int i = 3 * (y * img->nx + x); - const int j = 3 * (y * temp->nx + x); - temp->buf[j] = img->buf[i]; - temp->buf[j+1] = img->buf[i+1]; - temp->buf[j+2] = img->buf[i+2]; - } - } - } else { - if (params.image_grid_pinpoints[0] != 0) { - // "spatial_unpad" with "anyres" processing for llava-1.6 - std::vector> possible_resolutions; - for (int i = 0; i < 32 && params.image_grid_pinpoints[i] != 0; i+=2) { - possible_resolutions.push_back({params.image_grid_pinpoints[i], params.image_grid_pinpoints[i+1]}); - } - std::pair best_resolution = select_best_resolution({img->nx, img->ny}, possible_resolutions); - // clip_image_save_to_bmp(*img, "input.bmp"); - resize_and_pad_image(*img, *temp, best_resolution); // we do not pad with mean-bg color anymore in llava-1.6 - // clip_image_save_to_bmp(*temp, "resized.bmp"); - // visually verify normalized image: - // normalize_image_u8_to_f32(*temp, *res, ctx->image_mean, ctx->image_std); - // { - // clip_image_u8 * temp2 = clip_image_u8_init(); - // clip_image_convert_f32_to_u8(*res, *temp2); - // clip_image_save_to_bmp(*temp2, "resized_normalized_f32.bmp"); - // clip_image_u8_free(temp2); - // } + return true; - std::vector patches = divide_to_patches_u8(*temp, params.image_size); // prepare spatial sorted main patches of image_size each (336 in llava-1.6) - - clip_image_u8 *image_original_resize = clip_image_u8_init(); - // bilinear_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square - bicubic_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square - patches.insert(patches.begin(), image_original_resize); - // clip_image_f32_batch_init(patches.size()); - res_imgs->size = patches.size(); - res_imgs->data = new clip_image_f32[res_imgs->size]; - int num=0; - for (auto& patch : patches) { - normalize_image_u8_to_f32(patch, &res_imgs->data[num], ctx->image_mean, ctx->image_std); - num++; - } - - for (size_t i = 0; i < patches.size(); i++) { - // LOG_DBG("patch %d: %d %d\n", i, patches[i]->nx, patches[i]->ny); - clip_image_u8_free(patches[i]); - } - - clip_image_u8_free(temp); - - return true; - } else { - temp->nx = img->nx; - temp->ny = img->ny; - temp->buf.resize(img->buf.size()); - memcpy(temp->buf.data(), img->buf.data(), temp->buf.size()); - } } - const int nx = temp->nx; - const int ny = temp->ny; - // clip_image_save_to_bmp(*temp, "resized_vanilla.bmp"); - - const int nx2 = ctx->vision_model.hparams.image_size; - const int ny2 = ctx->vision_model.hparams.image_size; - clip_image_f32 * res = clip_image_f32_init(); - res->nx = nx2; - res->ny = ny2; - res->buf.resize(3 * nx2 * ny2); - - const float scale = std::max(nx, ny) / (float)ctx->vision_model.hparams.image_size; - - const int nx3 = int(nx / scale + 0.5f); - const int ny3 = int(ny / scale + 0.5f); - - const auto & m3 = ctx->image_mean; // {0.48145466f, 0.4578275f, 0.40821073f}; - const auto & s3 = ctx->image_std; // {0.26862954f, 0.26130258f, 0.27577711f}; - - for (int y = 0; y < ny3; y++) { - for (int x = 0; x < nx3; x++) { - for (int c = 0; c < 3; c++) { - // linear interpolation - const float sx = (x + 0.5f) * scale - 0.5f; - const float sy = (y + 0.5f) * scale - 0.5f; - - const int x0 = std::max(0, (int)std::floor(sx)); - const int y0 = std::max(0, (int)std::floor(sy)); - - const int x1 = std::min(x0 + 1, nx - 1); - const int y1 = std::min(y0 + 1, ny - 1); - - const float dx = sx - x0; - const float dy = sy - y0; - - const int j00 = 3 * (y0 * nx + x0) + c; - const int j01 = 3 * (y0 * nx + x1) + c; - const int j10 = 3 * (y1 * nx + x0) + c; - const int j11 = 3 * (y1 * nx + x1) + c; - - const float v00 = temp->buf[j00]; - const float v01 = temp->buf[j01]; - const float v10 = temp->buf[j10]; - const float v11 = temp->buf[j11]; - - const float v0 = v00 * (1.0f - dx) + v01 * dx; - const float v1 = v10 * (1.0f - dx) + v11 * dx; - - const float v = v0 * (1.0f - dy) + v1 * dy; - - const uint8_t v2 = std::min(std::max(std::round(v), 0.0f), 255.0f); - - const int i = 3 * (y * nx3 + x) + c; - - res->buf[i] = ((float(v2) / 255.0f) - m3[c]) / s3[c]; - } - } - } - clip_image_u8_free(temp); - - // { - // clip_image_u8 * temp2 = clip_image_u8_init(); - // clip_image_convert_f32_to_u8(*res, *temp2); - // clip_image_save_to_bmp(*temp2, "resized_normalized_f32_vanilla.bmp"); - // clip_image_u8_free(temp2); - // } - // res_imgs.push_back(res); - - res_imgs->size = 1; - res_imgs->data = new clip_image_f32[res_imgs->size]; - res_imgs->data[0] = *res; - clip_image_f32_free(res); - - return true; + GGML_ASSERT(false && "Unknown image preprocessing type"); } ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx) { @@ -2290,17 +2254,15 @@ ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx) { } void clip_free(clip_ctx * ctx) { - ggml_free(ctx->ctx_data); - gguf_free(ctx->ctx_gguf); - - ggml_backend_buffer_free(ctx->params_buffer); - ggml_backend_free(ctx->backend); - ggml_gallocr_free(ctx->compute_alloc); + if (ctx == nullptr) { + return; + } delete ctx; } size_t clip_embd_nbytes(const struct clip_ctx * ctx) { - return clip_n_patches(ctx) * clip_n_mmproj_embd(ctx) * sizeof(float); + int extra_tokens = ctx->has_glm_projector ? 2 : 0; + return (clip_n_patches(ctx) + extra_tokens) * clip_n_mmproj_embd(ctx) * sizeof(float); } size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w) { @@ -2310,24 +2272,31 @@ size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w return clip_n_patches_by_img(ctx, &img) * clip_n_mmproj_embd(ctx) * sizeof(float); } -int32_t clip_image_size(const struct clip_ctx * ctx) { +int32_t clip_get_image_size(const struct clip_ctx * ctx) { return ctx->vision_model.hparams.image_size; } -int32_t clip_patch_size(const struct clip_ctx * ctx) { +int32_t clip_get_patch_size(const struct clip_ctx * ctx) { return ctx->vision_model.hparams.patch_size; } -int32_t clip_hidden_size(const struct clip_ctx * ctx) { +int32_t clip_get_hidden_size(const struct clip_ctx * ctx) { return ctx->vision_model.hparams.hidden_size; } const char * clip_patch_merge_type(const struct clip_ctx * ctx) { - return ctx->vision_model.hparams.mm_patch_merge_type; + return ctx->vision_model.hparams.mm_patch_merge_type == PATCH_MERGE_SPATIAL_UNPAD ? "spatial_unpad" : "flat"; } const int32_t * clip_image_grid(const struct clip_ctx * ctx) { - return ctx->vision_model.hparams.image_grid_pinpoints; + if (ctx->vision_model.hparams.image_grid_pinpoints.size()) { + return &ctx->vision_model.hparams.image_grid_pinpoints.front(); + } + return nullptr; +} + +size_t get_clip_image_grid_size(const struct clip_ctx * ctx) { + return ctx->vision_model.hparams.image_grid_pinpoints.size(); } int clip_n_patches(const struct clip_ctx * ctx) { @@ -2342,7 +2311,7 @@ int clip_n_patches_by_img(const struct clip_ctx * ctx, struct clip_image_f32 * i int n_patches = (params.image_size / params.patch_size) * (params.image_size / params.patch_size); - if (ctx->proj_type == PROJECTOR_TYPE_LDP || ctx->proj_type == PROJECTOR_TYPE_LDPV2) { + if (ctx->proj_type == PROJECTOR_TYPE_LDP || ctx->proj_type == PROJECTOR_TYPE_LDPV2 || ctx->proj_type == PROJECTOR_TYPE_GLM_EDGE) { n_patches /= 4; } else if (ctx->proj_type == PROJECTOR_TYPE_RESAMPLER) { if (ctx->minicpmv_version == 2) { @@ -2359,6 +2328,8 @@ int clip_n_patches_by_img(const struct clip_ctx * ctx, struct clip_image_f32 * i int x_patch = img->nx / patch_size + (int)(img->nx % patch_size > 0); int y_patch = img->ny / patch_size + (int)(img->ny % patch_size > 0); n_patches = x_patch * y_patch; + } else if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) { + n_patches = 256; } return n_patches; @@ -2452,33 +2423,44 @@ static std::vector> get_2d_sincos_pos_embed(int embed_dim, co bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f32 * img, float * vec) { if (!ctx->has_vision_encoder) { - LOG_ERR("This gguf file seems to have no vision encoder\n"); + LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__); return false; } - clip_image_f32_batch imgs{}; - imgs.size = 1; - imgs.data = img; + clip_image_f32_batch imgs; + clip_image_f32_ptr img_copy(clip_image_f32_init()); + *img_copy = *img; + imgs.entries.push_back(std::move(img_copy)); + return clip_image_batch_encode(ctx, n_threads, &imgs, vec); } -bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs, float * vec) { +bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, float * vec) { + const clip_image_f32_batch & imgs = *imgs_c_ptr; + if (!ctx->has_vision_encoder) { - LOG_ERR("This gguf file seems to have no vision encoder\n"); + LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__); return false; } - int batch_size = imgs->size; + int batch_size = imgs.entries.size(); if (ctx->has_llava_projector) { GGML_ASSERT(batch_size == 1); // TODO: support multiple images } if (ctx->has_minicpmv_projector) { GGML_ASSERT(batch_size == 1); } + if (ctx->has_glm_projector) { + GGML_ASSERT(batch_size == 1); + ggml_tensor * boi = ctx->vision_model.boi_w; + ggml_backend_tensor_get(boi,vec,0,ggml_nbytes(boi)); + vec = (float*)(vec+ggml_nelements(boi)); //offset for boi + } // build the inference graph + ggml_backend_sched_reset(ctx->sched.get()); ggml_cgraph * gf = clip_image_build_graph(ctx, imgs, ctx->load_image_size, true); - ggml_gallocr_alloc_graph(ctx->compute_alloc, gf); + ggml_backend_sched_alloc_graph(ctx->sched.get(), gf); // set inputs const auto & model = ctx->vision_model; @@ -2488,25 +2470,22 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima int image_size_width = image_size; int image_size_height = image_size; if (ctx->has_minicpmv_projector | ctx->has_qwen2vl_merger) { - image_size_width = imgs->data[0].nx; - image_size_height = imgs->data[0].ny; + image_size_width = imgs.entries[0]->nx; + image_size_height = imgs.entries[0]->ny; } const int patch_size = hparams.patch_size; const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size)); - const int num_positions = num_patches + (ctx->has_class_embedding ? 1 : 0); - if(ctx->load_image_size==nullptr){ - ctx->load_image_size= clip_image_size_init(); - } - const int pos_w = ctx->load_image_size->width/patch_size; - const int pos_h = ctx->load_image_size->height/patch_size; + const int num_positions = num_patches + (model.class_embedding ? 1 : 0); + const int pos_w = ctx->load_image_size.width / patch_size; + const int pos_h = ctx->load_image_size.height / patch_size; { struct ggml_tensor * inp_raw = ggml_graph_get_tensor(gf, "inp_raw"); float * data = (float *)malloc(ggml_nbytes(inp_raw)); - for (size_t i = 0; i < imgs->size; i++) { - const int nx = imgs->data[i].nx; - const int ny = imgs->data[i].ny; + for (size_t i = 0; i < imgs.entries.size(); i++) { + const int nx = imgs.entries[i]->nx; + const int ny = imgs.entries[i]->ny; if (!(ctx->has_minicpmv_projector | ctx->has_qwen2vl_merger)) { GGML_ASSERT(nx == image_size && ny == image_size); } @@ -2517,7 +2496,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima for (int k = 0; k < 3; k++) { for (int y = 0; y < ny; y++) { for (int x = 0; x < nx; x++) { - data[(b * 3 * n) + k * n + y * nx + x] = imgs->data[b].buf[3 * (y * nx + x) + k]; + data[(b * 3 * n) + k * n + y * nx + x] = imgs.entries[b]->buf[3 * (y * nx + x) + k]; } } } @@ -2578,16 +2557,14 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima free(pos_embed_data); } } - else{ - { - if (ctx->has_class_embedding) { - struct ggml_tensor * embeddings = ggml_graph_get_tensor(gf, "embeddings"); + else { + if (model.class_embedding) { + struct ggml_tensor * embeddings = ggml_graph_get_tensor(gf, "embeddings"); - void* zero_mem = malloc(ggml_nbytes(embeddings)); - memset(zero_mem, 0, ggml_nbytes(embeddings)); - ggml_backend_tensor_set(embeddings, zero_mem, 0, ggml_nbytes(embeddings)); - free(zero_mem); - } + void* zero_mem = malloc(ggml_nbytes(embeddings)); + memset(zero_mem, 0, ggml_nbytes(embeddings)); + ggml_backend_tensor_set(embeddings, zero_mem, 0, ggml_nbytes(embeddings)); + free(zero_mem); } if (ctx->has_qwen2vl_merger) { @@ -2617,6 +2594,9 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions)); free(positions_data); } + else if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) { + // do nothing + } else { struct ggml_tensor * positions = ggml_graph_get_tensor(gf, "positions"); @@ -2627,11 +2607,15 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions)); free(positions_data); - { + if (!ctx->has_glm_projector) { struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); + // The patches vector is used to get rows to index into the embeds with; + // we should skip dim 0 only if we have CLS to avoid going out of bounds + // when retrieving the rows. + int patch_offset = model.class_embedding ? 1 : 0; int* patches_data = (int*)malloc(ggml_nbytes(patches)); for (int i = 0; i < num_patches; i++) { - patches_data[i] = i + 1; + patches_data[i] = i + patch_offset; } ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches)); free(patches_data); @@ -2639,11 +2623,13 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima } } - if (ggml_backend_is_cpu(ctx->backend)) { - ggml_backend_cpu_set_n_threads(ctx->backend, n_threads); - } + ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads); - ggml_backend_graph_compute(ctx->backend, gf); + auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf); + if (status != GGML_STATUS_SUCCESS) { + LOG_ERR("%s: ggml_backend_sched_graph_compute failed with error %d\n", __func__, status); + return false; + } // the last node is the embedding tensor struct ggml_tensor * embeddings = ggml_graph_node(gf, -1); @@ -2651,19 +2637,27 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima // copy the embeddings to the location passed by the user ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings)); + if (ctx->has_glm_projector) { + //eoi + ggml_tensor * eoi = ctx->vision_model.eoi_w; + int offset = ggml_nelements(embeddings); + ggml_backend_tensor_get(eoi, vec+offset, 0, ggml_nbytes(eoi)); + } + return true; } bool clip_model_quantize(const char * fname_inp, const char * fname_out, const int itype) { - ggml_type type = GGML_TYPE_Q4_1; - assert(itype < GGML_TYPE_COUNT); - type = static_cast(itype); + ggml_type type = static_cast(itype); - auto * ctx_clip = clip_model_load(fname_inp, 2); + auto * ctx_clip = clip_init(fname_inp, clip_context_params{ + /* use_gpu */ false, + /* verbosity */ GGML_LOG_LEVEL_ERROR, + }); - const auto & ctx_src = ctx_clip->ctx_gguf; - const auto & ctx_data = ctx_clip->ctx_data; + const auto & ctx_src = ctx_clip->ctx_gguf.get(); + const auto & ctx_data = ctx_clip->ctx_data.get(); auto * ctx_out = gguf_init_empty(); gguf_set_kv(ctx_out, ctx_src); @@ -2711,8 +2705,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i } } - // quantize only 2D tensors - quantize &= (ggml_n_dims(cur) == 2); + // quantize only 2D tensors and bigger than block size + quantize &= (ggml_n_dims(cur) == 2) && cur->ne[0] > ggml_blck_size(type); if (quantize) { new_type = type; @@ -2737,7 +2731,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i f32_data = (float *)conv_buf.data(); break; default: - LOG_ERR("Please use an input file in f32 or f16\n"); + LOG_ERR("%s: Please use an input file in f32 or f16\n", __func__); gguf_free(ctx_out); return false; } @@ -2812,12 +2806,18 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { return 3584; } } + if (ctx->proj_type == PROJECTOR_TYPE_GLM_EDGE){ + return ctx->vision_model.mm_model_mlp_3_w->ne[1]; + } if (ctx->proj_type == PROJECTOR_TYPE_MERGER) { return ctx->vision_model.mm_1_b->ne[0]; } + if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) { + return ctx->vision_model.mm_input_proj_w->ne[0]; + } std::string proj_type = PROJECTOR_TYPE_NAMES[ctx->proj_type]; - throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str())); + throw std::runtime_error(string_format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str())); } int clip_is_minicpmv(const struct clip_ctx * ctx) { @@ -2827,10 +2827,44 @@ int clip_is_minicpmv(const struct clip_ctx * ctx) { return 0; } +bool clip_is_glm(const struct clip_ctx * ctx) { + return ctx->has_glm_projector; +} + bool clip_is_qwen2vl(const struct clip_ctx * ctx) { return ctx->has_qwen2vl_merger; } +bool clip_is_llava(const struct clip_ctx * ctx) { + return ctx->has_llava_projector; +} + +bool clip_is_gemma3(const struct clip_ctx * ctx) { + return ctx->proj_type == PROJECTOR_TYPE_GEMMA3; +} + +// Determine the number of encoder layers to iterate over +int get_deepest_feature_layer(const struct clip_ctx * ctx) { + // Get the index of the second to last layer; this is the + // default for models that have a llava projector + const auto & hparams = ctx->vision_model.hparams; + int n_layer = hparams.n_layer - 1; + int deepest_feature_layer = -1; + + // Handle other projectors; incrementing here indicates that we + // should use the last encoder layer for the vision features. + if (ctx->has_minicpmv_projector || ctx->has_glm_projector || ctx->has_qwen2vl_merger) { + n_layer += 1; + } + + // If we set explicit vision feature layers, only go up to the deepest one + for (const auto & feature_layer : hparams.vision_feature_layer) { + if (feature_layer > deepest_feature_layer) { + deepest_feature_layer = feature_layer; + } + } + return deepest_feature_layer < 0 ? n_layer : deepest_feature_layer; +} bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) { clip_image_f32 clip_img; @@ -2844,3 +2878,11 @@ bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, clip_image_encode(ctx, n_threads, &clip_img, vec); return true; } + +// +// API used internally with mtmd +// + +projector_type clip_get_projector_type(const struct clip_ctx * ctx) { + return ctx->proj_type; +} diff --git a/examples/llava/clip.h b/examples/llava/clip.h index 1603edd265..5fc45d3e23 100644 --- a/examples/llava/clip.h +++ b/examples/llava/clip.h @@ -1,6 +1,7 @@ #ifndef CLIP_H #define CLIP_H +#include "ggml.h" #include #include @@ -29,32 +30,34 @@ struct clip_image_size { int height; }; -struct clip_image_u8_batch { - struct clip_image_u8 * data; - size_t size; +struct clip_image_f32; +struct clip_image_u8_batch; +struct clip_image_f32_batch; + +struct clip_context_params { + bool use_gpu; + enum ggml_log_level verbosity; }; -struct clip_image_f32_batch { - struct clip_image_f32 * data; - size_t size; -}; +// deprecated, use clip_init +CLIP_API struct clip_ctx * clip_model_load(const char * fname, int verbosity); -CLIP_API struct clip_ctx * clip_model_load (const char * fname, int verbosity); -CLIP_API struct clip_ctx * clip_model_load_cpu(const char * fname, int verbosity); +CLIP_API struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params); CLIP_API void clip_free(struct clip_ctx * ctx); CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx); CLIP_API size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w); -CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx); -CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx); -CLIP_API int32_t clip_hidden_size(const struct clip_ctx * ctx); +CLIP_API int32_t clip_get_image_size (const struct clip_ctx * ctx); +CLIP_API int32_t clip_get_patch_size (const struct clip_ctx * ctx); +CLIP_API int32_t clip_get_hidden_size(const struct clip_ctx * ctx); // TODO: should be enum, not string CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx); CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx); +CLIP_API size_t get_clip_image_grid_size(const struct clip_ctx * ctx); CLIP_API int clip_n_patches (const struct clip_ctx * ctx); CLIP_API int clip_n_patches_by_img (const struct clip_ctx * ctx, struct clip_image_f32 * img); @@ -64,15 +67,32 @@ CLIP_API int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip); CLIP_API void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size); CLIP_API struct clip_image_size * clip_get_load_image_size(struct clip_ctx * ctx_clip); -CLIP_API struct clip_image_size * clip_image_size_init(); -CLIP_API struct clip_image_u8 * clip_image_u8_init (); -CLIP_API struct clip_image_f32 * clip_image_f32_init(); +CLIP_API struct clip_image_size * clip_image_size_init(); +CLIP_API struct clip_image_u8 * clip_image_u8_init (); +CLIP_API struct clip_image_f32 * clip_image_f32_init(); +CLIP_API struct clip_image_f32_batch * clip_image_f32_batch_init(); // only used by libllava +// nx, ny are the output image dimensions +CLIP_API unsigned char * clip_image_u8_get_data(struct clip_image_u8 * img, uint32_t * nx, uint32_t * ny); + +CLIP_API void clip_image_size_free (struct clip_image_size * img_size); CLIP_API void clip_image_u8_free (struct clip_image_u8 * img); CLIP_API void clip_image_f32_free(struct clip_image_f32 * img); CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch); CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch); +// use for accessing underlay data of clip_image_f32_batch +CLIP_API size_t clip_image_f32_batch_n_images(const struct clip_image_f32_batch * batch); // equivalent to batch->size() +CLIP_API size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->nx +CLIP_API size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->ny +CLIP_API struct clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->data + +/** + * Build image from pixels decoded by other libraries instead of stb_image.h for better performance. + * The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes + */ +CLIP_API void clip_build_img_from_pixels(const unsigned char * rgb_pixels, int nx, int ny, struct clip_image_u8 * img); + CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img); /** interpret bytes as an image file with length bytes_length, and use the result to populate img */ @@ -89,10 +109,16 @@ CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, cons CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype); CLIP_API int clip_is_minicpmv(const struct clip_ctx * ctx); +CLIP_API bool clip_is_glm(const struct clip_ctx * ctx); CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx); +CLIP_API bool clip_is_llava(const struct clip_ctx * ctx); +CLIP_API bool clip_is_gemma3(const struct clip_ctx * ctx); + +CLIP_API int get_deepest_feature_layer(const struct clip_ctx * ctx); CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec); + #ifdef __cplusplus } #endif diff --git a/examples/llava/convert_image_encoder_to_gguf.py b/examples/llava/convert_image_encoder_to_gguf.py index 4fa1d6ceae..2949faec42 100644 --- a/examples/llava/convert_image_encoder_to_gguf.py +++ b/examples/llava/convert_image_encoder_to_gguf.py @@ -6,7 +6,7 @@ import re import torch import numpy as np from gguf import * -from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel +from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel, SiglipVisionModel TEXT = "clip.text" VISION = "clip.vision" @@ -37,6 +37,18 @@ def should_skip_tensor(name: str, has_text: bool, has_vision: bool, has_llava: b def get_tensor_name(name: str) -> str: + # Standardize the transformers llava next keys for + # image newline / mm projector with the classes in haotian-liu LLaVA + if name == "image_newline": + return "model.image_newline" + if name.startswith("multi_modal_projector"): + name = name.replace("multi_modal_projector", "mm") + if "linear_1" in name: + name = name.replace("linear_1", "0") + if "linear_2" in name: + name = name.replace("linear_2", "2") + return name + if "projection" in name: return name if "mm_projector" in name: @@ -77,14 +89,21 @@ def bytes_to_unicode(): ap = argparse.ArgumentParser() ap.add_argument("-m", "--model-dir", help="Path to model directory cloned from HF Hub", required=True) ap.add_argument("--use-f32", action="store_true", default=False, help="Use f32 instead of f16") +ap.add_argument('--bigendian', action="store_true", default=False, help="Model is executed on big-endian machine") ap.add_argument("--text-only", action="store_true", required=False, help="Save a text-only model. It can't be used to encode images") ap.add_argument("--vision-only", action="store_true", required=False, help="Save a vision-only model. It can't be used to encode texts") ap.add_argument("--clip-model-is-vision", action="store_true", required=False, help="The clip model is a pure vision model (ShareGPT4V vision extract for example)") -ap.add_argument("--clip-model-is-openclip", action="store_true", required=False, + +# Selectable visual encoders that are compatible with this script +encoder_group = ap.add_mutually_exclusive_group() +encoder_group.add_argument("--clip-model-is-openclip", action="store_true", required=False, help="The clip model is from openclip (for ViT-SO400M type))") +encoder_group.add_argument("--clip-model-is-siglip", action="store_true", required=False, + help="the visual encoder is Siglip.") + ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.") ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp, ldpv2", choices=["mlp", "ldp", "ldpv2"], default="mlp") ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None) @@ -109,7 +128,12 @@ if args.use_f32: # output in the same directory as the model if output_dir is None dir_model = args.model_dir -if args.clip_model_is_vision or not os.path.exists(dir_model + "/vocab.json") or args.clip_model_is_openclip: +if ( + args.clip_model_is_vision or + not os.path.exists(dir_model + "/vocab.json") or + args.clip_model_is_openclip or + args.clip_model_is_siglip +): vocab = None tokens = None else: @@ -137,7 +161,10 @@ ftype = 1 if args.use_f32: ftype = 0 -if args.clip_model_is_vision or args.clip_model_is_openclip: +if args.clip_model_is_siglip: + model = SiglipVisionModel.from_pretrained(dir_model) + processor = None +elif args.clip_model_is_vision or args.clip_model_is_openclip: model = CLIPVisionModel.from_pretrained(dir_model) processor = None else: @@ -165,7 +192,7 @@ output_dir = args.output_dir if args.output_dir is not None else dir_model os.makedirs(output_dir, exist_ok=True) output_prefix = os.path.basename(output_dir).replace("ggml_", "") fname_out = os.path.join(output_dir, f"{fname_middle}model-{ftype_str[ftype]}.gguf") -fout = GGUFWriter(path=fname_out, arch="clip") +fout = GGUFWriter(path=fname_out, arch="clip", endianess=GGUFEndian.LITTLE if not args.bigendian else GGUFEndian.BIG) fout.add_bool("clip.has_text_encoder", has_text_encoder) fout.add_bool("clip.has_vision_encoder", has_vision_encoder) @@ -187,26 +214,71 @@ else: if has_text_encoder: assert t_hparams is not None assert tokens is not None + if args.clip_model_is_siglip: + text_projection_dim = 0 + else: + text_projection_dim = t_hparams.get("projection_dim", config["projection_dim"]) # text_model hparams fout.add_uint32(k(KEY_CONTEXT_LENGTH, TEXT), t_hparams["max_position_embeddings"]) fout.add_uint32(k(KEY_EMBEDDING_LENGTH, TEXT), t_hparams["hidden_size"]) fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, TEXT), t_hparams["intermediate_size"]) - fout.add_uint32("clip.text.projection_dim", t_hparams.get("projection_dim", config["projection_dim"])) + fout.add_uint32("clip.text.projection_dim", text_projection_dim) fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, TEXT), t_hparams["num_attention_heads"]) fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, TEXT), t_hparams["layer_norm_eps"]) fout.add_uint32(k(KEY_BLOCK_COUNT, TEXT), t_hparams["num_hidden_layers"]) fout.add_token_list(tokens) + + +def get_non_negative_vision_feature_layers(v_hparams): + """ + Determine the vision feature layer(s) for the llava model, which are indices into the + hidden states of the visual encoder. Note that the hidden states array generally takes the + form: + + [, , ... ] + + so feature indices should be offset as n+1 to get the output of encoder block n. + We convert all vision feature layers to non-negative so that -1 can be used in + the model as an unset value. If no vision feature layer is found, we leave it unset. + """ + num_hidden_layers = v_hparams["num_hidden_layers"] + to_non_negative = lambda layer_idx: layer_idx if layer_idx >= 0 else num_hidden_layers + layer_idx + 1 + feature_layers_key = None + # Key used for llava models in transformers + if "vision_feature_layer" in config: + feature_layers_key = "vision_feature_layer" + # Key used for llava models in the original format + elif "mm_vision_select_layer" in config: + feature_layers_key = "mm_vision_select_layer" + if feature_layers_key is not None: + feature_layers = config[feature_layers_key] + if isinstance(feature_layers, int): + feature_layers = [feature_layers] + return [to_non_negative(feature_layer) for feature_layer in feature_layers] + +# Determine if we have explicitly specified vision feature layers in our config +feature_layers = get_non_negative_vision_feature_layers(v_hparams) + if has_vision_encoder: - # vision_model hparams + # Siglip does not have a visual projector; set projection dim to 0 + if args.clip_model_is_siglip: + visual_projection_dim = 0 + else: + visual_projection_dim = v_hparams.get("projection_dim", config["projection_dim"]) + + # set vision_model hparams fout.add_uint32("clip.vision.image_size", v_hparams["image_size"]) fout.add_uint32("clip.vision.patch_size", v_hparams["patch_size"]) fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), v_hparams["hidden_size"]) fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), v_hparams["intermediate_size"]) - fout.add_uint32("clip.vision.projection_dim", v_hparams.get("projection_dim", config["projection_dim"])) + fout.add_uint32("clip.vision.projection_dim", visual_projection_dim) fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, VISION), v_hparams["num_attention_heads"]) fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), v_hparams["layer_norm_eps"]) - block_count = v_hparams["num_hidden_layers"] - 1 if has_llava_projector else v_hparams["num_hidden_layers"] + if feature_layers: + block_count = max(feature_layers) + else: + block_count = v_hparams["num_hidden_layers"] - 1 if has_llava_projector else v_hparams["num_hidden_layers"] fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), block_count) # /** # "image_grid_pinpoints": [ @@ -258,7 +330,8 @@ if has_vision_encoder: fout.add_string("clip.vision.mm_patch_merge_type", v_hparams["mm_patch_merge_type"]) if "mm_projector_type" in v_hparams: fout.add_string("clip.vision.mm_projector_type", v_hparams["mm_projector_type"]) - + if feature_layers: + fout.add_array("clip.vision.feature_layer", feature_layers) if processor is not None: image_mean = processor.image_processor.image_mean if args.image_mean is None or args.image_mean == default_image_mean else args.image_mean # pyright: ignore[reportAttributeAccessIssue] @@ -274,7 +347,13 @@ fout.add_bool("clip.use_gelu", use_gelu) if has_llava_projector: - model.vision_model.encoder.layers.pop(-1) + # By default, we drop the last layer for llava projector + # models unless we have explicitly set vision feature layers + if feature_layers is None: + model.vision_model.encoder.layers.pop(-1) + else: + model.vision_model.encoder.layers = model.vision_model.encoder.layers[:max(feature_layers)] + projector = torch.load(args.llava_projector) for name, data in projector.items(): name = get_tensor_name(name) diff --git a/examples/llava/gemma3-cli.cpp b/examples/llava/gemma3-cli.cpp new file mode 100644 index 0000000000..3d56647506 --- /dev/null +++ b/examples/llava/gemma3-cli.cpp @@ -0,0 +1,323 @@ +#include "arg.h" +#include "log.h" +#include "common.h" +#include "sampling.h" +#include "llama.h" +#include "ggml.h" +#include "console.h" +#include "chat.h" +#include "mtmd.h" + +#include +#include +#include + +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include +#include +#elif defined (_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#include +#endif + +static bool g_is_generating = false; + +/** + * Please note that this is NOT a production-ready stuff. + * It is a playground for trying Gemma 3 vision capabilities. + * For contributors: please keep this code simple and easy to understand. + */ + +static void show_additional_info(int /*argc*/, char ** argv) { + LOG( + "Experimental CLI for using Gemma 3 vision model\n\n" + "Usage: %s [options] -m --mmproj --image -p \n\n" + " -m and --mmproj are required\n" + " --image and -p are optional, if NOT provided, the CLI will run in chat mode\n", + argv[0] + ); +} + +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) +static void sigint_handler(int signo) { + if (signo == SIGINT) { + if (g_is_generating) { + g_is_generating = false; + } else { + console::cleanup(); + LOG("\nInterrupted by user\n"); + _exit(130); + } + } +} +#endif + +struct gemma3_context { + mtmd_context_ptr ctx_vision; + common_init_result llama_init; + + llama_model * model; + llama_context * lctx; + const llama_vocab * vocab; + llama_batch batch; + int n_batch; + + // note: we know that gemma3 template is "linear", meaning each turn is completely separated to another + // so here we don't need to keep track of chat history + common_chat_templates_ptr tmpls; + + int n_threads = 1; + llama_pos n_past = 0; + + gemma3_context(common_params & params) : llama_init(common_init_from_params(params)) { + model = llama_init.model.get(); + lctx = llama_init.context.get(); + vocab = llama_model_get_vocab(model); + n_threads = params.cpuparams.n_threads; + batch = llama_batch_init(params.n_batch, 0, 1); + n_batch = params.n_batch; + tmpls = common_chat_templates_init(model, params.chat_template); + init_vision_context(params); + } + + void init_vision_context(common_params & params) { + const char * clip_path = params.mmproj.path.c_str(); + ctx_vision.reset(mtmd_init_from_file(clip_path, model, mtmd_context_params{ + /* use_gpu */ true, + /* timings */ true, + /* n_threads */ params.cpuparams.n_threads, + /* verbosity */ GGML_LOG_LEVEL_INFO, + })); + if (!ctx_vision.get()) { + LOG_ERR("Failed to load vision model from %s\n", clip_path); + exit(1); + } + } +}; + +struct decode_embd_batch { + std::vector pos; + std::vector n_seq_id; + std::vector seq_id_0; + std::vector seq_ids; + std::vector logits; + llama_batch batch; + decode_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) { + pos .resize(n_tokens); + n_seq_id.resize(n_tokens); + seq_ids .resize(n_tokens + 1); + logits .resize(n_tokens); + seq_id_0.resize(1); + seq_id_0[0] = seq_id; + seq_ids [n_tokens] = nullptr; + batch = { + /*n_tokens =*/ n_tokens, + /*tokens =*/ nullptr, + /*embd =*/ embd, + /*pos =*/ pos.data(), + /*n_seq_id =*/ n_seq_id.data(), + /*seq_id =*/ seq_ids.data(), + /*logits =*/ logits.data(), + }; + for (int i = 0; i < n_tokens; i++) { + batch.pos [i] = pos_0 + i; + batch.n_seq_id[i] = 1; + batch.seq_id [i] = seq_id_0.data(); + batch.logits [i] = false; + } + } +}; + +static int generate_response(gemma3_context & ctx, common_sampler * smpl, int n_predict) { + for (int i = 0; i < n_predict; i++) { + if (i > n_predict || !g_is_generating) { + printf("\n"); + break; + } + + llama_token token_id = common_sampler_sample(smpl, ctx.lctx, -1); + common_sampler_accept(smpl, token_id, true); + + if (llama_vocab_is_eog(ctx.vocab, token_id)) { + printf("\n"); + break; // end of generation + } + + printf("%s", common_token_to_piece(ctx.lctx, token_id).c_str()); + fflush(stdout); + + // eval the token + common_batch_clear(ctx.batch); + common_batch_add(ctx.batch, token_id, ctx.n_past++, {0}, true); + if (llama_decode(ctx.lctx, ctx.batch)) { + LOG_ERR("failed to decode token\n"); + return 1; + } + } + return 0; +} + +static int eval_message(gemma3_context & ctx, common_chat_msg & msg, std::vector & images_fname, bool add_bos = false) { + std::vector bitmaps; + + common_chat_templates_inputs tmpl_inputs; + tmpl_inputs.messages = {msg}; + tmpl_inputs.add_generation_prompt = true; + tmpl_inputs.use_jinja = false; // jinja is buggy here + auto formatted_chat = common_chat_templates_apply(ctx.tmpls.get(), tmpl_inputs); + LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.prompt.c_str()); + + for (auto & fname : images_fname) { + mtmd_bitmap bitmap; + if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) { + LOG_ERR("Unable to load image %s\n", fname.c_str()); + return 2; // image not found + } + bitmaps.push_back(std::move(bitmap)); + } + + mtmd_input_text text; + text.text = formatted_chat.prompt; + text.add_special = add_bos; + text.parse_special = true; + mtmd_input_chunks chunks; + int32_t res = mtmd_tokenize(ctx.ctx_vision.get(), chunks, text, bitmaps); + if (res != 0) { + LOG_ERR("Unable to tokenize prompt, res = %d\n", res); + return 1; + } + + if (mtmd_helper_eval(ctx.ctx_vision.get(), ctx.lctx, chunks, ctx.n_past, 0, ctx.n_batch)) { + LOG_ERR("Unable to eval prompt\n"); + return 1; + } + + ctx.n_past += mtmd_helper_get_n_tokens(chunks); + + return 0; +} + +int main(int argc, char ** argv) { + ggml_time_init(); + + common_params params; + params.sampling.temp = 0.2; // lower temp by default for better quality + + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) { + return 1; + } + + common_init(); + + if (params.mmproj.path.empty()) { + show_additional_info(argc, argv); + return 1; + } + + gemma3_context ctx(params); + printf("%s: %s\n", __func__, params.model.path.c_str()); + + bool is_single_turn = !params.prompt.empty() && !params.image.empty(); + + struct common_sampler * smpl = common_sampler_init(ctx.model, params.sampling); + int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict; + + // ctrl+C handling + { +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) + struct sigaction sigint_action; + sigint_action.sa_handler = sigint_handler; + sigemptyset (&sigint_action.sa_mask); + sigint_action.sa_flags = 0; + sigaction(SIGINT, &sigint_action, NULL); +#elif defined (_WIN32) + auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL { + return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false; + }; + SetConsoleCtrlHandler(reinterpret_cast(console_ctrl_handler), true); +#endif + } + + if (is_single_turn) { + g_is_generating = true; + if (params.prompt.find("<__image__>") == std::string::npos) { + params.prompt += " <__image__>"; + } + common_chat_msg msg; + msg.role = "user"; + msg.content = params.prompt; + if (eval_message(ctx, msg, params.image, true)) { + return 1; + } + if (generate_response(ctx, smpl, n_predict)) { + return 1; + } + + } else { + LOG("\n Running in chat mode, available commands:"); + LOG("\n /image load an image"); + LOG("\n /clear clear the chat history"); + LOG("\n /quit or /exit exit the program"); + LOG("\n"); + + bool is_first_msg = true; + std::vector images_fname; + std::string content; + + while (true) { + g_is_generating = false; + LOG("\n> "); + console::set_display(console::user_input); + std::string line; + console::readline(line, false); + console::set_display(console::reset); + line = string_strip(line); + if (line.empty()) { + continue; + } + if (line == "/quit" || line == "/exit") { + break; + } + if (line == "/clear") { + ctx.n_past = 0; + llama_kv_self_seq_rm(ctx.lctx, 0, 1, -1); // keep BOS + LOG("Chat history cleared\n\n"); + continue; + } + g_is_generating = true; + if (line.find("/image") == 0) { + std::string image = line.substr(7); + images_fname.push_back(string_strip(image)); + content += "<__image__>"; + continue; + } else { + content += line; + } + common_chat_msg msg; + msg.role = "user"; + msg.content = content; + int ret = eval_message(ctx, msg, images_fname, is_first_msg); + if (ret == 2) { + // non-fatal error + images_fname.clear(); + content.clear(); + continue; + } + if (ret) { + return 1; + } + if (generate_response(ctx, smpl, n_predict)) { + return 1; + } + images_fname.clear(); + content.clear(); + is_first_msg = false; + } + } + llama_perf_context_print(ctx.lctx); + return 0; +} diff --git a/examples/llava/gemma3_convert_encoder_to_gguf.py b/examples/llava/gemma3_convert_encoder_to_gguf.py new file mode 100644 index 0000000000..241b526b9e --- /dev/null +++ b/examples/llava/gemma3_convert_encoder_to_gguf.py @@ -0,0 +1,307 @@ +import gguf +import argparse +import logging +import sys +import torch +import json +import os +import numpy as np +from typing import cast, ContextManager, Any, Iterator +from pathlib import Path +from torch import Tensor + +logger = logging.getLogger("gemma3-mmproj") + + +# (copied from convert_hf_to_gguf.py) +# tree of lazy tensors +class LazyTorchTensor(gguf.LazyBase): + _tensor_type = torch.Tensor + # to keep the type-checker happy + dtype: torch.dtype + shape: torch.Size + + # only used when converting a torch.Tensor to a np.ndarray + _dtype_map: dict[torch.dtype, type] = { + torch.float16: np.float16, + torch.float32: np.float32, + } + + # used for safetensors slices + # ref: https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/src/lib.rs#L1046 + # TODO: uncomment U64, U32, and U16, ref: https://github.com/pytorch/pytorch/issues/58734 + _dtype_str_map: dict[str, torch.dtype] = { + "F64": torch.float64, + "F32": torch.float32, + "BF16": torch.bfloat16, + "F16": torch.float16, + # "U64": torch.uint64, + "I64": torch.int64, + # "U32": torch.uint32, + "I32": torch.int32, + # "U16": torch.uint16, + "I16": torch.int16, + "U8": torch.uint8, + "I8": torch.int8, + "BOOL": torch.bool, + "F8_E4M3": torch.float8_e4m3fn, + "F8_E5M2": torch.float8_e5m2, + } + + def numpy(self) -> gguf.LazyNumpyTensor: + dtype = self._dtype_map[self.dtype] + return gguf.LazyNumpyTensor( + meta=gguf.LazyNumpyTensor.meta_with_dtype_and_shape(dtype, self.shape), + args=(self,), + func=(lambda s: s.numpy()) + ) + + @classmethod + def meta_with_dtype_and_shape(cls, dtype: torch.dtype, shape: tuple[int, ...]) -> Tensor: + return torch.empty(size=shape, dtype=dtype, device="meta") + + @classmethod + def from_safetensors_slice(cls, st_slice: Any) -> Tensor: + dtype = cls._dtype_str_map[st_slice.get_dtype()] + shape: tuple[int, ...] = tuple(st_slice.get_shape()) + lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(st_slice,), func=lambda s: s[:]) + return cast(torch.Tensor, lazy) + + @classmethod + def __torch_function__(cls, func, types, args=(), kwargs=None): + del types # unused + + if kwargs is None: + kwargs = {} + + if func is torch.Tensor.numpy: + return args[0].numpy() + + return cls._wrap_fn(func)(*args, **kwargs) + + +class Gemma3VisionTower: + hparams: dict + gguf_writer: gguf.GGUFWriter + fname_out: Path + ftype: gguf.LlamaFileType + + @staticmethod + def load_hparams(dir_model: Path): + with open(dir_model / "config.json", "r", encoding="utf-8") as f: + return json.load(f) + + @staticmethod + def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]: + part_names: list[str] = [] + for filename in os.listdir(dir_model): + if filename.startswith(prefix) and filename.endswith(suffix): + part_names.append(filename) + part_names.sort() + return part_names + + def __init__(self, + dir_model: Path, + fname_out: Path, + ftype: gguf.LlamaFileType, + is_big_endian: bool,): + hparams = Gemma3VisionTower.load_hparams(dir_model) + self.hparams = hparams + self.fname_out = fname_out + self.ftype = ftype + endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE + self.gguf_writer = gguf.GGUFWriter(path=None, arch="clip", endianess=endianess) + + text_config = hparams["text_config"] + vision_config = hparams["vision_config"] + + assert hparams["architectures"][0] == "Gemma3ForConditionalGeneration" + assert text_config is not None + assert vision_config is not None + + self.gguf_writer.add_string ("clip.projector_type", "gemma3") + self.gguf_writer.add_bool ("clip.has_text_encoder", False) + self.gguf_writer.add_bool ("clip.has_vision_encoder", True) + self.gguf_writer.add_bool ("clip.has_llava_projector", False) # legacy + self.gguf_writer.add_uint32 ("clip.vision.image_size", vision_config["image_size"]) + self.gguf_writer.add_uint32 ("clip.vision.patch_size", vision_config["patch_size"]) + self.gguf_writer.add_uint32 ("clip.vision.embedding_length", vision_config["hidden_size"]) + self.gguf_writer.add_uint32 ("clip.vision.feed_forward_length", vision_config["intermediate_size"]) + self.gguf_writer.add_uint32 ("clip.vision.projection_dim", text_config["hidden_size"]) + self.gguf_writer.add_uint32 ("clip.vision.block_count", vision_config["num_hidden_layers"]) + self.gguf_writer.add_uint32 ("clip.vision.attention.head_count", vision_config["num_attention_heads"]) + self.gguf_writer.add_float32("clip.vision.attention.layer_norm_epsilon", vision_config.get("layer_norm_eps", 1e-6)) + # default values taken from HF tranformers code + self.gguf_writer.add_array ("clip.vision.image_mean", [0.5, 0.5, 0.5]) + self.gguf_writer.add_array ("clip.vision.image_std", [0.5, 0.5, 0.5]) + self.gguf_writer.add_bool ("clip.use_gelu", True) + + # load tensors + for name, data_torch in self.get_tensors(dir_model): + # convert any unsupported data types to float32 + if data_torch.dtype not in (torch.float16, torch.float32): + data_torch = data_torch.to(torch.float32) + self.add_tensor(name, data_torch) + + def get_tensors(self, dir_model: Path) -> Iterator[tuple[str, Tensor]]: + part_names = Gemma3VisionTower.get_model_part_names(dir_model, "model", ".safetensors") + tensor_names_from_parts: set[str] = set() + for part_name in part_names: + logger.info(f"gguf: loading model part '{part_name}'") + from safetensors import safe_open + ctx = cast(ContextManager[Any], safe_open(dir_model / part_name, framework="pt", device="cpu")) + with ctx as model_part: + tensor_names_from_parts.update(model_part.keys()) + + for name in model_part.keys(): + data = model_part.get_slice(name) + data = LazyTorchTensor.from_safetensors_slice(data) + yield name, data + + def add_tensor(self, name: str, data_torch: Tensor): + is_1d = len(data_torch.shape) == 1 + is_embd = ".embeddings." in name + old_dtype = data_torch.dtype + can_quantize = not is_1d and not is_embd + data_qtype = gguf.GGMLQuantizationType.F32 + + # this is to support old checkpoint + # TODO: remove this when we have the final model + name = name.replace("vision_model.vision_model.", "vision_tower.vision_model.") + name = name.replace("multimodal_projector.", "multi_modal_projector.") + + # filter only vision tensors + if not name.startswith("vision_tower.vision_model.") and not name.startswith("multi_modal_projector."): + return + # prefix + name = name.replace("vision_tower.vision_model.encoder.layers.", "v.blk.") + name = name.replace("vision_tower.vision_model.", "v.") + # projector and input embd + name = name.replace(".embeddings.patch_embedding.", ".patch_embd.") + name = name.replace(".embeddings.position_embedding.", ".position_embd.") + name = name.replace( + "multi_modal_projector.mm_input_projection_weight", + "mm.input_projection.weight" + ) + name = name.replace( + "multi_modal_projector.mm_soft_emb_norm.weight", + "mm.soft_emb_norm.weight" + ) + name = name.replace("post_layernorm.", "post_ln.") + # each block + name = name.replace(".self_attn.k_proj.", ".attn_k.") + name = name.replace(".self_attn.v_proj.", ".attn_v.") + name = name.replace(".self_attn.q_proj.", ".attn_q.") + name = name.replace(".self_attn.out_proj.", ".attn_out.") + name = name.replace(".layer_norm1.", ".ln1.") + name = name.replace(".layer_norm2.", ".ln2.") + name = name.replace(".mlp.fc1.", ".ffn_down.") + name = name.replace(".mlp.fc2.", ".ffn_up.") + + if can_quantize: + if self.ftype == gguf.LlamaFileType.ALL_F32: + data_qtype = gguf.GGMLQuantizationType.F32 + elif self.ftype == gguf.LlamaFileType.MOSTLY_F16: + data_qtype = gguf.GGMLQuantizationType.F16 + elif self.ftype == gguf.LlamaFileType.MOSTLY_BF16: + data_qtype = gguf.GGMLQuantizationType.BF16 + elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0: + data_qtype = gguf.GGMLQuantizationType.Q8_0 + else: + raise ValueError(f"Unsupported file type: {self.ftype}") + + # corrent norm value ; only this "soft_emb_norm" need to be corrected as it's part of Gemma projector + # the other norm values are part of SigLIP model, and they are already correct + # ref code: Gemma3RMSNorm + if "soft_emb_norm.weight" in name: + logger.info(f"Correcting norm value for '{name}'") + data_torch = data_torch + 1 + + data = data_torch.numpy() + + try: + data = gguf.quants.quantize(data, data_qtype) + except Exception as e: + logger.error(f"Error quantizing tensor '{name}': {e}, fallback to F16") + data_qtype = gguf.GGMLQuantizationType.F16 + data = gguf.quants.quantize(data, data_qtype) + + # reverse shape to make it similar to the internal ggml dimension order + shape_str = f"{{{', '.join(str(n) for n in reversed(data_torch.shape))}}}" + logger.info(f"{f'%-32s' % f'{name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}") + + self.gguf_writer.add_tensor(name, data, raw_dtype=data_qtype) + + def write(self): + self.gguf_writer.write_header_to_file(path=self.fname_out) + self.gguf_writer.write_kv_data_to_file() + self.gguf_writer.write_tensors_to_file(progress=True) + self.gguf_writer.close() + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Convert Gemma 3 vision tower safetensors to GGUF format",) + parser.add_argument( + "--outfile", type=Path, default="mmproj.gguf", + help="path to write to", + ) + parser.add_argument( + "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0"], default="f16", + help="output format", + ) + parser.add_argument( + "--bigendian", action="store_true", + help="model is executed on big endian machine", + ) + parser.add_argument( + "model", type=Path, + help="directory containing model file", + nargs="?", + ) + parser.add_argument( + "--verbose", action="store_true", + help="increase output verbosity", + ) + + args = parser.parse_args() + if args.model is None: + parser.error("the following arguments are required: model") + return args + + +def main() -> None: + args = parse_args() + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + + dir_model = args.model + + if not dir_model.is_dir(): + logger.error(f'Error: {args.model} is not a directory') + sys.exit(1) + + ftype_map: dict[str, gguf.LlamaFileType] = { + "f32": gguf.LlamaFileType.ALL_F32, + "f16": gguf.LlamaFileType.MOSTLY_F16, + "bf16": gguf.LlamaFileType.MOSTLY_BF16, + "q8_0": gguf.LlamaFileType.MOSTLY_Q8_0, + } + + logger.info(f"Loading model: {dir_model.name}") + + with torch.inference_mode(): + gemma3_vision_tower = Gemma3VisionTower( + dir_model=dir_model, + fname_out=args.outfile, + ftype=ftype_map[args.outtype], + is_big_endian=args.bigendian, + ) + gemma3_vision_tower.write() + + +if __name__ == '__main__': + main() + diff --git a/examples/llava/glmedge-convert-image-encoder-to-gguf.py b/examples/llava/glmedge-convert-image-encoder-to-gguf.py new file mode 100644 index 0000000000..848ef1cf3f --- /dev/null +++ b/examples/llava/glmedge-convert-image-encoder-to-gguf.py @@ -0,0 +1,280 @@ +import argparse +import os +import json +import re + +import torch +import numpy as np +from gguf import * + +TEXT = "clip.text" +VISION = "clip.vision" +from transformers import SiglipVisionModel, SiglipVisionConfig + +def k(raw_key: str, arch: str) -> str: + return raw_key.format(arch=arch) + + +def should_skip_tensor(name: str, has_text: bool, has_vision: bool, has_llava: bool) -> bool: + if name in ( + "logit_scale", + "text_model.embeddings.position_ids", + "vision_model.embeddings.position_ids", + ): + return True + + if name in ( + "vision_model.head.probe", + "vision_model.head.attention.in_proj_weight", + "vision_model.head.attention.in_proj_bias", + "vision_model.head.attention.out_proj.weight", + "vision_model.head.attention.out_proj.bias", + "vision_model.head.layernorm.weight", + "vision_model.head.layernorm.bias", + "vision_model.head.mlp.fc1.weight", + "vision_model.head.mlp.fc1.bias", + "vision_model.head.mlp.fc2.weight", + "vision_model.head.mlp.fc2.bias" + ): + return True + + if name.startswith("v") and not has_vision: + return True + + if name.startswith("t") and not has_text: + return True + + return False + + +def get_tensor_name(name: str) -> str: + if "projection" in name: + return name + if "mm_projector" in name: + name = name.replace("model.mm_projector", "mm") + name = re.sub(r'mm\.mlp\.mlp', 'mm.model.mlp', name, count=1) + name = re.sub(r'mm\.peg\.peg', 'mm.model.peg', name, count=1) + return name + + return name.replace("text_model", "t").replace("vision_model", "v").replace("encoder.layers", "blk").replace("embeddings.", "").replace("_proj", "").replace("self_attn.", "attn_").replace("layer_norm", "ln").replace("layernorm", "ln").replace("mlp.fc1", "ffn_down").replace("mlp.fc2", "ffn_up").replace("embedding", "embd").replace("final", "post").replace("layrnorm", "ln") + + +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a significant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = ( + list(range(ord("!"), ord("~") + 1)) + + list(range(ord("¡"), ord("¬") + 1)) + + list(range(ord("®"), ord("ÿ") + 1)) + ) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +ap = argparse.ArgumentParser() +ap.add_argument("-m", "--model-dir", help="Path to model directory cloned from HF Hub", required=True) +ap.add_argument("--use-f32", action="store_true", default=False, help="Use f32 instead of f16") +ap.add_argument("--text-only", action="store_true", required=False, + help="Save a text-only model. It can't be used to encode images") +ap.add_argument("--vision-only", action="store_true", required=False, + help="Save a vision-only model. It can't be used to encode texts") +ap.add_argument("--clip-model-is-vision", action="store_true", required=False, + help="The clip model is a pure vision model (ShareGPT4V vision extract for example)") +ap.add_argument("--clip-model-is-openclip", action="store_true", required=False, + help="The clip model is from openclip (for ViT-SO400M type))") +ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.") +ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp, ldpv2", choices=["mlp", "ldp", "ldpv2","adapter"], default="adapter") +ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None) +# Example --image_mean 0.48145466 0.4578275 0.40821073 --image_std 0.26862954 0.26130258 0.27577711 +# Example --image_mean 0.5 0.5 0.5 --image_std 0.5 0.5 0.5 +default_image_mean = [0.5, 0.5, 0.5] +default_image_std = [0.5, 0.5, 0.5] +ap.add_argument('--image-mean', type=float, nargs='+', help='Mean of the images for normalization (overrides processor) ', default=None) +ap.add_argument('--image-std', type=float, nargs='+', help='Standard deviation of the images for normalization (overrides processor)', default=None) + +# with proper +args = ap.parse_args() + + +if args.text_only and args.vision_only: + print("--text-only and --image-only arguments cannot be specified at the same time.") + exit(1) + +if args.use_f32: + print("WARNING: Weights for the convolution op is always saved in f16, as the convolution op in GGML does not support 32-bit kernel weights yet.") + +# output in the same directory as the model if output_dir is None +dir_model = args.model_dir + +if args.clip_model_is_vision or not os.path.exists(dir_model + "/vocab.json") or args.clip_model_is_openclip: + vocab = None + tokens = None +else: + with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f: + vocab = json.load(f) + tokens = [key for key in vocab] + +with open(dir_model + "/config.json", "r", encoding="utf-8") as f: + config = json.load(f) + if args.clip_model_is_vision: + v_hparams = config + t_hparams = None + else: + v_hparams = config["vision_config"] + t_hparams = None + +# possible data types +# ftype == 0 -> float32 +# ftype == 1 -> float16 +# +# map from ftype to string +ftype_str = ["f32", "f16"] + +ftype = 1 +if args.use_f32: + ftype = 0 + +vision_config = SiglipVisionConfig(**v_hparams) +model = SiglipVisionModel(vision_config) +model.load_state_dict(torch.load(os.path.join(dir_model, "glm.clip"))) + +fname_middle = None +has_text_encoder = False +has_vision_encoder = True +has_glm_projector = True +if args.text_only: + fname_middle = "text-" + has_vision_encoder = False +elif args.llava_projector is not None: + fname_middle = "mmproj-" + has_text_encoder = False + has_glm_projector = True +elif args.vision_only: + fname_middle = "vision-" + has_text_encoder = False +else: + fname_middle = "" + +output_dir = args.output_dir if args.output_dir is not None else dir_model +os.makedirs(output_dir, exist_ok=True) +output_prefix = os.path.basename(output_dir).replace("ggml_", "") +fname_out = os.path.join(output_dir, f"{fname_middle}model-{ftype_str[ftype]}.gguf") +fout = GGUFWriter(path=fname_out, arch="clip") + +fout.add_bool("clip.has_text_encoder", has_text_encoder) +fout.add_bool("clip.has_vision_encoder", has_vision_encoder) +fout.add_bool("clip.has_glm_projector", has_glm_projector) +fout.add_file_type(ftype) +model_name = config["_name_or_path"] if "_name_or_path" in config else os.path.basename(dir_model) +fout.add_name(model_name) +if has_glm_projector: + fout.add_description("image encoder for glm4v") + fout.add_string("clip.projector_type", "adapter") +else: + fout.add_description("two-tower CLIP model") + +if has_text_encoder: + assert t_hparams is not None + assert tokens is not None + # text_model hparams + fout.add_uint32(k(KEY_CONTEXT_LENGTH, TEXT), t_hparams["max_position_embeddings"]) + fout.add_uint32(k(KEY_EMBEDDING_LENGTH, TEXT), t_hparams["hidden_size"]) + fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, TEXT), t_hparams["intermediate_size"]) + fout.add_uint32("clip.text.projection_dim", t_hparams.get("projection_dim", config["projection_dim"])) + fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, TEXT), t_hparams["num_attention_heads"]) + fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, TEXT), t_hparams["layer_norm_eps"]) + fout.add_uint32(k(KEY_BLOCK_COUNT, TEXT), t_hparams["num_hidden_layers"]) + fout.add_token_list(tokens) + +if has_vision_encoder: + # vision_model hparams + fout.add_uint32("clip.vision.image_size", v_hparams["image_size"]) + fout.add_uint32("clip.vision.patch_size", v_hparams["patch_size"]) + fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), v_hparams["hidden_size"]) + fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), v_hparams["intermediate_size"]) + fout.add_uint32("clip.vision.projection_dim", 0) + fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, VISION), v_hparams["num_attention_heads"]) + fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), 1e-6) + fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), v_hparams["num_hidden_layers"]) + + image_mean = args.image_mean if args.image_mean is not None else default_image_mean + image_std = args.image_std if args.image_std is not None else default_image_std + fout.add_array("clip.vision.image_mean", image_mean) + fout.add_array("clip.vision.image_std", image_std) + +fout.add_bool("clip.use_gelu", True) + + +if has_glm_projector: + # model.vision_model.encoder.layers.pop(-1) # pyright: ignore[reportAttributeAccessIssue] + projector = torch.load(args.llava_projector) + for name, data in projector.items(): + name = get_tensor_name(name) + # pw and dw conv ndim==4 + if data.ndim == 2 or data.ndim == 4: + data = data.squeeze().numpy().astype(np.float16) + else: + data = data.squeeze().numpy().astype(np.float32) + if name.startswith("vision."): + name=name.replace("vision.","") + fout.add_tensor(name, data) + print(f"Projector {name} - {data.dtype} - shape = {data.shape}") + # print(f"Projector {name} tensors added\n") + +state_dict = model.state_dict() # pyright: ignore[reportAttributeAccessIssue] +for name, data in state_dict.items(): + if should_skip_tensor(name, has_text_encoder, has_vision_encoder, has_glm_projector): + # we don't need this + print(f"skipping parameter: {name}") + continue + + name = get_tensor_name(name) + data = data.squeeze().numpy() + + n_dims = len(data.shape) + + # ftype == 0 -> float32, ftype == 1 -> float16 + ftype_cur = 0 + if n_dims == 4: + print(f"tensor {name} is always saved in f16") + data = data.astype(np.float16) + ftype_cur = 1 + elif ftype == 1: + if name[-7:] == ".weight" and n_dims == 2: + # print(" Converting to float16") + data = data.astype(np.float16) + ftype_cur = 1 + else: + # print(" Converting to float32") + data = data.astype(np.float32) + ftype_cur = 0 + else: + if data.dtype != np.float32: + # print(" Converting to float32") + data = data.astype(np.float32) + ftype_cur = 0 + print(f"siglip {name} - {data.dtype} - shape = {data.shape}") + # print(f"{name} - {ftype_str[ftype_cur]} - shape = {data.shape}") + fout.add_tensor(name, data) + + +fout.write_header_to_file() +fout.write_kv_data_to_file() +fout.write_tensors_to_file() +fout.close() + +print("Done. Output file: " + fname_out) diff --git a/examples/llava/glmedge-surgery.py b/examples/llava/glmedge-surgery.py new file mode 100644 index 0000000000..16bb915d04 --- /dev/null +++ b/examples/llava/glmedge-surgery.py @@ -0,0 +1,33 @@ +import argparse +import os +import torch +from transformers import AutoModel + +ap = argparse.ArgumentParser() +ap.add_argument("-m", "--model", help="Path to GLM model") +args = ap.parse_args() + +# find the model part that includes the the multimodal projector weights +model = AutoModel.from_pretrained(args.model, trust_remote_code=True, local_files_only=True) +checkpoint = model.state_dict() + +# get a list of mm tensor names +mm_tensors = [k for k, v in checkpoint.items() if k.startswith("vision.adapter.")] + +# store these tensors in a new dictionary and torch.save them +projector = {name: checkpoint[name].float() for name in mm_tensors} +torch.save(projector, f"{args.model}/glm.projector") + +clip_tensors = [k for k, v in checkpoint.items() if k.startswith("vision.vit.model.vision_model.")] +if len(clip_tensors) > 0: + clip = {name.replace("vision.vit.model.", ""): checkpoint[name].float() for name in clip_tensors} + torch.save(clip, f"{args.model}/glm.clip") + + # added tokens should be removed to be able to convert Mistral models + if os.path.exists(f"{args.model}/added_tokens.json"): + with open(f"{args.model}/added_tokens.json", "w") as f: + f.write("{}\n") + +print("Done!") +print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.") +print(f"Also, use {args.model}glm.projector to prepare a glm-encoder.gguf file.") diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index 40aa0876f2..0fe0e333a5 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -225,7 +225,7 @@ static struct llama_model * llava_init(common_params * params) { llama_model_params model_params = common_model_params_to_llama(*params); - llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params->model.path.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: unable to load model\n" , __func__); return NULL; @@ -234,14 +234,14 @@ static struct llama_model * llava_init(common_params * params) { } static struct llava_context * llava_init_context(common_params * params, llama_model * model) { - const char * clip_path = params->mmproj.c_str(); + const char * clip_path = params->mmproj.path.c_str(); auto prompt = params->prompt; if (prompt.empty()) { prompt = "describe the image in detail."; } - auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1); + auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO); llama_context_params ctx_params = common_context_params_to_llama(*params); ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings @@ -283,7 +283,7 @@ int main(int argc, char ** argv) { common_init(); - if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) { + if (params.mmproj.path.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) { print_usage(argc, argv); return 1; } diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index 2cac7933d2..03a22cbb4c 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #if defined(LLAVA_LOG_OFF) # define LOG_INF(...) @@ -45,6 +46,17 @@ struct clip_image_grid_shape { int second; }; +// convenience cpp wrapper +struct clip_image_f32_batch_deleter { + void operator()(clip_image_f32_batch * val) { clip_image_f32_batch_free(val); } +}; +typedef std::unique_ptr clip_image_f32_batch_ptr; + +struct clip_image_size_deleter { + void operator()(clip_image_f32_batch * val) { clip_image_f32_batch_free(val); } +}; +typedef std::unique_ptr clip_image_size_ptr; + /** * Selects the best resolution from a list of possible resolutions based on the original size. * @@ -105,8 +117,8 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector struct ggml_context * ctx; } model; - const int32_t image_size = clip_image_size(ctx_clip); - const int32_t patch_size = clip_patch_size(ctx_clip); + const int32_t image_size = clip_get_image_size(ctx_clip); + const int32_t patch_size = clip_get_patch_size(ctx_clip); int32_t num_patches_per_side = image_size / patch_size; // 336 / 14 = 24 - used for embedding-patching boxes (24*24 = 576 patches) @@ -246,12 +258,9 @@ static clip_image_f32 * reshape_by_patch(clip_image_f32 * image, int patch_size) static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float * image_embd, int * n_img_pos) { // std::vector img_res_v; // format VectN x H x W x RGB (N x 336 x 336 x 3), so interleaved RGB - different to the python implementation which is N x 3 x 336 x 336 - clip_image_f32_batch img_res_v; - img_res_v.size = 0; - img_res_v.data = nullptr; - if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) { + clip_image_f32_batch_ptr img_res_v(clip_image_f32_batch_init()); + if (!clip_image_preprocess(ctx_clip, img, img_res_v.get())) { LOG_ERR("%s: unable to preprocess image\n", __func__); - delete[] img_res_v.data; return false; } @@ -259,63 +268,83 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli const char * mm_patch_merge_type = clip_patch_merge_type(ctx_clip); + const size_t n_imgs = clip_image_f32_batch_n_images(img_res_v.get()); + if (clip_is_minicpmv(ctx_clip) || clip_is_qwen2vl(ctx_clip)) { std::vector image_embd_v; - image_embd_v.resize(img_res_v.size); - struct clip_image_size * load_image_size = clip_image_size_init(); + image_embd_v.resize(n_imgs); + clip_image_size load_image_size; - for (size_t i = 0; i < img_res_v.size; i++) { + for (size_t i = 0; i < n_imgs; i++) { const int64_t t_img_enc_step_start_us = ggml_time_us(); - image_embd_v[i] = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny)); - int patch_size=14; - load_image_size->width = img_res_v.data[i].nx; - load_image_size->height = img_res_v.data[i].ny; - clip_add_load_image_size(ctx_clip, load_image_size); + int nx = clip_image_f32_batch_nx(img_res_v.get(), i); + int ny = clip_image_f32_batch_ny(img_res_v.get(), i); + image_embd_v[i] = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, nx, ny)); + int patch_size = 14; + load_image_size.width = nx; + load_image_size.height = ny; + clip_add_load_image_size(ctx_clip, &load_image_size); bool encoded = false; + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), i); if (clip_is_qwen2vl(ctx_clip)) { - encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); + encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd_v[i]); } else { - encoded = clip_image_encode(ctx_clip, n_threads, reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]); + encoded = clip_image_encode(ctx_clip, n_threads, reshape_by_patch(img_res, patch_size), image_embd_v[i]); } if (!encoded) { - LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); + LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) n_imgs); return false; } const int64_t t_img_enc_steop_batch_us = ggml_time_us(); - LOG_INF("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)img_res_v.size, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0); + LOG_INF("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)n_imgs, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0); } const int64_t t_img_enc_batch_us = ggml_time_us(); - LOG_INF("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); + LOG_INF("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)n_imgs, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); int n_img_pos_out = 0; for (size_t i = 0; i < image_embd_v.size(); i++) { + int nx = clip_image_f32_batch_nx(img_res_v.get(), i); + int ny = clip_image_f32_batch_ny(img_res_v.get(), i); + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), i); std::memcpy( image_embd + n_img_pos_out * clip_n_mmproj_embd(ctx_clip), image_embd_v[i], - clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny)); - n_img_pos_out += clip_n_patches_by_img(ctx_clip, &img_res_v.data[i]); + clip_embd_nbytes_by_img(ctx_clip, nx, ny)); + n_img_pos_out += clip_n_patches_by_img(ctx_clip, img_res); } *n_img_pos = n_img_pos_out; for (size_t i = 0; i < image_embd_v.size(); i++) { free(image_embd_v[i]); } image_embd_v.clear(); - load_image_size->width = img->nx; - load_image_size->height = img->ny; + load_image_size.width = img->nx; + load_image_size.height = img->ny; + clip_add_load_image_size(ctx_clip, &load_image_size); + LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size.width, load_image_size.height); + } + else if (clip_is_glm(ctx_clip)){ + struct clip_image_size * load_image_size = clip_image_size_init(); + load_image_size->width = clip_image_f32_batch_nx(img_res_v.get(), 0); + load_image_size->height = clip_image_f32_batch_ny(img_res_v.get(), 0); clip_add_load_image_size(ctx_clip, load_image_size); - LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height); - delete[] img_res_v.data; - img_res_v.size = 0; - img_res_v.data = nullptr; + + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), 0); + bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd); + int pos = int(load_image_size->width/clip_get_patch_size(ctx_clip)/2); + *n_img_pos = (pos * pos + 2); + if (!encoded){ + LOG_ERR("Unable to encode image \n"); + return false; + } } else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) { // flat / default llava-1.5 type embedding *n_img_pos = clip_n_patches(ctx_clip); - bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd); // image_embd shape is 576 x 4096 - delete[] img_res_v.data; + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), 0); + bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd); // image_embd shape is 576 x 4096 if (!encoded) { LOG_ERR("Unable to encode image\n"); @@ -326,31 +355,28 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli // spatial_unpad llava-1.6 type embedding // TODO: CLIP needs batching support - in HF the llm projection is separate after encoding, which might be a solution to quickly get batching working std::vector image_embd_v; - image_embd_v.resize(img_res_v.size); - for (size_t i = 0; i < img_res_v.size; i++) { + image_embd_v.resize(n_imgs); + for (size_t i = 0; i < n_imgs; i++) { + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), i); image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip)); // 576 patches * 4096 embeddings * 4 bytes = 9437184 - const bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside + const bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside if (!encoded) { - LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); + LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) n_imgs); return false; } } const int64_t t_img_enc_batch_us = ggml_time_us(); - LOG_INF("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); + LOG_INF("%s: %d segments encoded in %8.2f ms\n", __func__, (int)n_imgs, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); const int32_t * image_grid = clip_image_grid(ctx_clip); + const size_t num_gridpoints = get_clip_image_grid_size(ctx_clip); std::vector> grid_pinpoints; - for (int i = 0; i < 32 && image_grid[i] != 0; i += 2) { + for (size_t i = 0; i < num_gridpoints; i += 2) { grid_pinpoints.push_back({image_grid[i], image_grid[i+1]}); } - // free all img_res_v - not needed anymore - delete[] img_res_v.data; - img_res_v.size = 0; - img_res_v.data = nullptr; - - const int32_t image_size = clip_image_size(ctx_clip); + const int32_t image_size = clip_get_image_size(ctx_clip); struct clip_image_grid_shape grid_shape = get_anyres_image_grid_shape({img->nx,img->ny}, grid_pinpoints, image_size); @@ -391,10 +417,14 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * } bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) { - int num_max_patches = 6; + // Granite vision uses up to 10 patches + base patch + int num_max_patches = 11; if (clip_is_minicpmv(ctx_clip)) { num_max_patches = 10; } + if (clip_is_glm(ctx_clip)) { + num_max_patches = 1; + } float * image_embd; if (clip_is_qwen2vl(ctx_clip)) { // qwen2vl don't split image into chunks, so `num_max_patches` is not needed. diff --git a/examples/llava/llava_surgery_v2.py b/examples/llava/llava_surgery_v2.py index 2d5b32fe6b..b07c3e323c 100644 --- a/examples/llava/llava_surgery_v2.py +++ b/examples/llava/llava_surgery_v2.py @@ -33,6 +33,33 @@ def save_model(model, file_path, file_type): else: torch.save(model, file_path) +# Helpers to match weight names from specific components or +# determine if a saved shard contains that component +def is_vision_tower(weight_name): + return ( + weight_name.startswith("model.vision_tower") or + weight_name.startswith("vit.") or + weight_name.startswith("vision_tower") + ) + +def is_newline(weight_name): + return ( + weight_name.startswith("model.image_newline") or + weight_name.startswith("image_newline") + ) + +def is_mm_projector(weight_name): + return ( + weight_name.startswith("model.mm_projector") or + weight_name.startswith("vision_proj.") or + weight_name.startswith("multi_modal_projector") + ) + +def newline_criteria(checkpoint): + return any(is_newline(k) for k in checkpoint.keys()) + +def proj_criteria(checkpoint): + return any(is_mm_projector(k) for k in checkpoint.keys()) # Adapted function to clean vision tower from checkpoint def clean_vision_tower_from_checkpoint(checkpoint_path): @@ -40,7 +67,7 @@ def clean_vision_tower_from_checkpoint(checkpoint_path): # file_type = 'pytorch' model_path = os.path.dirname(checkpoint_path) print(f"Searching for vision tower tensors in {checkpoint_path}") - clip_tensors = [k for k, v in checkpoint.items() if (k.startswith("model.vision_tower") or k.startswith("vit."))] + clip_tensors = [k for k, v in checkpoint.items() if is_vision_tower(k)] if len(clip_tensors) > 0: print(f"Found {len(clip_tensors)} tensors to extract from {checkpoint_path}") @@ -84,12 +111,6 @@ def find_relevant_checkpoints(checkpoint_paths, newline_criteria, projector): return newline_checkpoint_path, projector_checkpoint_path -def newline_criteria(checkpoint): - return any(k.startswith("model.image_newline") for k in checkpoint.keys()) - -def proj_criteria(checkpoint): - return any(k.startswith("model.mm_projector") or k.startswith("vision_proj.") for k in checkpoint.keys()) - # Command-line interface setup ap = argparse.ArgumentParser() @@ -123,14 +144,14 @@ first_checkpoint = None if newline_checkpoint_path is not None: print(f"Taking newline from {newline_checkpoint_path}") first_checkpoint, file_type = load_model(newline_checkpoint_path) - first_mm_tensors = [k for k, v in first_checkpoint.items() if k.startswith("model.image_newline")] + first_mm_tensors = [k for k, v in first_checkpoint.items() if is_newline(k)] # Load the checkpoint mm_tensors = [] last_checkpoint = None if projector_checkpoint_path is not None: last_checkpoint, file_type = load_model(projector_checkpoint_path) - mm_tensors = [k for k, v in last_checkpoint.items() if k.startswith("model.mm_projector") or k.startswith("vision_proj.")] + mm_tensors = [k for k, v in last_checkpoint.items() if is_mm_projector(k)] if len(mm_tensors) == 0: if last_checkpoint is not None: @@ -155,5 +176,5 @@ if len(projector) > 0: save_model(projector, f"{args.model}/llava.projector", 'pytorch') print("Done!") -print(f"Now you can convert {args.model} to a a regular LLaMA GGUF file.") +print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.") print(f"Also, use {args.model}/llava.projector to prepare a llava-encoder.gguf file.") diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index 53d902d616..5ad970c220 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -31,7 +31,7 @@ static struct llama_model * llava_init(common_params * params) { llama_model_params model_params = common_model_params_to_llama(*params); - llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params->model.path.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: unable to load model\n" , __func__); return NULL; @@ -80,13 +80,17 @@ static void llava_free(struct llava_context * ctx_llava) { } static struct clip_ctx * clip_init_context(common_params * params) { - const char * clip_path = params->mmproj.c_str(); + const char * clip_path = params->mmproj.path.c_str(); auto prompt = params->prompt; if (prompt.empty()) { prompt = "describe the image in detail."; } - auto * ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1); + struct clip_context_params clip_params = { + /* use_gpu */ params->n_gpu_layers != 0, + /* verbosity */ GGML_LOG_LEVEL_INFO, // TODO: make this configurable + }; + auto * ctx_clip = clip_init(clip_path, clip_params); return ctx_clip; } @@ -148,19 +152,34 @@ static void process_image(struct llava_context * ctx_llava, struct llava_image_e process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++); eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); if (num_image_embeds > 1) { - size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip); - eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); - for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) { - for (size_t j = 0; j < num_image_embeds_col; ++j) { - eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); - process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++); - eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); - if (j == num_image_embeds_col - 1) { - eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false); + if (has_minicpmv_projector == 2) { + size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip); + eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); + for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) { + for (size_t j = 0; j < num_image_embeds_col; ++j) { + eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); + process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++); + eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); + if (j == num_image_embeds_col - 1) { + eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false); + } + } + } + eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); + } + else if (has_minicpmv_projector == 3 || has_minicpmv_projector == 4) { + size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip); + for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) { + for (size_t j = 0; j < num_image_embeds_col; ++j) { + eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); + process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++); + eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); + if (j == num_image_embeds_col - 1) { + eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false); + } } } } - eval_string(ctx_llava->ctx_llama, std::string("").c_str(), params->n_batch, &n_past, false); } LOG_INF("%s: image token past: %d\n", __func__, n_past); } @@ -271,7 +290,7 @@ int main(int argc, char ** argv) { common_init(); - if (params.mmproj.empty() || (params.image.empty())) { + if (params.mmproj.path.empty() || (params.image.empty())) { show_additional_info(argc, argv); return 1; } diff --git a/examples/llava/minicpmv-convert-image-encoder-to-gguf.py b/examples/llava/minicpmv-convert-image-encoder-to-gguf.py index 9b196757f0..cfe0961f98 100644 --- a/examples/llava/minicpmv-convert-image-encoder-to-gguf.py +++ b/examples/llava/minicpmv-convert-image-encoder-to-gguf.py @@ -597,7 +597,6 @@ elif args.minicpmv_projector is not None: fname_middle = "mmproj-" has_text_encoder = False has_minicpmv_projector = True - minicpmv_version = 4 elif args.vision_only: fname_middle = "vision-" has_text_encoder = False diff --git a/examples/llava/mtmd.cpp b/examples/llava/mtmd.cpp new file mode 100644 index 0000000000..3fd5bebc6a --- /dev/null +++ b/examples/llava/mtmd.cpp @@ -0,0 +1,373 @@ +#include "clip.h" +#include "clip-impl.h" +#include "mtmd.h" + +#include "llama.h" + +#include +#include +#include +#include +#include +#include +#include + +struct mtmd_context { + struct clip_ctx * ctx_clip; + const struct llama_model * text_model; + std::vector image_embd_v; // image embedding vector + + bool print_timings; + int n_threads; + std::string image_marker; + + // TODO @ngxson : add timings + + mtmd_context(const char * mmproj_fname, + const llama_model * text_model, + const mtmd_context_params & ctx_params) : + print_timings(ctx_params.print_timings), + n_threads (ctx_params.n_threads), + image_marker (ctx_params.image_marker) + { + clip_context_params ctx_clip_params; + ctx_clip_params.use_gpu = ctx_params.use_gpu; + ctx_clip_params.verbosity = ctx_params.verbosity; + ctx_clip = clip_init(mmproj_fname, ctx_clip_params); + if (!ctx_clip) { + throw std::runtime_error(string_format("Failed to load CLIP model from %s\n", mmproj_fname)); + } + this->text_model = text_model; + } + + ~mtmd_context() { + clip_free(ctx_clip); + } +}; + +struct mtmd_image_tokens_data { + clip_image_f32_batch batch_f32; // preprocessed image patches +}; + +struct mtmd_image_tokens { + uint32_t nx; // number of tokens in x direction + uint32_t ny; // number of tokens in y direction + uint32_t n_tokens() const { return nx * ny; } + clip_image_f32_batch batch_f32; // preprocessed image patches + std::string id; // optional user-defined ID, useful for KV cache tracking +}; + +mtmd_context * mtmd_init_from_file(const char * mmproj_fname, + const struct llama_model * text_model, + const struct mtmd_context_params ctx_params) { + try { + return new mtmd_context(mmproj_fname, text_model, ctx_params); + } catch (const std::exception & e) { + LOG_ERR("%s: error: %s\n", __func__, e.what()); + return nullptr; + } +} + +void mtmd_free(mtmd_context * ctx) { + if (ctx) { + delete ctx; + } +} + +// copied from common_tokenize +static std::vector mtmd_tokenize_text_internal( + const struct llama_vocab * vocab, + const std::string & text, + bool add_special, + bool parse_special) { + // upper limit for the number of tokens + int n_tokens = text.length() + 2 * add_special; + std::vector result(n_tokens); + n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + return result; +} + +int32_t mtmd_tokenize(mtmd_context * ctx, + std::vector & output, + const mtmd_input_text & text, + const std::vector & bitmaps) { + auto vocab = llama_model_get_vocab(ctx->text_model); + + std::string prompt_modified(text.text); + std::string marker_modified(ctx->image_marker); + projector_type proj_type = clip_get_projector_type(ctx->ctx_clip); + // a bit hacky here, but works for now + // for some models, we need to add prefix and suffix to the image embeddings + if (proj_type == PROJECTOR_TYPE_GEMMA3) { + // ... (image embeddings) ... + marker_modified = "" + ctx->image_marker + ""; + string_replace_all(prompt_modified, ctx->image_marker, marker_modified); + } + + std::vector parts = string_split_str(prompt_modified, ctx->image_marker); + output.clear(); + output.reserve(parts.size()); + + size_t i_img = 0; + + for (const auto & part : parts) { + //printf("tokenizing part: %s\n", part.c_str()); + bool add_bos = &parts.front() == ∂ + auto tokens = mtmd_tokenize_text_internal(vocab, part, text.add_special && add_bos, text.parse_special); + if (tokens.empty()) { + continue; + } + mtmd_input_chunk chunk{ + MTMD_INPUT_CHUNK_TYPE_TEXT, + std::move(tokens), + {}, + }; + output.emplace_back(std::move(chunk)); + + if (&parts.back() != &part) { + // add image token to middle of 2 parts + + if (i_img >= bitmaps.size()) { + LOG_ERR("%s: error: not enough images for %d parts\n", __func__, (int)parts.size()); + return 1; + } + + // shim layer + clip_image_u8_ptr img_u8(clip_image_u8_init()); + img_u8->nx = bitmaps[i_img].nx; + img_u8->ny = bitmaps[i_img].ny; + img_u8->buf.resize(bitmaps[i_img].data.size()); + std::memcpy(img_u8->buf.data(), bitmaps[i_img].data.data(), img_u8->nx * img_u8->ny * 3); + + // preprocess image + clip_image_f32_batch batch_f32; + bool ok = clip_image_preprocess(ctx->ctx_clip, img_u8.get(), &batch_f32); + if (!ok) { + LOG_ERR("Unable to preprocess image\n"); + return 2; + } + + mtmd_image_tokens_ptr image_tokens(new mtmd_image_tokens); + image_tokens->nx = clip_n_patches(ctx->ctx_clip); // TODO @ngxson : use clip_n_patches_by_image + image_tokens->ny = 1; // TODO + image_tokens->batch_f32 = std::move(batch_f32); + image_tokens->id = bitmaps[i_img].id; // optional + + mtmd_input_chunk chunk{ + MTMD_INPUT_CHUNK_TYPE_IMAGE, + {}, + std::move(image_tokens), + }; + output.emplace_back(std::move(chunk)); + i_img++; + } + } + + return 0; +} + +void mtmd_image_tokens_free(mtmd_image_tokens * image_tokens) { + if (image_tokens) { + delete image_tokens; + } +} + +size_t mtmd_image_tokens_get_n_tokens(const mtmd_image_tokens * image_tokens) { + return image_tokens->n_tokens(); +} + +size_t mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens) { + return image_tokens->nx; +} + +size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens) { + return image_tokens->ny; +} + +std::string mtmd_image_tokens_get_id(const mtmd_image_tokens * image_tokens) { + return image_tokens->id; +} + +int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens) { + int n_mmproj_embd = clip_n_mmproj_embd(ctx->ctx_clip); + ctx->image_embd_v.resize(image_tokens->n_tokens() * n_mmproj_embd); + bool ok = clip_image_batch_encode( + ctx->ctx_clip, + ctx->n_threads, + &image_tokens->batch_f32, + ctx->image_embd_v.data()); + return ok ? 0 : 1; +} + +float * mtmd_get_output_embd(mtmd_context * ctx) { + return ctx->image_embd_v.data(); +} + +size_t mtmd_helper_get_n_tokens(mtmd_input_chunks & chunks) { + size_t n_tokens = 0; + for (auto & chunk : chunks) { + if (chunk.type == MTMD_INPUT_CHUNK_TYPE_TEXT) { + n_tokens += chunk.tokens_text.size(); + } else if (chunk.type == MTMD_INPUT_CHUNK_TYPE_IMAGE) { + n_tokens += chunk.tokens_image->n_tokens(); + } else { + GGML_ASSERT(false && "chunk type not supported"); + } + } + return n_tokens; +} + +// helper struct to make working with embd batch easier +// note: this will be removed after llama_batch_ext refactoring +struct decode_embd_batch { + std::vector pos; + std::vector n_seq_id; + std::vector seq_id_0; + std::vector seq_ids; + std::vector logits; + llama_batch batch; + decode_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) { + pos .resize(n_tokens); + n_seq_id.resize(n_tokens); + seq_ids .resize(n_tokens + 1); + logits .resize(n_tokens); + seq_id_0.resize(1); + seq_id_0[0] = seq_id; + seq_ids [n_tokens] = nullptr; + batch = { + /*n_tokens =*/ n_tokens, + /*tokens =*/ nullptr, + /*embd =*/ embd, + /*pos =*/ pos.data(), + /*n_seq_id =*/ n_seq_id.data(), + /*seq_id =*/ seq_ids.data(), + /*logits =*/ logits.data(), + }; + for (int i = 0; i < n_tokens; i++) { + batch.pos [i] = pos_0 + i; + batch.n_seq_id[i] = 1; + batch.seq_id [i] = seq_id_0.data(); + batch.logits [i] = false; + } + } +}; + +int32_t mtmd_helper_eval(mtmd_context * ctx, + llama_context * lctx, + mtmd_input_chunks & chunks, + llama_pos pos0, + llama_seq_id seq_id, + int32_t n_batch) { + int32_t ret; + llama_pos n_past = pos0; + llama_batch text_batch = llama_batch_init(n_batch, 0, 1); + + for (auto & chunk : chunks) { + bool is_last = &chunk == &chunks.back(); + if (chunk.type == MTMD_INPUT_CHUNK_TYPE_TEXT) { + // TODO @ngxson : may need to split into smaller batches + text_batch.n_tokens = chunk.tokens_text.size(); + for (size_t i = 0; i < chunk.tokens_text.size(); i++) { + text_batch.token [i] = chunk.tokens_text[i]; + text_batch.pos [i] = n_past++; + text_batch.n_seq_id[i] = 1; + text_batch.seq_id [i][0] = seq_id; + text_batch.logits [i] = false; + } + if (is_last) { + // always get logits for last input chunk + text_batch.logits[text_batch.n_tokens - 1] = true; + } + ret = llama_decode(lctx, text_batch); + if (ret != 0) { + LOG_ERR("failed to decode text\n"); + llama_batch_free(text_batch); + return ret; + } + + } else if (chunk.type == MTMD_INPUT_CHUNK_TYPE_IMAGE) { + GGML_ASSERT(!is_last && "logits for last image chunk is not yet support"); + GGML_ASSERT(chunk.tokens_image != nullptr); + int64_t t0 = ggml_time_ms(); + if (ctx->print_timings) { + LOG_INF("encoding image...\n"); + } + ret = mtmd_encode(ctx, chunk.tokens_image.get()); + if (ret != 0) { + LOG_ERR("failed to encode image\n"); + llama_batch_free(text_batch); + return ret; + } + if (ctx->print_timings) { + LOG_INF("image encoded in %" PRId64 " ms\n", ggml_time_ms() - t0); + } + + int32_t n_tokens = mtmd_image_tokens_get_n_tokens(chunk.tokens_image.get()); + float * embd = mtmd_get_output_embd(ctx); + decode_embd_batch batch_img(embd, n_tokens, n_past, 0); + int64_t t1 = ggml_time_ms(); + ret = llama_decode(lctx, batch_img.batch); + if (ret != 0) { + LOG_ERR("failed to decode image\n"); + llama_batch_free(text_batch); + return ret; + } + if (ctx->print_timings) { + LOG_INF("image decoded in %" PRId64 " ms\n", ggml_time_ms() - t1); + } + + n_past += n_tokens; + + } else { + GGML_ASSERT(false && "chunk type not supported"); + } + } + + llama_batch_free(text_batch); + return 0; +} + +int32_t mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len, mtmd_bitmap & output) { + clip_image_u8_ptr img_u8(clip_image_u8_init()); + bool ok = clip_image_load_from_bytes(buf, len, img_u8.get()); + if (!ok) { + LOG_ERR("Unable to load image from buffer\n"); + return 1; + } + unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny); + output.data.resize(output.nx * output.ny * 3); + std::memcpy(output.data.data(), data, output.nx * output.ny * 3); + return 0; +} + +int32_t mtmd_helper_bitmap_init_from_file(const char * fname, mtmd_bitmap & output) { + clip_image_u8_ptr img_u8(clip_image_u8_init()); + bool ok = clip_image_load_from_file(fname, img_u8.get()); + if (!ok) { + LOG_ERR("Unable to load image %s\n", fname); + return 1; + } + unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny); + output.data.resize(output.nx * output.ny * 3); + std::memcpy(output.data.data(), data, output.nx * output.ny * 3); + return 0; +} + +bool mtmd_decode_use_non_causal(mtmd_context * ctx) { + projector_type proj_type = clip_get_projector_type(ctx->ctx_clip); + if (proj_type == PROJECTOR_TYPE_GEMMA3) { + return true; + } + return false; +} + +void mtmd_image_tokens_deleter::operator()(mtmd_image_tokens * val) { + mtmd_image_tokens_free(val); +} diff --git a/examples/llava/mtmd.h b/examples/llava/mtmd.h new file mode 100644 index 0000000000..78be192dd6 --- /dev/null +++ b/examples/llava/mtmd.h @@ -0,0 +1,161 @@ +#ifndef MTMD_H +#define MTMD_H + +#include "ggml.h" +#include "llama.h" +#include "clip.h" + +#include +#include +#include + +#ifdef LLAMA_SHARED +# if defined(_WIN32) && !defined(__MINGW32__) +# ifdef LLAMA_BUILD +# define MTMD_API __declspec(dllexport) +# else +# define MTMD_API __declspec(dllimport) +# endif +# else +# define MTMD_API __attribute__ ((visibility ("default"))) +# endif +#else +# define MTMD_API +#endif + +#ifdef __cplusplus + +enum mtmd_input_chunk_type { + MTMD_INPUT_CHUNK_TYPE_TEXT, + MTMD_INPUT_CHUNK_TYPE_IMAGE, +}; + +struct mtmd_context; +struct mtmd_image_tokens; + +// represents raw image data, layout is RGBRGBRGB... +// length of data must be nx * ny * 3 +struct mtmd_bitmap { + uint32_t nx; + uint32_t ny; + std::vector data; + std::string id; // optional user-defined id, for ex: can be set to image hash, useful for KV cache tracking +}; + +struct mtmd_image_tokens_deleter { + void operator()(mtmd_image_tokens * val); // forward declaration +}; +using mtmd_image_tokens_ptr = std::unique_ptr; + +struct mtmd_input_chunk { + mtmd_input_chunk_type type; + std::vector tokens_text; + mtmd_image_tokens_ptr tokens_image; +}; + +using mtmd_input_chunks = std::vector; + +struct mtmd_context_params { + bool use_gpu = true; + bool print_timings = true; + int n_threads = 4; + enum ggml_log_level verbosity = GGML_LOG_LEVEL_INFO; + const char * image_marker = "<__image__>"; +}; + +struct mtmd_input_text { + std::string text; + bool add_special; + bool parse_special; +}; + +// initialize the mtmd context +// return nullptr on failure +MTMD_API mtmd_context * mtmd_init_from_file(const char * mmproj_fname, + const llama_model * text_model, + const mtmd_context_params ctx_params); + +MTMD_API void mtmd_free(mtmd_context * ctx); + +// tokenize an input text prompt and an image +// the prompt must have the input image marker (default: "<__image__>") in it +// the marker will be replaced with the image tokens +// for example: +// "here is an image: <__image__>\ndescribe it in detail." +// this will gives 3 chunks: +// 1. "here is an image: " +// 2. (image tokens) +// 3. "\ndescribe it in detail." +// number of bitmaps must be equal to the number of image markers in the prompt +// this function is thread-safe (shared ctx) +// return values: +// 0 on success +// 1 on number of images not matching the number of markers +// 2 on image preprocessing error +MTMD_API int32_t mtmd_tokenize(mtmd_context * ctx, + std::vector & output, + const mtmd_input_text & text, + const std::vector & bitmaps); + +// access mtmd_image_tokens +MTMD_API size_t mtmd_image_tokens_get_n_tokens(const mtmd_image_tokens * image_tokens); +MTMD_API size_t mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens); +MTMD_API size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens); +MTMD_API std::string mtmd_image_tokens_get_id(const mtmd_image_tokens * image_tokens); +MTMD_API void mtmd_image_tokens_free(mtmd_image_tokens * image_tokens); + +// returns 0 on success +MTMD_API int32_t mtmd_encode(mtmd_context * ctx, + const mtmd_image_tokens * image_tokens); + +// get output embeddings from the last encode pass +MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx); + +// whether we need to set non-causal mask before llama_decode +MTMD_API bool mtmd_decode_use_non_causal(mtmd_context * ctx); + + + +// +// helper functions (can be implemented based on other functions) +// + +// helper to count the total number of tokens from a list of chunks, useful to keep track of n_past +MTMD_API size_t mtmd_helper_get_n_tokens(mtmd_input_chunks & chunks); + +// helper function that automatically: +// 1. run llama_decode() on text chunks +// 2. run mtmd_encode() on image chunks, then mtmd_get_output_embd() and then llama_decode() +// if any of the mtmd_encode() or llama_decode() calls return non-zero, stop and forward the error +// otherwise, returns 0 on success +MTMD_API int32_t mtmd_helper_eval(mtmd_context * ctx, + llama_context * lctx, + mtmd_input_chunks & chunks, + llama_pos pos0, + llama_seq_id seq_id, + int32_t n_batch); + +// helper function to construct a mtmd_bitmap from a file +// returns 0 on success +// this function is thread-safe +MTMD_API int32_t mtmd_helper_bitmap_init_from_file(const char * fname, mtmd_bitmap & output); + +// helper function to construct a mtmd_bitmap from a buffer +// the buffer must be an image in format supported by stb_image (jpg, png, bmp, gif, etc.) +// returns 0 on success +// this function is thread-safe +MTMD_API int32_t mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len, mtmd_bitmap & output); + +// convenient unique_ptr wrappers +struct mtmd_context_deleter { + void operator()(mtmd_context * val) { mtmd_free(val); } +}; +using mtmd_context_ptr = std::unique_ptr; + +#else + +static_assert(false && "C header is not yet supported by this library"); + +#endif + +#endif diff --git a/examples/llava/qwen2vl-cli.cpp b/examples/llava/qwen2vl-cli.cpp index 132a7da543..eca7b7f10b 100644 --- a/examples/llava/qwen2vl-cli.cpp +++ b/examples/llava/qwen2vl-cli.cpp @@ -314,7 +314,7 @@ static struct llama_model * llava_init(common_params * params) { llama_model_params model_params = common_model_params_to_llama(*params); - llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params->model.path.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: unable to load model\n" , __func__); return NULL; @@ -323,14 +323,14 @@ static struct llama_model * llava_init(common_params * params) { } static struct llava_context * llava_init_context(common_params * params, llama_model * model) { - const char * clip_path = params->mmproj.c_str(); + const char * clip_path = params->mmproj.path.c_str(); auto prompt = params->prompt; if (prompt.empty()) { prompt = "describe the image in detail."; } - auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1); + auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO); llama_context_params ctx_params = common_context_params_to_llama(*params); ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings @@ -524,7 +524,7 @@ int main(int argc, char ** argv) { common_init(); - if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) { + if (params.mmproj.path.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) { print_usage(argc, argv); return 1; } diff --git a/examples/llava/test-1.jpeg b/examples/llava/test-1.jpeg new file mode 100644 index 0000000000..7fdcaaf04b Binary files /dev/null and b/examples/llava/test-1.jpeg differ diff --git a/examples/llava/tests.sh b/examples/llava/tests.sh new file mode 100755 index 0000000000..cc9bda8769 --- /dev/null +++ b/examples/llava/tests.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# make sure we are in the right directory +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +cd $SCRIPT_DIR + +#export LLAMA_CACHE="$SCRIPT_DIR/tmp" + +set -eux + +mkdir -p $SCRIPT_DIR/output + +PROJ_ROOT="$SCRIPT_DIR/../.." +cd $PROJ_ROOT + +############### + +arr_bin=() +arr_hf=() + +add_test() { + local bin=$1 + local hf=$2 + arr_bin+=("$bin") + arr_hf+=("$hf") +} + +add_test "llama-gemma3-cli" "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M" +add_test "llama-llava-cli" "cmp-nct/Yi-VL-6B-GGUF:Q5_K" +add_test "llama-llava-cli" "guinmoon/MobileVLM-3B-GGUF:Q4_K_M" +add_test "llama-llava-cli" "THUDM/glm-edge-v-5b-gguf:Q4_K_M" +add_test "llama-llava-cli" "second-state/Llava-v1.5-7B-GGUF:Q2_K" +add_test "llama-llava-cli" "cjpais/llava-1.6-mistral-7b-gguf:Q3_K" +add_test "llama-llava-cli" "ibm-research/granite-vision-3.2-2b-GGUF:Q4_K_M" +add_test "llama-minicpmv-cli" "second-state/MiniCPM-Llama3-V-2_5-GGUF:Q2_K" # model from openbmb is corrupted +add_test "llama-minicpmv-cli" "openbmb/MiniCPM-V-2_6-gguf:Q2_K" +add_test "llama-minicpmv-cli" "openbmb/MiniCPM-o-2_6-gguf:Q4_0" +add_test "llama-qwen2vl-cli" "bartowski/Qwen2-VL-2B-Instruct-GGUF:Q4_K_M" + +############### + +cmake --build build -j --target "${arr_bin[@]}" + +arr_res=() + +for i in "${!arr_bin[@]}"; do + bin="${arr_bin[$i]}" + hf="${arr_hf[$i]}" + + echo "Running test with binary: $bin and HF model: $hf" + echo "" + echo "" + + output=$("$PROJ_ROOT/build/bin/$bin" -hf "$hf" --image $SCRIPT_DIR/test-1.jpeg -p "what is the publisher name of the newspaper?" --temp 0 2>&1 | tee /dev/tty) + + echo "$output" > $SCRIPT_DIR/output/$bin-$(echo "$hf" | tr '/' '-').log + + if echo "$output" | grep -iq "new york"; then + result="\033[32mOK\033[0m: $bin $hf" + else + result="\033[31mFAIL\033[0m: $bin $hf" + fi + echo -e "$result" + arr_res+=("$result") + + echo "" + echo "" + echo "" + echo "#################################################" + echo "#################################################" + echo "" + echo "" +done + +set +x + +for i in "${!arr_res[@]}"; do + echo -e "${arr_res[$i]}" +done +echo "" +echo "Output logs are saved in $SCRIPT_DIR/output" diff --git a/examples/lookahead/README.md b/examples/lookahead/README.md index a69a471b47..aab3cd0ca4 100644 --- a/examples/lookahead/README.md +++ b/examples/lookahead/README.md @@ -4,4 +4,4 @@ Demonstration of lookahead decoding technique: https://lmsys.org/blog/2023-11-21-lookahead-decoding/ -More info: https://github.com/ggerganov/llama.cpp/pull/4207 +More info: https://github.com/ggml-org/llama.cpp/pull/4207 diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp index 2f0898e628..7df20aee17 100644 --- a/examples/lookahead/lookahead.cpp +++ b/examples/lookahead/lookahead.cpp @@ -7,6 +7,7 @@ #include #include #include +#include struct ngram_data { bool active = false; @@ -95,7 +96,7 @@ int main(int argc, char ** argv) { llama_decode(ctx, llama_batch_get_one(&inp.back(), 1)); for (int s = 1; s < W + G + 1; ++s) { - llama_kv_cache_seq_cp(ctx, 0, s, -1, -1); + llama_kv_self_seq_cp(ctx, 0, s, -1, -1); } const auto t_enc_end = ggml_time_us(); @@ -437,17 +438,17 @@ int main(int argc, char ** argv) { // KV cache management // if no verification token matched, we simply remove all cells from this batch -> no fragmentation - llama_kv_cache_seq_rm(ctx, -1, n_past, -1); + llama_kv_self_seq_rm(ctx, -1, n_past, -1); if (seq_id_best != 0) { // if a verification token matched, we keep the best sequence and remove the rest // this leads to some KV cache fragmentation - llama_kv_cache_seq_keep(ctx, seq_id_best); - llama_kv_cache_seq_cp (ctx, seq_id_best, 0, -1, -1); - llama_kv_cache_seq_rm (ctx, seq_id_best, -1, -1); + llama_kv_self_seq_keep(ctx, seq_id_best); + llama_kv_self_seq_cp (ctx, seq_id_best, 0, -1, -1); + llama_kv_self_seq_rm (ctx, seq_id_best, -1, -1); for (int s = 1; s < W + G + 1; ++s) { - llama_kv_cache_seq_cp(ctx, 0, s, -1, -1); + llama_kv_self_seq_cp(ctx, 0, s, -1, -1); } } } diff --git a/examples/lookup/README.md b/examples/lookup/README.md index 71c345c037..07d73849b0 100644 --- a/examples/lookup/README.md +++ b/examples/lookup/README.md @@ -8,5 +8,5 @@ The key parameters for lookup decoding are `ngram_min`, `ngram_max` and `n_draft More info: -https://github.com/ggerganov/llama.cpp/pull/4484 -https://github.com/ggerganov/llama.cpp/issues/4226 +https://github.com/ggml-org/llama.cpp/pull/4484 +https://github.com/ggml-org/llama.cpp/issues/4226 diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp index dbd0444ec8..4ae93b2a5e 100644 --- a/examples/lookup/lookup.cpp +++ b/examples/lookup/lookup.cpp @@ -192,7 +192,7 @@ int main(int argc, char ** argv){ // KV cache management // clean the cache of draft tokens that weren't accepted - llama_kv_cache_seq_rm(ctx, 0, n_past, -1); + llama_kv_self_seq_rm(ctx, 0, n_past, -1); common_batch_clear(batch_tgt); common_batch_add(batch_tgt, draft[0], n_past, { 0 }, true); diff --git a/examples/main/README.md b/examples/main/README.md index 46f92eb7ae..e4b3590b5d 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -1,6 +1,6 @@ # llama.cpp/examples/main -This example program allows you to use various LLaMA language models easily and efficiently. It is specifically designed to work with the [llama.cpp](https://github.com/ggerganov/llama.cpp) project, which provides a plain C/C++ implementation with optional 4-bit quantization support for faster, lower memory inference, and is optimized for desktop CPUs. This program can be used to perform various inference tasks with LLaMA models, including generating text based on user-provided prompts and chat-like interactions with reverse prompts. +This example program allows you to use various LLaMA language models easily and efficiently. It is specifically designed to work with the [llama.cpp](https://github.com/ggml-org/llama.cpp) project, which provides a plain C/C++ implementation with optional 4-bit quantization support for faster, lower memory inference, and is optimized for desktop CPUs. This program can be used to perform various inference tasks with LLaMA models, including generating text based on user-provided prompts and chat-like interactions with reverse prompts. ## Table of Contents @@ -27,29 +27,53 @@ Once downloaded, place your model in the models folder in llama.cpp. ##### Input prompt (One-and-done) ```bash -./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --prompt "Once upon a time" +./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf -no-cnv --prompt "Once upon a time" ``` ##### Conversation mode (Allow for continuous interaction with the model) ```bash -./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf -cnv --chat-template gemma +./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --chat-template gemma +``` + +##### Conversation mode using built-in jinja chat template + +```bash +./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --jinja +``` + +##### One-and-done query using jinja with custom system prompt and a starting prompt + +```bash +./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --jinja --single-turn -sys "You are a helpful assistant" -p "Hello" ``` ##### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it): ```bash -./llama-cli -m models\gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1 +./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1 ``` ### Windows: ##### Input prompt (One-and-done) ```powershell -./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --prompt "Once upon a time" +./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf -no-cnv --prompt "Once upon a time" ``` ##### Conversation mode (Allow for continuous interaction with the model) ```powershell -./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf -cnv --chat-template gemma +./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --chat-template gemma +``` + +##### Conversation mode using built-in jinja chat template + +```powershell +./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --jinja +``` + +##### One-and-done query using jinja with custom system prompt and a starting prompt + +```powershell +./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --jinja --single-turn -sys "You are a helpful assistant" -p "Hello" ``` #### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it): @@ -77,6 +101,8 @@ The `llama-cli` program provides several ways to interact with the LLaMA models - `--prompt PROMPT`: Provide a prompt directly as a command-line option. - `--file FNAME`: Provide a file containing a prompt or multiple prompts. +- `--system-prompt PROMPT`: Provide a system prompt (will otherwise use the default one in the chat template (if provided)). +- `--system-prompt-file FNAME`: Provide a file containing a system prompt. - `--interactive-first`: Run the program in interactive mode and wait for input right away. (More on this below.) ## Interaction @@ -89,7 +115,10 @@ In interactive mode, users can participate in text generation by injecting their - `-i, --interactive`: Run the program in interactive mode, allowing users to engage in real-time conversations or provide specific instructions to the model. - `--interactive-first`: Run the program in interactive mode and immediately wait for user input before starting the text generation. -- `-cnv, --conversation`: Run the program in conversation mode (does not print special tokens and suffix/prefix, use default chat template) (default: false) +- `-cnv, --conversation`: Run the program in conversation mode (does not print special tokens and suffix/prefix, use default or provided chat template) (default: true if chat template found) +- `-no-cnv`: Disable conversation mode (default: false) +- `-st, --single-turn`: Only process a single conversation turn (user input) and then exit. +- `--jinja`: Enable jinja chat template parser, will use the model's built-in template or a user-provided one (default: false) - `--color`: Enable colorized output to differentiate visually distinguishing between prompts, user input, and generated text. By understanding and utilizing these interaction options, you can create engaging and dynamic experiences with the LLaMA models, tailoring the text generation process to your specific needs. @@ -121,10 +150,12 @@ When --in-prefix or --in-suffix options are enabled the chat template ( --chat-t ### Chat templates - `--chat-template JINJA_TEMPLATE`: This option sets a custom jinja chat template. It accepts a string, not a file name. Default: template taken from model's metadata. Llama.cpp only supports [some pre-defined templates](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template). These include llama2, llama3, gemma, monarch, chatml, orion, vicuna, vicuna-orca, deepseek, command-r, zephyr. When --in-prefix or --in-suffix options are enabled the chat template ( --chat-template ) is disabled. + `--chat-template JINJA_TEMPLATE`: This option sets a custom jinja chat template. It accepts a string, not a file name. Default: template taken from model's metadata. Llama.cpp only supports [some pre-defined templates](https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template). These include llama2, llama3, gemma, monarch, chatml, orion, vicuna, vicuna-orca, deepseek, command-r, zephyr. When --in-prefix or --in-suffix options are enabled the chat template ( --chat-template ) is disabled. Example usage: `--chat-template gemma` +`--chat-template-file FNAME`: Load a custom jinja chat template from an external file, useful if the model contains outdated or incompatible template, some examples can be found in models/templates. Up-to-date chat templates can be downloaded from Hugging Face using scripts/get_chat_template.py + ## Context Management During text generation, LLaMA models have a limited context size, which means they can only consider a certain number of tokens from the input and generated text. When the context fills up, the model resets internally, potentially losing some information from the beginning of the conversation or instructions. Context management options help maintain continuity and coherence in these situations. @@ -265,6 +296,14 @@ Being experimental and unique, XTC is disabled by default. The recommended combi Example usage: `--xtc-probability 0.5 --xtc-threshold 0.1` +### Top-nσ Sampling + +- `--top-nsigma N`: Limit the next token selection to a subset of tokens with pre-softmax logits that are within n * σ less than the max logit (default: -1, -1 = disabled). + +Top-nσ sampling is a text generation method that selects tokens based on a statistical threshold in pre-softmax logits. It works by only sampling from tokens with logits that are within n * σ of the maximum logit. This method helps maintain a stable sampling space regardless of temperature scaling, allowing it to perform well on reasoning tasks even in high temperatures. Without complex probability manipulation, it efficiently filters tokens directly on the pre-softmax logits. A higher value for top-nsigma (e.g., 5) will take more noisy tokens into consideration, while a lower value (e.g., 1) will focous on the more informative region of the sampling space. + +Example usage: `--top-nsigma 1` + ### Logit Bias - `-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS`: Modify the likelihood of a token appearing in the generated text completion. diff --git a/examples/main/main.cpp b/examples/main/main.cpp index da2a03ab9b..c59b941bf5 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -4,7 +4,7 @@ #include "log.h" #include "sampling.h" #include "llama.h" -#include "chat-template.hpp" +#include "chat.h" #include #include @@ -31,8 +31,6 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -static const char * DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant"; - static llama_context ** g_ctx; static llama_model ** g_model; static common_sampler ** g_smpl; @@ -47,8 +45,8 @@ static void print_usage(int argc, char ** argv) { (void) argc; LOG("\nexample usage:\n"); - LOG("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128\n", argv[0]); - LOG("\n chat (conversation): %s -m your_model.gguf -p \"You are a helpful assistant\" -cnv\n", argv[0]); + LOG("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128 -no-cnv\n", argv[0]); + LOG("\n chat (conversation): %s -m your_model.gguf -sys \"You are a helpful assistant\"\n", argv[0]); LOG("\n"); } @@ -158,7 +156,7 @@ int main(int argc, char ** argv) { } const llama_vocab * vocab = llama_model_get_vocab(model); - auto chat_templates = common_chat_templates_from_model(model, params.chat_template); + auto chat_templates = common_chat_templates_init(model, params.chat_template); LOG_INF("%s: llama threadpool init, n_threads = %d\n", __func__, (int) params.cpuparams.n_threads); @@ -201,7 +199,7 @@ int main(int argc, char ** argv) { } // auto enable conversation mode if chat template is available - const bool has_chat_template = chat_templates.has_explicit_template && chat_templates.template_default; + const bool has_chat_template = common_chat_templates_was_explicit(chat_templates.get()); if (params.conversation_mode == COMMON_CONVERSATION_MODE_AUTO) { if (has_chat_template) { LOG_INF("%s: chat template is available, enabling conversation mode (disable it with -no-cnv)\n", __func__); @@ -219,7 +217,11 @@ int main(int argc, char ** argv) { // print chat template example in conversation mode if (params.conversation_mode) { if (params.enable_chat_template) { - LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(*chat_templates.template_default, params.use_jinja).c_str()); + if (!params.prompt.empty() && params.system_prompt.empty()) { + LOG_WRN("*** User-specified prompt will pre-start conversation, did you mean to set --system-prompt (-sys) instead?\n"); + } + + LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(chat_templates.get(), params.use_jinja).c_str()); } else { LOG_INF("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__); } @@ -254,7 +256,7 @@ int main(int argc, char ** argv) { } } - const bool add_bos = llama_vocab_get_add_bos(vocab); + const bool add_bos = llama_vocab_get_add_bos(vocab) && !params.use_jinja; if (!llama_model_has_encoder(model)) { GGML_ASSERT(!llama_vocab_get_add_eos(vocab)); } @@ -263,21 +265,45 @@ int main(int argc, char ** argv) { std::vector embd_inp; + bool waiting_for_first_input = false; auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) { - common_chat_msg new_msg{role, content}; - auto formatted = common_chat_format_single(*chat_templates.template_default, chat_msgs, new_msg, role == "user", g_params->use_jinja); - chat_msgs.push_back({role, content}); + common_chat_msg new_msg; + new_msg.role = role; + new_msg.content = content; + auto formatted = common_chat_format_single(chat_templates.get(), chat_msgs, new_msg, role == "user", g_params->use_jinja); + chat_msgs.push_back(new_msg); LOG_DBG("formatted: '%s'\n", formatted.c_str()); return formatted; }; + std::string prompt; { - auto prompt = (params.conversation_mode && params.enable_chat_template) - // format the system prompt in conversation mode (fallback to default if empty) - ? chat_add_and_format("system", params.prompt.empty() ? DEFAULT_SYSTEM_MESSAGE : params.prompt) + if (params.conversation_mode && params.enable_chat_template) { + if (!params.system_prompt.empty()) { + // format the system prompt (will use template default if empty) + chat_add_and_format("system", params.system_prompt); + } + + if (!params.prompt.empty()) { + // format and append the user prompt + chat_add_and_format("user", params.prompt); + } else { + waiting_for_first_input = true; + } + + if (!params.system_prompt.empty() || !params.prompt.empty()) { + common_chat_templates_inputs inputs; + inputs.messages = chat_msgs; + inputs.add_generation_prompt = !params.prompt.empty(); + + prompt = common_chat_templates_apply(chat_templates.get(), inputs).prompt; + } + } else { // otherwise use the prompt as is - : params.prompt; - if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) { + prompt = params.prompt; + } + + if (params.interactive_first || !prompt.empty() || session_tokens.empty()) { LOG_DBG("tokenize the prompt\n"); embd_inp = common_tokenize(ctx, prompt, true, true); } else { @@ -290,7 +316,7 @@ int main(int argc, char ** argv) { } // Should not run without any tokens - if (embd_inp.empty()) { + if (!waiting_for_first_input && embd_inp.empty()) { if (add_bos) { embd_inp.push_back(llama_vocab_bos(vocab)); LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str()); @@ -328,7 +354,7 @@ int main(int argc, char ** argv) { } // remove any "future" tokens that we might have inherited from the previous session - llama_kv_cache_seq_rm(ctx, -1, n_matching_session_tokens, -1); + llama_kv_self_seq_rm(ctx, -1, n_matching_session_tokens, -1); } LOG_DBG("recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n", @@ -350,7 +376,12 @@ int main(int argc, char ** argv) { } if (params.conversation_mode) { - params.interactive_first = true; + if (params.single_turn && !params.prompt.empty()) { + params.interactive = false; + params.interactive_first = false; + } else { + params.interactive_first = true; + } } // enable interactive mode if interactive start is specified @@ -474,8 +505,8 @@ int main(int argc, char ** argv) { LOG_INF( " - Press Ctrl+C to interject at any time.\n"); #endif LOG_INF( "%s", control_message); - if (params.conversation_mode && params.enable_chat_template && params.prompt.empty()) { - LOG_INF( " - Using default system message. To change it, set a different value via -p PROMPT or -f FILE argument.\n"); + if (params.conversation_mode && params.enable_chat_template && params.system_prompt.empty()) { + LOG_INF( " - Not using system message. To change it, set a different value via -sys PROMPT\n"); } LOG_INF("\n"); @@ -503,12 +534,14 @@ int main(int argc, char ** argv) { std::vector embd; - // tokenized antiprompts - std::vector> antiprompt_ids; + // single-token antiprompts + std::vector antiprompt_token; - antiprompt_ids.reserve(params.antiprompt.size()); for (const std::string & antiprompt : params.antiprompt) { - antiprompt_ids.emplace_back(::common_tokenize(ctx, antiprompt, false, true)); + auto ids = ::common_tokenize(ctx, antiprompt, false, true); + if (ids.size() == 1) { + antiprompt_token.push_back(ids[0]); + } } if (llama_model_has_encoder(model)) { @@ -569,8 +602,8 @@ int main(int argc, char ** argv) { LOG_DBG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n", n_past, n_left, n_ctx, params.n_keep, n_discard); - llama_kv_cache_seq_rm (ctx, 0, params.n_keep , params.n_keep + n_discard); - llama_kv_cache_seq_add(ctx, 0, params.n_keep + n_discard, n_past, -n_discard); + llama_kv_self_seq_rm (ctx, 0, params.n_keep , params.n_keep + n_discard); + llama_kv_self_seq_add(ctx, 0, params.n_keep + n_discard, n_past, -n_discard); n_past -= n_discard; @@ -593,9 +626,9 @@ int main(int argc, char ** argv) { LOG_DBG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", ga_i + ib*bd, ga_i + ib*bd + ga_w, ga_n, (ga_i + ib*bd)/ga_n, (ga_i + ib*bd + ga_w)/ga_n); LOG_DBG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", ga_i + ib*bd + ga_w, n_past + ib*bd, dd, ga_i + ib*bd + ga_w + dd, n_past + ib*bd + dd); - llama_kv_cache_seq_add(ctx, 0, ga_i, n_past, ib*bd); - llama_kv_cache_seq_div(ctx, 0, ga_i + ib*bd, ga_i + ib*bd + ga_w, ga_n); - llama_kv_cache_seq_add(ctx, 0, ga_i + ib*bd + ga_w, n_past + ib*bd, dd); + llama_kv_self_seq_add(ctx, 0, ga_i, n_past, ib*bd); + llama_kv_self_seq_div(ctx, 0, ga_i + ib*bd, ga_i + ib*bd + ga_w, ga_n); + llama_kv_self_seq_add(ctx, 0, ga_i + ib*bd + ga_w, n_past + ib*bd, dd); n_past -= bd; @@ -753,8 +786,8 @@ int main(int argc, char ** argv) { // check for reverse prompt using special tokens llama_token last_token = common_sampler_last(smpl); - for (std::vector ids : antiprompt_ids) { - if (ids.size() == 1 && last_token == ids[0]) { + for (auto token : antiprompt_token) { + if (token == last_token) { if (params.interactive) { is_interacting = true; } @@ -769,7 +802,7 @@ int main(int argc, char ** argv) { } // deal with end of generation tokens in interactive mode - if (llama_vocab_is_eog(vocab, common_sampler_last(smpl))) { + if (!waiting_for_first_input && llama_vocab_is_eog(vocab, common_sampler_last(smpl))) { LOG_DBG("found an EOG token\n"); if (params.interactive) { @@ -789,12 +822,17 @@ int main(int argc, char ** argv) { } // if current token is not EOG, we add it to current assistant message - if (params.conversation_mode) { + if (params.conversation_mode && !waiting_for_first_input) { const auto id = common_sampler_last(smpl); assistant_ss << common_token_to_piece(ctx, id, false); + + if (!prompt.empty()) { + prompt.clear(); + is_interacting = false; + } } - if (n_past > 0 && is_interacting) { + if ((n_past > 0 || waiting_for_first_input) && is_interacting) { LOG_DBG("waiting for user input\n"); if (params.conversation_mode) { @@ -827,9 +865,22 @@ int main(int argc, char ** argv) { console::set_display(console::reset); display = true; - // Add tokens to embd only if the input buffer is non-empty - // Entering a empty line lets the user pass control back - if (buffer.length() > 1) { + if (buffer.empty()) { // Ctrl+D on empty line exits + LOG("EOF by user\n"); + break; + } + + if (buffer.back() == '\n') { + // Implement #587: + // If the user wants the text to end in a newline, + // this should be accomplished by explicitly adding a newline by using \ followed by return, + // then returning control by pressing return again. + buffer.pop_back(); + } + + if (buffer.empty()) { // Enter key on empty line lets the user pass control back + LOG_DBG("empty line, passing control back\n"); + } else { // Add tokens to embd only if the input buffer is non-empty // append input suffix if any if (!params.input_suffix.empty() && !params.conversation_mode) { LOG_DBG("appending input suffix: '%s'\n", params.input_suffix.c_str()); @@ -877,18 +928,22 @@ int main(int argc, char ** argv) { n_remain -= line_inp.size(); LOG_DBG("n_remain: %d\n", n_remain); - } else { - LOG_DBG("empty line, passing control back\n"); } input_echo = false; // do not echo this again } - if (n_past > 0) { + if (n_past > 0 || waiting_for_first_input) { if (is_interacting) { common_sampler_reset(smpl); } is_interacting = false; + + if (waiting_for_first_input && params.single_turn) { + params.interactive = false; + params.interactive_first = false; + } + waiting_for_first_input = false; } } diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index 7ef43d5e12..80698518e3 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -12,6 +12,7 @@ #include #include #include +#include // trim whitespace from the beginning and end of a string static std::string trim(const std::string & str) { @@ -105,6 +106,8 @@ int main(int argc, char ** argv) { common_params params; + params.n_predict = 128; + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_PARALLEL)) { return 1; } @@ -201,7 +204,7 @@ int main(int argc, char ** argv) { // assign the system KV cache to all parallel sequences for (int32_t i = 1; i <= n_clients; ++i) { - llama_kv_cache_seq_cp(ctx, 0, i, -1, -1); + llama_kv_self_seq_cp(ctx, 0, i, -1, -1); } LOG_INF("\n"); @@ -233,9 +236,9 @@ int main(int argc, char ** argv) { if (batch.n_tokens == 0) { // all sequences have ended - clear the entire KV cache for (int i = 1; i <= n_clients; ++i) { - llama_kv_cache_seq_rm(ctx, i, -1, -1); + llama_kv_self_seq_rm(ctx, i, -1, -1); // but keep the system prompt - llama_kv_cache_seq_cp(ctx, 0, i, -1, -1); + llama_kv_self_seq_cp(ctx, 0, i, -1, -1); } LOG_INF("%s: clearing the KV cache\n", __func__); @@ -371,8 +374,8 @@ int main(int argc, char ** argv) { } // delete only the generated part of the sequence, i.e. keep the system prompt in the cache - llama_kv_cache_seq_rm(ctx, client.id + 1, -1, -1); - llama_kv_cache_seq_cp(ctx, 0, client.id + 1, -1, -1); + llama_kv_self_seq_rm(ctx, client.id + 1, -1, -1); + llama_kv_self_seq_cp(ctx, 0, client.id + 1, -1, -1); const auto t_main_end = ggml_time_us(); @@ -404,7 +407,7 @@ int main(int argc, char ** argv) { params.prompt_file = "used built-in defaults"; } LOG_INF("External prompt file: \033[32m%s\033[0m\n", params.prompt_file.c_str()); - LOG_INF("Model and path used: \033[32m%s\033[0m\n\n", params.model.c_str()); + LOG_INF("Model and path used: \033[32m%s\033[0m\n\n", params.model.path.c_str()); LOG_INF("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6); LOG_INF("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6); diff --git a/examples/passkey/README.md b/examples/passkey/README.md index 2b8e910f96..2f19597c48 100644 --- a/examples/passkey/README.md +++ b/examples/passkey/README.md @@ -5,8 +5,8 @@ models ability to recall information from long contexts. See the following PRs for more info: -- https://github.com/ggerganov/llama.cpp/pull/3856 -- https://github.com/ggerganov/llama.cpp/pull/4810 +- https://github.com/ggml-org/llama.cpp/pull/3856 +- https://github.com/ggml-org/llama.cpp/pull/4810 ### Usage diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp index 5953928d47..347ea4a698 100644 --- a/examples/passkey/passkey.cpp +++ b/examples/passkey/passkey.cpp @@ -7,6 +7,7 @@ #include #include #include +#include static void print_usage(int, char ** argv) { LOG("\nexample usage:\n"); @@ -63,7 +64,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = common_model_params_to_llama(params); - llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: unable to load model\n" , __func__); @@ -132,11 +133,11 @@ int main(int argc, char ** argv) { const int ib = i/n_batch - 1; const int bd = n_batch_grp*(n_grp - 1); - llama_kv_cache_seq_add (ctx, 0, n_past - n_batch, n_past, ib*bd); - llama_kv_cache_seq_div (ctx, 0, n_past - n_batch + ib*bd, n_past + ib*bd, n_grp); - llama_kv_cache_update (ctx); + llama_kv_self_seq_add (ctx, 0, n_past - n_batch, n_past, ib*bd); + llama_kv_self_seq_div (ctx, 0, n_past - n_batch + ib*bd, n_past + ib*bd, n_grp); + llama_kv_self_update (ctx); - n_past = llama_kv_cache_seq_pos_max(ctx, 0) + 1; + n_past = llama_kv_self_seq_pos_max(ctx, 0) + 1; } common_batch_clear(batch); @@ -166,12 +167,12 @@ int main(int argc, char ** argv) { LOG_INF("%s: shifting KV cache with %d\n", __func__, n_discard); - llama_kv_cache_seq_rm (ctx, 0, n_keep , n_keep + n_discard); - llama_kv_cache_seq_add(ctx, 0, n_keep + n_discard, n_ctx, -n_discard); - //llama_kv_cache_defrag (ctx); - llama_kv_cache_update (ctx); + llama_kv_self_seq_rm (ctx, 0, n_keep , n_keep + n_discard); + llama_kv_self_seq_add(ctx, 0, n_keep + n_discard, n_ctx, -n_discard); + //llama_kv_self_defrag (ctx); + llama_kv_self_update (ctx); - n_past = llama_kv_cache_seq_pos_max(ctx, 0) + 1; + n_past = llama_kv_self_seq_pos_max(ctx, 0) + 1; common_batch_clear(batch); @@ -197,12 +198,12 @@ int main(int argc, char ** argv) { if (n_discard > 0) { LOG_INF("%s: shifting KV cache with %d to free space for the answer\n", __func__, n_discard); - llama_kv_cache_seq_rm (ctx, 0, n_keep , n_keep + n_discard); - llama_kv_cache_seq_add(ctx, 0, n_keep + n_discard, n_ctx, -n_discard); - //llama_kv_cache_defrag (ctx); - llama_kv_cache_update (ctx); + llama_kv_self_seq_rm (ctx, 0, n_keep , n_keep + n_discard); + llama_kv_self_seq_add(ctx, 0, n_keep + n_discard, n_ctx, -n_discard); + //llama_kv_self_defrag (ctx); + llama_kv_self_update (ctx); - n_past = llama_kv_cache_seq_pos_max(ctx, 0) + 1; + n_past = llama_kv_self_seq_pos_max(ctx, 0) + 1; } } diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 9bf6c57433..175f2804b5 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -3,6 +3,7 @@ #include "log.h" #include "llama.h" +#include #include #include #include @@ -360,7 +361,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const common_params const auto t_start = std::chrono::high_resolution_clock::now(); // clear the KV cache - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); llama_batch batch = llama_batch_init(n_batch, 0, 1); @@ -546,7 +547,7 @@ static results_perplexity perplexity(llama_context * ctx, const common_params & const auto t_start = std::chrono::high_resolution_clock::now(); // clear the KV cache - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); for (int j = 0; j < num_batches; ++j) { const int batch_start = start + j * n_batch; @@ -850,7 +851,7 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) { LOG_INF("%s : calculating hellaswag score over selected tasks.\n", __func__); - LOG("\ntask\tacc_norm\n"); + LOG("\ntask\tacc_norm\t95%% confidence interval\n"); double acc = 0.0f; @@ -923,7 +924,7 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) { return; } - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); // decode all tasks [i0, i1) if (!decode_helper(ctx, batch, batch_logits, n_batch, n_vocab)) { @@ -984,8 +985,22 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) { acc += 1.0; } - // Print the accumulated accuracy mean x 100 - LOG("%zu\t%.8lf\n", i + 1, acc/double(i + 1)*100.0); + double freq = acc / double(i + 1); + + const double za = 1.95996398454; + + // // Wald normal approx + // double conf =za*sqrt(freq*(1-freq)/double(i + 1)); + // LOG("%zu\t%.8lf +/- %.8lf\n", i + 1, freq*100.0, conf*100.0); + + // Wilson score interval, more accurate + double z = za * za / double(i + 1); + double cnf = z * sqrt(double(i + 1) * (4.0 * freq * (1 - freq) + z)) / (za + za); + double a = (freq + z * 0.5 - cnf) / (1.0 + z); + double b = (freq + z * 0.5 + cnf) / (1.0 + z); + + // Print the accumulated accuracy mean x 100 and confidence interval + LOG("%zu\t%3.8lf%%\t[%3.4lf%%, %3.4lf%%]\n", i + 1, freq * 100.0, a * 100.0, b * 100.0); } i0 = i1 - 1; @@ -1202,7 +1217,7 @@ static void winogrande_score(llama_context * ctx, const common_params & params) return; } - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); // decode all tasks [i0, i1) if (!decode_helper(ctx, batch, batch_logits, n_batch, n_vocab)) { @@ -1574,7 +1589,7 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par return; } - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); // decode all tasks [i0, i1) if (!decode_helper(ctx, batch, batch_logits, n_batch, n_vocab)) { @@ -1764,7 +1779,7 @@ static void kl_divergence(llama_context * ctx, const common_params & params) { } // clear the KV cache - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); llama_batch batch = llama_batch_init(n_batch, 0, 1); diff --git a/examples/pydantic_models_to_grammar_examples.py b/examples/pydantic_models_to_grammar_examples.py index eb000d5ccb..f94b82ca47 100755 --- a/examples/pydantic_models_to_grammar_examples.py +++ b/examples/pydantic_models_to_grammar_examples.py @@ -23,7 +23,7 @@ def create_completion(host, prompt, gbnf_grammar): """Calls the /completion API on llama-server. See - https://github.com/ggerganov/llama.cpp/tree/HEAD/examples/server#api-endpoints + https://github.com/ggml-org/llama.cpp/tree/HEAD/examples/server#api-endpoints """ print(f" Request:\n Grammar:\n{textwrap.indent(gbnf_grammar, ' ')}\n Prompt:\n{textwrap.indent(prompt.rstrip(), ' ')}") headers = {"Content-Type": "application/json"} diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index bd2f734670..dd07ab9b37 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -1,6 +1,6 @@ #include "ggml.h" #include "llama.h" -#include "llama-context.h" +#include "llama-model.h" #include "common.h" #include @@ -328,7 +328,7 @@ int main(int argc, char ** argv) { } } - const auto & tensors = llama_internal_get_tensor_map(ctx); + const auto & tensors = llama_internal_get_tensor_map(model); // check layer tensors int included_layers = 0; diff --git a/examples/quantize/README.md b/examples/quantize/README.md index f9cce7b213..992d00e21b 100644 --- a/examples/quantize/README.md +++ b/examples/quantize/README.md @@ -69,22 +69,22 @@ Several quantization methods are supported. They differ in the resulting model d | 13B | ms/tok @ 8th | - | 73 | 82 | 98 | 105 | 128 | | 13B | bits/weight | 16.0 | 4.5 | 5.0 | 5.5 | 6.0 | 8.5 | -- [k-quants](https://github.com/ggerganov/llama.cpp/pull/1684) +- [k-quants](https://github.com/ggml-org/llama.cpp/pull/1684) - recent k-quants improvements and new i-quants - - [#2707](https://github.com/ggerganov/llama.cpp/pull/2707) - - [#2807](https://github.com/ggerganov/llama.cpp/pull/2807) - - [#4773 - 2-bit i-quants (inference)](https://github.com/ggerganov/llama.cpp/pull/4773) - - [#4856 - 2-bit i-quants (inference)](https://github.com/ggerganov/llama.cpp/pull/4856) - - [#4861 - importance matrix](https://github.com/ggerganov/llama.cpp/pull/4861) - - [#4872 - MoE models](https://github.com/ggerganov/llama.cpp/pull/4872) - - [#4897 - 2-bit quantization](https://github.com/ggerganov/llama.cpp/pull/4897) - - [#4930 - imatrix for all k-quants](https://github.com/ggerganov/llama.cpp/pull/4930) - - [#4951 - imatrix on the GPU](https://github.com/ggerganov/llama.cpp/pull/4957) - - [#4969 - imatrix for legacy quants](https://github.com/ggerganov/llama.cpp/pull/4969) - - [#4996 - k-quants tuning](https://github.com/ggerganov/llama.cpp/pull/4996) - - [#5060 - Q3_K_XS](https://github.com/ggerganov/llama.cpp/pull/5060) - - [#5196 - 3-bit i-quants](https://github.com/ggerganov/llama.cpp/pull/5196) - - [quantization tuning](https://github.com/ggerganov/llama.cpp/pull/5320), [another one](https://github.com/ggerganov/llama.cpp/pull/5334), and [another one](https://github.com/ggerganov/llama.cpp/pull/5361) + - [#2707](https://github.com/ggml-org/llama.cpp/pull/2707) + - [#2807](https://github.com/ggml-org/llama.cpp/pull/2807) + - [#4773 - 2-bit i-quants (inference)](https://github.com/ggml-org/llama.cpp/pull/4773) + - [#4856 - 2-bit i-quants (inference)](https://github.com/ggml-org/llama.cpp/pull/4856) + - [#4861 - importance matrix](https://github.com/ggml-org/llama.cpp/pull/4861) + - [#4872 - MoE models](https://github.com/ggml-org/llama.cpp/pull/4872) + - [#4897 - 2-bit quantization](https://github.com/ggml-org/llama.cpp/pull/4897) + - [#4930 - imatrix for all k-quants](https://github.com/ggml-org/llama.cpp/pull/4930) + - [#4951 - imatrix on the GPU](https://github.com/ggml-org/llama.cpp/pull/4957) + - [#4969 - imatrix for legacy quants](https://github.com/ggml-org/llama.cpp/pull/4969) + - [#4996 - k-quants tuning](https://github.com/ggml-org/llama.cpp/pull/4996) + - [#5060 - Q3_K_XS](https://github.com/ggml-org/llama.cpp/pull/5060) + - [#5196 - 3-bit i-quants](https://github.com/ggml-org/llama.cpp/pull/5196) + - [quantization tuning](https://github.com/ggml-org/llama.cpp/pull/5320), [another one](https://github.com/ggml-org/llama.cpp/pull/5334), and [another one](https://github.com/ggml-org/llama.cpp/pull/5361) **Llama 2 7B** diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 8d47b17b6b..0355311dc5 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include struct quant_option { std::string name; @@ -15,7 +17,7 @@ struct quant_option { std::string desc; }; -static const std::vector QUANT_OPTIONS = { +static const std::vector QUANT_OPTIONS = { { "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G, +0.4685 ppl @ Llama-3-8B", }, { "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G, +0.4511 ppl @ Llama-3-8B", }, { "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G, +0.1316 ppl @ Llama-3-8B", }, @@ -104,7 +106,8 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp // [[noreturn]] static void usage(const char * executable) { - printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights] [--exclude-weights] [--output-tensor-type] [--token-embedding-type] [--override-kv] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable); + printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights] [--exclude-weights] [--output-tensor-type]\n", executable); + printf(" [--token-embedding-type] [--tensor-type] [--keep-split] [--override-kv] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n"); printf(" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n"); printf(" --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n"); printf(" --pure: Disable k-quant mixtures and quantize all tensors to the same type\n"); @@ -113,6 +116,8 @@ static void usage(const char * executable) { printf(" --exclude-weights tensor_name: use importance matrix for this/these tensor(s)\n"); printf(" --output-tensor-type ggml_type: use this ggml_type for the output.weight tensor\n"); printf(" --token-embedding-type ggml_type: use this ggml_type for the token embeddings tensor\n"); + printf(" --tensor-type TENSOR=TYPE: quantize this tensor to this ggml_type. example: --tensor-type attn_q=q8_0\n"); + printf(" Advanced option to selectively quantize tensors. May be specified multiple times.\n"); printf(" --keep-split: will generate quantized model in the same shards as input\n"); printf(" --override-kv KEY=TYPE:VALUE\n"); printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n"); @@ -243,6 +248,107 @@ static ggml_type parse_ggml_type(const char * arg) { return GGML_TYPE_COUNT; } +// Allowed tensors for arbitrary quantization with --tensor-type option +static const std::vector ALLOWED_TENSOR_TYPE = { + "attn_k", + "attn_kv_a_mqa", + "attn_kv_b", + "attn_o", + "attn_output", + "attn_q", + "attn_q_a", + "attn_q_b", + "attn_qkv", + "attn_v", + "channel_mix_key", + "channel_mix_receptance", + "channel_mix_value", + "cls", + "cls.output", + "cross_attn_k", + "cross_attn_o", + "cross_attn_q", + "cross_attn_v", + "ffn_act", + "ffn_down", + "ffn_down_exps", + "ffn_down_shexp", + "ffn_gate", + "ffn_gate_exps", + "ffn_gate_shexp", + "ffn_up", + "ffn_up_exps", + "ffn_up_shexp", + "ssm_in", + "ssm_out", + "time_mix_gate", + "time_mix_key", + "time_mix_output", + "time_mix_receptance", + "time_mix_value", +}; + +// changes to this struct must be replicated in llama-quant.cpp +struct tensor_quantization { + std::string name; + ggml_type quant = GGML_TYPE_COUNT; +}; + +static bool parse_tensor_type(const char * data, std::vector & tensor_type) { + const char * sep = strchr(data, '='); + if (sep == nullptr) { + printf("\n%s: malformed tensor type '%s'\n\n", __func__, data); + return false; + } + + const size_t tn_len = sep - data; + if (tn_len == 0) { + printf("\n%s: missing tensor name\n\n", __func__); + return false; + } + + if (const size_t qt_len = strlen(sep); qt_len == 1) { + printf("\n%s: missing quantization type\n\n", __func__); + return false; + } + + std::string tn(data, tn_len); + std::transform(tn.begin(), tn.end(), tn.begin(), tolower); + sep++; + const std::string qt(sep); + + bool found = false; + for (const auto & allowed : ALLOWED_TENSOR_TYPE) { + std::string tensor; + tensor = tn.rfind('.') != std::string::npos ? tn.substr(tn.rfind('.') + 1) : tn; + // handle special case of cls.output + std::string cls_output = "cls.output"; + if (tn.find(cls_output) != std::string::npos) { + tensor = "cls.output"; + } + // check if an allowed tensor exists and it's at the end of the kv string + if (tensor == allowed) { + found = true; + break; + } + } + if (!found) { + printf("\n%s: invalid tensor name '%s'\n\n", __func__, tn.c_str()); + return false; + } + + if (parse_ggml_type(qt.c_str()) == GGML_TYPE_COUNT) { + printf("\n%s: invalid quantization type '%s'\n\n", __func__, qt.c_str()); + return false; + } + + tensor_quantization tqz; + tqz.name = tn; + tqz.quant = parse_ggml_type(qt.c_str()); + tensor_type.emplace_back(std::move(tqz)); + return true; +} + int main(int argc, char ** argv) { if (argc < 3) { usage(argv[0]); @@ -254,6 +360,7 @@ int main(int argc, char ** argv) { std::string imatrix_file; std::vector included_weights, excluded_weights; std::vector kv_overrides; + std::vector tensor_types; for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) { if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) { @@ -276,6 +383,10 @@ int main(int argc, char ** argv) { } else { usage(argv[0]); } + } else if (strcmp(argv[arg_idx], "--tensor-type") == 0) { + if (arg_idx == argc-1 || !parse_tensor_type(argv[++arg_idx], tensor_types)) { + usage(argv[0]); + } } else if (strcmp(argv[arg_idx], "--override-kv") == 0) { if (arg_idx == argc-1 || !string_parse_kv_override(argv[++arg_idx], kv_overrides)) { usage(argv[0]); @@ -360,6 +471,9 @@ int main(int argc, char ** argv) { kv_overrides.back().key[0] = 0; params.kv_overrides = &kv_overrides; } + if (!tensor_types.empty()) { + params.tensor_types = &tensor_types; + } llama_backend_init(); diff --git a/examples/retrieval/README.md b/examples/retrieval/README.md index bc5f22e2ff..6938a1e96e 100644 --- a/examples/retrieval/README.md +++ b/examples/retrieval/README.md @@ -3,7 +3,7 @@ Demonstration of simple retrieval technique based on cosine similarity More info: -https://github.com/ggerganov/llama.cpp/pull/6193 +https://github.com/ggml-org/llama.cpp/pull/6193 ### How to use diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index 2439022a22..0efe20d4b3 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -83,7 +83,7 @@ static void batch_add_seq(llama_batch & batch, const std::vector & toke static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd) { // clear previous kv_cache values (irrelevant for embeddings) - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); // run model LOG_INF("%s: n_tokens = %d, n_seq = %d\n", __func__, batch.n_tokens, n_seq); diff --git a/examples/rpc/CMakeLists.txt b/examples/rpc/CMakeLists.txt index ae48fb98d0..c2c7481486 100644 --- a/examples/rpc/CMakeLists.txt +++ b/examples/rpc/CMakeLists.txt @@ -1,2 +1,4 @@ -add_executable(rpc-server rpc-server.cpp) -target_link_libraries(rpc-server PRIVATE ggml llama) +set(TARGET rpc-server) +add_executable(${TARGET} rpc-server.cpp) +target_link_libraries(${TARGET} PRIVATE ggml) +target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/rpc/README.md b/examples/rpc/README.md index 312bb634dc..561f19fda6 100644 --- a/examples/rpc/README.md +++ b/examples/rpc/README.md @@ -72,3 +72,14 @@ $ bin/llama-cli -m ../models/tinyllama-1b/ggml-model-f16.gguf -p "Hello, my name This way you can offload model layers to both local and remote devices. +### Local cache + +The RPC server can use a local cache to store large tensors and avoid transferring them over the network. +This can speed up model loading significantly, especially when using large models. +To enable the cache, use the `-c` option: + +```bash +$ bin/rpc-server -c +``` + +By default, the cache is stored in the `$HOME/.cache/llama.cpp/rpc` directory and can be controlled via the `LLAMA_CACHE` environment variable. diff --git a/examples/rpc/rpc-server.cpp b/examples/rpc/rpc-server.cpp index 8b1b23edad..9db5542570 100644 --- a/examples/rpc/rpc-server.cpp +++ b/examples/rpc/rpc-server.cpp @@ -1,3 +1,7 @@ +#if defined(_MSC_VER) +#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING +#endif + #include "ggml-cpu.h" #ifdef GGML_USE_CUDA @@ -18,26 +22,144 @@ #include "ggml-rpc.h" #ifdef _WIN32 +# define DIRECTORY_SEPARATOR '\\' +# include # include +# include +# include #else +# define DIRECTORY_SEPARATOR '/' # include +# include #endif +#include #include #include +#include +#include + +namespace fs = std::filesystem; + +// NOTE: this is copied from common.cpp to avoid linking with libcommon +// returns true if successful, false otherwise +static bool fs_create_directory_with_parents(const std::string & path) { +#ifdef _WIN32 + std::wstring_convert> converter; + std::wstring wpath = converter.from_bytes(path); + + // if the path already exists, check whether it's a directory + const DWORD attributes = GetFileAttributesW(wpath.c_str()); + if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { + return true; + } + + size_t pos_slash = 0; + + // process path from front to back, procedurally creating directories + while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) { + const std::wstring subpath = wpath.substr(0, pos_slash); + const wchar_t * test = subpath.c_str(); + + const bool success = CreateDirectoryW(test, NULL); + if (!success) { + const DWORD error = GetLastError(); + + // if the path already exists, ensure that it's a directory + if (error == ERROR_ALREADY_EXISTS) { + const DWORD attributes = GetFileAttributesW(subpath.c_str()); + if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) { + return false; + } + } else { + return false; + } + } + + pos_slash += 1; + } + + return true; +#else + // if the path already exists, check whether it's a directory + struct stat info; + if (stat(path.c_str(), &info) == 0) { + return S_ISDIR(info.st_mode); + } + + size_t pos_slash = 1; // skip leading slashes for directory creation + + // process path from front to back, procedurally creating directories + while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) { + const std::string subpath = path.substr(0, pos_slash); + struct stat info; + + // if the path already exists, ensure that it's a directory + if (stat(subpath.c_str(), &info) == 0) { + if (!S_ISDIR(info.st_mode)) { + return false; + } + } else { + // create parent directories + const int ret = mkdir(subpath.c_str(), 0755); + if (ret != 0) { + return false; + } + } + + pos_slash += 1; + } + + return true; +#endif // _WIN32 +} + +// NOTE: this is copied from common.cpp to avoid linking with libcommon +static std::string fs_get_cache_directory() { + std::string cache_directory = ""; + auto ensure_trailing_slash = [](std::string p) { + // Make sure to add trailing slash + if (p.back() != DIRECTORY_SEPARATOR) { + p += DIRECTORY_SEPARATOR; + } + return p; + }; + if (getenv("LLAMA_CACHE")) { + cache_directory = std::getenv("LLAMA_CACHE"); + } else { +#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) + if (std::getenv("XDG_CACHE_HOME")) { + cache_directory = std::getenv("XDG_CACHE_HOME"); + } else { + cache_directory = std::getenv("HOME") + std::string("/.cache/"); + } +#elif defined(__APPLE__) + cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); +#elif defined(_WIN32) + cache_directory = std::getenv("LOCALAPPDATA"); +#else +# error Unknown architecture +#endif + cache_directory = ensure_trailing_slash(cache_directory); + cache_directory += "llama.cpp"; + } + return ensure_trailing_slash(cache_directory); +} struct rpc_server_params { std::string host = "127.0.0.1"; int port = 50052; size_t backend_mem = 0; + bool use_cache = false; }; static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) { fprintf(stderr, "Usage: %s [options]\n\n", argv[0]); fprintf(stderr, "options:\n"); - fprintf(stderr, " -h, --help show this help message and exit\n"); - fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str()); - fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port); - fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n"); + fprintf(stderr, " -h, --help show this help message and exit\n"); + fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str()); + fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port); + fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n"); + fprintf(stderr, " -c, --cache enable local file cache\n"); fprintf(stderr, "\n"); } @@ -58,6 +180,8 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & if (params.port <= 0 || params.port > 65535) { return false; } + } else if (arg == "-c" || arg == "--cache") { + params.use_cache = true; } else if (arg == "-m" || arg == "--mem") { if (++i >= argc) { return false; @@ -164,8 +288,23 @@ int main(int argc, char * argv[]) { } else { get_backend_memory(&free_mem, &total_mem); } - printf("Starting RPC server on %s, backend memory: %zu MB\n", endpoint.c_str(), free_mem / (1024 * 1024)); - ggml_backend_rpc_start_server(backend, endpoint.c_str(), free_mem, total_mem); + const char * cache_dir = nullptr; + std::string cache_dir_str = fs_get_cache_directory() + "rpc/"; + if (params.use_cache) { + if (!fs_create_directory_with_parents(cache_dir_str)) { + fprintf(stderr, "Failed to create cache directory: %s\n", cache_dir_str.c_str()); + return 1; + } + cache_dir = cache_dir_str.c_str(); + } + printf("Starting RPC server v%d.%d.%d\n", + RPC_PROTO_MAJOR_VERSION, + RPC_PROTO_MINOR_VERSION, + RPC_PROTO_PATCH_VERSION); + printf(" endpoint : %s\n", endpoint.c_str()); + printf(" local cache : %s\n", cache_dir ? cache_dir : "n/a"); + printf(" backend memory : %zu MB\n", free_mem / (1024 * 1024)); + ggml_backend_rpc_start_server(backend, endpoint.c_str(), cache_dir, free_mem, total_mem); ggml_backend_free(backend); return 0; } diff --git a/examples/run/CMakeLists.txt b/examples/run/CMakeLists.txt index cd6b0520e0..7cff188ca6 100644 --- a/examples/run/CMakeLists.txt +++ b/examples/run/CMakeLists.txt @@ -1,5 +1,16 @@ set(TARGET llama-run) add_executable(${TARGET} run.cpp linenoise.cpp/linenoise.cpp) + +# TODO: avoid copying this code block from common/CMakeLists.txt +set(LLAMA_RUN_EXTRA_LIBS "") +if (LLAMA_CURL) + find_package(CURL REQUIRED) + target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL) + include_directories(${CURL_INCLUDE_DIRS}) + find_library(CURL_LIBRARY curl REQUIRED) + set(LLAMA_RUN_EXTRA_LIBS ${LLAMA_RUN_EXTRA_LIBS} ${CURL_LIBRARY}) +endif () + install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_RUN_EXTRA_LIBS}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/run/linenoise.cpp/linenoise.cpp b/examples/run/linenoise.cpp/linenoise.cpp index a68f12a1a0..9cb9399003 100644 --- a/examples/run/linenoise.cpp/linenoise.cpp +++ b/examples/run/linenoise.cpp/linenoise.cpp @@ -75,7 +75,7 @@ * * DSR (Device Status Report) * Sequence: ESC [ 6 n - * Effect: reports the current cusor position as ESC [ n ; m R + * Effect: reports the current cursor position as ESC [ n ; m R * where n is the row and m is the column * * When multi line mode is enabled, we also use an additional escape @@ -107,6 +107,7 @@ # include # include +# include # include # include # include @@ -205,9 +206,9 @@ class File { int fd = -1; }; -__attribute__((format(printf, 1, 2))) -/* Debugging function. */ #if 0 +/* Debugging function. */ +__attribute__((format(printf, 1, 2))) static void lndebug(const char *fmt, ...) { static File file; if (file.file == nullptr) { @@ -222,11 +223,469 @@ static void lndebug(const char *fmt, ...) { fflush(file.file); } } -#else -static void lndebug(const char *, ...) { -} #endif +/* ========================== Encoding functions ============================= */ + +/* Get length of previous UTF8 codepoint */ +static size_t prevUtf8CodePointLen(const char * buf, int pos) { + int end = pos--; + while (pos >= 0 && ((unsigned char) buf[pos] & 0xC0) == 0x80) { + pos--; + } + return end - pos; +} + +/* Convert UTF8 to Unicode code point */ +static size_t utf8BytesToCodePoint(const char * buf, size_t len, int * cp) { + if (len) { + unsigned char byte = buf[0]; + if ((byte & 0x80) == 0) { + *cp = byte; + return 1; + } else if ((byte & 0xE0) == 0xC0) { + if (len >= 2) { + *cp = (((unsigned long) (buf[0] & 0x1F)) << 6) | ((unsigned long) (buf[1] & 0x3F)); + return 2; + } + } else if ((byte & 0xF0) == 0xE0) { + if (len >= 3) { + *cp = (((unsigned long) (buf[0] & 0x0F)) << 12) | (((unsigned long) (buf[1] & 0x3F)) << 6) | + ((unsigned long) (buf[2] & 0x3F)); + return 3; + } + } else if ((byte & 0xF8) == 0xF0) { + if (len >= 4) { + *cp = (((unsigned long) (buf[0] & 0x07)) << 18) | (((unsigned long) (buf[1] & 0x3F)) << 12) | + (((unsigned long) (buf[2] & 0x3F)) << 6) | ((unsigned long) (buf[3] & 0x3F)); + return 4; + } + } + } + return 0; +} + +/* Check if the code is a wide character */ +static const unsigned long wideCharTable[][2] = { + /* BEGIN: WIDE CHAR TABLE */ + { 0x1100, 0x115F }, + { 0x231A, 0x231B }, + { 0x2329, 0x232A }, + { 0x23E9, 0x23EC }, + { 0x23F0, 0x23F0 }, + { 0x23F3, 0x23F3 }, + { 0x25FD, 0x25FE }, + { 0x2614, 0x2615 }, + { 0x2630, 0x2637 }, + { 0x2648, 0x2653 }, + { 0x267F, 0x267F }, + { 0x268A, 0x268F }, + { 0x2693, 0x2693 }, + { 0x26A1, 0x26A1 }, + { 0x26AA, 0x26AB }, + { 0x26BD, 0x26BE }, + { 0x26C4, 0x26C5 }, + { 0x26CE, 0x26CE }, + { 0x26D4, 0x26D4 }, + { 0x26EA, 0x26EA }, + { 0x26F2, 0x26F3 }, + { 0x26F5, 0x26F5 }, + { 0x26FA, 0x26FA }, + { 0x26FD, 0x26FD }, + { 0x2705, 0x2705 }, + { 0x270A, 0x270B }, + { 0x2728, 0x2728 }, + { 0x274C, 0x274C }, + { 0x274E, 0x274E }, + { 0x2753, 0x2755 }, + { 0x2757, 0x2757 }, + { 0x2795, 0x2797 }, + { 0x27B0, 0x27B0 }, + { 0x27BF, 0x27BF }, + { 0x2B1B, 0x2B1C }, + { 0x2B50, 0x2B50 }, + { 0x2B55, 0x2B55 }, + { 0x2E80, 0x2E99 }, + { 0x2E9B, 0x2EF3 }, + { 0x2F00, 0x2FD5 }, + { 0x2FF0, 0x303E }, + { 0x3041, 0x3096 }, + { 0x3099, 0x30FF }, + { 0x3105, 0x312F }, + { 0x3131, 0x318E }, + { 0x3190, 0x31E5 }, + { 0x31EF, 0x321E }, + { 0x3220, 0x3247 }, + { 0x3250, 0xA48C }, + { 0xA490, 0xA4C6 }, + { 0xA960, 0xA97C }, + { 0xAC00, 0xD7A3 }, + { 0xF900, 0xFAFF }, + { 0xFE10, 0xFE19 }, + { 0xFE30, 0xFE52 }, + { 0xFE54, 0xFE66 }, + { 0xFE68, 0xFE6B }, + { 0xFF01, 0xFF60 }, + { 0xFFE0, 0xFFE6 }, + { 0x16FE0, 0x16FE4 }, + { 0x16FF0, 0x16FF1 }, + { 0x17000, 0x187F7 }, + { 0x18800, 0x18CD5 }, + { 0x18CFF, 0x18D08 }, + { 0x1AFF0, 0x1AFF3 }, + { 0x1AFF5, 0x1AFFB }, + { 0x1AFFD, 0x1AFFE }, + { 0x1B000, 0x1B122 }, + { 0x1B132, 0x1B132 }, + { 0x1B150, 0x1B152 }, + { 0x1B155, 0x1B155 }, + { 0x1B164, 0x1B167 }, + { 0x1B170, 0x1B2FB }, + { 0x1D300, 0x1D356 }, + { 0x1D360, 0x1D376 }, + { 0x1F004, 0x1F004 }, + { 0x1F0CF, 0x1F0CF }, + { 0x1F18E, 0x1F18E }, + { 0x1F191, 0x1F19A }, + { 0x1F200, 0x1F202 }, + { 0x1F210, 0x1F23B }, + { 0x1F240, 0x1F248 }, + { 0x1F250, 0x1F251 }, + { 0x1F260, 0x1F265 }, + { 0x1F300, 0x1F320 }, + { 0x1F32D, 0x1F335 }, + { 0x1F337, 0x1F37C }, + { 0x1F37E, 0x1F393 }, + { 0x1F3A0, 0x1F3CA }, + { 0x1F3CF, 0x1F3D3 }, + { 0x1F3E0, 0x1F3F0 }, + { 0x1F3F4, 0x1F3F4 }, + { 0x1F3F8, 0x1F43E }, + { 0x1F440, 0x1F440 }, + { 0x1F442, 0x1F4FC }, + { 0x1F4FF, 0x1F53D }, + { 0x1F54B, 0x1F54E }, + { 0x1F550, 0x1F567 }, + { 0x1F57A, 0x1F57A }, + { 0x1F595, 0x1F596 }, + { 0x1F5A4, 0x1F5A4 }, + { 0x1F5FB, 0x1F64F }, + { 0x1F680, 0x1F6C5 }, + { 0x1F6CC, 0x1F6CC }, + { 0x1F6D0, 0x1F6D2 }, + { 0x1F6D5, 0x1F6D7 }, + { 0x1F6DC, 0x1F6DF }, + { 0x1F6EB, 0x1F6EC }, + { 0x1F6F4, 0x1F6FC }, + { 0x1F7E0, 0x1F7EB }, + { 0x1F7F0, 0x1F7F0 }, + { 0x1F90C, 0x1F93A }, + { 0x1F93C, 0x1F945 }, + { 0x1F947, 0x1F9FF }, + { 0x1FA70, 0x1FA7C }, + { 0x1FA80, 0x1FA89 }, + { 0x1FA8F, 0x1FAC6 }, + { 0x1FACE, 0x1FADC }, + { 0x1FADF, 0x1FAE9 }, + { 0x1FAF0, 0x1FAF8 }, + { 0x20000, 0x2FFFD }, + { 0x30000, 0x3FFFD } + /* END: WIDE CHAR TABLE */ +}; + +static const size_t wideCharTableSize = sizeof(wideCharTable) / sizeof(wideCharTable[0]); + +static bool isWideChar(unsigned long cp) { + for (size_t i = 0; i < wideCharTableSize; i++) { + auto first_code = wideCharTable[i][0]; + auto last_code = wideCharTable[i][1]; + if (first_code > cp) { + return false; + } + if (first_code <= cp && cp <= last_code) { + return true; + } + } + return false; +} + +/* Check if the code is a combining character */ +static const unsigned long combiningCharTable[] = { + /* BEGIN: COMBINING CHAR TABLE */ + 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, + 0x030D, 0x030E, 0x030F, 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 0x0318, 0x0319, + 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F, 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, + 0x0327, 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F, 0x0330, 0x0331, 0x0332, 0x0333, + 0x0334, 0x0335, 0x0336, 0x0337, 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F, 0x0340, + 0x0341, 0x0342, 0x0343, 0x0344, 0x0345, 0x0346, 0x0347, 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, + 0x034E, 0x034F, 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 0x0358, 0x0359, 0x035A, + 0x035B, 0x035C, 0x035D, 0x035E, 0x035F, 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, + 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, + 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597, 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, + 0x059E, 0x059F, 0x05A0, 0x05A1, 0x05A2, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7, 0x05A8, 0x05A9, 0x05AA, + 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF, 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, + 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0x05C4, 0x05C5, 0x05C7, 0x0610, + 0x0611, 0x0612, 0x0613, 0x0614, 0x0615, 0x0616, 0x0617, 0x0618, 0x0619, 0x061A, 0x064B, 0x064C, 0x064D, + 0x064E, 0x064F, 0x0650, 0x0651, 0x0652, 0x0653, 0x0654, 0x0655, 0x0656, 0x0657, 0x0658, 0x0659, 0x065A, + 0x065B, 0x065C, 0x065D, 0x065E, 0x065F, 0x0670, 0x06D6, 0x06D7, 0x06D8, 0x06D9, 0x06DA, 0x06DB, 0x06DC, + 0x06DF, 0x06E0, 0x06E1, 0x06E2, 0x06E3, 0x06E4, 0x06E7, 0x06E8, 0x06EA, 0x06EB, 0x06EC, 0x06ED, 0x0711, + 0x0730, 0x0731, 0x0732, 0x0733, 0x0734, 0x0735, 0x0736, 0x0737, 0x0738, 0x0739, 0x073A, 0x073B, 0x073C, + 0x073D, 0x073E, 0x073F, 0x0740, 0x0741, 0x0742, 0x0743, 0x0744, 0x0745, 0x0746, 0x0747, 0x0748, 0x0749, + 0x074A, 0x07A6, 0x07A7, 0x07A8, 0x07A9, 0x07AA, 0x07AB, 0x07AC, 0x07AD, 0x07AE, 0x07AF, 0x07B0, 0x07EB, + 0x07EC, 0x07ED, 0x07EE, 0x07EF, 0x07F0, 0x07F1, 0x07F2, 0x07F3, 0x07FD, 0x0816, 0x0817, 0x0818, 0x0819, + 0x081B, 0x081C, 0x081D, 0x081E, 0x081F, 0x0820, 0x0821, 0x0822, 0x0823, 0x0825, 0x0826, 0x0827, 0x0829, + 0x082A, 0x082B, 0x082C, 0x082D, 0x0859, 0x085A, 0x085B, 0x0897, 0x0898, 0x0899, 0x089A, 0x089B, 0x089C, + 0x089D, 0x089E, 0x089F, 0x08CA, 0x08CB, 0x08CC, 0x08CD, 0x08CE, 0x08CF, 0x08D0, 0x08D1, 0x08D2, 0x08D3, + 0x08D4, 0x08D5, 0x08D6, 0x08D7, 0x08D8, 0x08D9, 0x08DA, 0x08DB, 0x08DC, 0x08DD, 0x08DE, 0x08DF, 0x08E0, + 0x08E1, 0x08E3, 0x08E4, 0x08E5, 0x08E6, 0x08E7, 0x08E8, 0x08E9, 0x08EA, 0x08EB, 0x08EC, 0x08ED, 0x08EE, + 0x08EF, 0x08F0, 0x08F1, 0x08F2, 0x08F3, 0x08F4, 0x08F5, 0x08F6, 0x08F7, 0x08F8, 0x08F9, 0x08FA, 0x08FB, + 0x08FC, 0x08FD, 0x08FE, 0x08FF, 0x0900, 0x0901, 0x0902, 0x093A, 0x093C, 0x0941, 0x0942, 0x0943, 0x0944, + 0x0945, 0x0946, 0x0947, 0x0948, 0x094D, 0x0951, 0x0952, 0x0953, 0x0954, 0x0955, 0x0956, 0x0957, 0x0962, + 0x0963, 0x0981, 0x09BC, 0x09C1, 0x09C2, 0x09C3, 0x09C4, 0x09CD, 0x09E2, 0x09E3, 0x09FE, 0x0A01, 0x0A02, + 0x0A3C, 0x0A41, 0x0A42, 0x0A47, 0x0A48, 0x0A4B, 0x0A4C, 0x0A4D, 0x0A51, 0x0A70, 0x0A71, 0x0A75, 0x0A81, + 0x0A82, 0x0ABC, 0x0AC1, 0x0AC2, 0x0AC3, 0x0AC4, 0x0AC5, 0x0AC7, 0x0AC8, 0x0ACD, 0x0AE2, 0x0AE3, 0x0AFA, + 0x0AFB, 0x0AFC, 0x0AFD, 0x0AFE, 0x0AFF, 0x0B01, 0x0B3C, 0x0B3F, 0x0B41, 0x0B42, 0x0B43, 0x0B44, 0x0B4D, + 0x0B55, 0x0B56, 0x0B62, 0x0B63, 0x0B82, 0x0BC0, 0x0BCD, 0x0C00, 0x0C04, 0x0C3C, 0x0C3E, 0x0C3F, 0x0C40, + 0x0C46, 0x0C47, 0x0C48, 0x0C4A, 0x0C4B, 0x0C4C, 0x0C4D, 0x0C55, 0x0C56, 0x0C62, 0x0C63, 0x0C81, 0x0CBC, + 0x0CBF, 0x0CC6, 0x0CCC, 0x0CCD, 0x0CE2, 0x0CE3, 0x0D00, 0x0D01, 0x0D3B, 0x0D3C, 0x0D41, 0x0D42, 0x0D43, + 0x0D44, 0x0D4D, 0x0D62, 0x0D63, 0x0D81, 0x0DCA, 0x0DD2, 0x0DD3, 0x0DD4, 0x0DD6, 0x0E31, 0x0E34, 0x0E35, + 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, + 0x0EB1, 0x0EB4, 0x0EB5, 0x0EB6, 0x0EB7, 0x0EB8, 0x0EB9, 0x0EBA, 0x0EBB, 0x0EBC, 0x0EC8, 0x0EC9, 0x0ECA, + 0x0ECB, 0x0ECC, 0x0ECD, 0x0ECE, 0x0F18, 0x0F19, 0x0F35, 0x0F37, 0x0F39, 0x0F71, 0x0F72, 0x0F73, 0x0F74, + 0x0F75, 0x0F76, 0x0F77, 0x0F78, 0x0F79, 0x0F7A, 0x0F7B, 0x0F7C, 0x0F7D, 0x0F7E, 0x0F80, 0x0F81, 0x0F82, + 0x0F83, 0x0F84, 0x0F86, 0x0F87, 0x0F8D, 0x0F8E, 0x0F8F, 0x0F90, 0x0F91, 0x0F92, 0x0F93, 0x0F94, 0x0F95, + 0x0F96, 0x0F97, 0x0F99, 0x0F9A, 0x0F9B, 0x0F9C, 0x0F9D, 0x0F9E, 0x0F9F, 0x0FA0, 0x0FA1, 0x0FA2, 0x0FA3, + 0x0FA4, 0x0FA5, 0x0FA6, 0x0FA7, 0x0FA8, 0x0FA9, 0x0FAA, 0x0FAB, 0x0FAC, 0x0FAD, 0x0FAE, 0x0FAF, 0x0FB0, + 0x0FB1, 0x0FB2, 0x0FB3, 0x0FB4, 0x0FB5, 0x0FB6, 0x0FB7, 0x0FB8, 0x0FB9, 0x0FBA, 0x0FBB, 0x0FBC, 0x0FC6, + 0x102D, 0x102E, 0x102F, 0x1030, 0x1032, 0x1033, 0x1034, 0x1035, 0x1036, 0x1037, 0x1039, 0x103A, 0x103D, + 0x103E, 0x1058, 0x1059, 0x105E, 0x105F, 0x1060, 0x1071, 0x1072, 0x1073, 0x1074, 0x1082, 0x1085, 0x1086, + 0x108D, 0x109D, 0x135D, 0x135E, 0x135F, 0x1712, 0x1713, 0x1714, 0x1732, 0x1733, 0x1752, 0x1753, 0x1772, + 0x1773, 0x17B4, 0x17B5, 0x17B7, 0x17B8, 0x17B9, 0x17BA, 0x17BB, 0x17BC, 0x17BD, 0x17C6, 0x17C9, 0x17CA, + 0x17CB, 0x17CC, 0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D1, 0x17D2, 0x17D3, 0x17DD, 0x180B, 0x180C, 0x180D, + 0x180F, 0x1885, 0x1886, 0x18A9, 0x1920, 0x1921, 0x1922, 0x1927, 0x1928, 0x1932, 0x1939, 0x193A, 0x193B, + 0x1A17, 0x1A18, 0x1A1B, 0x1A56, 0x1A58, 0x1A59, 0x1A5A, 0x1A5B, 0x1A5C, 0x1A5D, 0x1A5E, 0x1A60, 0x1A62, + 0x1A65, 0x1A66, 0x1A67, 0x1A68, 0x1A69, 0x1A6A, 0x1A6B, 0x1A6C, 0x1A73, 0x1A74, 0x1A75, 0x1A76, 0x1A77, + 0x1A78, 0x1A79, 0x1A7A, 0x1A7B, 0x1A7C, 0x1A7F, 0x1AB0, 0x1AB1, 0x1AB2, 0x1AB3, 0x1AB4, 0x1AB5, 0x1AB6, + 0x1AB7, 0x1AB8, 0x1AB9, 0x1ABA, 0x1ABB, 0x1ABC, 0x1ABD, 0x1ABF, 0x1AC0, 0x1AC1, 0x1AC2, 0x1AC3, 0x1AC4, + 0x1AC5, 0x1AC6, 0x1AC7, 0x1AC8, 0x1AC9, 0x1ACA, 0x1ACB, 0x1ACC, 0x1ACD, 0x1ACE, 0x1B00, 0x1B01, 0x1B02, + 0x1B03, 0x1B34, 0x1B36, 0x1B37, 0x1B38, 0x1B39, 0x1B3A, 0x1B3C, 0x1B42, 0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, + 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73, 0x1B80, 0x1B81, 0x1BA2, 0x1BA3, 0x1BA4, 0x1BA5, 0x1BA8, 0x1BA9, + 0x1BAB, 0x1BAC, 0x1BAD, 0x1BE6, 0x1BE8, 0x1BE9, 0x1BED, 0x1BEF, 0x1BF0, 0x1BF1, 0x1C2C, 0x1C2D, 0x1C2E, + 0x1C2F, 0x1C30, 0x1C31, 0x1C32, 0x1C33, 0x1C36, 0x1C37, 0x1CD0, 0x1CD1, 0x1CD2, 0x1CD4, 0x1CD5, 0x1CD6, + 0x1CD7, 0x1CD8, 0x1CD9, 0x1CDA, 0x1CDB, 0x1CDC, 0x1CDD, 0x1CDE, 0x1CDF, 0x1CE0, 0x1CE2, 0x1CE3, 0x1CE4, + 0x1CE5, 0x1CE6, 0x1CE7, 0x1CE8, 0x1CED, 0x1CF4, 0x1CF8, 0x1CF9, 0x1DC0, 0x1DC1, 0x1DC2, 0x1DC3, 0x1DC4, + 0x1DC5, 0x1DC6, 0x1DC7, 0x1DC8, 0x1DC9, 0x1DCA, 0x1DCB, 0x1DCC, 0x1DCD, 0x1DCE, 0x1DCF, 0x1DD0, 0x1DD1, + 0x1DD2, 0x1DD3, 0x1DD4, 0x1DD5, 0x1DD6, 0x1DD7, 0x1DD8, 0x1DD9, 0x1DDA, 0x1DDB, 0x1DDC, 0x1DDD, 0x1DDE, + 0x1DDF, 0x1DE0, 0x1DE1, 0x1DE2, 0x1DE3, 0x1DE4, 0x1DE5, 0x1DE6, 0x1DE7, 0x1DE8, 0x1DE9, 0x1DEA, 0x1DEB, + 0x1DEC, 0x1DED, 0x1DEE, 0x1DEF, 0x1DF0, 0x1DF1, 0x1DF2, 0x1DF3, 0x1DF4, 0x1DF5, 0x1DF6, 0x1DF7, 0x1DF8, + 0x1DF9, 0x1DFA, 0x1DFB, 0x1DFC, 0x1DFD, 0x1DFE, 0x1DFF, 0x20D0, 0x20D1, 0x20D2, 0x20D3, 0x20D4, 0x20D5, + 0x20D6, 0x20D7, 0x20D8, 0x20D9, 0x20DA, 0x20DB, 0x20DC, 0x20E1, 0x20E5, 0x20E6, 0x20E7, 0x20E8, 0x20E9, + 0x20EA, 0x20EB, 0x20EC, 0x20ED, 0x20EE, 0x20EF, 0x20F0, 0x2CEF, 0x2CF0, 0x2CF1, 0x2D7F, 0x2DE0, 0x2DE1, + 0x2DE2, 0x2DE3, 0x2DE4, 0x2DE5, 0x2DE6, 0x2DE7, 0x2DE8, 0x2DE9, 0x2DEA, 0x2DEB, 0x2DEC, 0x2DED, 0x2DEE, + 0x2DEF, 0x2DF0, 0x2DF1, 0x2DF2, 0x2DF3, 0x2DF4, 0x2DF5, 0x2DF6, 0x2DF7, 0x2DF8, 0x2DF9, 0x2DFA, 0x2DFB, + 0x2DFC, 0x2DFD, 0x2DFE, 0x2DFF, 0x302A, 0x302B, 0x302C, 0x302D, 0x3099, 0x309A, 0xA66F, 0xA674, 0xA675, + 0xA676, 0xA677, 0xA678, 0xA679, 0xA67A, 0xA67B, 0xA67C, 0xA67D, 0xA69E, 0xA69F, 0xA6F0, 0xA6F1, 0xA802, + 0xA806, 0xA80B, 0xA825, 0xA826, 0xA82C, 0xA8C4, 0xA8C5, 0xA8E0, 0xA8E1, 0xA8E2, 0xA8E3, 0xA8E4, 0xA8E5, + 0xA8E6, 0xA8E7, 0xA8E8, 0xA8E9, 0xA8EA, 0xA8EB, 0xA8EC, 0xA8ED, 0xA8EE, 0xA8EF, 0xA8F0, 0xA8F1, 0xA8FF, + 0xA926, 0xA927, 0xA928, 0xA929, 0xA92A, 0xA92B, 0xA92C, 0xA92D, 0xA947, 0xA948, 0xA949, 0xA94A, 0xA94B, + 0xA94C, 0xA94D, 0xA94E, 0xA94F, 0xA950, 0xA951, 0xA980, 0xA981, 0xA982, 0xA9B3, 0xA9B6, 0xA9B7, 0xA9B8, + 0xA9B9, 0xA9BC, 0xA9BD, 0xA9E5, 0xAA29, 0xAA2A, 0xAA2B, 0xAA2C, 0xAA2D, 0xAA2E, 0xAA31, 0xAA32, 0xAA35, + 0xAA36, 0xAA43, 0xAA4C, 0xAA7C, 0xAAB0, 0xAAB2, 0xAAB3, 0xAAB4, 0xAAB7, 0xAAB8, 0xAABE, 0xAABF, 0xAAC1, + 0xAAEC, 0xAAED, 0xAAF6, 0xABE5, 0xABE8, 0xABED, 0xFB1E, 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, + 0xFE06, 0xFE07, 0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, 0xFE20, 0xFE21, 0xFE22, + 0xFE23, 0xFE24, 0xFE25, 0xFE26, 0xFE27, 0xFE28, 0xFE29, 0xFE2A, 0xFE2B, 0xFE2C, 0xFE2D, 0xFE2E, 0xFE2F, + 0x101FD, 0x102E0, 0x10376, 0x10377, 0x10378, 0x10379, 0x1037A, 0x10A01, 0x10A02, 0x10A03, 0x10A05, 0x10A06, 0x10A0C, + 0x10A0D, 0x10A0E, 0x10A0F, 0x10A38, 0x10A39, 0x10A3A, 0x10A3F, 0x10AE5, 0x10AE6, 0x10D24, 0x10D25, 0x10D26, 0x10D27, + 0x10D69, 0x10D6A, 0x10D6B, 0x10D6C, 0x10D6D, 0x10EAB, 0x10EAC, 0x10EFC, 0x10EFD, 0x10EFE, 0x10EFF, 0x10F46, 0x10F47, + 0x10F48, 0x10F49, 0x10F4A, 0x10F4B, 0x10F4C, 0x10F4D, 0x10F4E, 0x10F4F, 0x10F50, 0x10F82, 0x10F83, 0x10F84, 0x10F85, + 0x11001, 0x11038, 0x11039, 0x1103A, 0x1103B, 0x1103C, 0x1103D, 0x1103E, 0x1103F, 0x11040, 0x11041, 0x11042, 0x11043, + 0x11044, 0x11045, 0x11046, 0x11070, 0x11073, 0x11074, 0x1107F, 0x11080, 0x11081, 0x110B3, 0x110B4, 0x110B5, 0x110B6, + 0x110B9, 0x110BA, 0x110C2, 0x11100, 0x11101, 0x11102, 0x11127, 0x11128, 0x11129, 0x1112A, 0x1112B, 0x1112D, 0x1112E, + 0x1112F, 0x11130, 0x11131, 0x11132, 0x11133, 0x11134, 0x11173, 0x11180, 0x11181, 0x111B6, 0x111B7, 0x111B8, 0x111B9, + 0x111BA, 0x111BB, 0x111BC, 0x111BD, 0x111BE, 0x111C9, 0x111CA, 0x111CB, 0x111CC, 0x111CF, 0x1122F, 0x11230, 0x11231, + 0x11234, 0x11236, 0x11237, 0x1123E, 0x11241, 0x112DF, 0x112E3, 0x112E4, 0x112E5, 0x112E6, 0x112E7, 0x112E8, 0x112E9, + 0x112EA, 0x11300, 0x11301, 0x1133B, 0x1133C, 0x11340, 0x11366, 0x11367, 0x11368, 0x11369, 0x1136A, 0x1136B, 0x1136C, + 0x11370, 0x11371, 0x11372, 0x11373, 0x11374, 0x113BB, 0x113BC, 0x113BD, 0x113BE, 0x113BF, 0x113C0, 0x113CE, 0x113D0, + 0x113D2, 0x113E1, 0x113E2, 0x11438, 0x11439, 0x1143A, 0x1143B, 0x1143C, 0x1143D, 0x1143E, 0x1143F, 0x11442, 0x11443, + 0x11444, 0x11446, 0x1145E, 0x114B3, 0x114B4, 0x114B5, 0x114B6, 0x114B7, 0x114B8, 0x114BA, 0x114BF, 0x114C0, 0x114C2, + 0x114C3, 0x115B2, 0x115B3, 0x115B4, 0x115B5, 0x115BC, 0x115BD, 0x115BF, 0x115C0, 0x115DC, 0x115DD, 0x11633, 0x11634, + 0x11635, 0x11636, 0x11637, 0x11638, 0x11639, 0x1163A, 0x1163D, 0x1163F, 0x11640, 0x116AB, 0x116AD, 0x116B0, 0x116B1, + 0x116B2, 0x116B3, 0x116B4, 0x116B5, 0x116B7, 0x1171D, 0x1171F, 0x11722, 0x11723, 0x11724, 0x11725, 0x11727, 0x11728, + 0x11729, 0x1172A, 0x1172B, 0x1182F, 0x11830, 0x11831, 0x11832, 0x11833, 0x11834, 0x11835, 0x11836, 0x11837, 0x11839, + 0x1183A, 0x1193B, 0x1193C, 0x1193E, 0x11943, 0x119D4, 0x119D5, 0x119D6, 0x119D7, 0x119DA, 0x119DB, 0x119E0, 0x11A01, + 0x11A02, 0x11A03, 0x11A04, 0x11A05, 0x11A06, 0x11A07, 0x11A08, 0x11A09, 0x11A0A, 0x11A33, 0x11A34, 0x11A35, 0x11A36, + 0x11A37, 0x11A38, 0x11A3B, 0x11A3C, 0x11A3D, 0x11A3E, 0x11A47, 0x11A51, 0x11A52, 0x11A53, 0x11A54, 0x11A55, 0x11A56, + 0x11A59, 0x11A5A, 0x11A5B, 0x11A8A, 0x11A8B, 0x11A8C, 0x11A8D, 0x11A8E, 0x11A8F, 0x11A90, 0x11A91, 0x11A92, 0x11A93, + 0x11A94, 0x11A95, 0x11A96, 0x11A98, 0x11A99, 0x11C30, 0x11C31, 0x11C32, 0x11C33, 0x11C34, 0x11C35, 0x11C36, 0x11C38, + 0x11C39, 0x11C3A, 0x11C3B, 0x11C3C, 0x11C3D, 0x11C3F, 0x11C92, 0x11C93, 0x11C94, 0x11C95, 0x11C96, 0x11C97, 0x11C98, + 0x11C99, 0x11C9A, 0x11C9B, 0x11C9C, 0x11C9D, 0x11C9E, 0x11C9F, 0x11CA0, 0x11CA1, 0x11CA2, 0x11CA3, 0x11CA4, 0x11CA5, + 0x11CA6, 0x11CA7, 0x11CAA, 0x11CAB, 0x11CAC, 0x11CAD, 0x11CAE, 0x11CAF, 0x11CB0, 0x11CB2, 0x11CB3, 0x11CB5, 0x11CB6, + 0x11D31, 0x11D32, 0x11D33, 0x11D34, 0x11D35, 0x11D36, 0x11D3A, 0x11D3C, 0x11D3D, 0x11D3F, 0x11D40, 0x11D41, 0x11D42, + 0x11D43, 0x11D44, 0x11D45, 0x11D47, 0x11D90, 0x11D91, 0x11D95, 0x11D97, 0x11EF3, 0x11EF4, 0x11F00, 0x11F01, 0x11F36, + 0x11F37, 0x11F38, 0x11F39, 0x11F3A, 0x11F40, 0x11F42, 0x11F5A, 0x13440, 0x13447, 0x13448, 0x13449, 0x1344A, 0x1344B, + 0x1344C, 0x1344D, 0x1344E, 0x1344F, 0x13450, 0x13451, 0x13452, 0x13453, 0x13454, 0x13455, 0x1611E, 0x1611F, 0x16120, + 0x16121, 0x16122, 0x16123, 0x16124, 0x16125, 0x16126, 0x16127, 0x16128, 0x16129, 0x1612D, 0x1612E, 0x1612F, 0x16AF0, + 0x16AF1, 0x16AF2, 0x16AF3, 0x16AF4, 0x16B30, 0x16B31, 0x16B32, 0x16B33, 0x16B34, 0x16B35, 0x16B36, 0x16F4F, 0x16F8F, + 0x16F90, 0x16F91, 0x16F92, 0x16FE4, 0x1BC9D, 0x1BC9E, 0x1CF00, 0x1CF01, 0x1CF02, 0x1CF03, 0x1CF04, 0x1CF05, 0x1CF06, + 0x1CF07, 0x1CF08, 0x1CF09, 0x1CF0A, 0x1CF0B, 0x1CF0C, 0x1CF0D, 0x1CF0E, 0x1CF0F, 0x1CF10, 0x1CF11, 0x1CF12, 0x1CF13, + 0x1CF14, 0x1CF15, 0x1CF16, 0x1CF17, 0x1CF18, 0x1CF19, 0x1CF1A, 0x1CF1B, 0x1CF1C, 0x1CF1D, 0x1CF1E, 0x1CF1F, 0x1CF20, + 0x1CF21, 0x1CF22, 0x1CF23, 0x1CF24, 0x1CF25, 0x1CF26, 0x1CF27, 0x1CF28, 0x1CF29, 0x1CF2A, 0x1CF2B, 0x1CF2C, 0x1CF2D, + 0x1CF30, 0x1CF31, 0x1CF32, 0x1CF33, 0x1CF34, 0x1CF35, 0x1CF36, 0x1CF37, 0x1CF38, 0x1CF39, 0x1CF3A, 0x1CF3B, 0x1CF3C, + 0x1CF3D, 0x1CF3E, 0x1CF3F, 0x1CF40, 0x1CF41, 0x1CF42, 0x1CF43, 0x1CF44, 0x1CF45, 0x1CF46, 0x1D167, 0x1D168, 0x1D169, + 0x1D17B, 0x1D17C, 0x1D17D, 0x1D17E, 0x1D17F, 0x1D180, 0x1D181, 0x1D182, 0x1D185, 0x1D186, 0x1D187, 0x1D188, 0x1D189, + 0x1D18A, 0x1D18B, 0x1D1AA, 0x1D1AB, 0x1D1AC, 0x1D1AD, 0x1D242, 0x1D243, 0x1D244, 0x1DA00, 0x1DA01, 0x1DA02, 0x1DA03, + 0x1DA04, 0x1DA05, 0x1DA06, 0x1DA07, 0x1DA08, 0x1DA09, 0x1DA0A, 0x1DA0B, 0x1DA0C, 0x1DA0D, 0x1DA0E, 0x1DA0F, 0x1DA10, + 0x1DA11, 0x1DA12, 0x1DA13, 0x1DA14, 0x1DA15, 0x1DA16, 0x1DA17, 0x1DA18, 0x1DA19, 0x1DA1A, 0x1DA1B, 0x1DA1C, 0x1DA1D, + 0x1DA1E, 0x1DA1F, 0x1DA20, 0x1DA21, 0x1DA22, 0x1DA23, 0x1DA24, 0x1DA25, 0x1DA26, 0x1DA27, 0x1DA28, 0x1DA29, 0x1DA2A, + 0x1DA2B, 0x1DA2C, 0x1DA2D, 0x1DA2E, 0x1DA2F, 0x1DA30, 0x1DA31, 0x1DA32, 0x1DA33, 0x1DA34, 0x1DA35, 0x1DA36, 0x1DA3B, + 0x1DA3C, 0x1DA3D, 0x1DA3E, 0x1DA3F, 0x1DA40, 0x1DA41, 0x1DA42, 0x1DA43, 0x1DA44, 0x1DA45, 0x1DA46, 0x1DA47, 0x1DA48, + 0x1DA49, 0x1DA4A, 0x1DA4B, 0x1DA4C, 0x1DA4D, 0x1DA4E, 0x1DA4F, 0x1DA50, 0x1DA51, 0x1DA52, 0x1DA53, 0x1DA54, 0x1DA55, + 0x1DA56, 0x1DA57, 0x1DA58, 0x1DA59, 0x1DA5A, 0x1DA5B, 0x1DA5C, 0x1DA5D, 0x1DA5E, 0x1DA5F, 0x1DA60, 0x1DA61, 0x1DA62, + 0x1DA63, 0x1DA64, 0x1DA65, 0x1DA66, 0x1DA67, 0x1DA68, 0x1DA69, 0x1DA6A, 0x1DA6B, 0x1DA6C, 0x1DA75, 0x1DA84, 0x1DA9B, + 0x1DA9C, 0x1DA9D, 0x1DA9E, 0x1DA9F, 0x1DAA1, 0x1DAA2, 0x1DAA3, 0x1DAA4, 0x1DAA5, 0x1DAA6, 0x1DAA7, 0x1DAA8, 0x1DAA9, + 0x1DAAA, 0x1DAAB, 0x1DAAC, 0x1DAAD, 0x1DAAE, 0x1DAAF, 0x1E000, 0x1E001, 0x1E002, 0x1E003, 0x1E004, 0x1E005, 0x1E006, + 0x1E008, 0x1E009, 0x1E00A, 0x1E00B, 0x1E00C, 0x1E00D, 0x1E00E, 0x1E00F, 0x1E010, 0x1E011, 0x1E012, 0x1E013, 0x1E014, + 0x1E015, 0x1E016, 0x1E017, 0x1E018, 0x1E01B, 0x1E01C, 0x1E01D, 0x1E01E, 0x1E01F, 0x1E020, 0x1E021, 0x1E023, 0x1E024, + 0x1E026, 0x1E027, 0x1E028, 0x1E029, 0x1E02A, 0x1E08F, 0x1E130, 0x1E131, 0x1E132, 0x1E133, 0x1E134, 0x1E135, 0x1E136, + 0x1E2AE, 0x1E2EC, 0x1E2ED, 0x1E2EE, 0x1E2EF, 0x1E4EC, 0x1E4ED, 0x1E4EE, 0x1E4EF, 0x1E5EE, 0x1E5EF, 0x1E8D0, 0x1E8D1, + 0x1E8D2, 0x1E8D3, 0x1E8D4, 0x1E8D5, 0x1E8D6, 0x1E944, 0x1E945, 0x1E946, 0x1E947, 0x1E948, 0x1E949, 0x1E94A, 0xE0100, + 0xE0101, 0xE0102, 0xE0103, 0xE0104, 0xE0105, 0xE0106, 0xE0107, 0xE0108, 0xE0109, 0xE010A, 0xE010B, 0xE010C, 0xE010D, + 0xE010E, 0xE010F, 0xE0110, 0xE0111, 0xE0112, 0xE0113, 0xE0114, 0xE0115, 0xE0116, 0xE0117, 0xE0118, 0xE0119, 0xE011A, + 0xE011B, 0xE011C, 0xE011D, 0xE011E, 0xE011F, 0xE0120, 0xE0121, 0xE0122, 0xE0123, 0xE0124, 0xE0125, 0xE0126, 0xE0127, + 0xE0128, 0xE0129, 0xE012A, 0xE012B, 0xE012C, 0xE012D, 0xE012E, 0xE012F, 0xE0130, 0xE0131, 0xE0132, 0xE0133, 0xE0134, + 0xE0135, 0xE0136, 0xE0137, 0xE0138, 0xE0139, 0xE013A, 0xE013B, 0xE013C, 0xE013D, 0xE013E, 0xE013F, 0xE0140, 0xE0141, + 0xE0142, 0xE0143, 0xE0144, 0xE0145, 0xE0146, 0xE0147, 0xE0148, 0xE0149, 0xE014A, 0xE014B, 0xE014C, 0xE014D, 0xE014E, + 0xE014F, 0xE0150, 0xE0151, 0xE0152, 0xE0153, 0xE0154, 0xE0155, 0xE0156, 0xE0157, 0xE0158, 0xE0159, 0xE015A, 0xE015B, + 0xE015C, 0xE015D, 0xE015E, 0xE015F, 0xE0160, 0xE0161, 0xE0162, 0xE0163, 0xE0164, 0xE0165, 0xE0166, 0xE0167, 0xE0168, + 0xE0169, 0xE016A, 0xE016B, 0xE016C, 0xE016D, 0xE016E, 0xE016F, 0xE0170, 0xE0171, 0xE0172, 0xE0173, 0xE0174, 0xE0175, + 0xE0176, 0xE0177, 0xE0178, 0xE0179, 0xE017A, 0xE017B, 0xE017C, 0xE017D, 0xE017E, 0xE017F, 0xE0180, 0xE0181, 0xE0182, + 0xE0183, 0xE0184, 0xE0185, 0xE0186, 0xE0187, 0xE0188, 0xE0189, 0xE018A, 0xE018B, 0xE018C, 0xE018D, 0xE018E, 0xE018F, + 0xE0190, 0xE0191, 0xE0192, 0xE0193, 0xE0194, 0xE0195, 0xE0196, 0xE0197, 0xE0198, 0xE0199, 0xE019A, 0xE019B, 0xE019C, + 0xE019D, 0xE019E, 0xE019F, 0xE01A0, 0xE01A1, 0xE01A2, 0xE01A3, 0xE01A4, 0xE01A5, 0xE01A6, 0xE01A7, 0xE01A8, 0xE01A9, + 0xE01AA, 0xE01AB, 0xE01AC, 0xE01AD, 0xE01AE, 0xE01AF, 0xE01B0, 0xE01B1, 0xE01B2, 0xE01B3, 0xE01B4, 0xE01B5, 0xE01B6, + 0xE01B7, 0xE01B8, 0xE01B9, 0xE01BA, 0xE01BB, 0xE01BC, 0xE01BD, 0xE01BE, 0xE01BF, 0xE01C0, 0xE01C1, 0xE01C2, 0xE01C3, + 0xE01C4, 0xE01C5, 0xE01C6, 0xE01C7, 0xE01C8, 0xE01C9, 0xE01CA, 0xE01CB, 0xE01CC, 0xE01CD, 0xE01CE, 0xE01CF, 0xE01D0, + 0xE01D1, 0xE01D2, 0xE01D3, 0xE01D4, 0xE01D5, 0xE01D6, 0xE01D7, 0xE01D8, 0xE01D9, 0xE01DA, 0xE01DB, 0xE01DC, 0xE01DD, + 0xE01DE, 0xE01DF, 0xE01E0, 0xE01E1, 0xE01E2, 0xE01E3, 0xE01E4, 0xE01E5, 0xE01E6, 0xE01E7, 0xE01E8, 0xE01E9, 0xE01EA, + 0xE01EB, 0xE01EC, 0xE01ED, 0xE01EE, 0xE01EF + /* END: COMBINING CHAR TABLE */ +}; + +static const unsigned long combiningCharTableSize = sizeof(combiningCharTable) / sizeof(combiningCharTable[0]); + +static bool isCombiningChar(unsigned long cp) { + for (size_t i = 0; i < combiningCharTableSize; i++) { + auto code = combiningCharTable[i]; + if (code > cp) { + return false; + } + if (code == cp) { + return true; + } + } + return false; +} + +/* Get length of previous grapheme */ +static size_t defaultPrevCharLen(const char * buf, size_t /*buf_len*/, size_t pos, size_t * col_len) { + size_t end = pos; + while (pos > 0) { + size_t len = prevUtf8CodePointLen(buf, pos); + pos -= len; + int cp; + utf8BytesToCodePoint(buf + pos, len, &cp); + if (!isCombiningChar(cp)) { + if (col_len != NULL) { + *col_len = isWideChar(cp) ? 2 : 1; + } + return end - pos; + } + } + /* NOTREACHED */ + return 0; +} + +/* Get length of next grapheme */ +static size_t defaultNextCharLen(const char * buf, size_t buf_len, size_t pos, size_t * col_len) { + size_t beg = pos; + int cp; + size_t len = utf8BytesToCodePoint(buf + pos, buf_len - pos, &cp); + if (isCombiningChar(cp)) { + /* NOTREACHED */ + return 0; + } + if (col_len != NULL) { + *col_len = isWideChar(cp) ? 2 : 1; + } + pos += len; + while (pos < buf_len) { + int cp; + len = utf8BytesToCodePoint(buf + pos, buf_len - pos, &cp); + if (!isCombiningChar(cp)) { + return pos - beg; + } + pos += len; + } + return pos - beg; +} + +/* Read a Unicode from file. */ +static size_t defaultReadCode(int fd, char * buf, size_t buf_len, int * cp) { + if (buf_len < 1) { + return -1; + } + size_t nread = read(fd, &buf[0], 1); + if (nread <= 0) { + return nread; + } + + unsigned char byte = buf[0]; + if ((byte & 0x80) == 0) { + ; + } else if ((byte & 0xE0) == 0xC0) { + if (buf_len < 2) { + return -1; + } + nread = read(fd, &buf[1], 1); + if (nread <= 0) { + return nread; + } + } else if ((byte & 0xF0) == 0xE0) { + if (buf_len < 3) { + return -1; + } + nread = read(fd, &buf[1], 2); + if (nread <= 0) { + return nread; + } + } else if ((byte & 0xF8) == 0xF0) { + if (buf_len < 3) { + return -1; + } + nread = read(fd, &buf[1], 3); + if (nread <= 0) { + return nread; + } + } else { + return -1; + } + + return utf8BytesToCodePoint(buf, buf_len, cp); +} + +/* Set default encoding functions */ +static linenoisePrevCharLen * prevCharLen = defaultPrevCharLen; +static linenoiseNextCharLen * nextCharLen = defaultNextCharLen; +static linenoiseReadCode * readCode = defaultReadCode; + +/* Set used defined encoding functions */ +void linenoiseSetEncodingFunctions(linenoisePrevCharLen * prevCharLenFunc, linenoiseNextCharLen * nextCharLenFunc, + linenoiseReadCode * readCodeFunc) { + prevCharLen = prevCharLenFunc; + nextCharLen = nextCharLenFunc; + readCode = readCodeFunc; +} + /* ======================= Low level terminal handling ====================== */ /* Enable "mask mode". When it is enabled, instead of the input that @@ -408,6 +867,73 @@ static void refreshLineWithCompletion(struct linenoiseState *ls, linenoiseComple } } +enum ESC_TYPE { ESC_NULL = 0, ESC_DELETE, ESC_UP, ESC_DOWN, ESC_RIGHT, ESC_LEFT, ESC_HOME, ESC_END }; + +static ESC_TYPE readEscapeSequence(struct linenoiseState * l) { + /* Check if the file input has additional data. */ + struct pollfd pfd; + pfd.fd = l->ifd; + pfd.events = POLLIN; + + auto ret = poll(&pfd, 1, 1); // 1 millisecond timeout + if (ret <= 0) { // -1: error, 0: timeout + return ESC_NULL; + } + + /* Read the next two bytes representing the escape sequence. + * Use two calls to handle slow terminals returning the two + * chars at different times. */ + char seq[3]; + if (read(l->ifd, seq, 1) == -1) { + return ESC_NULL; + } + if (read(l->ifd, seq + 1, 1) == -1) { + return ESC_NULL; + } + + /* ESC [ sequences. */ + if (seq[0] == '[') { + if (seq[1] >= '0' && seq[1] <= '9') { + /* Extended escape, read additional byte. */ + if (read(l->ifd, seq + 2, 1) == -1) { + return ESC_NULL; + } + if (seq[2] == '~') { + switch (seq[1]) { + case '3': + return ESC_DELETE; + } + } + } else { + switch (seq[1]) { + case 'A': + return ESC_UP; + case 'B': + return ESC_DOWN; + case 'C': + return ESC_RIGHT; + case 'D': + return ESC_LEFT; + case 'H': + return ESC_HOME; + case 'F': + return ESC_END; + } + } + } + + /* ESC O sequences. */ + else if (seq[0] == 'O') { + switch (seq[1]) { + case 'H': + return ESC_HOME; + case 'F': + return ESC_END; + } + } + return ESC_NULL; +} + /* This is an helper function for linenoiseEdit*() and is called when the * user types the key in order to complete the string currently in the * input. @@ -418,11 +944,11 @@ static void refreshLineWithCompletion(struct linenoiseState *ls, linenoiseComple * If the function returns non-zero, the caller should handle the * returned value as a byte read from the standard input, and process * it as usually: this basically means that the function may return a byte - * read from the termianl but not processed. Otherwise, if zero is returned, + * read from the terminal but not processed. Otherwise, if zero is returned, * the input was consumed by the completeLine() function to navigate the * possible completions, and the caller should read for the next characters * from stdin. */ -static int completeLine(struct linenoiseState *ls, int keypressed) { +static int completeLine(struct linenoiseState * ls, int keypressed, ESC_TYPE esc_type) { linenoiseCompletions lc; int nwritten; char c = keypressed; @@ -432,31 +958,31 @@ static int completeLine(struct linenoiseState *ls, int keypressed) { linenoiseBeep(); ls->in_completion = 0; } else { - switch(c) { - case 9: /* tab */ - if (ls->in_completion == 0) { - ls->in_completion = 1; - ls->completion_idx = 0; - } else { - ls->completion_idx = (ls->completion_idx + 1) % (lc.len + 1); - if (ls->completion_idx == lc.len) linenoiseBeep(); + if (c == TAB) { + if (ls->in_completion == 0) { + ls->in_completion = 1; + ls->completion_idx = 0; + } else { + ls->completion_idx = (ls->completion_idx + 1) % (lc.len + 1); + if (ls->completion_idx == lc.len) { + linenoiseBeep(); } - c = 0; - break; - case 27: /* escape */ - /* Re-show original buffer */ - if (ls->completion_idx < lc.len) refreshLine(ls); - ls->in_completion = 0; - c = 0; - break; - default: - /* Update buffer and return */ - if (ls->completion_idx < lc.len) { - nwritten = snprintf(ls->buf, ls->buflen, "%s", lc.cvec[ls->completion_idx]); - ls->len = ls->pos = nwritten; - } - ls->in_completion = 0; - break; + } + c = 0; + } else if (c == ESC && esc_type == ESC_NULL) { + /* Re-show original buffer */ + if (ls->completion_idx < lc.len) { + refreshLine(ls); + } + ls->in_completion = 0; + c = 0; + } else { + /* Update buffer and return */ + if (ls->completion_idx < lc.len) { + nwritten = snprintf(ls->buf, ls->buflen, "%s", lc.cvec[ls->completion_idx]); + ls->len = ls->pos = nwritten; + } + ls->in_completion = 0; } /* Show completion or original buffer */ @@ -508,16 +1034,30 @@ void linenoiseAddCompletion(linenoiseCompletions *lc, const char *str) { lc->cvec[lc->len++] = copy.release(); } +/* Get column length from begining of buffer to current byte position */ +static size_t columnPos(const char * buf, size_t buf_len, size_t pos) { + size_t ret = 0; + size_t off = 0; + while (off < pos) { + size_t col_len; + size_t len = nextCharLen(buf, buf_len, off, &col_len); + off += len; + ret += col_len; + } + return ret; +} + /* Helper of refreshSingleLine() and refreshMultiLine() to show hints * to the right of the prompt. */ -static void refreshShowHints(std::string & ab, struct linenoiseState * l, int plen) { +static void refreshShowHints(std::string & ab, struct linenoiseState * l, int pcollen) { char seq[64]; - if (hintsCallback && plen+l->len < l->cols) { + size_t collen = pcollen + columnPos(l->buf, l->len, l->len); + if (hintsCallback && collen < l->cols) { int color = -1, bold = 0; const char *hint = hintsCallback(l->buf,&color,&bold); if (hint) { int hintlen = strlen(hint); - int hintmaxlen = l->cols-(plen+l->len); + int hintmaxlen = l->cols - collen; if (hintlen > hintmaxlen) hintlen = hintmaxlen; if (bold == 1 && color == -1) color = 37; if (color != -1 || bold != 0) @@ -535,6 +1075,50 @@ static void refreshShowHints(std::string & ab, struct linenoiseState * l, int pl } } +/* Check if text is an ANSI escape sequence */ +static int isAnsiEscape(const char * buf, size_t buf_len, size_t * len) { + if (buf_len > 2 && !memcmp("\033[", buf, 2)) { + size_t off = 2; + while (off < buf_len) { + switch (buf[off++]) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'J': + case 'K': + case 'S': + case 'T': + case 'f': + case 'm': + *len = off; + return 1; + } + } + } + return 0; +} + +/* Get column length of prompt text */ +static size_t promptTextColumnLen(const char * prompt, size_t plen) { + char buf[LINENOISE_MAX_LINE]; + size_t buf_len = 0; + size_t off = 0; + while (off < plen) { + size_t len; + if (isAnsiEscape(prompt + off, plen - off, &len)) { + off += len; + continue; + } + buf[buf_len++] = prompt[off++]; + } + return columnPos(buf, buf_len, buf_len); +} + /* Single line low level line refresh. * * Rewrite the currently edited line accordingly to the buffer content, @@ -544,19 +1128,21 @@ static void refreshShowHints(std::string & ab, struct linenoiseState * l, int pl * prompt, just write it, or both. */ static void refreshSingleLine(struct linenoiseState *l, int flags) { char seq[64]; - size_t plen = strlen(l->prompt); + size_t pcollen = promptTextColumnLen(l->prompt, strlen(l->prompt)); int fd = l->ofd; char *buf = l->buf; size_t len = l->len; size_t pos = l->pos; std::string ab; - while((plen+pos) >= l->cols) { - buf++; - len--; - pos--; + + while ((pcollen + columnPos(buf, len, pos)) >= l->cols) { + int chlen = nextCharLen(buf, len, 0, NULL); + buf += chlen; + len -= chlen; + pos -= chlen; } - while (plen+len > l->cols) { - len--; + while (pcollen + columnPos(buf, len, len) > l->cols) { + len -= prevCharLen(buf, len, len, NULL); } /* Cursor to left edge */ @@ -574,7 +1160,7 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) { ab.append(buf, len); } /* Show hits if any. */ - refreshShowHints(ab, l, plen); + refreshShowHints(ab, l, pcollen); } /* Erase to right */ @@ -582,13 +1168,43 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) { ab.append(seq); if (flags & REFRESH_WRITE) { /* Move cursor to original position. */ - snprintf(seq,sizeof(seq),"\r\x1b[%dC", (int)(pos+plen)); + snprintf(seq, sizeof(seq), "\r\x1b[%dC", (int) (columnPos(buf, len, pos) + pcollen)); ab.append(seq); } (void) !write(fd, ab.c_str(), ab.size()); /* Can't recover from write error. */ } +/* Get column length from begining of buffer to current byte position for multiline mode*/ +static size_t columnPosForMultiLine(const char * buf, size_t buf_len, size_t pos, size_t cols, size_t ini_pos) { + size_t ret = 0; + size_t colwid = ini_pos; + + size_t off = 0; + while (off < buf_len) { + size_t col_len; + size_t len = nextCharLen(buf, buf_len, off, &col_len); + + int dif = (int) (colwid + col_len) - (int) cols; + if (dif > 0) { + ret += dif; + colwid = col_len; + } else if (dif == 0) { + colwid = 0; + } else { + colwid += col_len; + } + + if (off >= pos) { + break; + } + off += len; + ret += col_len; + } + + return ret; +} + /* Multi line low level line refresh. * * Rewrite the currently edited line accordingly to the buffer content, @@ -598,11 +1214,13 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) { * prompt, just write it, or both. */ static void refreshMultiLine(struct linenoiseState *l, int flags) { char seq[64]; - int plen = strlen(l->prompt); - int rows = (plen+l->len+l->cols-1)/l->cols; /* rows used by current buf. */ - int rpos = (plen+l->oldpos+l->cols)/l->cols; /* cursor relative row. */ + size_t pcollen = promptTextColumnLen(l->prompt, strlen(l->prompt)); + int colpos = columnPosForMultiLine(l->buf, l->len, l->len, l->cols, pcollen); + int colpos2; /* cursor column position. */ + int rows = (pcollen + colpos + l->cols - 1) / l->cols; /* rows used by current buf. */ + int rpos = (pcollen + l->oldcolpos + l->cols) / l->cols; /* cursor relative row. */ int rpos2; /* rpos after refresh. */ - int col; /* colum position, zero-based. */ + int col; /* column position, zero-based. */ int old_rows = l->oldrows; int fd = l->ofd, j; std::string ab; @@ -611,15 +1229,13 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { /* First step: clear all the lines used before. To do so start by * going to the last row. */ if (flags & REFRESH_CLEAN) { - if (old_rows-rpos > 0) { - lndebug("go down %d", old_rows-rpos); + if (old_rows - rpos > 0) { snprintf(seq,64,"\x1b[%dB", old_rows-rpos); ab.append(seq); } /* Now for every row clear it, go up. */ - for (j = 0; j < old_rows-1; j++) { - lndebug("clear+up"); + for (j = 0; j < old_rows - 1; j++) { snprintf(seq,64,"\r\x1b[0K\x1b[1A"); ab.append(seq); } @@ -627,11 +1243,13 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { if (flags & REFRESH_ALL) { /* Clean the top line. */ - lndebug("clear"); snprintf(seq,64,"\r\x1b[0K"); ab.append(seq); } + /* Get column length to cursor position */ + colpos2 = columnPosForMultiLine(l->buf, l->len, l->pos, l->cols, pcollen); + if (flags & REFRESH_WRITE) { /* Write the prompt and the current buffer content */ ab.append(l->prompt); @@ -644,15 +1262,11 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { } /* Show hits if any. */ - refreshShowHints(ab, l, plen); + refreshShowHints(ab, l, pcollen); /* If we are at the very end of the screen with our prompt, we need to * emit a newline and move the prompt to the first column. */ - if (l->pos && - l->pos == l->len && - (l->pos+plen) % l->cols == 0) - { - lndebug(""); + if (l->pos && l->pos == l->len && (colpos2 + pcollen) % l->cols == 0) { ab.append("\n"); snprintf(seq,64,"\r"); ab.append(seq); @@ -661,19 +1275,16 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { } /* Move cursor to right position. */ - rpos2 = (plen+l->pos+l->cols)/l->cols; /* Current cursor relative row */ - lndebug("rpos2 %d", rpos2); + rpos2 = (pcollen + colpos2 + l->cols) / l->cols; /* Current cursor relative row */ - /* Go up till we reach the expected positon. */ - if (rows-rpos2 > 0) { - lndebug("go-up %d", rows-rpos2); + /* Go up till we reach the expected position. */ + if (rows - rpos2 > 0) { snprintf(seq,64,"\x1b[%dA", rows-rpos2); ab.append(seq); } /* Set column. */ - col = (plen+(int)l->pos) % (int)l->cols; - lndebug("set col %d", 1+col); + col = (pcollen + colpos2) % l->cols; if (col) snprintf(seq,64,"\r\x1b[%dC", col); else @@ -681,8 +1292,8 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { ab.append(seq); } - lndebug("\n"); - l->oldpos = l->pos; + l->oldcolpos = colpos2; + (void) !write(fd, ab.c_str(), ab.size()); /* Can't recover from write error. */ } @@ -720,26 +1331,36 @@ void linenoiseShow(struct linenoiseState *l) { /* Insert the character 'c' at cursor current position. * * On error writing to the terminal -1 is returned, otherwise 0. */ -static int linenoiseEditInsert(struct linenoiseState * l, char c) { - if (l->len < l->buflen) { +static int linenoiseEditInsert(struct linenoiseState * l, const char * cbuf, int clen) { + if (l->len + clen <= l->buflen) { if (l->len == l->pos) { - l->buf[l->pos] = c; - l->pos++; - l->len++; + memcpy(&l->buf[l->pos], cbuf, clen); + l->pos += clen; + l->len += clen; + ; l->buf[l->len] = '\0'; - if ((!mlmode && l->plen+l->len < l->cols && !hintsCallback)) { + if ((!mlmode && promptTextColumnLen(l->prompt, l->plen) + columnPos(l->buf, l->len, l->len) < l->cols && + !hintsCallback)) { /* Avoid a full update of the line in the * trivial case. */ - char d = (maskmode==1) ? '*' : c; - if (write(l->ofd,&d,1) == -1) return -1; + if (maskmode == 1) { + static const char d = '*'; + if (write(l->ofd, &d, 1) == -1) { + return -1; + } + } else { + if (write(l->ofd, cbuf, clen) == -1) { + return -1; + } + } } else { refreshLine(l); } } else { - memmove(l->buf+l->pos+1,l->buf+l->pos,l->len-l->pos); - l->buf[l->pos] = c; - l->len++; - l->pos++; + memmove(l->buf + l->pos + clen, l->buf + l->pos, l->len - l->pos); + memcpy(&l->buf[l->pos], cbuf, clen); + l->pos += clen; + l->len += clen; l->buf[l->len] = '\0'; refreshLine(l); } @@ -750,7 +1371,7 @@ static int linenoiseEditInsert(struct linenoiseState * l, char c) { /* Move cursor on the left. */ static void linenoiseEditMoveLeft(struct linenoiseState * l) { if (l->pos > 0) { - l->pos--; + l->pos -= prevCharLen(l->buf, l->len, l->pos, NULL); refreshLine(l); } } @@ -758,7 +1379,7 @@ static void linenoiseEditMoveLeft(struct linenoiseState * l) { /* Move cursor on the right. */ static void linenoiseEditMoveRight(struct linenoiseState * l) { if (l->pos != l->len) { - l->pos++; + l->pos += nextCharLen(l->buf, l->len, l->pos, NULL); refreshLine(l); } } @@ -810,8 +1431,9 @@ static void linenoiseEditHistoryNext(struct linenoiseState * l, int dir) { * position. Basically this is what happens with the "Delete" keyboard key. */ static void linenoiseEditDelete(struct linenoiseState * l) { if (l->len > 0 && l->pos < l->len) { - memmove(l->buf+l->pos,l->buf+l->pos+1,l->len-l->pos-1); - l->len--; + int chlen = nextCharLen(l->buf, l->len, l->pos, NULL); + memmove(l->buf + l->pos, l->buf + l->pos + chlen, l->len - l->pos - chlen); + l->len -= chlen; l->buf[l->len] = '\0'; refreshLine(l); } @@ -820,15 +1442,16 @@ static void linenoiseEditDelete(struct linenoiseState * l) { /* Backspace implementation. */ static void linenoiseEditBackspace(struct linenoiseState * l) { if (l->pos > 0 && l->len > 0) { - memmove(l->buf+l->pos-1,l->buf+l->pos,l->len-l->pos); - l->pos--; - l->len--; + int chlen = prevCharLen(l->buf, l->len, l->pos, NULL); + memmove(l->buf + l->pos - chlen, l->buf + l->pos, l->len - l->pos); + l->pos -= chlen; + l->len -= chlen; l->buf[l->len] = '\0'; refreshLine(l); } } -/* Delete the previosu word, maintaining the cursor at the start of the +/* Delete the previous word, maintaining the cursor at the start of the * current word. */ static void linenoiseEditDeletePrevWord(struct linenoiseState * l) { size_t old_pos = l->pos; @@ -855,7 +1478,7 @@ static void linenoiseEditDeletePrevWord(struct linenoiseState * l) { * each time there is some data arriving in the standard input. * * The user can also call linenoiseEditHide() and linenoiseEditShow() if it - * is required to show some input arriving asyncronously, without mixing + * is required to show some input arriving asynchronously, without mixing * it with the currently edited line. * * When linenoiseEditFeed() returns non-NULL, the user finished with the @@ -878,7 +1501,7 @@ int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, ch l->buflen = buflen; l->prompt = prompt; l->plen = strlen(prompt); - l->oldpos = l->pos = 0; + l->oldcolpos = l->pos = 0; l->len = 0; /* Enter raw mode. */ @@ -890,7 +1513,7 @@ int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, ch /* Buffer starts empty. */ l->buf[0] = '\0'; - l->buflen--; /* Make sure there is always space for the nulterm */ + l->buflen--; /* Make sure there is always space for the nullterm */ /* If stdin is not a tty, stop here with the initialization. We * will actually just read a line from standard input in blocking @@ -907,6 +1530,159 @@ int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, ch const char* linenoiseEditMore = "If you see this, you are misusing the API: when linenoiseEditFeed() is called, if it returns linenoiseEditMore the user is yet editing the line. See the README file for more information."; +static const char * handleEnterKey(struct linenoiseState * l) { + --history_len; + free(history[history_len]); + if (mlmode) { + linenoiseEditMoveEnd(l); + } + if (hintsCallback) { + /* Force a refresh without hints to leave the previous + * line as the user typed it after a newline. */ + linenoiseHintsCallback * hc = hintsCallback; + hintsCallback = NULL; + refreshLine(l); + hintsCallback = hc; + } + + return strdup(l->buf); +} + +static const char * handleCtrlCKey() { + errno = EAGAIN; + return NULL; +} + +static const char * handleCtrlDKey(struct linenoiseState * l) { + if (l->len > 0) { + linenoiseEditDelete(l); + return linenoiseEditMore; + } + + --history_len; + free(history[history_len]); + errno = ENOENT; + return NULL; +} + +static void handleCtrlTKey(struct linenoiseState * l) { + if (l->pos > 0 && l->pos < l->len) { + auto prev_chlen = prevCharLen(l->buf, l->len, l->pos, NULL); + auto curr_chlen = nextCharLen(l->buf, l->len, l->pos, NULL); + + std::string prev_char(prev_chlen, 0); + memcpy(prev_char.data(), l->buf + l->pos - prev_chlen, prev_chlen); + memmove(l->buf + l->pos - prev_chlen, l->buf + l->pos, curr_chlen); + memmove(l->buf + l->pos - prev_chlen + curr_chlen, prev_char.data(), prev_chlen); + + l->pos = l->pos - prev_chlen + curr_chlen; + if (l->pos + prev_chlen != l->len) { + l->pos += prev_chlen; + } + + refreshLine(l); + } +} + +static void handleEscapeSequence(struct linenoiseState * l, int esc_type) { + switch (esc_type) { + case ESC_NULL: + break; + case ESC_DELETE: + linenoiseEditDelete(l); + break; + case ESC_UP: + linenoiseEditHistoryNext(l, LINENOISE_HISTORY_PREV); + break; + case ESC_DOWN: + linenoiseEditHistoryNext(l, LINENOISE_HISTORY_NEXT); + break; + case ESC_RIGHT: + linenoiseEditMoveRight(l); + break; + case ESC_LEFT: + linenoiseEditMoveLeft(l); + break; + case ESC_HOME: + linenoiseEditMoveHome(l); + break; + case ESC_END: + linenoiseEditMoveEnd(l); + break; + } +} + +static void handleCtrlUKey(struct linenoiseState * l) { + l->buf[0] = '\0'; + l->pos = l->len = 0; + refreshLine(l); +} + +static void handleCtrlKKey(struct linenoiseState * l) { + l->buf[l->pos] = '\0'; + l->len = l->pos; + refreshLine(l); +} + +static const char * processInputCharacter(struct linenoiseState * l, int c, char * cbuf, int nread, int esc_type) { + switch (c) { + case ENTER: + return handleEnterKey(l); + case CTRL_C: + return handleCtrlCKey(); + case BACKSPACE: + case CTRL_H: + linenoiseEditBackspace(l); + break; + case CTRL_D: /* ctrl-d, remove char at right of cursor, or if the + line is empty, act as end-of-file. */ + return handleCtrlDKey(l); + case CTRL_T: + handleCtrlTKey(l); + break; + case CTRL_B: + linenoiseEditMoveLeft(l); + break; + case CTRL_F: + linenoiseEditMoveRight(l); + break; + case CTRL_P: + linenoiseEditHistoryNext(l, LINENOISE_HISTORY_PREV); + break; + case CTRL_N: + linenoiseEditHistoryNext(l, LINENOISE_HISTORY_NEXT); + break; + case ESC: + handleEscapeSequence(l, esc_type); + break; + default: + if (linenoiseEditInsert(l, cbuf, nread)) { + return NULL; + } + break; + case CTRL_U: /* Ctrl+u, delete the whole line. */ + handleCtrlUKey(l); + break; + case CTRL_K: /* Ctrl+k, delete from current to end of line. */ + handleCtrlKKey(l); + break; + case CTRL_A: /* Ctrl+a, go to the start of the line */ + linenoiseEditMoveHome(l); + break; + case CTRL_E: /* ctrl+e, go to the end of the line */ + linenoiseEditMoveEnd(l); + break; + case CTRL_L: /* ctrl+l, clear screen */ + linenoiseClearScreen(); + refreshLine(l); + break; + case CTRL_W: /* ctrl+w, delete previous word */ + linenoiseEditDeletePrevWord(l); + break; + } + return linenoiseEditMore; +} + /* This function is part of the multiplexed API of linenoise, see the top * comment on linenoiseEditStart() for more information. Call this function * each time there is some data to read from the standard input file @@ -925,163 +1701,33 @@ const char* linenoiseEditMore = "If you see this, you are misusing the API: when * * Some other errno: I/O error. */ -const char *linenoiseEditFeed(struct linenoiseState *l) { - /* Not a TTY, pass control to line reading without character - * count limits. */ +const char * linenoiseEditFeed(struct linenoiseState * l) { + /* Not a TTY, pass control to line reading without character count + * limits. */ if (!isatty(l->ifd)) return linenoiseNoTTY(); - char c; + int c; int nread; - char seq[3]; + char cbuf[32]; - nread = read(l->ifd,&c,1); + nread = readCode(l->ifd, cbuf, sizeof(cbuf), &c); if (nread <= 0) return NULL; + auto esc_type = ESC_NULL; + if (c == ESC) { + esc_type = readEscapeSequence(l); + } + /* Only autocomplete when the callback is set. It returns < 0 when * there was an error reading from fd. Otherwise it will return the * character that should be handled next. */ if ((l->in_completion || c == 9) && completionCallback != NULL) { - c = completeLine(l,c); + c = completeLine(l, c, esc_type); /* Read next character when 0 */ if (c == 0) return linenoiseEditMore; } - switch(c) { - case ENTER: /* enter */ - history_len--; - free(history[history_len]); - if (mlmode) linenoiseEditMoveEnd(l); - if (hintsCallback) { - /* Force a refresh without hints to leave the previous - * line as the user typed it after a newline. */ - linenoiseHintsCallback *hc = hintsCallback; - hintsCallback = NULL; - refreshLine(l); - hintsCallback = hc; - } - return strdup(l->buf); - case CTRL_C: /* ctrl-c */ - errno = EAGAIN; - return NULL; - case BACKSPACE: /* backspace */ - case 8: /* ctrl-h */ - linenoiseEditBackspace(l); - break; - case CTRL_D: /* ctrl-d, remove char at right of cursor, or if the - line is empty, act as end-of-file. */ - if (l->len > 0) { - linenoiseEditDelete(l); - } else { - history_len--; - free(history[history_len]); - errno = ENOENT; - return NULL; - } - break; - case CTRL_T: /* ctrl-t, swaps current character with previous. */ - if (l->pos > 0 && l->pos < l->len) { - int aux = l->buf[l->pos-1]; - l->buf[l->pos-1] = l->buf[l->pos]; - l->buf[l->pos] = aux; - if (l->pos != l->len-1) l->pos++; - refreshLine(l); - } - break; - case CTRL_B: /* ctrl-b */ - linenoiseEditMoveLeft(l); - break; - case CTRL_F: /* ctrl-f */ - linenoiseEditMoveRight(l); - break; - case CTRL_P: /* ctrl-p */ - linenoiseEditHistoryNext(l, LINENOISE_HISTORY_PREV); - break; - case CTRL_N: /* ctrl-n */ - linenoiseEditHistoryNext(l, LINENOISE_HISTORY_NEXT); - break; - case ESC: /* escape sequence */ - /* Read the next two bytes representing the escape sequence. - * Use two calls to handle slow terminals returning the two - * chars at different times. */ - if (read(l->ifd,seq,1) == -1) break; - if (read(l->ifd,seq+1,1) == -1) break; - - /* ESC [ sequences. */ - if (seq[0] == '[') { - if (seq[1] >= '0' && seq[1] <= '9') { - /* Extended escape, read additional byte. */ - if (read(l->ifd,seq+2,1) == -1) break; - if (seq[2] == '~') { - switch(seq[1]) { - case '3': /* Delete key. */ - linenoiseEditDelete(l); - break; - } - } - } else { - switch(seq[1]) { - case 'A': /* Up */ - linenoiseEditHistoryNext(l, LINENOISE_HISTORY_PREV); - break; - case 'B': /* Down */ - linenoiseEditHistoryNext(l, LINENOISE_HISTORY_NEXT); - break; - case 'C': /* Right */ - linenoiseEditMoveRight(l); - break; - case 'D': /* Left */ - linenoiseEditMoveLeft(l); - break; - case 'H': /* Home */ - linenoiseEditMoveHome(l); - break; - case 'F': /* End*/ - linenoiseEditMoveEnd(l); - break; - } - } - } - - /* ESC O sequences. */ - else if (seq[0] == 'O') { - switch(seq[1]) { - case 'H': /* Home */ - linenoiseEditMoveHome(l); - break; - case 'F': /* End*/ - linenoiseEditMoveEnd(l); - break; - } - } - break; - default: - if (linenoiseEditInsert(l,c)) return NULL; - break; - case CTRL_U: /* Ctrl+u, delete the whole line. */ - l->buf[0] = '\0'; - l->pos = l->len = 0; - refreshLine(l); - break; - case CTRL_K: /* Ctrl+k, delete from current to end of line. */ - l->buf[l->pos] = '\0'; - l->len = l->pos; - refreshLine(l); - break; - case CTRL_A: /* Ctrl+a, go to the start of the line */ - linenoiseEditMoveHome(l); - break; - case CTRL_E: /* ctrl+e, go to the end of the line */ - linenoiseEditMoveEnd(l); - break; - case CTRL_L: /* ctrl+l, clear screen */ - linenoiseClearScreen(); - refreshLine(l); - break; - case CTRL_W: /* ctrl+w, delete previous word */ - linenoiseEditDeletePrevWord(l); - break; - } - return linenoiseEditMore; + return processInputCharacter(l, c, cbuf, nread, esc_type); } /* This is part of the multiplexed linenoise API. See linenoiseEditStart() @@ -1095,7 +1741,7 @@ void linenoiseEditStop(struct linenoiseState *l) { } /* This just implements a blocking loop for the multiplexed API. - * In many applications that are not event-drivern, we can just call + * In many applications that are not event-driven, we can just call * the blocking linenoise API, wait for the user to complete the editing * and return the buffer. */ static const char *linenoiseBlockingEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt) @@ -1135,8 +1781,7 @@ void linenoisePrintKeyCodes(void) { quit[sizeof(quit)-1] = c; /* Insert current char on the right. */ if (memcmp(quit,"quit",sizeof(quit)) == 0) break; - printf("'%c' %02x (%d) (type quit to exit)\n", - isprint(c) ? c : '?', (int)c, (int)c); + printf("'%c' %02x (%d) (type quit to exit)\n", isprint((int) c) ? c : '?', (int) c, (int) c); printf("\r"); /* Go left edge manually, we are in raw mode. */ fflush(stdout); } diff --git a/examples/run/linenoise.cpp/linenoise.h b/examples/run/linenoise.cpp/linenoise.h index a14ec6c740..9823ca36d0 100644 --- a/examples/run/linenoise.cpp/linenoise.h +++ b/examples/run/linenoise.cpp/linenoise.h @@ -47,27 +47,27 @@ extern "C" { #include /* For size_t. */ #include -extern const char *linenoiseEditMore; +extern const char * linenoiseEditMore; /* The linenoiseState structure represents the state during line editing. * We pass this state to functions implementing specific editing * functionalities. */ struct linenoiseState { - int in_completion; /* The user pressed TAB and we are now in completion + int in_completion; /* The user pressed TAB and we are now in completion * mode, so input is handled by completeLine(). */ - size_t completion_idx; /* Index of next completion to propose. */ - int ifd; /* Terminal stdin file descriptor. */ - int ofd; /* Terminal stdout file descriptor. */ - char *buf; /* Edited line buffer. */ - size_t buflen; /* Edited line buffer size. */ - const char *prompt; /* Prompt to display. */ - size_t plen; /* Prompt length. */ - size_t pos; /* Current cursor position. */ - size_t oldpos; /* Previous refresh cursor position. */ - size_t len; /* Current edited line length. */ - size_t cols; /* Number of columns in terminal. */ - size_t oldrows; /* Rows used by last refrehsed line (multiline mode) */ - int history_index; /* The history index we are currently editing. */ + size_t completion_idx; /* Index of next completion to propose. */ + int ifd; /* Terminal stdin file descriptor. */ + int ofd; /* Terminal stdout file descriptor. */ + char * buf; /* Edited line buffer. */ + size_t buflen; /* Edited line buffer size. */ + const char * prompt; /* Prompt to display. */ + size_t plen; /* Prompt length. */ + size_t pos; /* Current cursor position. */ + size_t oldcolpos; /* Previous refresh cursor column position. */ + size_t len; /* Current edited line length. */ + size_t cols; /* Number of columns in terminal. */ + size_t oldrows; /* Rows used by last refreshed line (multiline mode) */ + int history_index; /* The history index we are currently editing. */ }; struct linenoiseCompletions { @@ -89,19 +89,20 @@ struct linenoiseCompletions { }; /* Non blocking API. */ -int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt); -const char *linenoiseEditFeed(struct linenoiseState *l); -void linenoiseEditStop(struct linenoiseState *l); -void linenoiseHide(struct linenoiseState *l); -void linenoiseShow(struct linenoiseState *l); +int linenoiseEditStart(struct linenoiseState * l, int stdin_fd, int stdout_fd, char * buf, size_t buflen, + const char * prompt); +const char * linenoiseEditFeed(struct linenoiseState * l); +void linenoiseEditStop(struct linenoiseState * l); +void linenoiseHide(struct linenoiseState * l); +void linenoiseShow(struct linenoiseState * l); /* Blocking API. */ -const char *linenoise(const char *prompt); -void linenoiseFree(void *ptr); +const char * linenoise(const char * prompt); +void linenoiseFree(void * ptr); /* Completion API. */ typedef void(linenoiseCompletionCallback)(const char *, linenoiseCompletions *); -typedef const char*(linenoiseHintsCallback)(const char *, int *color, int *bold); +typedef const char *(linenoiseHintsCallback) (const char *, int * color, int * bold); typedef void(linenoiseFreeHintsCallback)(const char *); void linenoiseSetCompletionCallback(linenoiseCompletionCallback *); void linenoiseSetHintsCallback(linenoiseHintsCallback *); @@ -109,10 +110,10 @@ void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *); void linenoiseAddCompletion(linenoiseCompletions *, const char *); /* History API. */ -int linenoiseHistoryAdd(const char *line); +int linenoiseHistoryAdd(const char * line); int linenoiseHistorySetMaxLen(int len); -int linenoiseHistorySave(const char *filename); -int linenoiseHistoryLoad(const char *filename); +int linenoiseHistorySave(const char * filename); +int linenoiseHistoryLoad(const char * filename); /* Other utilities. */ void linenoiseClearScreen(void); @@ -121,6 +122,14 @@ void linenoisePrintKeyCodes(void); void linenoiseMaskModeEnable(void); void linenoiseMaskModeDisable(void); +/* Encoding functions. */ +typedef size_t(linenoisePrevCharLen)(const char * buf, size_t buf_len, size_t pos, size_t * col_len); +typedef size_t(linenoiseNextCharLen)(const char * buf, size_t buf_len, size_t pos, size_t * col_len); +typedef size_t(linenoiseReadCode)(int fd, char * buf, size_t buf_len, int * c); + +void linenoiseSetEncodingFunctions(linenoisePrevCharLen * prevCharLenFunc, linenoiseNextCharLen * nextCharLenFunc, + linenoiseReadCode * readCodeFunc); + #ifdef __cplusplus } #endif diff --git a/examples/run/run.cpp b/examples/run/run.cpp index 40f2bcb008..e63c2aac33 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -24,37 +24,20 @@ #include #include +#include "chat.h" #include "common.h" #include "json.hpp" #include "linenoise.cpp/linenoise.h" #include "llama-cpp.h" -#include "chat-template.hpp" +#include "log.h" #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32) [[noreturn]] static void sigint_handler(int) { - printf("\n\033[0m"); + printf("\n" LOG_COL_DEFAULT); exit(0); // not ideal, but it's the only way to guarantee exit in all cases } #endif -GGML_ATTRIBUTE_FORMAT(1, 2) -static std::string fmt(const char * fmt, ...) { - va_list ap; - va_list ap2; - va_start(ap, fmt); - va_copy(ap2, ap); - const int size = vsnprintf(NULL, 0, fmt, ap); - GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT - std::string buf; - buf.resize(size); - const int size2 = vsnprintf(const_cast(buf.data()), buf.size() + 1, fmt, ap2); - GGML_ASSERT(size2 == size); - va_end(ap2); - va_end(ap); - - return buf; -} - GGML_ATTRIBUTE_FORMAT(1, 2) static int printe(const char * fmt, ...) { va_list args; @@ -65,12 +48,20 @@ static int printe(const char * fmt, ...) { return ret; } +static std::string strftime_fmt(const char * fmt, const std::tm & tm) { + std::ostringstream oss; + oss << std::put_time(&tm, fmt); + + return oss.str(); +} + class Opt { public: int init(int argc, const char ** argv) { ctx_params = llama_context_default_params(); model_params = llama_model_default_params(); context_size_default = ctx_params.n_batch; + n_threads_default = ctx_params.n_threads; ngl_default = model_params.n_gpu_layers; common_params_sampling sampling; temperature_default = sampling.temp; @@ -96,6 +87,7 @@ class Opt { ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default; ctx_params.n_ctx = ctx_params.n_batch; + ctx_params.n_threads = ctx_params.n_threads_batch = n_threads >= 0 ? n_threads : n_threads_default; model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default; temperature = temperature >= 0 ? temperature : temperature_default; @@ -105,14 +97,15 @@ class Opt { llama_context_params ctx_params; llama_model_params model_params; std::string model_; + std::string chat_template_file; std::string user; bool use_jinja = false; - int context_size = -1, ngl = -1; + int context_size = -1, ngl = -1, n_threads = -1; float temperature = -1; bool verbose = false; private: - int context_size_default = -1, ngl_default = -1; + int context_size_default = -1, ngl_default = -1, n_threads_default = -1; float temperature_default = -1; bool help = false; @@ -140,48 +133,104 @@ class Opt { return 0; } + int handle_option_with_value(int argc, const char ** argv, int & i, std::string & option_value) { + if (i + 1 >= argc) { + return 1; + } + + option_value = argv[++i]; + + return 0; + } + + int parse_options_with_value(int argc, const char ** argv, int & i, bool & options_parsing) { + if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) { + if (handle_option_with_value(argc, argv, i, context_size) == 1) { + return 1; + } + } else if (options_parsing && + (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "-ngl") == 0 || strcmp(argv[i], "--ngl") == 0)) { + if (handle_option_with_value(argc, argv, i, ngl) == 1) { + return 1; + } + } else if (options_parsing && (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--threads") == 0)) { + if (handle_option_with_value(argc, argv, i, n_threads) == 1) { + return 1; + } + } else if (options_parsing && strcmp(argv[i], "--temp") == 0) { + if (handle_option_with_value(argc, argv, i, temperature) == 1) { + return 1; + } + } else if (options_parsing && strcmp(argv[i], "--chat-template-file") == 0) { + if (handle_option_with_value(argc, argv, i, chat_template_file) == 1) { + return 1; + } + use_jinja = true; + } else { + return 2; + } + + return 0; + } + + int parse_options(const char ** argv, int & i, bool & options_parsing) { + if (options_parsing && (parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) { + verbose = true; + } else if (options_parsing && strcmp(argv[i], "--jinja") == 0) { + use_jinja = true; + } else if (options_parsing && parse_flag(argv, i, "-h", "--help")) { + help = true; + return 0; + } else if (options_parsing && strcmp(argv[i], "--") == 0) { + options_parsing = false; + } else { + return 2; + } + + return 0; + } + + int parse_positional_args(const char ** argv, int & i, int & positional_args_i) { + if (positional_args_i == 0) { + if (!argv[i][0] || argv[i][0] == '-') { + return 1; + } + + ++positional_args_i; + model_ = argv[i]; + } else if (positional_args_i == 1) { + ++positional_args_i; + user = argv[i]; + } else { + user += " " + std::string(argv[i]); + } + + return 0; + } + int parse(int argc, const char ** argv) { bool options_parsing = true; for (int i = 1, positional_args_i = 0; i < argc; ++i) { - if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) { - if (handle_option_with_value(argc, argv, i, context_size) == 1) { - return 1; - } - } else if (options_parsing && - (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "-ngl") == 0 || strcmp(argv[i], "--ngl") == 0)) { - if (handle_option_with_value(argc, argv, i, ngl) == 1) { - return 1; - } - } else if (options_parsing && strcmp(argv[i], "--temp") == 0) { - if (handle_option_with_value(argc, argv, i, temperature) == 1) { - return 1; - } - } else if (options_parsing && - (parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) { - verbose = true; - } else if (options_parsing && strcmp(argv[i], "--jinja") == 0) { - use_jinja = true; - } else if (options_parsing && parse_flag(argv, i, "-h", "--help")) { - help = true; - return 0; - } else if (options_parsing && strcmp(argv[i], "--") == 0) { - options_parsing = false; - } else if (positional_args_i == 0) { - if (!argv[i][0] || argv[i][0] == '-') { - return 1; - } + int ret = parse_options_with_value(argc, argv, i, options_parsing); + if (ret == 0) { + continue; + } else if (ret == 1) { + return ret; + } - ++positional_args_i; - model_ = argv[i]; - } else if (positional_args_i == 1) { - ++positional_args_i; - user = argv[i]; - } else { - user += " " + std::string(argv[i]); + ret = parse_options(argv, i, options_parsing); + if (ret == 0) { + continue; + } else if (ret == 1) { + return ret; + } + + if (parse_positional_args(argv, i, positional_args_i)) { + return 1; } } - if (model_.empty()){ + if (model_.empty()) { return 1; } @@ -199,10 +248,17 @@ class Opt { "Options:\n" " -c, --context-size \n" " Context size (default: %d)\n" + " --chat-template-file \n" + " Path to the file containing the chat template to use with the model.\n" + " Only supports jinja templates and implicitly sets the --jinja flag.\n" + " --jinja\n" + " Use jinja templating for the chat template of the model\n" " -n, -ngl, --ngl \n" " Number of GPU layers (default: %d)\n" " --temp \n" " Temperature (default: %.1f)\n" + " -t, --threads \n" + " Number of threads to use during generation (default: %d)\n" " -v, --verbose, --log-verbose\n" " Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n" " -h, --help\n" @@ -231,7 +287,7 @@ class Opt { " llama-run file://some-file3.gguf\n" " llama-run --ngl 999 some-file4.gguf\n" " llama-run --ngl 999 some-file5.gguf Hello World\n", - context_size_default, ngl_default, temperature_default); + context_size_default, ngl_default, temperature_default, n_threads_default); } }; @@ -253,13 +309,12 @@ static int get_terminal_width() { #endif } -#ifdef LLAMA_USE_CURL class File { public: FILE * file = nullptr; FILE * open(const std::string & filename, const char * mode) { - file = fopen(filename.c_str(), mode); + file = ggml_fopen(filename.c_str(), mode); return file; } @@ -295,6 +350,20 @@ class File { return 0; } + std::string to_string() { + fseek(file, 0, SEEK_END); + const size_t size = ftell(file); + fseek(file, 0, SEEK_SET); + std::string out; + out.resize(size); + const size_t read_size = fread(&out[0], 1, size, file); + if (read_size != size) { + printe("Error reading file: %s", strerror(errno)); + } + + return out; + } + ~File() { if (fd >= 0) { # ifdef _WIN32 @@ -319,6 +388,7 @@ class File { # endif }; +#ifdef LLAMA_USE_CURL class HttpClient { public: int init(const std::string & url, const std::vector & headers, const std::string & output_file, @@ -338,7 +408,7 @@ class HttpClient { if (!output_file.empty()) { output_file_partial = output_file + ".partial"; if (!out.open(output_file_partial, "ab")) { - printe("Failed to open file\n"); + printe("Failed to open file for writing\n"); return 1; } @@ -437,11 +507,11 @@ class HttpClient { int secs = static_cast(seconds) % 60; if (hrs > 0) { - return fmt("%dh %02dm %02ds", hrs, mins, secs); + return string_format("%dh %02dm %02ds", hrs, mins, secs); } else if (mins > 0) { - return fmt("%dm %02ds", mins, secs); + return string_format("%dm %02ds", mins, secs); } else { - return fmt("%ds", secs); + return string_format("%ds", secs); } } @@ -456,7 +526,7 @@ class HttpClient { } } - return fmt("%.2f %s", dbl_size, suffix[i]); + return string_format("%.2f %s", dbl_size, suffix[i]); } static int update_progress(void * ptr, curl_off_t total_to_download, curl_off_t now_downloaded, curl_off_t, @@ -490,7 +560,9 @@ class HttpClient { return (now_downloaded_plus_file_size * 100) / total_to_download; } - static std::string generate_progress_prefix(curl_off_t percentage) { return fmt("%3ld%% |", static_cast(percentage)); } + static std::string generate_progress_prefix(curl_off_t percentage) { + return string_format("%3ld%% |", static_cast(percentage)); + } static double calculate_speed(curl_off_t now_downloaded, const std::chrono::steady_clock::time_point & start_time) { const auto now = std::chrono::steady_clock::now(); @@ -501,9 +573,9 @@ class HttpClient { static std::string generate_progress_suffix(curl_off_t now_downloaded_plus_file_size, curl_off_t total_to_download, double speed, double estimated_time) { const int width = 10; - return fmt("%*s/%*s%*s/s%*s", width, human_readable_size(now_downloaded_plus_file_size).c_str(), width, - human_readable_size(total_to_download).c_str(), width, human_readable_size(speed).c_str(), width, - human_readable_time(estimated_time).c_str()); + return string_format("%*s/%*s%*s/s%*s", width, human_readable_size(now_downloaded_plus_file_size).c_str(), + width, human_readable_size(total_to_download).c_str(), width, + human_readable_size(speed).c_str(), width, human_readable_time(estimated_time).c_str()); } static int calculate_progress_bar_width(const std::string & progress_prefix, const std::string & progress_suffix) { @@ -527,8 +599,7 @@ class HttpClient { static void print_progress(const std::string & progress_prefix, const std::string & progress_bar, const std::string & progress_suffix) { - printe("\r%*s\r%s%s| %s", get_terminal_width(), " ", progress_prefix.c_str(), progress_bar.c_str(), - progress_suffix.c_str()); + printe("\r" LOG_CLR_TO_EOL "%s%s| %s", progress_prefix.c_str(), progress_bar.c_str(), progress_suffix.c_str()); } // Function to write data to a file static size_t write_data(void * ptr, size_t size, size_t nmemb, void * stream) { @@ -550,7 +621,7 @@ class LlamaData { llama_model_ptr model; llama_sampler_ptr sampler; llama_context_ptr context; - std::vector messages; + std::vector messages; // TODO: switch to common_chat_msg std::list msg_strs; std::vector fmtted; @@ -626,8 +697,10 @@ class LlamaData { std::vector headers = { "User-Agent: llama-cpp", "Accept: application/json" }; std::string url; + std::string model_endpoint = get_model_endpoint(); + if (pos == std::string::npos) { - auto [model_name, manifest_url] = extract_model_and_tag(model, "https://huggingface.co/v2/"); + auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/"); hfr = model_name; nlohmann::json manifest; @@ -642,7 +715,7 @@ class LlamaData { hff = model.substr(pos + 1); } - url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff; + url = model_endpoint + hfr + "/resolve/main/" + hff; return download(url, bn, true, headers); } @@ -674,39 +747,63 @@ class LlamaData { } int github_dl(const std::string & model, const std::string & bn) { - std::string repository = model; - std::string branch = "main"; - size_t at_pos = model.find('@'); + std::string repository = model; + std::string branch = "main"; + const size_t at_pos = model.find('@'); if (at_pos != std::string::npos) { repository = model.substr(0, at_pos); branch = model.substr(at_pos + 1); } - std::vector repo_parts; - size_t start = 0; - for (size_t end = 0; (end = repository.find('/', start)) != std::string::npos; start = end + 1) { - repo_parts.push_back(repository.substr(start, end - start)); - } - - repo_parts.push_back(repository.substr(start)); + const std::vector repo_parts = string_split(repository, "/"); if (repo_parts.size() < 3) { printe("Invalid GitHub repository format\n"); return 1; } - const std::string org = repo_parts[0]; - const std::string project = repo_parts[1]; - std::string project_path = repo_parts[2]; - for (size_t i = 3; i < repo_parts.size(); ++i) { - project_path += "/" + repo_parts[i]; + const std::string & org = repo_parts[0]; + const std::string & project = repo_parts[1]; + std::string url = "https://raw.githubusercontent.com/" + org + "/" + project + "/" + branch; + for (size_t i = 2; i < repo_parts.size(); ++i) { + url += "/" + repo_parts[i]; } - const std::string url = - "https://raw.githubusercontent.com/" + org + "/" + project + "/" + branch + "/" + project_path; - return download(url, bn, true); } + int s3_dl(const std::string & model, const std::string & bn) { + const size_t slash_pos = model.find('/'); + if (slash_pos == std::string::npos) { + return 1; + } + + const std::string bucket = model.substr(0, slash_pos); + const std::string key = model.substr(slash_pos + 1); + const char * access_key = std::getenv("AWS_ACCESS_KEY_ID"); + const char * secret_key = std::getenv("AWS_SECRET_ACCESS_KEY"); + if (!access_key || !secret_key) { + printe("AWS credentials not found in environment\n"); + return 1; + } + + // Generate AWS Signature Version 4 headers + // (Implementation requires HMAC-SHA256 and date handling) + // Get current timestamp + const time_t now = time(nullptr); + const tm tm = *gmtime(&now); + const std::string date = strftime_fmt("%Y%m%d", tm); + const std::string datetime = strftime_fmt("%Y%m%dT%H%M%SZ", tm); + const std::vector headers = { + "Authorization: AWS4-HMAC-SHA256 Credential=" + std::string(access_key) + "/" + date + + "/us-east-1/s3/aws4_request", + "x-amz-content-sha256: UNSIGNED-PAYLOAD", "x-amz-date: " + datetime + }; + + const std::string url = "https://" + bucket + ".s3.amazonaws.com/" + key; + + return download(url, bn, true, headers); + } + std::string basename(const std::string & path) { const size_t pos = path.find_last_of("/\\"); if (pos == std::string::npos) { @@ -735,19 +832,23 @@ class LlamaData { } const std::string bn = basename(model_); - if (string_starts_with(model_, "hf://") || string_starts_with(model_, "huggingface://")) { + if (string_starts_with(model_, "hf://") || string_starts_with(model_, "huggingface://") || + string_starts_with(model_, "hf.co/")) { + rm_until_substring(model_, "hf.co/"); rm_until_substring(model_, "://"); ret = huggingface_dl(model_, bn); - } else if (string_starts_with(model_, "hf.co/")) { - rm_until_substring(model_, "hf.co/"); - ret = huggingface_dl(model_, bn); - } else if (string_starts_with(model_, "https://") || string_starts_with(model_, "http://")) { + } else if ((string_starts_with(model_, "https://") || string_starts_with(model_, "http://")) && + !string_starts_with(model_, "https://ollama.com/library/")) { ret = download(model_, bn, true); } else if (string_starts_with(model_, "github:") || string_starts_with(model_, "github://")) { - rm_until_substring(model_, "github://"); rm_until_substring(model_, "github:"); + rm_until_substring(model_, "://"); ret = github_dl(model_, bn); + } else if (string_starts_with(model_, "s3://")) { + rm_until_substring(model_, "://"); + ret = s3_dl(model_, bn); } else { // ollama:// or nothing + rm_until_substring(model_, "ollama.com/library/"); rm_until_substring(model_, "://"); ret = ollama_dl(model_, bn); } @@ -761,16 +862,13 @@ class LlamaData { llama_model_ptr initialize_model(Opt & opt) { ggml_backend_load_all(); resolve_model(opt.model_); - printe( - "\r%*s" - "\rLoading model", - get_terminal_width(), " "); + printe("\r" LOG_CLR_TO_EOL "Loading model"); llama_model_ptr model(llama_model_load_from_file(opt.model_.c_str(), opt.model_params)); if (!model) { printe("%s: error: unable to load model from file: %s\n", __func__, opt.model_.c_str()); } - printe("\r%*s\r", static_cast(sizeof("Loading model")), " "); + printe("\r" LOG_CLR_TO_EOL); return model; } @@ -802,42 +900,29 @@ static void add_message(const char * role, const std::string & text, LlamaData & } // Function to apply the chat template and resize `formatted` if needed -static int apply_chat_template(const common_chat_template & tmpl, LlamaData & llama_data, const bool append, bool use_jinja) { - if (use_jinja) { - json messages = json::array(); - for (const auto & msg : llama_data.messages) { - messages.push_back({ - {"role", msg.role}, - {"content", msg.content}, - }); - } - try { - auto result = tmpl.apply(messages, /* tools= */ json(), append); - llama_data.fmtted.resize(result.size() + 1); - memcpy(llama_data.fmtted.data(), result.c_str(), result.size() + 1); - return result.size(); - } catch (const std::exception & e) { - printe("failed to render the chat template: %s\n", e.what()); - return -1; - } - } - int result = llama_chat_apply_template( - tmpl.source().c_str(), llama_data.messages.data(), llama_data.messages.size(), append, - append ? llama_data.fmtted.data() : nullptr, append ? llama_data.fmtted.size() : 0); - if (append && result > static_cast(llama_data.fmtted.size())) { - llama_data.fmtted.resize(result); - result = llama_chat_apply_template(tmpl.source().c_str(), llama_data.messages.data(), - llama_data.messages.size(), append, llama_data.fmtted.data(), - llama_data.fmtted.size()); +static int apply_chat_template(const struct common_chat_templates * tmpls, LlamaData & llama_data, const bool append, bool use_jinja) { + common_chat_templates_inputs inputs; + for (const auto & msg : llama_data.messages) { + common_chat_msg cmsg; + cmsg.role = msg.role; + cmsg.content = msg.content; + inputs.messages.push_back(cmsg); } + inputs.add_generation_prompt = append; + inputs.use_jinja = use_jinja; - return result; + auto chat_params = common_chat_templates_apply(tmpls, inputs); + // TODO: use other params for tool calls. + auto result = chat_params.prompt; + llama_data.fmtted.resize(result.size() + 1); + memcpy(llama_data.fmtted.data(), result.c_str(), result.size() + 1); + return result.size(); } // Function to tokenize the prompt static int tokenize_prompt(const llama_vocab * vocab, const std::string & prompt, std::vector & prompt_tokens, const LlamaData & llama_data) { - const bool is_first = llama_get_kv_cache_used_cells(llama_data.context.get()) == 0; + const bool is_first = llama_kv_self_used_cells(llama_data.context.get()) == 0; const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, is_first, true); prompt_tokens.resize(n_prompt_tokens); @@ -853,9 +938,9 @@ static int tokenize_prompt(const llama_vocab * vocab, const std::string & prompt // Check if we have enough space in the context to evaluate this batch static int check_context_size(const llama_context_ptr & ctx, const llama_batch & batch) { const int n_ctx = llama_n_ctx(ctx.get()); - const int n_ctx_used = llama_get_kv_cache_used_cells(ctx.get()); + const int n_ctx_used = llama_kv_self_used_cells(ctx.get()); if (n_ctx_used + batch.n_tokens > n_ctx) { - printf("\033[0m\n"); + printf(LOG_COL_DEFAULT "\n"); printe("context size exceeded\n"); return 1; } @@ -918,17 +1003,15 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str batch = llama_batch_get_one(&new_token_id, 1); } - printf("\033[0m"); + printf(LOG_COL_DEFAULT); return 0; } static int read_user_input(std::string & user_input) { - static const char * prompt_prefix = "> "; + static const char * prompt_prefix_env = std::getenv("LLAMA_PROMPT_PREFIX"); + static const char * prompt_prefix = prompt_prefix_env ? prompt_prefix_env : "> "; #ifdef WIN32 - printf( - "\r%*s" - "\r\033[0m%s", - get_terminal_width(), " ", prompt_prefix); + printf("\r" LOG_CLR_TO_EOL LOG_COL_DEFAULT "%s", prompt_prefix); std::getline(std::cin, user_input); if (std::cin.eof()) { @@ -964,7 +1047,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt, const bool stdout_a_terminal) { // Set response color if (stdout_a_terminal) { - printf("\033[33m"); + printf(LOG_COL_YELLOW); } if (generate(llama_data, prompt, response)) { @@ -973,13 +1056,13 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt, } // End response with color reset and newline - printf("\n%s", stdout_a_terminal ? "\033[0m" : ""); + printf("\n%s", stdout_a_terminal ? LOG_COL_DEFAULT : ""); return 0; } // Helper function to apply the chat template and handle errors -static int apply_chat_template_with_error_handling(const common_chat_template & tmpl, LlamaData & llama_data, const bool append, int & output_length, bool use_jinja) { - const int new_len = apply_chat_template(tmpl, llama_data, append, use_jinja); +static int apply_chat_template_with_error_handling(const common_chat_templates * tmpls, LlamaData & llama_data, const bool append, int & output_length, bool use_jinja) { + const int new_len = apply_chat_template(tmpls, llama_data, append, use_jinja); if (new_len < 0) { printe("failed to apply the chat template\n"); return -1; @@ -1037,40 +1120,68 @@ static int get_user_input(std::string & user_input, const std::string & user) { return 0; } +// Reads a chat template file to be used +static std::string read_chat_template_file(const std::string & chat_template_file) { + File file; + if (!file.open(chat_template_file, "r")) { + printe("Error opening chat template file '%s': %s", chat_template_file.c_str(), strerror(errno)); + return ""; + } + + return file.to_string(); +} + +static int process_user_message(const Opt & opt, const std::string & user_input, LlamaData & llama_data, + const common_chat_templates_ptr & chat_templates, int & prev_len, + const bool stdout_a_terminal) { + add_message("user", opt.user.empty() ? user_input : opt.user, llama_data); + int new_len; + if (apply_chat_template_with_error_handling(chat_templates.get(), llama_data, true, new_len, opt.use_jinja) < 0) { + return 1; + } + + std::string prompt(llama_data.fmtted.begin() + prev_len, llama_data.fmtted.begin() + new_len); + std::string response; + if (generate_response(llama_data, prompt, response, stdout_a_terminal)) { + return 1; + } + + if (!opt.user.empty()) { + return 2; + } + + add_message("assistant", response, llama_data); + if (apply_chat_template_with_error_handling(chat_templates.get(), llama_data, false, prev_len, opt.use_jinja) < 0) { + return 1; + } + + return 0; +} + // Main chat loop function -static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_jinja) { +static int chat_loop(LlamaData & llama_data, const Opt & opt) { int prev_len = 0; llama_data.fmtted.resize(llama_n_ctx(llama_data.context.get())); - auto chat_templates = common_chat_templates_from_model(llama_data.model.get(), ""); - GGML_ASSERT(chat_templates.template_default); + std::string chat_template; + if (!opt.chat_template_file.empty()) { + chat_template = read_chat_template_file(opt.chat_template_file); + } + + common_chat_templates_ptr chat_templates = common_chat_templates_init(llama_data.model.get(), chat_template); static const bool stdout_a_terminal = is_stdout_a_terminal(); while (true) { // Get user input std::string user_input; - if (get_user_input(user_input, user) == 1) { + if (get_user_input(user_input, opt.user) == 1) { return 0; } - add_message("user", user.empty() ? user_input : user, llama_data); - int new_len; - if (apply_chat_template_with_error_handling(*chat_templates.template_default, llama_data, true, new_len, use_jinja) < 0) { + const int ret = process_user_message(opt, user_input, llama_data, chat_templates, prev_len, stdout_a_terminal); + if (ret == 1) { return 1; - } - - std::string prompt(llama_data.fmtted.begin() + prev_len, llama_data.fmtted.begin() + new_len); - std::string response; - if (generate_response(llama_data, prompt, response, stdout_a_terminal)) { - return 1; - } - - if (!user.empty()) { + } else if (ret == 2) { break; } - - add_message("assistant", response, llama_data); - if (apply_chat_template_with_error_handling(*chat_templates.template_default, llama_data, false, prev_len, use_jinja) < 0) { - return 1; - } } return 0; @@ -1128,7 +1239,7 @@ int main(int argc, const char ** argv) { return 1; } - if (chat_loop(llama_data, opt.user, opt.use_jinja)) { + if (chat_loop(llama_data, opt)) { return 1; } diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index cf7cbd8159..760ebbbf08 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -15,7 +15,7 @@ int main(int argc, char ** argv) { return 1; } - print_build_info(); + common_init(); if (params.n_predict < 0) { params.n_predict = 16; @@ -196,7 +196,7 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s : seq 0 copied, %zd bytes\n", __func__, ncopy); // erase whole kv - llama_kv_cache_clear(ctx3); + llama_kv_self_clear(ctx3); fprintf(stderr, "%s : kv cache cleared\n", __func__); // restore kv into seq 1 diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index 1b7cc8c132..aee90388e4 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -5,7 +5,7 @@ option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF) include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) if (MINGW) - # fix: https://github.com/ggerganov/llama.cpp/actions/runs/9651004652/job/26617901362?pr=8006 + # fix: https://github.com/ggml-org/llama.cpp/actions/runs/9651004652/job/26617901362?pr=8006 add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER}) endif() diff --git a/examples/server/README.md b/examples/server/README.md index 5022de672d..a2a0903261 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -7,14 +7,15 @@ Set of LLM REST APIs and a simple web front end to interact with llama.cpp. **Features:** * LLM inference of F16 and quantized models on GPU and CPU * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions and embeddings routes - * Reranking endoint (WIP: https://github.com/ggerganov/llama.cpp/pull/9510) + * Reranking endoint (WIP: https://github.com/ggml-org/llama.cpp/pull/9510) * Parallel decoding with multi-user support * Continuous batching * Multimodal (wip) * Monitoring endpoints * Schema-constrained JSON response format + * [Function calling](../../docs/function-calling.md) / tool use for ~any model -The project is under active development, and we are [looking for feedback and contributors](https://github.com/ggerganov/llama.cpp/issues/4216). +The project is under active development, and we are [looking for feedback and contributors](https://github.com/ggml-org/llama.cpp/issues/4216). ## Usage @@ -65,7 +66,7 @@ The project is under active development, and we are [looking for feedback and co | `-np, --parallel N` | number of parallel sequences to decode (default: 1)
(env: LLAMA_ARG_N_PARALLEL) | | `--mlock` | force system to keep model in RAM rather than swapping or compressing
(env: LLAMA_ARG_MLOCK) | | `--no-mmap` | do not memory-map model (slower load but may reduce pageouts if not using mlock)
(env: LLAMA_ARG_NO_MMAP) | -| `--numa TYPE` | attempt optimizations that help on some NUMA systems
- distribute: spread execution evenly over all nodes
- isolate: only spawn threads on CPUs on the node that execution started on
- numactl: use the CPU map provided by numactl
if run without this previously, it is recommended to drop the system page cache before using this
see https://github.com/ggerganov/llama.cpp/issues/1437
(env: LLAMA_ARG_NUMA) | +| `--numa TYPE` | attempt optimizations that help on some NUMA systems
- distribute: spread execution evenly over all nodes
- isolate: only spawn threads on CPUs on the node that execution started on
- numactl: use the CPU map provided by numactl
if run without this previously, it is recommended to drop the system page cache before using this
see https://github.com/ggml-org/llama.cpp/issues/1437
(env: LLAMA_ARG_NUMA) | | `-dev, --device ` | comma-separated list of devices to use for offloading (none = don't offload)
use --list-devices to see a list of available devices
(env: LLAMA_ARG_DEVICE) | | `--list-devices` | print list of available devices and exit | | `-ngl, --gpu-layers, --n-gpu-layers N` | number of layers to store in VRAM
(env: LLAMA_ARG_N_GPU_LAYERS) | @@ -126,7 +127,8 @@ The project is under active development, and we are [looking for feedback and co | `--grammar GRAMMAR` | BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '') | | `--grammar-file FNAME` | file to read grammar from | | `-j, --json-schema SCHEMA` | JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object
For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead | -| `--jinja` | Enable experimental Jinja templating engine (needed for tool use) | +| `--jinja` | Enable experimental Jinja templating engine (required for tool use) | +| `--reasoning-format FORMAT` | Controls extraction of model thinking traces and the format / field in which they are returned (default: `deepseek`; allowed values: `deepseek`, `none`; requires `--jinja`). `none` will leave thinking traces inline in `message.content` in a model-specific format, while `deepseek` will return them separately under `message.reasoning_content` | **Example-specific params** @@ -177,7 +179,7 @@ Example usage of docker compose with environment variables: ```yml services: llamacpp-server: - image: ghcr.io/ggerganov/llama.cpp:server + image: ghcr.io/ggml-org/llama.cpp:server ports: - 8080:8080 volumes: @@ -220,7 +222,7 @@ services: The project includes a web-based user interface that enables interaction with the model through the `/chat/completions` endpoint. The web UI is developed using: -- `vue` framework for frontend development +- `react` framework for frontend development - `tailwindcss` and `daisyui` for styling - `vite` for build tooling @@ -236,9 +238,13 @@ npm i # to run the dev server npm run dev -# to build the public/index.html +# to build the public/index.html.gz npm run build ``` +After `public/index.html.gz` has been generated we need to generate the c++ +headers (like build/examples/server/index.html.gz.hpp) that will be included +by server.cpp. This is done by building `llama-server` as described in the +[build](#build) section above. NOTE: if you are using the vite dev server, you can change the API base URL to llama.cpp. To do that, run this code snippet in browser's console: @@ -268,10 +274,10 @@ You can consume the endpoints with Postman or NodeJS with axios library. You can ### Docker ```bash -docker run -p 8080:8080 -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:server -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 +docker run -p 8080:8080 -v /path/to/models:/models ghcr.io/ggml-org/llama.cpp:server -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 # or, with CUDA: -docker run -p 8080:8080 -v /path/to/models:/models --gpus all ghcr.io/ggerganov/llama.cpp:server-cuda -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 --n-gpu-layers 99 +docker run -p 8080:8080 -v /path/to/models:/models --gpus all ghcr.io/ggml-org/llama.cpp:server-cuda -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 --n-gpu-layers 99 ``` ## Testing with CURL @@ -456,7 +462,7 @@ These words will not be included in the completion, so make sure to add them to - Note: In streaming mode (`stream`), only `content`, `tokens` and `stop` will be returned until end of completion. Responses are sent using the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html) standard. Note: the browser's `EventSource` interface cannot be used due to its lack of `POST` request support. - `completion_probabilities`: An array of token probabilities for each completion. The array's length is `n_predict`. Each item in the array has a nested array `top_logprobs`. It contains at **maximum** `n_probs` elements: - ```json + ``` { "content": "", "tokens": [ generated token ids if requested ], @@ -557,7 +563,7 @@ If `with_pieces` is `true`: ``` With input 'á' (utf8 hex: C3 A1) on tinyllama/stories260k -```json +``` { "tokens": [ {"id": 198, "piece": [195]}, // hex C3 @@ -572,6 +578,18 @@ With input 'á' (utf8 hex: C3 A1) on tinyllama/stories260k `tokens`: Set the tokens to detokenize. +### POST `/apply-template`: Apply chat template to a conversation + +Uses the server's prompt template formatting functionality to convert chat messages to a single string expected by a chat model as input, but does not perform inference. Instead, the prompt string is returned in the `prompt` field of the JSON response. The prompt can then be modified as desired (for example, to insert "Sure!" at the beginning of the model's response) before sending to `/completion` to generate the chat response. + +*Options:* + +`messages`: (Required) Chat turns in the same format as `/v1/chat/completions`. + +**Response format** + +Returns a JSON object with a field `prompt` containing a string of the input messages formatted according to the model's chat template format. + ### POST `/embedding`: Generate embedding of a given text > [!IMPORTANT] @@ -764,7 +782,7 @@ Same as the `/v1/embeddings` endpoint. **Response format** -```json +``` [ { "index": 0, @@ -1049,11 +1067,11 @@ print(completion.choices[0].text) ### POST `/v1/chat/completions`: OpenAI-compatible Chat Completions API -Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only models with a [supported chat template](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template) can be used optimally with this endpoint. By default, the ChatML template will be used. +Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only models with a [supported chat template](https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template) can be used optimally with this endpoint. By default, the ChatML template will be used. *Options:* -See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). While some OpenAI-specific features such as function calling aren't supported, llama.cpp `/completion`-specific features such as `mirostat` are supported. +See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). llama.cpp `/completion`-specific features such as `mirostat` are also supported. The `response_format` parameter supports both plain JSON output (e.g. `{"type": "json_object"}`) and schema-constrained JSON (e.g. `{"type": "json_object", "schema": {"type": "string", "minLength": 10, "maxLength": 100}}` or `{"type": "json_schema", "schema": {"properties": { "name": { "title": "Name", "type": "string" }, "date": { "title": "Date", "type": "string" }, "participants": { "items": {"type: "string" }, "title": "Participants", "type": "string" } } } }`), similar to other OpenAI-inspired API providers. @@ -1101,6 +1119,12 @@ curl http://localhost:8080/v1/chat/completions \ }' ``` +*Tool call support* + +[OpenAI-style function calling](https://platform.openai.com/docs/guides/function-calling) is supported with the `--jinja` flag (and may require a `--chat-template-file` override to get the right tool-use compatible Jinja template; worst case, `--chat-template chatml` may also work). + +**See our [Function calling](../../docs/function-calling.md) docs** for more details, supported native tool call styles (generic tool call style is used as fallback) / examples of use. + ### POST `/v1/embeddings`: OpenAI-compatible embeddings API This endpoint requires that the model uses a pooling different than type `none`. The embeddings are normalized using the Eucledian norm. @@ -1204,7 +1228,7 @@ Apart from error types supported by OAI, we also have custom types that are spec ### Legacy completion web UI -A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggerganov/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./examples/server/public_legacy` +A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggml-org/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./examples/server/public_legacy` For example: diff --git a/examples/server/httplib.h b/examples/server/httplib.h index c2f12dd2ac..0f981dc895 100644 --- a/examples/server/httplib.h +++ b/examples/server/httplib.h @@ -1,14 +1,14 @@ // // httplib.h // -// Copyright (c) 2024 Yuji Hirose. All rights reserved. +// Copyright (c) 2025 Yuji Hirose. All rights reserved. // MIT License // #ifndef CPPHTTPLIB_HTTPLIB_H #define CPPHTTPLIB_HTTPLIB_H -#define CPPHTTPLIB_VERSION "0.18.5" +#define CPPHTTPLIB_VERSION "0.20.0" /* * Configuration @@ -66,6 +66,10 @@ #define CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND 0 #endif +#ifndef CPPHTTPLIB_CLIENT_MAX_TIMEOUT_MSECOND +#define CPPHTTPLIB_CLIENT_MAX_TIMEOUT_MSECOND 0 +#endif + #ifndef CPPHTTPLIB_IDLE_INTERVAL_SECOND #define CPPHTTPLIB_IDLE_INTERVAL_SECOND 0 #endif @@ -184,14 +188,16 @@ using ssize_t = long; #include #include +// afunix.h uses types declared in winsock2.h, so has to be included after it. +#include + #ifndef WSA_FLAG_NO_HANDLE_INHERIT #define WSA_FLAG_NO_HANDLE_INHERIT 0x80 #endif +using nfds_t = unsigned long; using socket_t = SOCKET; -#ifdef CPPHTTPLIB_USE_POLL -#define poll(fds, nfds, timeout) WSAPoll(fds, nfds, timeout) -#endif +using socklen_t = int; #else // not _WIN32 @@ -211,14 +217,11 @@ using socket_t = SOCKET; #ifdef __linux__ #include #endif -#include -#ifdef CPPHTTPLIB_USE_POLL -#include -#endif #include +#include +#include #include #include -#include #include #include #include @@ -240,7 +243,6 @@ using socket_t = int; #include #include #include -#include #include #include #include @@ -313,6 +315,10 @@ using socket_t = int; #include #endif +#ifdef CPPHTTPLIB_ZSTD_SUPPORT +#include +#endif + /* * Declaration */ @@ -428,6 +434,15 @@ private: } // namespace detail +enum SSLVerifierResponse { + // no decision has been made, use the built-in certificate verifier + NoDecisionMade, + // connection certificate is verified and accepted + CertificateAccepted, + // connection certificate was processed but is rejected + CertificateRejected +}; + enum StatusCode { // Information responses Continue_100 = 100, @@ -662,6 +677,8 @@ struct Request { ContentProvider content_provider_; bool is_chunked_content_provider_ = false; size_t authorization_count_ = 0; + std::chrono::time_point start_time_ = + (std::chrono::steady_clock::time_point::min)(); }; struct Response { @@ -727,7 +744,8 @@ public: virtual ~Stream() = default; virtual bool is_readable() const = 0; - virtual bool is_writable() const = 0; + virtual bool wait_readable() const = 0; + virtual bool wait_writable() const = 0; virtual ssize_t read(char *ptr, size_t size) = 0; virtual ssize_t write(const char *ptr, size_t size) = 0; @@ -735,6 +753,8 @@ public: virtual void get_local_ip_and_port(std::string &ip, int &port) const = 0; virtual socket_t socket() const = 0; + virtual time_t duration() const = 0; + ssize_t write(const char *ptr); ssize_t write(const std::string &s); }; @@ -838,6 +858,16 @@ using Logger = std::function; using SocketOptions = std::function; +namespace detail { + +bool set_socket_opt_impl(socket_t sock, int level, int optname, + const void *optval, socklen_t optlen); +bool set_socket_opt(socket_t sock, int level, int optname, int opt); +bool set_socket_opt_time(socket_t sock, int level, int optname, time_t sec, + time_t usec); + +} // namespace detail + void default_socket_options(socket_t sock); const char *status_message(int status); @@ -858,7 +888,7 @@ public: * Captures parameters in request path and stores them in Request::path_params * * Capture name is a substring of a pattern from : to /. - * The rest of the pattern is matched agains the request path directly + * The rest of the pattern is matched against the request path directly * Parameters are captured starting from the next character after * the end of the last matched static pattern fragment until the next /. * @@ -1088,7 +1118,7 @@ private: virtual bool process_and_close_socket(socket_t sock); std::atomic is_running_{false}; - std::atomic is_decommisioned{false}; + std::atomic is_decommissioned{false}; struct MountPointEntry { std::string mount_point; @@ -1421,6 +1451,10 @@ public: template void set_write_timeout(const std::chrono::duration &duration); + void set_max_timeout(time_t msec); + template + void set_max_timeout(const std::chrono::duration &duration); + void set_basic_auth(const std::string &username, const std::string &password); void set_bearer_token_auth(const std::string &token); #ifdef CPPHTTPLIB_OPENSSL_SUPPORT @@ -1458,7 +1492,8 @@ public: #ifdef CPPHTTPLIB_OPENSSL_SUPPORT void enable_server_certificate_verification(bool enabled); void enable_server_hostname_verification(bool enabled); - void set_server_certificate_verifier(std::function verifier); + void set_server_certificate_verifier( + std::function verifier); #endif void set_logger(Logger logger); @@ -1529,6 +1564,7 @@ protected: time_t read_timeout_usec_ = CPPHTTPLIB_CLIENT_READ_TIMEOUT_USECOND; time_t write_timeout_sec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_SECOND; time_t write_timeout_usec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND; + time_t max_timeout_msec_ = CPPHTTPLIB_CLIENT_MAX_TIMEOUT_MSECOND; std::string basic_auth_username_; std::string basic_auth_password_; @@ -1574,7 +1610,7 @@ protected: #ifdef CPPHTTPLIB_OPENSSL_SUPPORT bool server_certificate_verification_ = true; bool server_hostname_verification_ = true; - std::function server_certificate_verifier_; + std::function server_certificate_verifier_; #endif Logger logger_; @@ -1583,9 +1619,6 @@ private: bool send_(Request &req, Response &res, Error &error); Result send_(Request &&req); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - bool is_ssl_peer_could_be_closed(SSL *ssl) const; -#endif socket_t create_client_socket(Error &error) const; bool read_response_line(Stream &strm, const Request &req, Response &res) const; @@ -1611,8 +1644,10 @@ private: std::string adjust_host_string(const std::string &host) const; - virtual bool process_socket(const Socket &socket, - std::function callback); + virtual bool + process_socket(const Socket &socket, + std::chrono::time_point start_time, + std::function callback); virtual bool is_ssl() const; }; @@ -1854,6 +1889,10 @@ public: template void set_write_timeout(const std::chrono::duration &duration); + void set_max_timeout(time_t msec); + template + void set_max_timeout(const std::chrono::duration &duration); + void set_basic_auth(const std::string &username, const std::string &password); void set_bearer_token_auth(const std::string &token); #ifdef CPPHTTPLIB_OPENSSL_SUPPORT @@ -1884,7 +1923,8 @@ public: #ifdef CPPHTTPLIB_OPENSSL_SUPPORT void enable_server_certificate_verification(bool enabled); void enable_server_hostname_verification(bool enabled); - void set_server_certificate_verifier(std::function verifier); + void set_server_certificate_verifier( + std::function verifier); #endif void set_logger(Logger logger); @@ -1971,12 +2011,16 @@ private: void shutdown_ssl(Socket &socket, bool shutdown_gracefully) override; void shutdown_ssl_impl(Socket &socket, bool shutdown_gracefully); - bool process_socket(const Socket &socket, - std::function callback) override; + bool + process_socket(const Socket &socket, + std::chrono::time_point start_time, + std::function callback) override; bool is_ssl() const override; - bool connect_with_proxy(Socket &sock, Response &res, bool &success, - Error &error); + bool connect_with_proxy( + Socket &sock, + std::chrono::time_point start_time, + Response &res, bool &success, Error &error); bool initialize_ssl(Socket &socket, Error &error); bool load_certs(); @@ -2013,6 +2057,10 @@ inline void duration_to_sec_and_usec(const T &duration, U callback) { callback(static_cast(sec), static_cast(usec)); } +template inline constexpr size_t str_len(const char (&)[N]) { + return N - 1; +} + inline bool is_numeric(const std::string &str) { return !str.empty() && std::all_of(str.begin(), str.end(), ::isdigit); } @@ -2053,20 +2101,45 @@ inline uint64_t Response::get_header_value_u64(const std::string &key, return detail::get_header_value_u64(headers, key, def, id); } -inline void default_socket_options(socket_t sock) { - int opt = 1; +namespace detail { + +inline bool set_socket_opt_impl(socket_t sock, int level, int optname, + const void *optval, socklen_t optlen) { + return setsockopt(sock, level, optname, #ifdef _WIN32 - setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - reinterpret_cast(&opt), sizeof(opt)); + reinterpret_cast(optval), #else + optval, +#endif + optlen) == 0; +} + +inline bool set_socket_opt(socket_t sock, int level, int optname, int optval) { + return set_socket_opt_impl(sock, level, optname, &optval, sizeof(optval)); +} + +inline bool set_socket_opt_time(socket_t sock, int level, int optname, + time_t sec, time_t usec) { +#ifdef _WIN32 + auto timeout = static_cast(sec * 1000 + usec / 1000); +#else + timeval timeout; + timeout.tv_sec = static_cast(sec); + timeout.tv_usec = static_cast(usec); +#endif + return set_socket_opt_impl(sock, level, optname, &timeout, sizeof(timeout)); +} + +} // namespace detail + +inline void default_socket_options(socket_t sock) { + detail::set_socket_opt(sock, SOL_SOCKET, #ifdef SO_REUSEPORT - setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, - reinterpret_cast(&opt), sizeof(opt)); + SO_REUSEPORT, #else - setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - reinterpret_cast(&opt), sizeof(opt)); -#endif + SO_REUSEADDR, #endif + 1); } inline const char *status_message(int status) { @@ -2147,9 +2220,9 @@ inline const char *status_message(int status) { inline std::string get_bearer_token_auth(const Request &req) { if (req.has_header("Authorization")) { - static std::string BearerHeaderPrefix = "Bearer "; + constexpr auto bearer_header_prefix_len = detail::str_len("Bearer "); return req.get_header_value("Authorization") - .substr(BearerHeaderPrefix.length()); + .substr(bearer_header_prefix_len); } return ""; } @@ -2238,6 +2311,14 @@ inline void ClientImpl::set_write_timeout( duration, [&](time_t sec, time_t usec) { set_write_timeout(sec, usec); }); } +template +inline void ClientImpl::set_max_timeout( + const std::chrono::duration &duration) { + auto msec = + std::chrono::duration_cast(duration).count(); + set_max_timeout(msec); +} + template inline void Client::set_connection_timeout( const std::chrono::duration &duration) { @@ -2256,6 +2337,12 @@ Client::set_write_timeout(const std::chrono::duration &duration) { cli_->set_write_timeout(duration); } +template +inline void +Client::set_max_timeout(const std::chrono::duration &duration) { + cli_->set_max_timeout(duration); +} + /* * Forward declarations and types that will be part of the .h file if split into * .h + .cc. @@ -2310,8 +2397,6 @@ std::string encode_query_param(const std::string &value); std::string decode_url(const std::string &s, bool convert_plus_to_space); -void read_file(const std::string &path, std::string &out); - std::string trim_copy(const std::string &s); void divide( @@ -2330,10 +2415,12 @@ void split(const char *b, const char *e, char d, void split(const char *b, const char *e, char d, size_t m, std::function fn); -bool process_client_socket(socket_t sock, time_t read_timeout_sec, - time_t read_timeout_usec, time_t write_timeout_sec, - time_t write_timeout_usec, - std::function callback); +bool process_client_socket( + socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec, + time_t write_timeout_sec, time_t write_timeout_usec, + time_t max_timeout_msec, + std::chrono::time_point start_time, + std::function callback); socket_t create_client_socket(const std::string &host, const std::string &ip, int port, int address_family, bool tcp_nodelay, @@ -2365,7 +2452,7 @@ ssize_t send_socket(socket_t sock, const void *ptr, size_t size, int flags); ssize_t read_socket(socket_t sock, void *ptr, size_t size, int flags); -enum class EncodingType { None = 0, Gzip, Brotli }; +enum class EncodingType { None = 0, Gzip, Brotli, Zstd }; EncodingType encoding_type(const Request &req, const Response &res); @@ -2375,12 +2462,14 @@ public: ~BufferStream() override = default; bool is_readable() const override; - bool is_writable() const override; + bool wait_readable() const override; + bool wait_writable() const override; ssize_t read(char *ptr, size_t size) override; ssize_t write(const char *ptr, size_t size) override; void get_remote_ip_and_port(std::string &ip, int &port) const override; void get_local_ip_and_port(std::string &ip, int &port) const override; socket_t socket() const override; + time_t duration() const override; const std::string &get_buffer() const; @@ -2476,6 +2565,34 @@ private: }; #endif +#ifdef CPPHTTPLIB_ZSTD_SUPPORT +class zstd_compressor : public compressor { +public: + zstd_compressor(); + ~zstd_compressor(); + + bool compress(const char *data, size_t data_length, bool last, + Callback callback) override; + +private: + ZSTD_CCtx *ctx_ = nullptr; +}; + +class zstd_decompressor : public decompressor { +public: + zstd_decompressor(); + ~zstd_decompressor(); + + bool is_valid() const override; + + bool decompress(const char *data, size_t data_length, + Callback callback) override; + +private: + ZSTD_DCtx *ctx_ = nullptr; +}; +#endif + // NOTE: until the read size reaches `fixed_buffer_size`, use `fixed_buffer` // to store data. The call can set memory on stack for performance. class stream_line_reader { @@ -2494,7 +2611,7 @@ private: char *fixed_buffer_; const size_t fixed_buffer_size_; size_t fixed_buffer_used_size_ = 0; - std::string glowable_buffer_; + std::string growable_buffer_; }; class mmap { @@ -2547,7 +2664,7 @@ inline bool is_obs_text(char c) { return 128 <= static_cast(c); } inline bool is_field_vchar(char c) { return is_vchar(c) || is_obs_text(c); } inline bool is_field_content(const std::string &s) { - if (s.empty()) { return false; } + if (s.empty()) { return true; } if (s.size() == 1) { return is_field_vchar(s[0]); @@ -2835,18 +2952,9 @@ inline std::string decode_url(const std::string &s, return result; } -inline void read_file(const std::string &path, std::string &out) { - std::ifstream fs(path, std::ios_base::binary); - fs.seekg(0, std::ios_base::end); - auto size = fs.tellg(); - fs.seekg(0); - out.resize(static_cast(size)); - fs.read(&out[0], static_cast(size)); -} - inline std::string file_extension(const std::string &path) { std::smatch m; - static auto re = std::regex("\\.([a-zA-Z0-9]+)$"); + thread_local auto re = std::regex("\\.([a-zA-Z0-9]+)$"); if (std::regex_search(path, m, re)) { return m[1].str(); } return std::string(); } @@ -2930,18 +3038,18 @@ inline stream_line_reader::stream_line_reader(Stream &strm, char *fixed_buffer, fixed_buffer_size_(fixed_buffer_size) {} inline const char *stream_line_reader::ptr() const { - if (glowable_buffer_.empty()) { + if (growable_buffer_.empty()) { return fixed_buffer_; } else { - return glowable_buffer_.data(); + return growable_buffer_.data(); } } inline size_t stream_line_reader::size() const { - if (glowable_buffer_.empty()) { + if (growable_buffer_.empty()) { return fixed_buffer_used_size_; } else { - return glowable_buffer_.size(); + return growable_buffer_.size(); } } @@ -2952,7 +3060,7 @@ inline bool stream_line_reader::end_with_crlf() const { inline bool stream_line_reader::getline() { fixed_buffer_used_size_ = 0; - glowable_buffer_.clear(); + growable_buffer_.clear(); #ifndef CPPHTTPLIB_ALLOW_LF_AS_LINE_TERMINATOR char prev_byte = 0; @@ -2990,11 +3098,11 @@ inline void stream_line_reader::append(char c) { fixed_buffer_[fixed_buffer_used_size_++] = c; fixed_buffer_[fixed_buffer_used_size_] = '\0'; } else { - if (glowable_buffer_.empty()) { + if (growable_buffer_.empty()) { assert(fixed_buffer_[fixed_buffer_used_size_] == '\0'); - glowable_buffer_.assign(fixed_buffer_, fixed_buffer_used_size_); + growable_buffer_.assign(fixed_buffer_, fixed_buffer_used_size_); } - glowable_buffer_ += c; + growable_buffer_ += c; } } @@ -3171,72 +3279,43 @@ inline ssize_t send_socket(socket_t sock, const void *ptr, size_t size, }); } -inline ssize_t select_read(socket_t sock, time_t sec, time_t usec) { -#ifdef CPPHTTPLIB_USE_POLL - struct pollfd pfd_read; - pfd_read.fd = sock; - pfd_read.events = POLLIN; - - auto timeout = static_cast(sec * 1000 + usec / 1000); - - return handle_EINTR([&]() { return poll(&pfd_read, 1, timeout); }); +inline int poll_wrapper(struct pollfd *fds, nfds_t nfds, int timeout) { +#ifdef _WIN32 + return ::WSAPoll(fds, nfds, timeout); #else -#ifndef _WIN32 - if (sock >= FD_SETSIZE) { return -1; } -#endif - - fd_set fds; - FD_ZERO(&fds); - FD_SET(sock, &fds); - - timeval tv; - tv.tv_sec = static_cast(sec); - tv.tv_usec = static_cast(usec); - - return handle_EINTR([&]() { - return select(static_cast(sock + 1), &fds, nullptr, nullptr, &tv); - }); + return ::poll(fds, nfds, timeout); #endif } -inline ssize_t select_write(socket_t sock, time_t sec, time_t usec) { -#ifdef CPPHTTPLIB_USE_POLL - struct pollfd pfd_read; - pfd_read.fd = sock; - pfd_read.events = POLLOUT; +template +inline ssize_t select_impl(socket_t sock, time_t sec, time_t usec) { + struct pollfd pfd; + pfd.fd = sock; + pfd.events = (Read ? POLLIN : POLLOUT); auto timeout = static_cast(sec * 1000 + usec / 1000); - return handle_EINTR([&]() { return poll(&pfd_read, 1, timeout); }); -#else -#ifndef _WIN32 - if (sock >= FD_SETSIZE) { return -1; } -#endif + return handle_EINTR([&]() { return poll_wrapper(&pfd, 1, timeout); }); +} - fd_set fds; - FD_ZERO(&fds); - FD_SET(sock, &fds); +inline ssize_t select_read(socket_t sock, time_t sec, time_t usec) { + return select_impl(sock, sec, usec); +} - timeval tv; - tv.tv_sec = static_cast(sec); - tv.tv_usec = static_cast(usec); - - return handle_EINTR([&]() { - return select(static_cast(sock + 1), nullptr, &fds, nullptr, &tv); - }); -#endif +inline ssize_t select_write(socket_t sock, time_t sec, time_t usec) { + return select_impl(sock, sec, usec); } inline Error wait_until_socket_is_ready(socket_t sock, time_t sec, time_t usec) { -#ifdef CPPHTTPLIB_USE_POLL struct pollfd pfd_read; pfd_read.fd = sock; pfd_read.events = POLLIN | POLLOUT; auto timeout = static_cast(sec * 1000 + usec / 1000); - auto poll_res = handle_EINTR([&]() { return poll(&pfd_read, 1, timeout); }); + auto poll_res = + handle_EINTR([&]() { return poll_wrapper(&pfd_read, 1, timeout); }); if (poll_res == 0) { return Error::ConnectionTimeout; } @@ -3250,38 +3329,6 @@ inline Error wait_until_socket_is_ready(socket_t sock, time_t sec, } return Error::Connection; -#else -#ifndef _WIN32 - if (sock >= FD_SETSIZE) { return Error::Connection; } -#endif - - fd_set fdsr; - FD_ZERO(&fdsr); - FD_SET(sock, &fdsr); - - auto fdsw = fdsr; - auto fdse = fdsr; - - timeval tv; - tv.tv_sec = static_cast(sec); - tv.tv_usec = static_cast(usec); - - auto ret = handle_EINTR([&]() { - return select(static_cast(sock + 1), &fdsr, &fdsw, &fdse, &tv); - }); - - if (ret == 0) { return Error::ConnectionTimeout; } - - if (ret > 0 && (FD_ISSET(sock, &fdsr) || FD_ISSET(sock, &fdsw))) { - auto error = 0; - socklen_t len = sizeof(error); - auto res = getsockopt(sock, SOL_SOCKET, SO_ERROR, - reinterpret_cast(&error), &len); - auto successful = res >= 0 && !error; - return successful ? Error::Success : Error::Connection; - } - return Error::Connection; -#endif } inline bool is_socket_alive(socket_t sock) { @@ -3298,16 +3345,21 @@ inline bool is_socket_alive(socket_t sock) { class SocketStream final : public Stream { public: SocketStream(socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec, - time_t write_timeout_sec, time_t write_timeout_usec); + time_t write_timeout_sec, time_t write_timeout_usec, + time_t max_timeout_msec = 0, + std::chrono::time_point start_time = + (std::chrono::steady_clock::time_point::min)()); ~SocketStream() override; bool is_readable() const override; - bool is_writable() const override; + bool wait_readable() const override; + bool wait_writable() const override; ssize_t read(char *ptr, size_t size) override; ssize_t write(const char *ptr, size_t size) override; void get_remote_ip_and_port(std::string &ip, int &port) const override; void get_local_ip_and_port(std::string &ip, int &port) const override; socket_t socket() const override; + time_t duration() const override; private: socket_t sock_; @@ -3315,6 +3367,8 @@ private: time_t read_timeout_usec_; time_t write_timeout_sec_; time_t write_timeout_usec_; + time_t max_timeout_msec_; + const std::chrono::time_point start_time_; std::vector read_buff_; size_t read_buff_off_ = 0; @@ -3326,18 +3380,23 @@ private: #ifdef CPPHTTPLIB_OPENSSL_SUPPORT class SSLSocketStream final : public Stream { public: - SSLSocketStream(socket_t sock, SSL *ssl, time_t read_timeout_sec, - time_t read_timeout_usec, time_t write_timeout_sec, - time_t write_timeout_usec); + SSLSocketStream( + socket_t sock, SSL *ssl, time_t read_timeout_sec, + time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, time_t max_timeout_msec = 0, + std::chrono::time_point start_time = + (std::chrono::steady_clock::time_point::min)()); ~SSLSocketStream() override; bool is_readable() const override; - bool is_writable() const override; + bool wait_readable() const override; + bool wait_writable() const override; ssize_t read(char *ptr, size_t size) override; ssize_t write(const char *ptr, size_t size) override; void get_remote_ip_and_port(std::string &ip, int &port) const override; void get_local_ip_and_port(std::string &ip, int &port) const override; socket_t socket() const override; + time_t duration() const override; private: socket_t sock_; @@ -3346,6 +3405,8 @@ private: time_t read_timeout_usec_; time_t write_timeout_sec_; time_t write_timeout_usec_; + time_t max_timeout_msec_; + const std::chrono::time_point start_time_; }; #endif @@ -3416,13 +3477,15 @@ process_server_socket(const std::atomic &svr_sock, socket_t sock, }); } -inline bool process_client_socket(socket_t sock, time_t read_timeout_sec, - time_t read_timeout_usec, - time_t write_timeout_sec, - time_t write_timeout_usec, - std::function callback) { +inline bool process_client_socket( + socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec, + time_t write_timeout_sec, time_t write_timeout_usec, + time_t max_timeout_msec, + std::chrono::time_point start_time, + std::function callback) { SocketStream strm(sock, read_timeout_sec, read_timeout_usec, - write_timeout_sec, write_timeout_usec); + write_timeout_sec, write_timeout_usec, max_timeout_msec, + start_time); return callback(strm); } @@ -3478,7 +3541,6 @@ socket_t create_socket(const std::string &host, const std::string &ip, int port, hints.ai_flags = socket_flags; } -#ifndef _WIN32 if (hints.ai_family == AF_UNIX) { const auto addrlen = host.length(); if (addrlen > sizeof(sockaddr_un::sun_path)) { return INVALID_SOCKET; } @@ -3502,11 +3564,19 @@ socket_t create_socket(const std::string &host, const std::string &ip, int port, sizeof(addr) - sizeof(addr.sun_path) + addrlen); #ifndef SOCK_CLOEXEC +#ifndef _WIN32 fcntl(sock, F_SETFD, FD_CLOEXEC); +#endif #endif if (socket_options) { socket_options(sock); } +#ifdef _WIN32 + // Setting SO_REUSEADDR seems not to work well with AF_UNIX on windows, so + // remove the option. + detail::set_socket_opt(sock, SOL_SOCKET, SO_REUSEADDR, 0); +#endif + bool dummy; if (!bind_or_connect(sock, hints, dummy)) { close_socket(sock); @@ -3515,7 +3585,6 @@ socket_t create_socket(const std::string &host, const std::string &ip, int port, } return sock; } -#endif auto service = std::to_string(port); @@ -3569,26 +3638,10 @@ socket_t create_socket(const std::string &host, const std::string &ip, int port, } #endif - if (tcp_nodelay) { - auto opt = 1; -#ifdef _WIN32 - setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - reinterpret_cast(&opt), sizeof(opt)); -#else - setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - reinterpret_cast(&opt), sizeof(opt)); -#endif - } + if (tcp_nodelay) { set_socket_opt(sock, IPPROTO_TCP, TCP_NODELAY, 1); } if (rp->ai_family == AF_INET6) { - auto opt = ipv6_v6only ? 1 : 0; -#ifdef _WIN32 - setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, - reinterpret_cast(&opt), sizeof(opt)); -#else - setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, - reinterpret_cast(&opt), sizeof(opt)); -#endif + set_socket_opt(sock, IPPROTO_IPV6, IPV6_V6ONLY, ipv6_v6only ? 1 : 0); } if (socket_options) { socket_options(sock); } @@ -3731,36 +3784,10 @@ inline socket_t create_client_socket( } set_nonblocking(sock2, false); - - { -#ifdef _WIN32 - auto timeout = static_cast(read_timeout_sec * 1000 + - read_timeout_usec / 1000); - setsockopt(sock2, SOL_SOCKET, SO_RCVTIMEO, - reinterpret_cast(&timeout), sizeof(timeout)); -#else - timeval tv; - tv.tv_sec = static_cast(read_timeout_sec); - tv.tv_usec = static_cast(read_timeout_usec); - setsockopt(sock2, SOL_SOCKET, SO_RCVTIMEO, - reinterpret_cast(&tv), sizeof(tv)); -#endif - } - { - -#ifdef _WIN32 - auto timeout = static_cast(write_timeout_sec * 1000 + - write_timeout_usec / 1000); - setsockopt(sock2, SOL_SOCKET, SO_SNDTIMEO, - reinterpret_cast(&timeout), sizeof(timeout)); -#else - timeval tv; - tv.tv_sec = static_cast(write_timeout_sec); - tv.tv_usec = static_cast(write_timeout_usec); - setsockopt(sock2, SOL_SOCKET, SO_SNDTIMEO, - reinterpret_cast(&tv), sizeof(tv)); -#endif - } + set_socket_opt_time(sock2, SOL_SOCKET, SO_RCVTIMEO, read_timeout_sec, + read_timeout_usec); + set_socket_opt_time(sock2, SOL_SOCKET, SO_SNDTIMEO, write_timeout_sec, + write_timeout_usec); error = Error::Success; return true; @@ -3963,6 +3990,12 @@ inline EncodingType encoding_type(const Request &req, const Response &res) { if (ret) { return EncodingType::Gzip; } #endif +#ifdef CPPHTTPLIB_ZSTD_SUPPORT + // TODO: 'Accept-Encoding' has zstd, not zstd;q=0 + ret = s.find("zstd") != std::string::npos; + if (ret) { return EncodingType::Zstd; } +#endif + return EncodingType::None; } @@ -4171,6 +4204,61 @@ inline bool brotli_decompressor::decompress(const char *data, } #endif +#ifdef CPPHTTPLIB_ZSTD_SUPPORT +inline zstd_compressor::zstd_compressor() { + ctx_ = ZSTD_createCCtx(); + ZSTD_CCtx_setParameter(ctx_, ZSTD_c_compressionLevel, ZSTD_fast); +} + +inline zstd_compressor::~zstd_compressor() { ZSTD_freeCCtx(ctx_); } + +inline bool zstd_compressor::compress(const char *data, size_t data_length, + bool last, Callback callback) { + std::array buff{}; + + ZSTD_EndDirective mode = last ? ZSTD_e_end : ZSTD_e_continue; + ZSTD_inBuffer input = {data, data_length, 0}; + + bool finished; + do { + ZSTD_outBuffer output = {buff.data(), CPPHTTPLIB_COMPRESSION_BUFSIZ, 0}; + size_t const remaining = ZSTD_compressStream2(ctx_, &output, &input, mode); + + if (ZSTD_isError(remaining)) { return false; } + + if (!callback(buff.data(), output.pos)) { return false; } + + finished = last ? (remaining == 0) : (input.pos == input.size); + + } while (!finished); + + return true; +} + +inline zstd_decompressor::zstd_decompressor() { ctx_ = ZSTD_createDCtx(); } + +inline zstd_decompressor::~zstd_decompressor() { ZSTD_freeDCtx(ctx_); } + +inline bool zstd_decompressor::is_valid() const { return ctx_ != nullptr; } + +inline bool zstd_decompressor::decompress(const char *data, size_t data_length, + Callback callback) { + std::array buff{}; + ZSTD_inBuffer input = {data, data_length, 0}; + + while (input.pos < input.size) { + ZSTD_outBuffer output = {buff.data(), CPPHTTPLIB_COMPRESSION_BUFSIZ, 0}; + size_t const remaining = ZSTD_decompressStream(ctx_, &output, &input); + + if (ZSTD_isError(remaining)) { return false; } + + if (!callback(buff.data(), output.pos)) { return false; } + } + + return true; +} +#endif + inline bool has_header(const Headers &headers, const std::string &key) { return headers.find(key) != headers.end(); } @@ -4197,6 +4285,9 @@ inline bool parse_header(const char *beg, const char *end, T fn) { p++; } + auto name = std::string(beg, p); + if (!detail::fields::is_field_name(name)) { return false; } + if (p == end) { return false; } auto key_end = p; @@ -4212,22 +4303,17 @@ inline bool parse_header(const char *beg, const char *end, T fn) { if (!key_len) { return false; } auto key = std::string(beg, key_end); - auto val = case_ignore::equal(key, "Location") - ? std::string(p, end) - : decode_url(std::string(p, end), false); + auto val = std::string(p, end); - // NOTE: From RFC 9110: - // Field values containing CR, LF, or NUL characters are - // invalid and dangerous, due to the varying ways that - // implementations might parse and interpret those - // characters; a recipient of CR, LF, or NUL within a field - // value MUST either reject the message or replace each of - // those characters with SP before further processing or - // forwarding of that message. - static const std::string CR_LF_NUL("\r\n\0", 3); - if (val.find_first_of(CR_LF_NUL) != std::string::npos) { return false; } + if (!detail::fields::is_field_value(val)) { return false; } + + if (case_ignore::equal(key, "Location") || + case_ignore::equal(key, "Referer")) { + fn(key, val); + } else { + fn(key, decode_url(val, false)); + } - fn(key, val); return true; } @@ -4263,7 +4349,7 @@ inline bool read_headers(Stream &strm, Headers &headers) { auto end = line_reader.ptr() + line_reader.size() - line_terminator_len; if (!parse_header(line_reader.ptr(), end, - [&](const std::string &key, std::string &val) { + [&](const std::string &key, const std::string &val) { headers.emplace(key, val); })) { return false; @@ -4312,7 +4398,8 @@ inline bool read_content_without_length(Stream &strm, uint64_t r = 0; for (;;) { auto n = strm.read(buf, CPPHTTPLIB_RECV_BUFSIZ); - if (n <= 0) { return true; } + if (n == 0) { return true; } + if (n < 0) { return false; } if (!out(buf, static_cast(n), r, 0)) { return false; } r += static_cast(n); @@ -4355,7 +4442,7 @@ inline bool read_content_chunked(Stream &strm, T &x, assert(chunk_len == 0); - // NOTE: In RFC 9112, '7.1 Chunked Transfer Coding' mentiones "The chunked + // NOTE: In RFC 9112, '7.1 Chunked Transfer Coding' mentions "The chunked // transfer coding is complete when a chunk with a chunk-size of zero is // received, possibly followed by a trailer section, and finally terminated by // an empty line". https://www.rfc-editor.org/rfc/rfc9112.html#section-7.1 @@ -4365,8 +4452,8 @@ inline bool read_content_chunked(Stream &strm, T &x, // to be ok whether the final CRLF exists or not in the chunked data. // https://www.rfc-editor.org/rfc/rfc9112.html#section-7.1.3 // - // According to the reference code in RFC 9112, cpp-htpplib now allows - // chuncked transfer coding data without the final CRLF. + // According to the reference code in RFC 9112, cpp-httplib now allows + // chunked transfer coding data without the final CRLF. if (!line_reader.getline()) { return true; } while (strcmp(line_reader.ptr(), "\r\n") != 0) { @@ -4413,6 +4500,13 @@ bool prepare_content_receiver(T &x, int &status, #else status = StatusCode::UnsupportedMediaType_415; return false; +#endif + } else if (encoding == "zstd") { +#ifdef CPPHTTPLIB_ZSTD_SUPPORT + decompressor = detail::make_unique(); +#else + status = StatusCode::UnsupportedMediaType_415; + return false; #endif } @@ -4456,9 +4550,9 @@ bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, ret = read_content_without_length(strm, out); } else { auto is_invalid_value = false; - auto len = get_header_value_u64(x.headers, "Content-Length", - std::numeric_limits::max(), - 0, is_invalid_value); + auto len = get_header_value_u64( + x.headers, "Content-Length", + (std::numeric_limits::max)(), 0, is_invalid_value); if (is_invalid_value) { ret = false; @@ -4536,7 +4630,7 @@ inline bool write_content(Stream &strm, const ContentProvider &content_provider, data_sink.write = [&](const char *d, size_t l) -> bool { if (ok) { - if (strm.is_writable() && write_data(strm, d, l)) { + if (write_data(strm, d, l)) { offset += l; } else { ok = false; @@ -4545,10 +4639,10 @@ inline bool write_content(Stream &strm, const ContentProvider &content_provider, return ok; }; - data_sink.is_writable = [&]() -> bool { return strm.is_writable(); }; + data_sink.is_writable = [&]() -> bool { return strm.wait_writable(); }; while (offset < end_offset && !is_shutting_down()) { - if (!strm.is_writable()) { + if (!strm.wait_writable()) { error = Error::Write; return false; } else if (!content_provider(offset, end_offset - offset, data_sink)) { @@ -4586,17 +4680,17 @@ write_content_without_length(Stream &strm, data_sink.write = [&](const char *d, size_t l) -> bool { if (ok) { offset += l; - if (!strm.is_writable() || !write_data(strm, d, l)) { ok = false; } + if (!write_data(strm, d, l)) { ok = false; } } return ok; }; - data_sink.is_writable = [&]() -> bool { return strm.is_writable(); }; + data_sink.is_writable = [&]() -> bool { return strm.wait_writable(); }; data_sink.done = [&](void) { data_available = false; }; while (data_available && !is_shutting_down()) { - if (!strm.is_writable()) { + if (!strm.wait_writable()) { return false; } else if (!content_provider(offset, 0, data_sink)) { return false; @@ -4631,10 +4725,7 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider, // Emit chunked response header and footer for each chunk auto chunk = from_i_to_hex(payload.size()) + "\r\n" + payload + "\r\n"; - if (!strm.is_writable() || - !write_data(strm, chunk.data(), chunk.size())) { - ok = false; - } + if (!write_data(strm, chunk.data(), chunk.size())) { ok = false; } } } else { ok = false; @@ -4643,7 +4734,7 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider, return ok; }; - data_sink.is_writable = [&]() -> bool { return strm.is_writable(); }; + data_sink.is_writable = [&]() -> bool { return strm.wait_writable(); }; auto done_with_trailer = [&](const Headers *trailer) { if (!ok) { return; } @@ -4663,17 +4754,14 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider, if (!payload.empty()) { // Emit chunked response header and footer for each chunk auto chunk = from_i_to_hex(payload.size()) + "\r\n" + payload + "\r\n"; - if (!strm.is_writable() || - !write_data(strm, chunk.data(), chunk.size())) { + if (!write_data(strm, chunk.data(), chunk.size())) { ok = false; return; } } - static const std::string done_marker("0\r\n"); - if (!write_data(strm, done_marker.data(), done_marker.size())) { - ok = false; - } + constexpr const char done_marker[] = "0\r\n"; + if (!write_data(strm, done_marker, str_len(done_marker))) { ok = false; } // Trailer if (trailer) { @@ -4685,8 +4773,8 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider, } } - static const std::string crlf("\r\n"); - if (!write_data(strm, crlf.data(), crlf.size())) { ok = false; } + constexpr const char crlf[] = "\r\n"; + if (!write_data(strm, crlf, str_len(crlf))) { ok = false; } }; data_sink.done = [&](void) { done_with_trailer(nullptr); }; @@ -4696,7 +4784,7 @@ write_content_chunked(Stream &strm, const ContentProvider &content_provider, }; while (data_available && !is_shutting_down()) { - if (!strm.is_writable()) { + if (!strm.wait_writable()) { error = Error::Write; return false; } else if (!content_provider(offset, 0, data_sink)) { @@ -4928,13 +5016,13 @@ public: return false; } - static const std::string header_content_type = "Content-Type:"; + constexpr const char header_content_type[] = "Content-Type:"; if (start_with_case_ignore(header, header_content_type)) { file_.content_type = - trim_copy(header.substr(header_content_type.size())); + trim_copy(header.substr(str_len(header_content_type))); } else { - static const std::regex re_content_disposition( + thread_local const std::regex re_content_disposition( R"~(^Content-Disposition:\s*form-data;\s*(.*)$)~", std::regex_constants::icase); @@ -4956,8 +5044,8 @@ public: it = params.find("filename*"); if (it != params.end()) { - // Only allow UTF-8 enconnding... - static const std::regex re_rfc5987_encoding( + // Only allow UTF-8 encoding... + thread_local const std::regex re_rfc5987_encoding( R"~(^UTF-8''(.+?)$)~", std::regex_constants::icase); std::smatch m2; @@ -5029,10 +5117,10 @@ private: file_.content_type.clear(); } - bool start_with_case_ignore(const std::string &a, - const std::string &b) const { - if (a.size() < b.size()) { return false; } - for (size_t i = 0; i < b.size(); i++) { + bool start_with_case_ignore(const std::string &a, const char *b) const { + const auto b_len = strlen(b); + if (a.size() < b_len) { return false; } + for (size_t i = 0; i < b_len; i++) { if (case_ignore::to_lower(a[i]) != case_ignore::to_lower(b[i])) { return false; } @@ -5119,19 +5207,18 @@ private: }; inline std::string random_string(size_t length) { - static const char data[] = + constexpr const char data[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - // std::random_device might actually be deterministic on some - // platforms, but due to lack of support in the c++ standard library, - // doing better requires either some ugly hacks or breaking portability. - static std::random_device seed_gen; - - // Request 128 bits of entropy for initialization - static std::seed_seq seed_sequence{seed_gen(), seed_gen(), seed_gen(), - seed_gen()}; - - static std::mt19937 engine(seed_sequence); + thread_local auto engine([]() { + // std::random_device might actually be deterministic on some + // platforms, but due to lack of support in the c++ standard library, + // doing better requires either some ugly hacks or breaking portability. + std::random_device seed_gen; + // Request 128 bits of entropy for initialization + std::seed_seq seed_sequence{seed_gen(), seed_gen(), seed_gen(), seed_gen()}; + return std::mt19937(seed_sequence); + }()); std::string result; for (size_t i = 0; i < length; i++) { @@ -5203,7 +5290,7 @@ serialize_multipart_formdata(const MultipartFormDataItems &items, inline bool range_error(Request &req, Response &res) { if (!req.ranges.empty() && 200 <= res.status && res.status < 300) { - ssize_t contant_len = static_cast( + ssize_t content_len = static_cast( res.content_length_ ? res.content_length_ : res.body.size()); ssize_t prev_first_pos = -1; @@ -5223,12 +5310,12 @@ inline bool range_error(Request &req, Response &res) { if (first_pos == -1 && last_pos == -1) { first_pos = 0; - last_pos = contant_len; + last_pos = content_len; } if (first_pos == -1) { - first_pos = contant_len - last_pos; - last_pos = contant_len - 1; + first_pos = content_len - last_pos; + last_pos = content_len - 1; } // NOTE: RFC-9110 '14.1.2. Byte Ranges': @@ -5240,13 +5327,13 @@ inline bool range_error(Request &req, Response &res) { // with a value that is one less than the current length of the selected // representation). // https://www.rfc-editor.org/rfc/rfc9110.html#section-14.1.2-6 - if (last_pos == -1 || last_pos >= contant_len) { - last_pos = contant_len - 1; + if (last_pos == -1 || last_pos >= content_len) { + last_pos = content_len - 1; } // Range must be within content length if (!(0 <= first_pos && first_pos <= last_pos && - last_pos <= contant_len - 1)) { + last_pos <= content_len - 1)) { return true; } @@ -5380,10 +5467,14 @@ write_multipart_ranges_data(Stream &strm, const Request &req, Response &res, inline bool expect_content(const Request &req) { if (req.method == "POST" || req.method == "PUT" || req.method == "PATCH" || - req.method == "PRI" || req.method == "DELETE") { + req.method == "DELETE") { return true; } - // TODO: check if Content-Length is set + if (req.has_header("Content-Length") && + req.get_header_value_u64("Content-Length") > 0) { + return true; + } + if (is_chunked_transfer_encoding(req.headers)) { return true; } return false; } @@ -5428,9 +5519,76 @@ inline std::string SHA_256(const std::string &s) { inline std::string SHA_512(const std::string &s) { return message_digest(s, EVP_sha512()); } -#endif -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline std::pair make_digest_authentication_header( + const Request &req, const std::map &auth, + size_t cnonce_count, const std::string &cnonce, const std::string &username, + const std::string &password, bool is_proxy = false) { + std::string nc; + { + std::stringstream ss; + ss << std::setfill('0') << std::setw(8) << std::hex << cnonce_count; + nc = ss.str(); + } + + std::string qop; + if (auth.find("qop") != auth.end()) { + qop = auth.at("qop"); + if (qop.find("auth-int") != std::string::npos) { + qop = "auth-int"; + } else if (qop.find("auth") != std::string::npos) { + qop = "auth"; + } else { + qop.clear(); + } + } + + std::string algo = "MD5"; + if (auth.find("algorithm") != auth.end()) { algo = auth.at("algorithm"); } + + std::string response; + { + auto H = algo == "SHA-256" ? detail::SHA_256 + : algo == "SHA-512" ? detail::SHA_512 + : detail::MD5; + + auto A1 = username + ":" + auth.at("realm") + ":" + password; + + auto A2 = req.method + ":" + req.path; + if (qop == "auth-int") { A2 += ":" + H(req.body); } + + if (qop.empty()) { + response = H(H(A1) + ":" + auth.at("nonce") + ":" + H(A2)); + } else { + response = H(H(A1) + ":" + auth.at("nonce") + ":" + nc + ":" + cnonce + + ":" + qop + ":" + H(A2)); + } + } + + auto opaque = (auth.find("opaque") != auth.end()) ? auth.at("opaque") : ""; + + auto field = "Digest username=\"" + username + "\", realm=\"" + + auth.at("realm") + "\", nonce=\"" + auth.at("nonce") + + "\", uri=\"" + req.path + "\", algorithm=" + algo + + (qop.empty() ? ", response=\"" + : ", qop=" + qop + ", nc=" + nc + ", cnonce=\"" + + cnonce + "\", response=\"") + + response + "\"" + + (opaque.empty() ? "" : ", opaque=\"" + opaque + "\""); + + auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; + return std::make_pair(key, field); +} + +inline bool is_ssl_peer_could_be_closed(SSL *ssl, socket_t sock) { + detail::set_nonblocking(sock, true); + auto se = detail::scope_exit([&]() { detail::set_nonblocking(sock, false); }); + + char buf[1]; + return !SSL_peek(ssl, buf, 1) && + SSL_get_error(ssl, 0) == SSL_ERROR_ZERO_RETURN; +} + #ifdef _WIN32 // NOTE: This code came up with the following stackoverflow post: // https://stackoverflow.com/questions/9507184/can-openssl-on-windows-use-the-system-certificate-store @@ -5569,74 +5727,13 @@ public: static WSInit wsinit_; #endif -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -inline std::pair make_digest_authentication_header( - const Request &req, const std::map &auth, - size_t cnonce_count, const std::string &cnonce, const std::string &username, - const std::string &password, bool is_proxy = false) { - std::string nc; - { - std::stringstream ss; - ss << std::setfill('0') << std::setw(8) << std::hex << cnonce_count; - nc = ss.str(); - } - - std::string qop; - if (auth.find("qop") != auth.end()) { - qop = auth.at("qop"); - if (qop.find("auth-int") != std::string::npos) { - qop = "auth-int"; - } else if (qop.find("auth") != std::string::npos) { - qop = "auth"; - } else { - qop.clear(); - } - } - - std::string algo = "MD5"; - if (auth.find("algorithm") != auth.end()) { algo = auth.at("algorithm"); } - - std::string response; - { - auto H = algo == "SHA-256" ? detail::SHA_256 - : algo == "SHA-512" ? detail::SHA_512 - : detail::MD5; - - auto A1 = username + ":" + auth.at("realm") + ":" + password; - - auto A2 = req.method + ":" + req.path; - if (qop == "auth-int") { A2 += ":" + H(req.body); } - - if (qop.empty()) { - response = H(H(A1) + ":" + auth.at("nonce") + ":" + H(A2)); - } else { - response = H(H(A1) + ":" + auth.at("nonce") + ":" + nc + ":" + cnonce + - ":" + qop + ":" + H(A2)); - } - } - - auto opaque = (auth.find("opaque") != auth.end()) ? auth.at("opaque") : ""; - - auto field = "Digest username=\"" + username + "\", realm=\"" + - auth.at("realm") + "\", nonce=\"" + auth.at("nonce") + - "\", uri=\"" + req.path + "\", algorithm=" + algo + - (qop.empty() ? ", response=\"" - : ", qop=" + qop + ", nc=" + nc + ", cnonce=\"" + - cnonce + "\", response=\"") + - response + "\"" + - (opaque.empty() ? "" : ", opaque=\"" + opaque + "\""); - - auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; - return std::make_pair(key, field); -} -#endif - inline bool parse_www_authenticate(const Response &res, std::map &auth, bool is_proxy) { auto auth_key = is_proxy ? "Proxy-Authenticate" : "WWW-Authenticate"; if (res.has_header(auth_key)) { - static auto re = std::regex(R"~((?:(?:,\s*)?(.+?)=(?:"(.*?)"|([^,]*))))~"); + thread_local auto re = + std::regex(R"~((?:(?:,\s*)?(.+?)=(?:"(.*?)"|([^,]*))))~"); auto s = res.get_header_value(auth_key); auto pos = s.find(' '); if (pos != std::string::npos) { @@ -5720,7 +5817,7 @@ inline void hosted_at(const std::string &hostname, inline std::string append_query_params(const std::string &path, const Params ¶ms) { std::string path_with_query = path; - const static std::regex re("[^?]+\\?.*"); + thread_local const std::regex re("[^?]+\\?.*"); auto delm = std::regex_match(path, re) ? '&' : '?'; path_with_query += delm + detail::params_to_query_str(params); return path_with_query; @@ -5949,23 +6046,52 @@ inline ssize_t Stream::write(const std::string &s) { namespace detail { +inline void calc_actual_timeout(time_t max_timeout_msec, time_t duration_msec, + time_t timeout_sec, time_t timeout_usec, + time_t &actual_timeout_sec, + time_t &actual_timeout_usec) { + auto timeout_msec = (timeout_sec * 1000) + (timeout_usec / 1000); + + auto actual_timeout_msec = + (std::min)(max_timeout_msec - duration_msec, timeout_msec); + + actual_timeout_sec = actual_timeout_msec / 1000; + actual_timeout_usec = (actual_timeout_msec % 1000) * 1000; +} + // Socket stream implementation -inline SocketStream::SocketStream(socket_t sock, time_t read_timeout_sec, - time_t read_timeout_usec, - time_t write_timeout_sec, - time_t write_timeout_usec) +inline SocketStream::SocketStream( + socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec, + time_t write_timeout_sec, time_t write_timeout_usec, + time_t max_timeout_msec, + std::chrono::time_point start_time) : sock_(sock), read_timeout_sec_(read_timeout_sec), read_timeout_usec_(read_timeout_usec), write_timeout_sec_(write_timeout_sec), - write_timeout_usec_(write_timeout_usec), read_buff_(read_buff_size_, 0) {} + write_timeout_usec_(write_timeout_usec), + max_timeout_msec_(max_timeout_msec), start_time_(start_time), + read_buff_(read_buff_size_, 0) {} inline SocketStream::~SocketStream() = default; inline bool SocketStream::is_readable() const { - return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; + return read_buff_off_ < read_buff_content_size_; } -inline bool SocketStream::is_writable() const { +inline bool SocketStream::wait_readable() const { + if (max_timeout_msec_ <= 0) { + return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; + } + + time_t read_timeout_sec; + time_t read_timeout_usec; + calc_actual_timeout(max_timeout_msec_, duration(), read_timeout_sec_, + read_timeout_usec_, read_timeout_sec, read_timeout_usec); + + return select_read(sock_, read_timeout_sec, read_timeout_usec) > 0; +} + +inline bool SocketStream::wait_writable() const { return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 && is_socket_alive(sock_); } @@ -5992,7 +6118,7 @@ inline ssize_t SocketStream::read(char *ptr, size_t size) { } } - if (!is_readable()) { return -1; } + if (!wait_readable()) { return -1; } read_buff_off_ = 0; read_buff_content_size_ = 0; @@ -6017,7 +6143,7 @@ inline ssize_t SocketStream::read(char *ptr, size_t size) { } inline ssize_t SocketStream::write(const char *ptr, size_t size) { - if (!is_writable()) { return -1; } + if (!wait_writable()) { return -1; } #if defined(_WIN32) && !defined(_WIN64) size = @@ -6039,10 +6165,18 @@ inline void SocketStream::get_local_ip_and_port(std::string &ip, inline socket_t SocketStream::socket() const { return sock_; } +inline time_t SocketStream::duration() const { + return std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time_) + .count(); +} + // Buffer stream implementation inline bool BufferStream::is_readable() const { return true; } -inline bool BufferStream::is_writable() const { return true; } +inline bool BufferStream::wait_readable() const { return true; } + +inline bool BufferStream::wait_writable() const { return true; } inline ssize_t BufferStream::read(char *ptr, size_t size) { #if defined(_MSC_VER) && _MSC_VER < 1910 @@ -6067,10 +6201,12 @@ inline void BufferStream::get_local_ip_and_port(std::string & /*ip*/, inline socket_t BufferStream::socket() const { return 0; } +inline time_t BufferStream::duration() const { return 0; } + inline const std::string &BufferStream::get_buffer() const { return buffer; } inline PathParamsMatcher::PathParamsMatcher(const std::string &pattern) { - static constexpr char marker[] = "/:"; + constexpr const char marker[] = "/:"; // One past the last ending position of a path param substring std::size_t last_param_end = 0; @@ -6091,7 +6227,7 @@ inline PathParamsMatcher::PathParamsMatcher(const std::string &pattern) { static_fragments_.push_back( pattern.substr(last_param_end, marker_pos - last_param_end + 1)); - const auto param_name_start = marker_pos + 2; + const auto param_name_start = marker_pos + str_len(marker); auto sep_pos = pattern.find(separator, param_name_start); if (sep_pos == std::string::npos) { sep_pos = pattern.length(); } @@ -6398,12 +6534,12 @@ inline Server &Server::set_payload_max_length(size_t length) { inline bool Server::bind_to_port(const std::string &host, int port, int socket_flags) { auto ret = bind_internal(host, port, socket_flags); - if (ret == -1) { is_decommisioned = true; } + if (ret == -1) { is_decommissioned = true; } return ret >= 0; } inline int Server::bind_to_any_port(const std::string &host, int socket_flags) { auto ret = bind_internal(host, 0, socket_flags); - if (ret == -1) { is_decommisioned = true; } + if (ret == -1) { is_decommissioned = true; } return ret; } @@ -6417,7 +6553,7 @@ inline bool Server::listen(const std::string &host, int port, inline bool Server::is_running() const { return is_running_; } inline void Server::wait_until_ready() const { - while (!is_running_ && !is_decommisioned) { + while (!is_running_ && !is_decommissioned) { std::this_thread::sleep_for(std::chrono::milliseconds{1}); } } @@ -6429,10 +6565,10 @@ inline void Server::stop() { detail::shutdown_socket(sock); detail::close_socket(sock); } - is_decommisioned = false; + is_decommissioned = false; } -inline void Server::decommission() { is_decommisioned = true; } +inline void Server::decommission() { is_decommissioned = true; } inline bool Server::parse_request_line(const char *s, Request &req) const { auto len = strlen(s); @@ -6455,7 +6591,7 @@ inline bool Server::parse_request_line(const char *s, Request &req) const { if (count != 3) { return false; } } - static const std::set methods{ + thread_local const std::set methods{ "GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS", "TRACE", "PATCH", "PRI"}; @@ -6609,6 +6745,10 @@ Server::write_content_with_provider(Stream &strm, const Request &req, } else if (type == detail::EncodingType::Brotli) { #ifdef CPPHTTPLIB_BROTLI_SUPPORT compressor = detail::make_unique(); +#endif + } else if (type == detail::EncodingType::Zstd) { +#ifdef CPPHTTPLIB_ZSTD_SUPPORT + compressor = detail::make_unique(); #endif } else { compressor = detail::make_unique(); @@ -6791,7 +6931,7 @@ Server::create_server_socket(const std::string &host, int port, inline int Server::bind_internal(const std::string &host, int port, int socket_flags) { - if (is_decommisioned) { return -1; } + if (is_decommissioned) { return -1; } if (!is_valid()) { return -1; } @@ -6818,7 +6958,7 @@ inline int Server::bind_internal(const std::string &host, int port, } inline bool Server::listen_internal() { - if (is_decommisioned) { return false; } + if (is_decommissioned) { return false; } auto ret = true; is_running_ = true; @@ -6842,7 +6982,7 @@ inline bool Server::listen_internal() { #endif #if defined _WIN32 - // sockets conneced via WASAccept inherit flags NO_HANDLE_INHERIT, + // sockets connected via WASAccept inherit flags NO_HANDLE_INHERIT, // OVERLAPPED socket_t sock = WSAAccept(svr_sock_, nullptr, nullptr, nullptr, 0); #elif defined SOCK_CLOEXEC @@ -6869,35 +7009,10 @@ inline bool Server::listen_internal() { break; } - { -#ifdef _WIN32 - auto timeout = static_cast(read_timeout_sec_ * 1000 + - read_timeout_usec_ / 1000); - setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, - reinterpret_cast(&timeout), sizeof(timeout)); -#else - timeval tv; - tv.tv_sec = static_cast(read_timeout_sec_); - tv.tv_usec = static_cast(read_timeout_usec_); - setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, - reinterpret_cast(&tv), sizeof(tv)); -#endif - } - { - -#ifdef _WIN32 - auto timeout = static_cast(write_timeout_sec_ * 1000 + - write_timeout_usec_ / 1000); - setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, - reinterpret_cast(&timeout), sizeof(timeout)); -#else - timeval tv; - tv.tv_sec = static_cast(write_timeout_sec_); - tv.tv_usec = static_cast(write_timeout_usec_); - setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, - reinterpret_cast(&tv), sizeof(tv)); -#endif - } + detail::set_socket_opt_time(sock, SOL_SOCKET, SO_RCVTIMEO, + read_timeout_sec_, read_timeout_usec_); + detail::set_socket_opt_time(sock, SOL_SOCKET, SO_SNDTIMEO, + write_timeout_sec_, write_timeout_usec_); if (!task_queue->enqueue( [this, sock]() { process_and_close_socket(sock); })) { @@ -6909,7 +7024,7 @@ inline bool Server::listen_internal() { task_queue->shutdown(); } - is_decommisioned = !ret; + is_decommissioned = !ret; return ret; } @@ -7049,6 +7164,8 @@ inline void Server::apply_ranges(const Request &req, Response &res, res.set_header("Content-Encoding", "gzip"); } else if (type == detail::EncodingType::Brotli) { res.set_header("Content-Encoding", "br"); + } else if (type == detail::EncodingType::Zstd) { + res.set_header("Content-Encoding", "zstd"); } } } @@ -7088,6 +7205,11 @@ inline void Server::apply_ranges(const Request &req, Response &res, #ifdef CPPHTTPLIB_BROTLI_SUPPORT compressor = detail::make_unique(); content_encoding = "br"; +#endif + } else if (type == detail::EncodingType::Zstd) { +#ifdef CPPHTTPLIB_ZSTD_SUPPORT + compressor = detail::make_unique(); + content_encoding = "zstd"; #endif } @@ -7143,28 +7265,6 @@ Server::process_request(Stream &strm, const std::string &remote_addr, res.version = "HTTP/1.1"; res.headers = default_headers_; -#ifdef _WIN32 - // TODO: Increase FD_SETSIZE statically (libzmq), dynamically (MySQL). -#else -#ifndef CPPHTTPLIB_USE_POLL - // Socket file descriptor exceeded FD_SETSIZE... - if (strm.socket() >= FD_SETSIZE) { - Headers dummy; - detail::read_headers(strm, dummy); - res.status = StatusCode::InternalServerError_500; - return write_response(strm, close_connection, req, res); - } -#endif -#endif - - // Check if the request URI doesn't exceed the limit - if (line_reader.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) { - Headers dummy; - detail::read_headers(strm, dummy); - res.status = StatusCode::UriTooLong_414; - return write_response(strm, close_connection, req, res); - } - // Request line and headers if (!parse_request_line(line_reader.ptr(), req) || !detail::read_headers(strm, req.headers)) { @@ -7172,6 +7272,14 @@ Server::process_request(Stream &strm, const std::string &remote_addr, return write_response(strm, close_connection, req, res); } + // Check if the request URI doesn't exceed the limit + if (req.target.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) { + Headers dummy; + detail::read_headers(strm, dummy); + res.status = StatusCode::UriTooLong_414; + return write_response(strm, close_connection, req, res); + } + if (req.get_header_value("Connection") == "close") { connection_closed = true; } @@ -7348,6 +7456,16 @@ inline ClientImpl::ClientImpl(const std::string &host, int port, client_cert_path_(client_cert_path), client_key_path_(client_key_path) {} inline ClientImpl::~ClientImpl() { + // Wait until all the requests in flight are handled. + size_t retry_count = 10; + while (retry_count-- > 0) { + { + std::lock_guard guard(socket_mutex_); + if (socket_requests_in_flight_ == 0) { break; } + } + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + } + std::lock_guard guard(socket_mutex_); shutdown_socket(socket_); close_socket(socket_); @@ -7363,6 +7481,7 @@ inline void ClientImpl::copy_settings(const ClientImpl &rhs) { read_timeout_usec_ = rhs.read_timeout_usec_; write_timeout_sec_ = rhs.write_timeout_sec_; write_timeout_usec_ = rhs.write_timeout_usec_; + max_timeout_msec_ = rhs.max_timeout_msec_; basic_auth_username_ = rhs.basic_auth_username_; basic_auth_password_ = rhs.basic_auth_password_; bearer_token_auth_token_ = rhs.bearer_token_auth_token_; @@ -7472,9 +7591,9 @@ inline bool ClientImpl::read_response_line(Stream &strm, const Request &req, if (!line_reader.getline()) { return false; } #ifdef CPPHTTPLIB_ALLOW_LF_AS_LINE_TERMINATOR - const static std::regex re("(HTTP/1\\.[01]) (\\d{3})(?: (.*?))?\r?\n"); + thread_local const std::regex re("(HTTP/1\\.[01]) (\\d{3})(?: (.*?))?\r?\n"); #else - const static std::regex re("(HTTP/1\\.[01]) (\\d{3})(?: (.*?))?\r\n"); + thread_local const std::regex re("(HTTP/1\\.[01]) (\\d{3})(?: (.*?))?\r\n"); #endif std::cmatch m; @@ -7509,18 +7628,6 @@ inline bool ClientImpl::send(Request &req, Response &res, Error &error) { return ret; } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -inline bool ClientImpl::is_ssl_peer_could_be_closed(SSL *ssl) const { - detail::set_nonblocking(socket_.sock, true); - auto se = detail::scope_exit( - [&]() { detail::set_nonblocking(socket_.sock, false); }); - - char buf[1]; - return !SSL_peek(ssl, buf, 1) && - SSL_get_error(ssl, 0) == SSL_ERROR_ZERO_RETURN; -} -#endif - inline bool ClientImpl::send_(Request &req, Response &res, Error &error) { { std::lock_guard guard(socket_mutex_); @@ -7535,12 +7642,14 @@ inline bool ClientImpl::send_(Request &req, Response &res, Error &error) { #ifdef CPPHTTPLIB_OPENSSL_SUPPORT if (is_alive && is_ssl()) { - if (is_ssl_peer_could_be_closed(socket_.ssl)) { is_alive = false; } + if (detail::is_ssl_peer_could_be_closed(socket_.ssl, socket_.sock)) { + is_alive = false; + } } #endif if (!is_alive) { - // Attempt to avoid sigpipe by shutting down nongracefully if it seems + // Attempt to avoid sigpipe by shutting down non-gracefully if it seems // like the other side has already closed the connection Also, there // cannot be any requests in flight from other threads since we locked // request_mutex_, so safe to close everything immediately @@ -7560,7 +7669,8 @@ inline bool ClientImpl::send_(Request &req, Response &res, Error &error) { auto &scli = static_cast(*this); if (!proxy_host_.empty() && proxy_port_ != -1) { auto success = false; - if (!scli.connect_with_proxy(socket_, res, success, error)) { + if (!scli.connect_with_proxy(socket_, req.start_time_, res, success, + error)) { return success; } } @@ -7606,7 +7716,7 @@ inline bool ClientImpl::send_(Request &req, Response &res, Error &error) { } }); - ret = process_socket(socket_, [&](Stream &strm) { + ret = process_socket(socket_, req.start_time_, [&](Stream &strm) { return handle_request(strm, req, res, close_connection, error); }); @@ -7715,7 +7825,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) { auto location = res.get_header_value("location"); if (location.empty()) { return false; } - const static std::regex re( + thread_local const std::regex re( R"((?:(https?):)?(?://(?:\[([a-fA-F\d:]+)\]|([^:/?#]+))(?::(\d+))?)?([^?#]*)(\?[^#]*)?(?:#.*)?)"); std::smatch m; @@ -7824,6 +7934,10 @@ inline bool ClientImpl::write_request(Stream &strm, Request &req, #ifdef CPPHTTPLIB_ZLIB_SUPPORT if (!accept_encoding.empty()) { accept_encoding += ", "; } accept_encoding += "gzip, deflate"; +#endif +#ifdef CPPHTTPLIB_ZSTD_SUPPORT + if (!accept_encoding.empty()) { accept_encoding += ", "; } + accept_encoding += "zstd"; #endif req.set_header("Accept-Encoding", accept_encoding); } @@ -8015,6 +8129,9 @@ inline Result ClientImpl::send_with_content_provider( req.headers = headers; req.path = path; req.progress = progress; + if (max_timeout_msec_ > 0) { + req.start_time_ = std::chrono::steady_clock::now(); + } auto error = Error::Success; @@ -8041,7 +8158,7 @@ inline bool ClientImpl::process_request(Stream &strm, Request &req, if (is_ssl()) { auto is_proxy_enabled = !proxy_host_.empty() && proxy_port_ != -1; if (!is_proxy_enabled) { - if (is_ssl_peer_could_be_closed(socket_.ssl)) { + if (detail::is_ssl_peer_could_be_closed(socket_.ssl, socket_.sock)) { error = Error::SSLPeerCouldBeClosed_; return false; } @@ -8166,12 +8283,13 @@ inline ContentProviderWithoutLength ClientImpl::get_multipart_content_provider( }; } -inline bool -ClientImpl::process_socket(const Socket &socket, - std::function callback) { +inline bool ClientImpl::process_socket( + const Socket &socket, + std::chrono::time_point start_time, + std::function callback) { return detail::process_client_socket( socket.sock, read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, - write_timeout_usec_, std::move(callback)); + write_timeout_usec_, max_timeout_msec_, start_time, std::move(callback)); } inline bool ClientImpl::is_ssl() const { return false; } @@ -8195,6 +8313,9 @@ inline Result ClientImpl::Get(const std::string &path, const Headers &headers, req.path = path; req.headers = headers; req.progress = std::move(progress); + if (max_timeout_msec_ > 0) { + req.start_time_ = std::chrono::steady_clock::now(); + } return send_(std::move(req)); } @@ -8260,6 +8381,9 @@ inline Result ClientImpl::Get(const std::string &path, const Headers &headers, return content_receiver(data, data_length); }; req.progress = std::move(progress); + if (max_timeout_msec_ > 0) { + req.start_time_ = std::chrono::steady_clock::now(); + } return send_(std::move(req)); } @@ -8305,6 +8429,9 @@ inline Result ClientImpl::Head(const std::string &path, req.method = "HEAD"; req.headers = headers; req.path = path; + if (max_timeout_msec_ > 0) { + req.start_time_ = std::chrono::steady_clock::now(); + } return send_(std::move(req)); } @@ -8722,6 +8849,9 @@ inline Result ClientImpl::Delete(const std::string &path, req.headers = headers; req.path = path; req.progress = progress; + if (max_timeout_msec_ > 0) { + req.start_time_ = std::chrono::steady_clock::now(); + } if (!content_type.empty()) { req.set_header("Content-Type", content_type); } req.body.assign(body, content_length); @@ -8769,6 +8899,9 @@ inline Result ClientImpl::Options(const std::string &path, req.method = "OPTIONS"; req.headers = headers; req.path = path; + if (max_timeout_msec_ > 0) { + req.start_time_ = std::chrono::steady_clock::now(); + } return send_(std::move(req)); } @@ -8822,6 +8955,10 @@ inline void ClientImpl::set_write_timeout(time_t sec, time_t usec) { write_timeout_usec_ = usec; } +inline void ClientImpl::set_max_timeout(time_t msec) { + max_timeout_msec_ = msec; +} + inline void ClientImpl::set_basic_auth(const std::string &username, const std::string &password) { basic_auth_username_ = username; @@ -8947,7 +9084,7 @@ inline void ClientImpl::enable_server_hostname_verification(bool enabled) { } inline void ClientImpl::set_server_certificate_verifier( - std::function verifier) { + std::function verifier) { server_certificate_verifier_ = verifier; } #endif @@ -9000,22 +9137,13 @@ inline void ssl_delete(std::mutex &ctx_mutex, SSL *ssl, socket_t sock, // Note that it is not always possible to avoid SIGPIPE, this is merely a // best-efforts. if (shutdown_gracefully) { -#ifdef _WIN32 (void)(sock); - SSL_shutdown(ssl); -#else - timeval tv; - tv.tv_sec = 1; - tv.tv_usec = 0; - setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, - reinterpret_cast(&tv), sizeof(tv)); - - auto ret = SSL_shutdown(ssl); - while (ret == 0) { - std::this_thread::sleep_for(std::chrono::milliseconds{100}); - ret = SSL_shutdown(ssl); + // SSL_shutdown() returns 0 on first call (indicating close_notify alert + // sent) and 1 on subsequent call (indicating close_notify alert received) + if (SSL_shutdown(ssl) == 0) { + // Expected to return 1, but even if it doesn't, we free ssl + SSL_shutdown(ssl); } -#endif } std::lock_guard guard(ctx_mutex); @@ -9060,51 +9188,59 @@ inline bool process_server_socket_ssl( } template -inline bool -process_client_socket_ssl(SSL *ssl, socket_t sock, time_t read_timeout_sec, - time_t read_timeout_usec, time_t write_timeout_sec, - time_t write_timeout_usec, T callback) { +inline bool process_client_socket_ssl( + SSL *ssl, socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec, + time_t write_timeout_sec, time_t write_timeout_usec, + time_t max_timeout_msec, + std::chrono::time_point start_time, T callback) { SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec, - write_timeout_sec, write_timeout_usec); + write_timeout_sec, write_timeout_usec, max_timeout_msec, + start_time); return callback(strm); } -class SSLInit { -public: - SSLInit() { - OPENSSL_init_ssl( - OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); - } -}; - // SSL socket stream implementation -inline SSLSocketStream::SSLSocketStream(socket_t sock, SSL *ssl, - time_t read_timeout_sec, - time_t read_timeout_usec, - time_t write_timeout_sec, - time_t write_timeout_usec) +inline SSLSocketStream::SSLSocketStream( + socket_t sock, SSL *ssl, time_t read_timeout_sec, time_t read_timeout_usec, + time_t write_timeout_sec, time_t write_timeout_usec, + time_t max_timeout_msec, + std::chrono::time_point start_time) : sock_(sock), ssl_(ssl), read_timeout_sec_(read_timeout_sec), read_timeout_usec_(read_timeout_usec), write_timeout_sec_(write_timeout_sec), - write_timeout_usec_(write_timeout_usec) { + write_timeout_usec_(write_timeout_usec), + max_timeout_msec_(max_timeout_msec), start_time_(start_time) { SSL_clear_mode(ssl, SSL_MODE_AUTO_RETRY); } inline SSLSocketStream::~SSLSocketStream() = default; inline bool SSLSocketStream::is_readable() const { - return detail::select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; + return SSL_pending(ssl_) > 0; } -inline bool SSLSocketStream::is_writable() const { +inline bool SSLSocketStream::wait_readable() const { + if (max_timeout_msec_ <= 0) { + return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; + } + + time_t read_timeout_sec; + time_t read_timeout_usec; + calc_actual_timeout(max_timeout_msec_, duration(), read_timeout_sec_, + read_timeout_usec_, read_timeout_sec, read_timeout_usec); + + return select_read(sock_, read_timeout_sec, read_timeout_usec) > 0; +} + +inline bool SSLSocketStream::wait_writable() const { return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 && - is_socket_alive(sock_); + is_socket_alive(sock_) && !is_ssl_peer_could_be_closed(ssl_, sock_); } inline ssize_t SSLSocketStream::read(char *ptr, size_t size) { if (SSL_pending(ssl_) > 0) { return SSL_read(ssl_, ptr, static_cast(size)); - } else if (is_readable()) { + } else if (wait_readable()) { auto ret = SSL_read(ssl_, ptr, static_cast(size)); if (ret < 0) { auto err = SSL_get_error(ssl_, ret); @@ -9118,7 +9254,7 @@ inline ssize_t SSLSocketStream::read(char *ptr, size_t size) { #endif if (SSL_pending(ssl_) > 0) { return SSL_read(ssl_, ptr, static_cast(size)); - } else if (is_readable()) { + } else if (wait_readable()) { std::this_thread::sleep_for(std::chrono::microseconds{10}); ret = SSL_read(ssl_, ptr, static_cast(size)); if (ret >= 0) { return ret; } @@ -9129,12 +9265,13 @@ inline ssize_t SSLSocketStream::read(char *ptr, size_t size) { } } return ret; + } else { + return -1; } - return -1; } inline ssize_t SSLSocketStream::write(const char *ptr, size_t size) { - if (is_writable()) { + if (wait_writable()) { auto handle_size = static_cast( std::min(size, (std::numeric_limits::max)())); @@ -9149,7 +9286,7 @@ inline ssize_t SSLSocketStream::write(const char *ptr, size_t size) { #else while (--n >= 0 && err == SSL_ERROR_WANT_WRITE) { #endif - if (is_writable()) { + if (wait_writable()) { std::this_thread::sleep_for(std::chrono::microseconds{10}); ret = SSL_write(ssl_, ptr, static_cast(handle_size)); if (ret >= 0) { return ret; } @@ -9176,7 +9313,11 @@ inline void SSLSocketStream::get_local_ip_and_port(std::string &ip, inline socket_t SSLSocketStream::socket() const { return sock_; } -static SSLInit sslinit_; +inline time_t SSLSocketStream::duration() const { + return std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time_) + .count(); +} } // namespace detail @@ -9416,16 +9557,22 @@ inline bool SSLClient::create_and_connect_socket(Socket &socket, Error &error) { } // Assumes that socket_mutex_ is locked and that there are no requests in flight -inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res, - bool &success, Error &error) { +inline bool SSLClient::connect_with_proxy( + Socket &socket, + std::chrono::time_point start_time, + Response &res, bool &success, Error &error) { success = true; Response proxy_res; if (!detail::process_client_socket( socket.sock, read_timeout_sec_, read_timeout_usec_, - write_timeout_sec_, write_timeout_usec_, [&](Stream &strm) { + write_timeout_sec_, write_timeout_usec_, max_timeout_msec_, + start_time, [&](Stream &strm) { Request req2; req2.method = "CONNECT"; req2.path = host_and_port_; + if (max_timeout_msec_ > 0) { + req2.start_time_ = std::chrono::steady_clock::now(); + } return process_request(strm, req2, proxy_res, false, error); })) { // Thread-safe to close everything because we are assuming there are no @@ -9445,7 +9592,8 @@ inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res, proxy_res = Response(); if (!detail::process_client_socket( socket.sock, read_timeout_sec_, read_timeout_usec_, - write_timeout_sec_, write_timeout_usec_, [&](Stream &strm) { + write_timeout_sec_, write_timeout_usec_, max_timeout_msec_, + start_time, [&](Stream &strm) { Request req3; req3.method = "CONNECT"; req3.path = host_and_port_; @@ -9453,6 +9601,9 @@ inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res, req3, auth, 1, detail::random_string(10), proxy_digest_auth_username_, proxy_digest_auth_password_, true)); + if (max_timeout_msec_ > 0) { + req3.start_time_ = std::chrono::steady_clock::now(); + } return process_request(strm, req3, proxy_res, false, error); })) { // Thread-safe to close everything because we are assuming there are @@ -9536,12 +9687,18 @@ inline bool SSLClient::initialize_ssl(Socket &socket, Error &error) { } if (server_certificate_verification_) { + auto verification_status = SSLVerifierResponse::NoDecisionMade; + if (server_certificate_verifier_) { - if (!server_certificate_verifier_(ssl2)) { - error = Error::SSLServerVerification; - return false; - } - } else { + verification_status = server_certificate_verifier_(ssl2); + } + + if (verification_status == SSLVerifierResponse::CertificateRejected) { + error = Error::SSLServerVerification; + return false; + } + + if (verification_status == SSLVerifierResponse::NoDecisionMade) { verify_result_ = SSL_get_verify_result(ssl2); if (verify_result_ != X509_V_OK) { @@ -9608,13 +9765,15 @@ inline void SSLClient::shutdown_ssl_impl(Socket &socket, assert(socket.ssl == nullptr); } -inline bool -SSLClient::process_socket(const Socket &socket, - std::function callback) { +inline bool SSLClient::process_socket( + const Socket &socket, + std::chrono::time_point start_time, + std::function callback) { assert(socket.ssl); return detail::process_client_socket_ssl( socket.ssl, socket.sock, read_timeout_sec_, read_timeout_usec_, - write_timeout_sec_, write_timeout_usec_, std::move(callback)); + write_timeout_sec_, write_timeout_usec_, max_timeout_msec_, start_time, + std::move(callback)); } inline bool SSLClient::is_ssl() const { return true; } @@ -9651,8 +9810,8 @@ SSLClient::verify_host_with_subject_alt_name(X509 *server_cert) const { auto type = GEN_DNS; - struct in6_addr addr6{}; - struct in_addr addr{}; + struct in6_addr addr6 = {}; + struct in_addr addr = {}; size_t addr_len = 0; #ifndef __MINGW32__ @@ -10300,7 +10459,7 @@ inline void Client::enable_server_hostname_verification(bool enabled) { } inline void Client::set_server_certificate_verifier( - std::function verifier) { + std::function verifier) { cli_->set_server_certificate_verifier(verifier); } #endif @@ -10344,8 +10503,4 @@ inline SSL_CTX *Client::ssl_context() const { } // namespace httplib -#if defined(_WIN32) && defined(CPPHTTPLIB_USE_POLL) -#undef poll -#endif - #endif // CPPHTTPLIB_HTTPLIB_H diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index 582ccc0d3f..674e227571 100644 Binary files a/examples/server/public/index.html.gz and b/examples/server/public/index.html.gz differ diff --git a/examples/server/public_legacy/json-schema-to-grammar.mjs b/examples/server/public_legacy/json-schema-to-grammar.mjs index e67bb15c17..f767ce7b72 100644 --- a/examples/server/public_legacy/json-schema-to-grammar.mjs +++ b/examples/server/public_legacy/json-schema-to-grammar.mjs @@ -1,5 +1,5 @@ // WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first. -const SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}'; +const SPACE_RULE = '| " " | "\\n"{1,2} [ \\t]{0,20}'; function _buildRepetition(itemRule, minItems, maxItems, opts={}) { if (minItems === 0 && maxItems === 1) { diff --git a/examples/server/server.cpp b/examples/server/server.cpp index b1cde2d7f4..c580ec1232 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -14,7 +14,7 @@ // mime type for sending response #define MIMETYPE_JSON "application/json; charset=utf-8" -// auto generated files (update with ./deps.sh) +// auto generated files (see README.md for details) #include "index.html.gz.hpp" #include "loading.html.hpp" @@ -42,7 +42,7 @@ enum stop_type { STOP_TYPE_LIMIT, }; -// state diagram: https://github.com/ggerganov/llama.cpp/pull/9283 +// state diagram: https://github.com/ggml-org/llama.cpp/pull/9283 enum slot_state { SLOT_STATE_IDLE, SLOT_STATE_STARTED, // TODO: this state is only used for setting up the initial prompt processing; maybe merge it with launch_slot_with_task in the future @@ -113,10 +113,11 @@ struct slot_params { struct common_params_speculative speculative; // OAI-compat fields - bool verbose = false; - oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE; - std::string oaicompat_model; - std::string oaicompat_cmpl_id; + bool verbose = false; + oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE; + std::string oaicompat_model; + std::string oaicompat_cmpl_id; + common_chat_format oaicompat_chat_format = COMMON_CHAT_FORMAT_CONTENT_ONLY; json to_json() const { std::vector samplers; @@ -130,6 +131,12 @@ struct slot_params { lora.push_back({{"id", i}, {"scale", this->lora[i].scale}}); } + auto grammar_triggers = json::array(); + for (const auto & trigger : sampling.grammar_triggers) { + server_grammar_trigger ct(std::move(trigger)); + grammar_triggers.push_back(ct.to_json()); + } + return json { {"n_predict", n_predict}, // Server configured n_predict {"seed", sampling.seed}, @@ -164,6 +171,10 @@ struct slot_params { {"n_probs", sampling.n_probs}, {"min_keep", sampling.min_keep}, {"grammar", sampling.grammar}, + {"grammar_lazy", sampling.grammar_lazy}, + {"grammar_triggers", grammar_triggers}, + {"preserved_tokens", sampling.preserved_tokens}, + {"chat_format", common_chat_format_name(oaicompat_chat_format)}, {"samplers", samplers}, {"speculative.n_max", speculative.n_max}, {"speculative.n_min", speculative.n_min}, @@ -264,7 +275,7 @@ struct server_task { params.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min); params.speculative.n_min = std::min(params.speculative.n_max, params.speculative.n_min); - params.speculative.n_min = std::max(params.speculative.n_min, 2); + params.speculative.n_min = std::max(params.speculative.n_min, 0); params.speculative.n_max = std::max(params.speculative.n_max, 0); // Use OpenAI API logprobs only if n_probs wasn't provided @@ -319,18 +330,76 @@ struct server_task { } // process "json_schema" and "grammar" - if (data.contains("json_schema") && !data.at("json_schema").is_null() && data.contains("grammar") && !data.at("grammar").is_null()) { - throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); - } if (data.contains("json_schema") && !data.contains("grammar")) { try { auto schema = json_value(data, "json_schema", json::object()); - params.sampling.grammar = json_schema_to_grammar(schema); + SRV_DBG("JSON schema: %s\n", schema.dump(2).c_str()); + params.sampling.grammar = json_schema_to_grammar(schema); + SRV_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str()); } catch (const std::exception & e) { throw std::runtime_error(std::string("\"json_schema\": ") + e.what()); } } else { - params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar); + params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar); + SRV_DBG("Grammar: %s\n", params.sampling.grammar.c_str()); + params.sampling.grammar_lazy = json_value(data, "grammar_lazy", defaults.sampling.grammar_lazy); + SRV_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false"); + } + + { + auto it = data.find("chat_format"); + if (it != data.end()) { + params.oaicompat_chat_format = static_cast(it->get()); + SRV_INF("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_format).c_str()); + } else { + params.oaicompat_chat_format = defaults.oaicompat_chat_format; + } + } + + { + const auto preserved_tokens = data.find("preserved_tokens"); + if (preserved_tokens != data.end()) { + for (const auto & t : *preserved_tokens) { + auto ids = common_tokenize(vocab, t.get(), /* add_special= */ false, /* parse_special= */ true); + if (ids.size() == 1) { + SRV_DBG("Preserved token: %d\n", ids[0]); + params.sampling.preserved_tokens.insert(ids[0]); + } else { + // This may happen when using a tool call style meant for a model with special tokens to preserve on a model without said tokens. + SRV_DBG("Not preserved because more than 1 token: %s\n", t.get().c_str()); + } + } + } + const auto grammar_triggers = data.find("grammar_triggers"); + if (grammar_triggers != data.end()) { + for (const auto & t : *grammar_triggers) { + server_grammar_trigger ct(t); + if (ct.value.type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) { + const auto & word = ct.value.value; + auto ids = common_tokenize(vocab, word, /* add_special= */ false, /* parse_special= */ true); + if (ids.size() == 1) { + auto token = ids[0]; + if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) { + throw std::runtime_error("Grammar trigger word should be marked as preserved token: " + word); + } + SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str()); + common_grammar_trigger trigger; + trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN; + trigger.value = word; + trigger.token = token; + params.sampling.grammar_triggers.push_back(std::move(trigger)); + } else { + SRV_DBG("Grammar trigger word: `%s`\n", word.c_str()); + params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word}); + } + } else { + params.sampling.grammar_triggers.push_back(std::move(ct.value)); + } + } + } + if (params.sampling.grammar_lazy && params.sampling.grammar_triggers.empty()) { + throw std::runtime_error("Error: no triggers set for lazy grammar!"); + } } { @@ -382,22 +451,12 @@ struct server_task { } { - const auto & samplers = data.find("samplers"); + const auto samplers = data.find("samplers"); if (samplers != data.end()) { if (samplers->is_array()) { - std::vector sampler_names; - for (const auto & name : *samplers) { - if (name.is_string()) { - sampler_names.emplace_back(name); - } - } - params.sampling.samplers = common_sampler_types_from_names(sampler_names, false); + params.sampling.samplers = common_sampler_types_from_names(*samplers, false); } else if (samplers->is_string()){ - std::string sampler_string; - for (const auto & name : *samplers) { - sampler_string += name; - } - params.sampling.samplers = common_sampler_types_from_chars(sampler_string); + params.sampling.samplers = common_sampler_types_from_chars(samplers->get()); } } else { params.sampling.samplers = defaults.sampling.samplers; @@ -431,8 +490,12 @@ struct result_timings { double predicted_per_token_ms; double predicted_per_second; + // Optional speculative metrics - only included when > 0 + int32_t draft_n = 0; + int32_t draft_n_accepted = 0; + json to_json() const { - return { + json base = { {"prompt_n", prompt_n}, {"prompt_ms", prompt_ms}, {"prompt_per_token_ms", prompt_per_token_ms}, @@ -443,6 +506,13 @@ struct result_timings { {"predicted_per_token_ms", predicted_per_token_ms}, {"predicted_per_second", predicted_per_second}, }; + + if (draft_n > 0) { + base["draft_n"] = draft_n; + base["draft_n_accepted"] = draft_n_accepted; + } + + return base; } }; @@ -544,7 +614,7 @@ struct completion_token_output { struct server_task_result_cmpl_final : server_task_result { int index = 0; - std::string content; + std::string content; llama_tokens tokens; bool stream; @@ -566,10 +636,11 @@ struct server_task_result_cmpl_final : server_task_result { slot_params generation_params; // OAI-compat fields - bool verbose = false; - oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE; - std::string oaicompat_model; - std::string oaicompat_cmpl_id; + bool verbose = false; + oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE; + std::string oaicompat_model; + std::string oaicompat_cmpl_id; + common_chat_format oaicompat_chat_format = COMMON_CHAT_FORMAT_CONTENT_ONLY; virtual int get_index() override { return index; @@ -663,18 +734,49 @@ struct server_task_result_cmpl_final : server_task_result { json to_json_oaicompat_chat() { std::string finish_reason = "length"; + common_chat_msg msg; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { - finish_reason = "stop"; + SRV_DBG("Parsing chat message: %s\n", content.c_str()); + msg = common_chat_parse(content, oaicompat_chat_format); + finish_reason = msg.tool_calls.empty() ? "stop" : "tool_calls"; + } else { + msg.content = content; } - json choice = json{ + json message { + {"role", "assistant"}, + }; + if (!msg.reasoning_content.empty()) { + message["reasoning_content"] = msg.reasoning_content; + } + if (msg.content.empty() && !msg.tool_calls.empty()) { + message["content"] = json(); + } else { + message["content"] = msg.content; + } + if (!msg.tool_calls.empty()) { + auto tool_calls = json::array(); + for (const auto & tc : msg.tool_calls) { + tool_calls.push_back({ + {"type", "function"}, + {"function", { + {"name", tc.name}, + {"arguments", tc.arguments}, + }}, + // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo). + // We only generate a random id for the ones that don't generate one by themselves + // (they also won't get to see it as their template likely doesn't use it, so it's all for the client) + {"id", tc.id.empty() ? gen_tool_call_id() : tc.id}, + }); + } + message["tool_calls"] = tool_calls; + } + + json choice { {"finish_reason", finish_reason}, {"index", 0}, - {"message", json { - {"content", content}, - {"role", "assistant"} - } - }}; + {"message", message}, + }; if (!stream && probs_output.size() > 0) { choice["logprobs"] = json{ @@ -716,7 +818,7 @@ struct server_task_result_cmpl_final : server_task_result { finish_reason = "stop"; } - json choice = json{ + json choice = json { {"finish_reason", finish_reason}, {"index", 0}, {"delta", json::object()} @@ -740,6 +842,11 @@ struct server_task_result_cmpl_final : server_task_result { ret.push_back({"timings", timings.to_json()}); } + // extra fields for debugging purposes + if (verbose) { + ret["__verbose"] = to_json_non_oaicompat(); + } + return ret; } }; @@ -1191,6 +1298,8 @@ struct server_slot { llama_token sampled; + common_chat_format chat_format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + // stats size_t n_sent_text = 0; // number of sent text character @@ -1202,6 +1311,10 @@ struct server_slot { std::function callback_on_release; + // Speculative decoding stats + int32_t n_draft_total = 0; // Total draft tokens generated + int32_t n_draft_accepted = 0; // Draft tokens actually accepted + void reset() { SLT_DBG(*this, "%s", "\n"); @@ -1218,13 +1331,17 @@ struct server_slot { generated_tokens.clear(); generated_token_probs.clear(); + + // clear speculative decoding stats + n_draft_total = 0; + n_draft_accepted = 0; } bool is_non_causal() const { return task_type == SERVER_TASK_TYPE_EMBEDDING || task_type == SERVER_TASK_TYPE_RERANK; } - bool can_batch_with(server_slot & other_slot) { + bool can_batch_with(server_slot & other_slot) const { return is_non_causal() == other_slot.is_non_causal() && are_lora_equal(lora, other_slot.lora); } @@ -1284,6 +1401,12 @@ struct server_slot { timings.predicted_per_token_ms = t_token_generation / n_decoded; timings.predicted_per_second = 1e3 / t_token_generation * n_decoded; + // Add speculative metrics + if (n_draft_total > 0) { + timings.draft_n = n_draft_total; + timings.draft_n_accepted = n_draft_accepted; + } + return timings; } @@ -1331,6 +1454,15 @@ struct server_slot { t_prompt_processing, n_prompt_tokens_processed, t_prompt, n_prompt_second, t_token_generation, n_decoded, t_gen, n_gen_second, t_prompt_processing + t_token_generation, n_prompt_tokens_processed + n_decoded); + + if (n_draft_total > 0) { + const float draft_ratio = (float) n_draft_accepted / n_draft_total; + SLT_INF(*this, + "\n" + "draft acceptance rate = %0.5f (%5d accepted / %5d generated)\n", + draft_ratio, n_draft_accepted, n_draft_total + ); + } } json to_json() const { @@ -1420,29 +1552,30 @@ struct server_queue { std::condition_variable condition_tasks; // callback functions - std::function callback_new_task; - std::function callback_update_slots; + std::function callback_new_task; + std::function callback_update_slots; // Add a new task to the end of the queue - int post(server_task task, bool front = false) { + int post(server_task && task, bool front = false) { std::unique_lock lock(mutex_tasks); GGML_ASSERT(task.id != -1); // if this is cancel task make sure to clean up pending tasks if (task.type == SERVER_TASK_TYPE_CANCEL) { cleanup_pending_task(task.id_target); } - QUE_DBG("new task, id = %d, front = %d\n", task.id, front); + const int task_id = task.id; + QUE_DBG("new task, id = %d, front = %d\n", task_id, front); if (front) { queue_tasks.push_front(std::move(task)); } else { queue_tasks.push_back(std::move(task)); } condition_tasks.notify_one(); - return task.id; + return task_id; } // multi-task version of post() - int post(std::vector & tasks, bool front = false) { + int post(std::vector && tasks, bool front = false) { std::unique_lock lock(mutex_tasks); for (auto & task : tasks) { if (task.id == -1) { @@ -1464,7 +1597,7 @@ struct server_queue { } // Add a new task, but defer until one slot is available - void defer(server_task task) { + void defer(server_task && task) { std::unique_lock lock(mutex_tasks); QUE_DBG("defer task, id = %d\n", task.id); queue_tasks_deferred.push_back(std::move(task)); @@ -1479,7 +1612,7 @@ struct server_queue { } // Register function to process a new task - void on_new_task(std::function callback) { + void on_new_task(std::function callback) { callback_new_task = std::move(callback); } @@ -1520,11 +1653,15 @@ struct server_queue { while (true) { std::unique_lock lock(mutex_tasks); + if (!running) { + QUE_DBG("%s", "terminate\n"); + return; + } if (queue_tasks.empty()) { lock.unlock(); break; } - server_task task = queue_tasks.front(); + server_task task = std::move(queue_tasks.front()); queue_tasks.pop_front(); lock.unlock(); @@ -1540,11 +1677,11 @@ struct server_queue { QUE_DBG("%s", "waiting for new tasks\n"); { std::unique_lock lock(mutex_tasks); + if (!running) { + QUE_DBG("%s", "terminate\n"); + return; + } if (queue_tasks.empty()) { - if (!running) { - QUE_DBG("%s", "terminate\n"); - return; - } condition_tasks.wait(lock, [&]{ return (!queue_tasks.empty() || !running); }); @@ -1569,6 +1706,8 @@ private: }; struct server_response { + bool running = true; + // for keeping track of all tasks waiting for the result std::unordered_set waiting_task_ids; @@ -1623,6 +1762,10 @@ struct server_response { while (true) { std::unique_lock lock(mutex_results); condition_results.wait(lock, [&]{ + if (!running) { + SRV_DBG("%s : queue result stop\n", __func__); + std::terminate(); // we cannot return here since the caller is HTTP code + } return !queue_results.empty(); }); @@ -1653,6 +1796,10 @@ struct server_response { } std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout)); + if (!running) { + SRV_DBG("%s : queue result stop\n", __func__); + std::terminate(); // we cannot return here since the caller is HTTP code + } if (cr_res == std::cv_status::timeout) { return nullptr; } @@ -1682,6 +1829,12 @@ struct server_response { } } } + + // terminate the waiting loop + void terminate() { + running = false; + condition_results.notify_all(); + } }; struct server_context { @@ -1720,7 +1873,7 @@ struct server_context { // Necessary similarity of prompt for slot selection float slot_prompt_similarity = 0.0f; - common_chat_templates chat_templates; + common_chat_templates_ptr chat_templates; ~server_context() { // Clear any sampling context @@ -1741,7 +1894,7 @@ struct server_context { } bool load_model(const common_params & params) { - SRV_INF("loading model '%s'\n", params.model.c_str()); + SRV_INF("loading model '%s'\n", params.model.path.c_str()); params_base = params; @@ -1751,7 +1904,7 @@ struct server_context { ctx = llama_init.context.get(); if (model == nullptr) { - SRV_ERR("failed to load model, '%s'\n", params_base.model.c_str()); + SRV_ERR("failed to load model, '%s'\n", params_base.model.path.c_str()); return false; } @@ -1762,31 +1915,32 @@ struct server_context { add_bos_token = llama_vocab_get_add_bos(vocab); has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL; - if (!params_base.speculative.model.empty() || !params_base.speculative.hf_repo.empty()) { - SRV_INF("loading draft model '%s'\n", params_base.speculative.model.c_str()); + if (!params_base.speculative.model.path.empty() || !params_base.speculative.model.hf_repo.empty()) { + SRV_INF("loading draft model '%s'\n", params_base.speculative.model.path.c_str()); auto params_dft = params_base; params_dft.devices = params_base.speculative.devices; - params_dft.hf_file = params_base.speculative.hf_file; - params_dft.hf_repo = params_base.speculative.hf_repo; params_dft.model = params_base.speculative.model; - params_dft.model_url = params_base.speculative.model_url; params_dft.n_ctx = params_base.speculative.n_ctx == 0 ? params_base.n_ctx / params_base.n_parallel : params_base.speculative.n_ctx; params_dft.n_gpu_layers = params_base.speculative.n_gpu_layers; params_dft.n_parallel = 1; + // force F16 KV cache for the draft model for extra performance + params_dft.cache_type_k = GGML_TYPE_F16; + params_dft.cache_type_v = GGML_TYPE_F16; + llama_init_dft = common_init_from_params(params_dft); model_dft = llama_init_dft.model.get(); if (model_dft == nullptr) { - SRV_ERR("failed to load draft model, '%s'\n", params_base.speculative.model.c_str()); + SRV_ERR("failed to load draft model, '%s'\n", params_base.speculative.model.path.c_str()); return false; } if (!common_speculative_are_compatible(ctx, llama_init_dft.context.get())) { - SRV_ERR("the draft model '%s' is not compatible with the target model '%s'\n", params_base.speculative.model.c_str(), params_base.model.c_str()); + SRV_ERR("the draft model '%s' is not compatible with the target model '%s'\n", params_base.speculative.model.path.c_str(), params_base.model.path.c_str()); return false; } @@ -1796,49 +1950,22 @@ struct server_context { cparams_dft = common_context_params_to_llama(params_dft); cparams_dft.n_batch = n_ctx_dft; - // force F16 KV cache for the draft model for extra performance - cparams_dft.type_k = GGML_TYPE_F16; - cparams_dft.type_v = GGML_TYPE_F16; - // the context is not needed - we will create one for each slot llama_init_dft.context.reset(); } - chat_templates = common_chat_templates_from_model(model, params_base.chat_template); - GGML_ASSERT(chat_templates.template_default.get() != nullptr); + chat_templates = common_chat_templates_init(model, params_base.chat_template); + try { + common_chat_format_example(chat_templates.get(), params.use_jinja); + } catch (const std::exception & e) { + SRV_WRN("%s: Chat template parsing error: %s\n", __func__, e.what()); + SRV_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__); + chat_templates = common_chat_templates_init(model, "chatml"); + } return true; } - bool validate_builtin_chat_template(bool use_jinja) const { - llama_chat_message chat[] = {{"user", "test"}}; - - if (use_jinja) { - auto templates = common_chat_templates_from_model(model, ""); - GGML_ASSERT(templates.template_default); - try { - templates.template_default->apply({{ - {"role", "user"}, - {"content", "test"}, - }}, json(), true); - if (templates.template_tool_use) { - templates.template_tool_use->apply({{ - {"role", "user"}, - {"content", "test"}, - }}, json(), true); - } - return true; - } catch (const std::exception & e) { - SRV_ERR("failed to apply template: %s\n", e.what()); - return false; - } - } else { - const char * tmpl = llama_model_chat_template(model, /* name */ nullptr); - const int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0); - return chat_res > 0; - } - } - void init() { const int32_t n_ctx_slot = n_ctx / params_base.n_parallel; @@ -1878,7 +2005,7 @@ struct server_context { slot.reset(); - slots.push_back(slot); + slots.push_back(std::move(slot)); } default_generation_settings_for_props = slots[0].to_json(); @@ -1967,7 +2094,19 @@ struct server_context { return ret; } - bool launch_slot_with_task(server_slot & slot, const server_task & task) { + bool can_be_detokenized(const struct llama_context * ctx, const std::vector & tokens) { + const llama_model * model = llama_get_model(ctx); + const llama_vocab * vocab = llama_model_get_vocab(model); + const int32_t n_vocab = llama_vocab_n_tokens(vocab); + for (const auto & token : tokens) { + if (token < 0 || token >= n_vocab) { + return false; + } + } + return true; + } + + bool launch_slot_with_task(server_slot & slot, server_task && task) { slot.reset(); slot.id_task = task.id; slot.index = task.index; @@ -1975,18 +2114,23 @@ struct server_context { slot.params = std::move(task.params); slot.prompt_tokens = std::move(task.prompt_tokens); - if (!are_lora_equal(task.params.lora, slot.lora)) { + if (!are_lora_equal(slot.params.lora, slot.lora)) { // if lora is changed, we cannot reuse cached tokens slot.cache_tokens.clear(); - slot.lora = task.params.lora; + slot.lora = slot.params.lora; } + bool can_detokenize = can_be_detokenized(ctx, slot.prompt_tokens); + if (!can_detokenize) { + send_error(task, "Prompt contains invalid tokens", ERROR_TYPE_INVALID_REQUEST); + return false; + } SLT_DBG(slot, "launching slot : %s\n", safe_json_to_str(slot.to_json()).c_str()); if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) { // Might be better to reject the request with a 400 ? + SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d\n", slot.params.n_predict, slot.n_predict); slot.params.n_predict = slot.n_predict; - SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d", slot.n_predict, slot.n_predict); } if (slot.params.ignore_eos && has_eos_token) { @@ -2023,7 +2167,7 @@ struct server_context { SRV_DBG("%s", "clearing KV cache\n"); // clear the entire KV cache - llama_kv_cache_clear(ctx); + llama_kv_self_clear(ctx); clean_kv_cache = false; } @@ -2088,14 +2232,6 @@ struct server_context { } if (slot.has_new_line) { - // if we have already seen a new line, we stop after a certain time limit - if (slot.params.t_max_predict_ms > 0 && (ggml_time_us() - slot.t_start_generation > 1000.0f*slot.params.t_max_predict_ms)) { - slot.stop = STOP_TYPE_LIMIT; - slot.has_next_token = false; - - SLT_DBG(slot, "stopped by time limit, n_decoded = %d, t_max_predict_ms = %d ms\n", slot.n_decoded, (int) slot.params.t_max_predict_ms); - } - // require that each new line has a whitespace prefix (i.e. indentation) of at least slot.params.n_indent if (slot.params.n_indent > 0) { // check the current indentation @@ -2134,6 +2270,14 @@ struct server_context { // check if there is a new line in the generated text if (result.text_to_send.find('\n') != std::string::npos) { slot.has_new_line = true; + + // if we have seen a new line, we stop after a certain time limit, but only upon another new line + if (slot.params.t_max_predict_ms > 0 && (ggml_time_us() - slot.t_start_generation > 1000.0f*slot.params.t_max_predict_ms)) { + slot.stop = STOP_TYPE_LIMIT; + slot.has_next_token = false; + + SLT_DBG(slot, "stopped by time limit, n_decoded = %d, t_max_predict_ms = %d ms\n", slot.n_decoded, (int) slot.params.t_max_predict_ms); + } } // if context shift is disabled, we stop when it reaches the context limit @@ -2191,7 +2335,7 @@ struct server_context { for (size_t i = 0; i < std::min(max_probs, n_probs); i++) { result.probs.push_back({ cur_p->data[i].id, - common_detokenize(ctx, {cur_p->data[i].id}, special), + common_token_to_piece(ctx, cur_p->data[i].id, special), cur_p->data[i].p }); } @@ -2213,7 +2357,7 @@ struct server_context { for (size_t i = 0; i < std::min(n_vocab, n_probs); i++) { result.probs.push_back({ cur[i].id, - common_detokenize(ctx, {cur[i].id}, special), + common_token_to_piece(ctx, cur[i].id, special), cur[i].p }); } @@ -2275,11 +2419,11 @@ struct server_context { res->id_slot = slot.id; res->index = slot.index; - res->content = slot.generated_text; - res->tokens = slot.generated_tokens; + res->content = std::move(slot.generated_text); + res->tokens = std::move(slot.generated_tokens); res->timings = slot.get_timings(); res->prompt = common_detokenize(ctx, slot.prompt_tokens, true); - res->response_fields = slot.params.response_fields; + res->response_fields = std::move(slot.params.response_fields); res->truncated = slot.truncated; res->n_decoded = slot.n_decoded; @@ -2290,12 +2434,12 @@ struct server_context { res->stop = slot.stop; res->post_sampling_probs = slot.params.post_sampling_probs; - res->verbose = slot.params.verbose; - res->stream = slot.params.stream; - res->oaicompat = slot.params.oaicompat; - res->oaicompat_model = slot.params.oaicompat_model; - res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id; - + res->verbose = slot.params.verbose; + res->stream = slot.params.stream; + res->oaicompat = slot.params.oaicompat; + res->oaicompat_model = slot.params.oaicompat_model; + res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id; + res->oaicompat_chat_format = slot.params.oaicompat_chat_format; // populate res.probs_output if (slot.params.sampling.n_probs > 0) { if (!slot.params.stream && slot.stop == STOP_TYPE_WORD) { @@ -2404,10 +2548,10 @@ struct server_context { server_task task(SERVER_TASK_TYPE_CANCEL); task.id_target = id_task; queue_results.remove_waiting_task_id(id_task); - cancel_tasks.push_back(task); + cancel_tasks.push_back(std::move(task)); } // push to beginning of the queue, so it has highest priority - queue_tasks.post(cancel_tasks, true); + queue_tasks.post(std::move(cancel_tasks), true); } // receive the results from task(s) @@ -2494,7 +2638,7 @@ struct server_context { // Functions to process the task // - void process_single_task(server_task task) { + void process_single_task(server_task && task) { switch (task.type) { case SERVER_TASK_TYPE_COMPLETION: case SERVER_TASK_TYPE_INFILL: @@ -2508,17 +2652,17 @@ struct server_context { if (slot == nullptr) { // if no slot is available, we defer this task for processing later SRV_DBG("no slot is available, defer task, id_task = %d\n", task.id); - queue_tasks.defer(task); + queue_tasks.defer(std::move(task)); break; } if (slot->is_processing()) { // if requested slot is unavailable, we defer this task for processing later SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id); - queue_tasks.defer(task); + queue_tasks.defer(std::move(task)); break; } - if (!launch_slot_with_task(*slot, task)) { + if (!launch_slot_with_task(*slot, std::move(task))) { SRV_ERR("failed to launch slot with task, id_task = %d\n", task.id); break; } @@ -2565,8 +2709,8 @@ struct server_context { res->n_tasks_deferred = queue_tasks.queue_tasks_deferred.size(); res->t_start = metrics.t_start; - res->kv_cache_tokens_count = llama_get_kv_cache_token_count(ctx); - res->kv_cache_used_cells = llama_get_kv_cache_used_cells(ctx); + res->kv_cache_tokens_count = llama_kv_self_n_tokens(ctx); + res->kv_cache_used_cells = llama_kv_self_used_cells(ctx); res->n_prompt_tokens_processed_total = metrics.n_prompt_tokens_processed_total; res->t_prompt_processing_total = metrics.t_prompt_processing_total; @@ -2597,7 +2741,7 @@ struct server_context { if (slot->is_processing()) { // if requested slot is unavailable, we defer this task for processing later SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id); - queue_tasks.defer(task); + queue_tasks.defer(std::move(task)); break; } @@ -2633,7 +2777,7 @@ struct server_context { if (slot->is_processing()) { // if requested slot is unavailable, we defer this task for processing later SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id); - queue_tasks.defer(task); + queue_tasks.defer(std::move(task)); break; } @@ -2676,13 +2820,13 @@ struct server_context { if (slot->is_processing()) { // if requested slot is unavailable, we defer this task for processing later SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id); - queue_tasks.defer(task); + queue_tasks.defer(std::move(task)); break; } // Erase token cache const size_t n_erased = slot->cache_tokens.size(); - llama_kv_cache_seq_rm(ctx, slot->id, -1, -1); + llama_kv_self_seq_rm(ctx, slot->id, -1, -1); slot->cache_tokens.clear(); auto res = std::make_unique(); @@ -2728,7 +2872,7 @@ struct server_context { server_task task(SERVER_TASK_TYPE_NEXT_RESPONSE); task.id = queue_tasks.get_new_id(); - queue_tasks.post(task); + queue_tasks.post(std::move(task)); } // apply context-shift if needed @@ -2750,8 +2894,8 @@ struct server_context { SLT_WRN(slot, "slot context shift, n_keep = %d, n_left = %d, n_discard = %d\n", n_keep, n_left, n_discard); - llama_kv_cache_seq_rm (ctx, slot.id, n_keep , n_keep + n_discard); - llama_kv_cache_seq_add(ctx, slot.id, n_keep + n_discard, slot.n_past, -n_discard); + llama_kv_self_seq_rm (ctx, slot.id, n_keep , n_keep + n_discard); + llama_kv_self_seq_add(ctx, slot.id, n_keep + n_discard, slot.n_past, -n_discard); if (slot.params.cache_prompt) { for (size_t i = n_keep + n_discard; i < slot.cache_tokens.size(); i++) { @@ -2773,6 +2917,10 @@ struct server_context { // track if given slot can be batched with slots already in the batch server_slot * slot_batched = nullptr; + auto accept_special_token = [&](server_slot & slot, llama_token token) { + return params_base.special || slot.params.sampling.preserved_tokens.find(token) != slot.params.sampling.preserved_tokens.end(); + }; + // frist, add sampled tokens from any ongoing sequences for (auto & slot : slots) { if (slot.state != SLOT_STATE_GENERATING) { @@ -2938,8 +3086,8 @@ struct server_context { const int64_t kv_shift = (int64_t) head_p - (int64_t) head_c; - llama_kv_cache_seq_rm (ctx, slot.id, head_p, head_c); - llama_kv_cache_seq_add(ctx, slot.id, head_c, -1, kv_shift); + llama_kv_self_seq_rm (ctx, slot.id, head_p, head_c); + llama_kv_self_seq_add(ctx, slot.id, head_c, head_c + n_match, kv_shift); for (size_t i = 0; i < n_match; i++) { slot.cache_tokens[head_p + i] = slot.cache_tokens[head_c + i]; @@ -2977,9 +3125,9 @@ struct server_context { } // keep only the common part - if (!llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1)) { + if (!llama_kv_self_seq_rm(ctx, slot.id, slot.n_past, -1)) { // could not partially delete (likely using a non-Transformer model) - llama_kv_cache_seq_rm(ctx, slot.id, -1, -1); + llama_kv_self_seq_rm(ctx, slot.id, -1, -1); // there is no common part left slot.n_past = 0; @@ -3136,7 +3284,7 @@ struct server_context { completion_token_output result; result.tok = id; - result.text_to_send = common_token_to_piece(ctx, result.tok, params_base.special); + result.text_to_send = common_token_to_piece(ctx, result.tok, accept_special_token(slot, result.tok)); result.prob = 1.0f; // TODO: set it here instead of doing inside populate_token_probs if (slot.params.sampling.n_probs > 0) { @@ -3191,6 +3339,9 @@ struct server_context { llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, slot.cache_tokens, id); + // keep track of total number of tokens generated in the draft + slot.n_draft_total += draft.size(); + // ignore small drafts if (slot.params.speculative.n_min > (int) draft.size()) { SLT_DBG(slot, "ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min); @@ -3216,16 +3367,19 @@ struct server_context { slot.n_past += ids.size(); slot.n_decoded += ids.size(); + // update how many tokens out of draft was accepted + slot.n_draft_accepted += ids.size() - 1; + slot.cache_tokens.push_back(id); slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1); - llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1); + llama_kv_self_seq_rm(ctx, slot.id, slot.n_past, -1); for (size_t i = 0; i < ids.size(); ++i) { completion_token_output result; result.tok = ids[i]; - result.text_to_send = common_token_to_piece(ctx, result.tok, params_base.special); + result.text_to_send = common_token_to_piece(ctx, result.tok, accept_special_token(slot, result.tok)); result.prob = 1.0f; // set later // TODO: set result.probs @@ -3265,10 +3419,12 @@ static void log_server_request(const httplib::Request & req, const httplib::Resp return; } - LOG_INF("request: %s %s %s %d\n", req.method.c_str(), req.path.c_str(), req.remote_addr.c_str(), res.status); + // reminder: this function is not covered by httplib's exception handler; if someone does more complicated stuff, think about wrapping it in try-catch - LOG_DBG("request: %s\n", req.body.c_str()); - LOG_DBG("response: %s\n", res.body.c_str()); + SRV_INF("request: %s %s %s %d\n", req.method.c_str(), req.path.c_str(), req.remote_addr.c_str(), res.status); + + SRV_DBG("request: %s\n", req.body.c_str()); + SRV_DBG("response: %s\n", res.body.c_str()); } std::function shutdown_handler; @@ -3351,9 +3507,13 @@ int main(int argc, char ** argv) { message = "Unknown Exception"; } - json formatted_error = format_error_response(message, ERROR_TYPE_SERVER); - LOG_WRN("got exception: %s\n", formatted_error.dump().c_str()); - res_error(res, formatted_error); + try { + json formatted_error = format_error_response(message, ERROR_TYPE_SERVER); + LOG_WRN("got exception: %s\n", formatted_error.dump().c_str()); + res_error(res, formatted_error); + } catch (const std::exception & e) { + LOG_ERR("got another exception: %s | while hanlding exception: %s\n", e.what(), message.c_str()); + } }); svr->set_error_handler([&res_error](const httplib::Request &, httplib::Response & res) { @@ -3474,14 +3634,17 @@ int main(int argc, char ** argv) { } // request slots data using task queue - server_task task(SERVER_TASK_TYPE_METRICS); - task.id = ctx_server.queue_tasks.get_new_id(); - ctx_server.queue_results.add_waiting_task_id(task.id); - ctx_server.queue_tasks.post(task, true); // high-priority task + int task_id = ctx_server.queue_tasks.get_new_id(); + { + server_task task(SERVER_TASK_TYPE_METRICS); + task.id = task_id; + ctx_server.queue_results.add_waiting_task_id(task_id); + ctx_server.queue_tasks.post(std::move(task), true); // high-priority task + } // get the result - server_task_result_ptr result = ctx_server.queue_results.recv(task.id); - ctx_server.queue_results.remove_waiting_task_id(task.id); + server_task_result_ptr result = ctx_server.queue_results.recv(task_id); + ctx_server.queue_results.remove_waiting_task_id(task_id); if (result->is_error()) { res_error(res, result->to_json()); @@ -3510,16 +3673,17 @@ int main(int argc, char ** argv) { } // request slots data using task queue - server_task task(SERVER_TASK_TYPE_METRICS); - task.id = ctx_server.queue_tasks.get_new_id(); - task.metrics_reset_bucket = true; - - ctx_server.queue_results.add_waiting_task_id(task.id); - ctx_server.queue_tasks.post(task, true); // high-priority task + int task_id = ctx_server.queue_tasks.get_new_id(); + { + server_task task(SERVER_TASK_TYPE_METRICS); + task.id = task_id; + ctx_server.queue_results.add_waiting_task_id(task_id); + ctx_server.queue_tasks.post(std::move(task), true); // high-priority task + } // get the result - server_task_result_ptr result = ctx_server.queue_results.recv(task.id); - ctx_server.queue_results.remove_waiting_task_id(task.id); + server_task_result_ptr result = ctx_server.queue_results.recv(task_id); + ctx_server.queue_results.remove_waiting_task_id(task_id); if (result->is_error()) { res_error(res, result->to_json()); @@ -3555,7 +3719,7 @@ int main(int argc, char ** argv) { }, { {"name", "n_busy_slots_per_decode"}, {"help", "Average number of busy slots per llama_decode() call"}, - {"value", (float) res_metrics->n_busy_slots_total / (float) res_metrics->n_decode_total} + {"value", (float) res_metrics->n_busy_slots_total / std::max((float) res_metrics->n_decode_total, 1.f)} }}}, {"gauge", {{ {"name", "prompt_tokens_seconds"}, @@ -3575,11 +3739,11 @@ int main(int argc, char ** argv) { {"value", (uint64_t) res_metrics->kv_cache_tokens_count} },{ {"name", "requests_processing"}, - {"help", "Number of request processing."}, + {"help", "Number of requests processing."}, {"value", (uint64_t) res_metrics->n_processing_slots} },{ {"name", "requests_deferred"}, - {"help", "Number of request deferred."}, + {"help", "Number of requests deferred."}, {"value", (uint64_t) res_metrics->n_tasks_deferred} }}} }; @@ -3616,17 +3780,20 @@ int main(int argc, char ** argv) { } std::string filepath = params.slot_save_path + filename; - server_task task(SERVER_TASK_TYPE_SLOT_SAVE); - task.id = ctx_server.queue_tasks.get_new_id(); - task.slot_action.slot_id = id_slot; - task.slot_action.filename = filename; - task.slot_action.filepath = filepath; + int task_id = ctx_server.queue_tasks.get_new_id(); + { + server_task task(SERVER_TASK_TYPE_SLOT_SAVE); + task.id = task_id; + task.slot_action.slot_id = id_slot; + task.slot_action.filename = filename; + task.slot_action.filepath = filepath; - ctx_server.queue_results.add_waiting_task_id(task.id); - ctx_server.queue_tasks.post(task); + ctx_server.queue_results.add_waiting_task_id(task_id); + ctx_server.queue_tasks.post(std::move(task)); + } - server_task_result_ptr result = ctx_server.queue_results.recv(task.id); - ctx_server.queue_results.remove_waiting_task_id(task.id); + server_task_result_ptr result = ctx_server.queue_results.recv(task_id); + ctx_server.queue_results.remove_waiting_task_id(task_id); if (result->is_error()) { res_error(res, result->to_json()); @@ -3645,17 +3812,20 @@ int main(int argc, char ** argv) { } std::string filepath = params.slot_save_path + filename; - server_task task(SERVER_TASK_TYPE_SLOT_RESTORE); - task.id = ctx_server.queue_tasks.get_new_id(); - task.slot_action.slot_id = id_slot; - task.slot_action.filename = filename; - task.slot_action.filepath = filepath; + int task_id = ctx_server.queue_tasks.get_new_id(); + { + server_task task(SERVER_TASK_TYPE_SLOT_RESTORE); + task.id = task_id; + task.slot_action.slot_id = id_slot; + task.slot_action.filename = filename; + task.slot_action.filepath = filepath; - ctx_server.queue_results.add_waiting_task_id(task.id); - ctx_server.queue_tasks.post(task); + ctx_server.queue_results.add_waiting_task_id(task_id); + ctx_server.queue_tasks.post(std::move(task)); + } - server_task_result_ptr result = ctx_server.queue_results.recv(task.id); - ctx_server.queue_results.remove_waiting_task_id(task.id); + server_task_result_ptr result = ctx_server.queue_results.recv(task_id); + ctx_server.queue_results.remove_waiting_task_id(task_id); if (result->is_error()) { res_error(res, result->to_json()); @@ -3667,15 +3837,18 @@ int main(int argc, char ** argv) { }; const auto handle_slots_erase = [&ctx_server, &res_error, &res_ok](const httplib::Request & /* req */, httplib::Response & res, int id_slot) { - server_task task(SERVER_TASK_TYPE_SLOT_ERASE); - task.id = ctx_server.queue_tasks.get_new_id(); - task.slot_action.slot_id = id_slot; + int task_id = ctx_server.queue_tasks.get_new_id(); + { + server_task task(SERVER_TASK_TYPE_SLOT_ERASE); + task.id = task_id; + task.slot_action.slot_id = id_slot; - ctx_server.queue_results.add_waiting_task_id(task.id); - ctx_server.queue_tasks.post(task); + ctx_server.queue_results.add_waiting_task_id(task_id); + ctx_server.queue_tasks.post(std::move(task)); + } - server_task_result_ptr result = ctx_server.queue_results.recv(task.id); - ctx_server.queue_results.remove_waiting_task_id(task.id); + server_task_result_ptr result = ctx_server.queue_results.recv(task_id); + ctx_server.queue_results.remove_waiting_task_id(task_id); if (result->is_error()) { res_error(res, result->to_json()); @@ -3720,12 +3893,16 @@ int main(int argc, char ** argv) { json data = { { "default_generation_settings", ctx_server.default_generation_settings_for_props }, { "total_slots", ctx_server.params_base.n_parallel }, - { "model_path", ctx_server.params_base.model }, - { "chat_template", ctx_server.chat_templates.template_default->source() }, + { "model_path", ctx_server.params_base.model.path }, + { "chat_template", common_chat_templates_source(ctx_server.chat_templates.get()) }, + { "bos_token", common_token_to_piece(ctx_server.ctx, llama_vocab_bos(ctx_server.vocab), /* special= */ true)}, + { "eos_token", common_token_to_piece(ctx_server.ctx, llama_vocab_eos(ctx_server.vocab), /* special= */ true)}, { "build_info", build_info }, }; - if (ctx_server.params_base.use_jinja && ctx_server.chat_templates.template_tool_use) { - data["chat_template_tool_use"] = ctx_server.chat_templates.template_tool_use->source(); + if (ctx_server.params_base.use_jinja) { + if (auto tool_use_src = common_chat_templates_source(ctx_server.chat_templates.get(), "tool_use")) { + data["chat_template_tool_use"] = tool_use_src; + } } res_ok(res, data); @@ -3744,6 +3921,21 @@ int main(int argc, char ** argv) { res_ok(res, {{ "success", true }}); }; + const auto handle_api_show = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) { + json data = { + { + "template", common_chat_templates_source(ctx_server.chat_templates.get()), + }, + { + "model_info", { + { "llama.context_length", ctx_server.slots.back().n_ctx, }, + } + }, + }; + + res_ok(res, data); + }; + // handle completion-like requests (completion, chat, infill) // we can optionally provide a custom format for partial results and final results const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok]( @@ -3760,10 +3952,15 @@ int main(int argc, char ** argv) { } auto completion_id = gen_chatcmplid(); - std::vector tasks; - + std::unordered_set task_ids; try { - std::vector tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, data.at("prompt"), true, true); + std::vector tasks; + + const auto & prompt = data.at("prompt"); + // TODO: this log can become very long, put it behind a flag or think about a more compact format + //SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get().c_str() : prompt.dump(2).c_str()); + + std::vector tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true); tasks.reserve(tokenized_prompts.size()); for (size_t i = 0; i < tokenized_prompts.size(); i++) { server_task task = server_task(type); @@ -3773,28 +3970,28 @@ int main(int argc, char ** argv) { task.prompt_tokens = std::move(tokenized_prompts[i]); task.params = server_task::params_from_json_cmpl( - ctx_server.ctx, - ctx_server.params_base, - data); + ctx_server.ctx, + ctx_server.params_base, + data); task.id_selected_slot = json_value(data, "id_slot", -1); // OAI-compat - task.params.oaicompat = oaicompat; - task.params.oaicompat_cmpl_id = completion_id; + task.params.oaicompat = oaicompat; + task.params.oaicompat_cmpl_id = completion_id; // oaicompat_model is already populated by params_from_json_cmpl - tasks.push_back(task); + tasks.push_back(std::move(task)); } + + task_ids = server_task::get_list_id(tasks); + ctx_server.queue_results.add_waiting_tasks(tasks); + ctx_server.queue_tasks.post(std::move(tasks)); } catch (const std::exception & e) { res_error(res, format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST)); return; } - ctx_server.queue_results.add_waiting_tasks(tasks); - ctx_server.queue_tasks.post(tasks); - bool stream = json_value(data, "stream", false); - const auto task_ids = server_task::get_list_id(tasks); if (!stream) { ctx_server.receive_multi_results(task_ids, [&](std::vector & results) { @@ -3949,14 +4146,14 @@ int main(int argc, char ** argv) { }; const auto handle_chat_completions = [&ctx_server, ¶ms, &res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) { + LOG_DBG("request: %s\n", req.body.c_str()); if (ctx_server.params_base.embedding) { res_error(res, format_error_response("This server does not support completions. Start it without `--embeddings`", ERROR_TYPE_NOT_SUPPORTED)); return; } auto body = json::parse(req.body); - const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default; - json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); + json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates.get()); return handle_completions_impl( SERVER_TASK_TYPE_COMPLETION, @@ -3966,12 +4163,19 @@ int main(int argc, char ** argv) { OAICOMPAT_TYPE_CHAT); }; + // same with handle_chat_completions, but without inference part + const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) { + auto body = json::parse(req.body); + json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates.get()); + res_ok(res, {{ "prompt", std::move(data.at("prompt")) }}); + }; + const auto handle_models = [¶ms, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) { json models = { {"object", "list"}, {"data", { { - {"id", params.model_alias.empty() ? params.model : params.model_alias}, + {"id", params.model_alias.empty() ? params.model.path : params.model_alias}, {"object", "model"}, {"created", std::time(0)}, {"owned_by", "llamacpp"}, @@ -4079,6 +4283,7 @@ int main(int argc, char ** argv) { // create and queue the task json responses = json::array(); bool error = false; + std::unordered_set task_ids; { std::vector tasks; for (size_t i = 0; i < tokenized_prompts.size(); i++) { @@ -4091,28 +4296,27 @@ int main(int argc, char ** argv) { // OAI-compat task.params.oaicompat = oaicompat; - tasks.push_back(task); + tasks.push_back(std::move(task)); } + task_ids = server_task::get_list_id(tasks); ctx_server.queue_results.add_waiting_tasks(tasks); - ctx_server.queue_tasks.post(tasks); - - // get the result - std::unordered_set task_ids = server_task::get_list_id(tasks); - - ctx_server.receive_multi_results(task_ids, [&](std::vector & results) { - for (auto & res : results) { - GGML_ASSERT(dynamic_cast(res.get()) != nullptr); - responses.push_back(res->to_json()); - } - }, [&](const json & error_data) { - res_error(res, error_data); - error = true; - }, req.is_connection_closed); - - ctx_server.queue_results.remove_waiting_task_ids(task_ids); + ctx_server.queue_tasks.post(std::move(tasks)); } + // get the result + ctx_server.receive_multi_results(task_ids, [&](std::vector & results) { + for (auto & res : results) { + GGML_ASSERT(dynamic_cast(res.get()) != nullptr); + responses.push_back(res->to_json()); + } + }, [&](const json & error_data) { + res_error(res, error_data); + error = true; + }, req.is_connection_closed); + + ctx_server.queue_results.remove_waiting_task_ids(task_ids); + if (error) { return; } @@ -4149,6 +4353,11 @@ int main(int argc, char ** argv) { // return; //} + // if true, use TEI API format, otherwise use Jina API format + // Jina: https://jina.ai/reranker/ + // TEI: https://huggingface.github.io/text-embeddings-inference/#/Text%20Embeddings%20Inference/rerank + bool is_tei_format = body.contains("texts"); + json query; if (body.count("query") == 1) { query = body.at("query"); @@ -4161,7 +4370,8 @@ int main(int argc, char ** argv) { return; } - std::vector documents = json_value(body, "documents", std::vector()); + std::vector documents = json_value(body, "documents", + json_value(body, "texts", std::vector())); if (documents.empty()) { res_error(res, format_error_response("\"documents\" must be a non-empty string array", ERROR_TYPE_INVALID_REQUEST)); return; @@ -4172,6 +4382,7 @@ int main(int argc, char ** argv) { // create and queue the task json responses = json::array(); bool error = false; + std::unordered_set task_ids; { std::vector tasks; std::vector tokenized_docs = tokenize_input_prompts(ctx_server.vocab, documents, /* add_special */ false, true); @@ -4181,32 +4392,35 @@ int main(int argc, char ** argv) { task.id = ctx_server.queue_tasks.get_new_id(); task.index = i; task.prompt_tokens = format_rerank(ctx_server.vocab, tokenized_query, tokenized_docs[i]); - tasks.push_back(task); + tasks.push_back(std::move(task)); } + task_ids = server_task::get_list_id(tasks); ctx_server.queue_results.add_waiting_tasks(tasks); - ctx_server.queue_tasks.post(tasks); - - // get the result - std::unordered_set task_ids = server_task::get_list_id(tasks); - - ctx_server.receive_multi_results(task_ids, [&](std::vector & results) { - for (auto & res : results) { - GGML_ASSERT(dynamic_cast(res.get()) != nullptr); - responses.push_back(res->to_json()); - } - }, [&](const json & error_data) { - res_error(res, error_data); - error = true; - }, req.is_connection_closed); + ctx_server.queue_tasks.post(std::move(tasks)); } + ctx_server.receive_multi_results(task_ids, [&](std::vector & results) { + for (auto & res : results) { + GGML_ASSERT(dynamic_cast(res.get()) != nullptr); + responses.push_back(res->to_json()); + } + }, [&](const json & error_data) { + res_error(res, error_data); + error = true; + }, req.is_connection_closed); + if (error) { return; } // write JSON response - json root = format_response_rerank(body, responses); + json root = format_response_rerank( + body, + responses, + is_tei_format, + documents); + res_ok(res, root); }; @@ -4231,14 +4445,19 @@ int main(int argc, char ** argv) { res_error(res, format_error_response("Request body must be an array", ERROR_TYPE_INVALID_REQUEST)); return; } - server_task task(SERVER_TASK_TYPE_SET_LORA); - task.id = ctx_server.queue_tasks.get_new_id(); - task.set_lora = parse_lora_request(ctx_server.params_base.lora_adapters, body); - ctx_server.queue_results.add_waiting_task_id(task.id); - ctx_server.queue_tasks.post(task); - server_task_result_ptr result = ctx_server.queue_results.recv(task.id); - ctx_server.queue_results.remove_waiting_task_id(task.id); + int task_id = ctx_server.queue_tasks.get_new_id(); + { + server_task task(SERVER_TASK_TYPE_SET_LORA); + task.id = task_id; + task.set_lora = parse_lora_request(ctx_server.params_base.lora_adapters, body); + ctx_server.queue_results.add_waiting_task_id(task_id); + ctx_server.queue_tasks.post(std::move(task)); + } + + // get the result + server_task_result_ptr result = ctx_server.queue_results.recv(task_id); + ctx_server.queue_results.remove_waiting_task_id(task_id); if (result->is_error()) { res_error(res, result->to_json()); @@ -4271,6 +4490,9 @@ int main(int argc, char ** argv) { res.set_content("Error: gzip is not supported by this browser", "text/plain"); } else { res.set_header("Content-Encoding", "gzip"); + // COEP and COOP headers, required by pyodide (python interpreter) + res.set_header("Cross-Origin-Embedder-Policy", "require-corp"); + res.set_header("Cross-Origin-Opener-Policy", "same-origin"); res.set_content(reinterpret_cast(index_html_gz), index_html_gz_len, "text/html; charset=utf-8"); } return false; @@ -4283,6 +4505,7 @@ int main(int argc, char ** argv) { svr->Get ("/metrics", handle_metrics); svr->Get ("/props", handle_props); svr->Post("/props", handle_props_change); + svr->Post("/api/show", handle_api_show); svr->Get ("/models", handle_models); // public endpoint (no API key check) svr->Get ("/v1/models", handle_models); // public endpoint (no API key check) svr->Post("/completion", handle_completions); // legacy @@ -4300,6 +4523,7 @@ int main(int argc, char ** argv) { svr->Post("/v1/reranking", handle_rerank); svr->Post("/tokenize", handle_tokenize); svr->Post("/detokenize", handle_detokenize); + svr->Post("/apply-template", handle_apply_template); // LoRA adapters hotswap svr->Get ("/lora-adapters", handle_lora_adapters_list); svr->Post("/lora-adapters", handle_lora_adapters_apply); @@ -4318,27 +4542,34 @@ int main(int argc, char ** argv) { svr->new_task_queue = [¶ms] { return new httplib::ThreadPool(params.n_threads_http); }; // clean up function, to be called before exit - auto clean_up = [&svr]() { + auto clean_up = [&svr, &ctx_server]() { + SRV_INF("%s: cleaning up before exit...\n", __func__); svr->stop(); + ctx_server.queue_results.terminate(); llama_backend_free(); }; - // bind HTTP listen port bool was_bound = false; - if (params.port == 0) { - int bound_port = svr->bind_to_any_port(params.hostname); - if ((was_bound = (bound_port >= 0))) { - params.port = bound_port; - } + if (string_ends_with(std::string(params.hostname), ".sock")) { + LOG_INF("%s: setting address family to AF_UNIX\n", __func__); + svr->set_address_family(AF_UNIX); + // bind_to_port requires a second arg, any value other than 0 should + // simply get ignored + was_bound = svr->bind_to_port(params.hostname, 8080); } else { - was_bound = svr->bind_to_port(params.hostname, params.port); + LOG_INF("%s: binding port with default address family\n", __func__); + // bind HTTP listen port + if (params.port == 0) { + int bound_port = svr->bind_to_any_port(params.hostname); + if ((was_bound = (bound_port >= 0))) { + params.port = bound_port; + } + } else { + was_bound = svr->bind_to_port(params.hostname, params.port); + } } if (!was_bound) { - //LOG_ERROR("couldn't bind HTTP server socket", { - // {"hostname", params.hostname}, - // {"port", params.port}, - //}); LOG_ERR("%s: couldn't bind HTTP server socket, hostname: %s, port: %d\n", __func__, params.hostname.c_str(), params.port); clean_up(); return 1; @@ -4365,33 +4596,24 @@ int main(int argc, char ** argv) { LOG_INF("%s: model loaded\n", __func__); - // if a custom chat template is not supplied, we will use the one that comes with the model (if any) - if (params.chat_template.empty()) { - if (!ctx_server.validate_builtin_chat_template(params.use_jinja)) { - LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__); - params.chat_template = "chatml"; - } - } - // print sample chat example to make it clear which template is used LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__, - ctx_server.chat_templates.template_default->source().c_str(), - common_chat_format_example(*ctx_server.chat_templates.template_default, ctx_server.params_base.use_jinja).c_str()); + common_chat_templates_source(ctx_server.chat_templates.get()), + common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str()); - ctx_server.queue_tasks.on_new_task(std::bind( - &server_context::process_single_task, &ctx_server, std::placeholders::_1)); + ctx_server.queue_tasks.on_new_task([&ctx_server](server_task && task) { + ctx_server.process_single_task(std::move(task)); + }); - ctx_server.queue_tasks.on_update_slots(std::bind( - &server_context::update_slots, &ctx_server)); + ctx_server.queue_tasks.on_update_slots([&ctx_server]() { + ctx_server.update_slots(); + }); shutdown_handler = [&](int) { + // this will unblock start_loop() ctx_server.queue_tasks.terminate(); }; - LOG_INF("%s: server is listening on http://%s:%d - starting the main loop\n", __func__, params.hostname.c_str(), params.port); - - ctx_server.queue_tasks.start_loop(); - #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) struct sigaction sigint_action; sigint_action.sa_handler = signal_handler; @@ -4406,6 +4628,11 @@ int main(int argc, char ** argv) { SetConsoleCtrlHandler(reinterpret_cast(console_ctrl_handler), true); #endif + LOG_INF("%s: server is listening on http://%s:%d - starting the main loop\n", __func__, params.hostname.c_str(), params.port); + + // this call blocks the main thread until queue_tasks.terminate() is called + ctx_server.queue_tasks.start_loop(); + clean_up(); t.join(); diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md index 5787276aba..652dea0382 100644 --- a/examples/server/tests/README.md +++ b/examples/server/tests/README.md @@ -17,7 +17,7 @@ To mitigate it, you can increase values in `n_predict`, `kv_size`. ```shell cd ../../.. -cmake -B build -DLLAMA_CURL=ON +cmake -B build cmake --build build --target llama-server ``` @@ -31,8 +31,9 @@ It's possible to override some scenario steps values with environment variables: | `LLAMA_SERVER_BIN_PATH` | to change the server binary path, default: `../../../build/bin/llama-server` | | `DEBUG` | to enable steps and server verbose mode `--verbose` | | `N_GPU_LAYERS` | number of model layers to offload to VRAM `-ngl --n-gpu-layers` | +| `LLAMA_CACHE` | by default server tests re-download models to the `tmp` subfolder. Set this to your cache (e.g. `$HOME/Library/Caches/llama.cpp` on Mac or `$HOME/.cache/llama.cpp` on Unix) to avoid this | -To run slow tests: +To run slow tests (will download many models, make sure to set `LLAMA_CACHE` if needed): ```shell SLOW_TESTS=1 ./tests.sh @@ -44,10 +45,16 @@ To run with stdout/stderr display in real time (verbose output, but useful for d DEBUG=1 ./tests.sh -s -v -x ``` -To run single test unit: +To run all the tests in a file: ```shell -./tests.sh unit/test_{name of test case here}.py -v -x +./tests.sh unit/test_chat_completion.py -v -x +``` + +To run a single test: + +```shell +./tests.sh unit/test_chat_completion.py::test_invalid_chat_completion_req ``` Hint: You can compile and run test in single command, useful for local developement: diff --git a/examples/server/tests/pytest.ini b/examples/server/tests/pytest.ini new file mode 100644 index 0000000000..6df308df74 --- /dev/null +++ b/examples/server/tests/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + slow: marks tests as slow (deselect with '-m "not slow"') + serial diff --git a/examples/server/tests/tests.sh b/examples/server/tests/tests.sh index 1e0777de36..33fa8cc646 100755 --- a/examples/server/tests/tests.sh +++ b/examples/server/tests/tests.sh @@ -6,9 +6,18 @@ cd $SCRIPT_DIR set -eu +if [[ "${SLOW_TESTS:-0}" == 1 ]]; then + # Slow tests for tool calls need quite a few models ahead of time to avoid timing out. + python $SCRIPT_DIR/../../../scripts/fetch_server_test_models.py +fi + if [ $# -lt 1 ] then - pytest -v -x + if [[ "${SLOW_TESTS:-0}" == 1 ]]; then + pytest -v -x + else + pytest -v -x -m "not slow" + fi else pytest "$@" fi diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py index 2e15348dce..491cb3a5df 100644 --- a/examples/server/tests/unit/test_chat_completion.py +++ b/examples/server/tests/unit/test_chat_completion.py @@ -2,7 +2,7 @@ import pytest from openai import OpenAI from utils import * -server = ServerPreset.tinyllama2() +server: ServerProcess @pytest.fixture(autouse=True) def create_server(): @@ -13,11 +13,16 @@ def create_server(): @pytest.mark.parametrize( "model,system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,finish_reason,jinja,chat_template", [ - (None, "Book", "What is the best book", 8, "(Suddenly)+", 77, 8, "length", False, None), - (None, "Book", "What is the best book", 8, "(Suddenly)+", 77, 8, "length", True, None), - (None, "Book", "What is the best book", 8, "^ blue", 23, 8, "length", True, "This is not a chat template, it is"), + (None, "Book", "Hey", 8, "But she couldn't", 69, 8, "length", False, None), + (None, "Book", "Hey", 8, "But she couldn't", 69, 8, "length", True, None), + (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", False, None), + (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None), + (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, 'chatml'), + (None, "Book", "What is the best book", 8, "^ blue", 23, 8, "length", True, "This is not a chat template, it is"), ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", False, None), ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None), + (None, "Book", [{"type": "text", "text": "What is"}, {"type": "text", "text": "the best book"}], 8, "Whillicter", 79, 8, "length", False, None), + (None, "Book", [{"type": "text", "text": "What is"}, {"type": "text", "text": "the best book"}], 8, "Whillicter", 79, 8, "length", True, None), ] ) def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_content, n_prompt, n_predicted, finish_reason, jinja, chat_template): @@ -41,7 +46,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte assert res.body["usage"]["completion_tokens"] == n_predicted choice = res.body["choices"][0] assert "assistant" == choice["message"]["role"] - assert match_regex(re_content, choice["message"]["content"]) + assert match_regex(re_content, choice["message"]["content"]), f'Expected {re_content}, got {choice["message"]["content"]}' assert choice["finish_reason"] == finish_reason @@ -121,9 +126,25 @@ def test_chat_template(): assert res.body["__verbose"]["prompt"] == " <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" +def test_apply_chat_template(): + global server + server.chat_template = "command-r" + server.start() + res = server.make_request("POST", "/apply-template", data={ + "messages": [ + {"role": "system", "content": "You are a test."}, + {"role": "user", "content":"Hi there"}, + ] + }) + assert res.status_code == 200 + assert "prompt" in res.body + assert res.body["prompt"] == "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a test.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" + + @pytest.mark.parametrize("response_format,n_predicted,re_content", [ ({"type": "json_object", "schema": {"const": "42"}}, 6, "\"42\""), ({"type": "json_object", "schema": {"items": [{"type": "integer"}]}}, 10, "[ -3000 ]"), + ({"type": "json_schema", "json_schema": {"schema": {"const": "foooooo"}}}, 10, "\"foooooo\""), ({"type": "json_object"}, 10, "(\\{|John)+"), ({"type": "sound"}, 0, None), # invalid response format (expected to fail) @@ -151,6 +172,47 @@ def test_completion_with_response_format(response_format: dict, n_predicted: int assert "error" in res.body +@pytest.mark.parametrize("jinja,json_schema,n_predicted,re_content", [ + (False, {"const": "42"}, 6, "\"42\""), + (True, {"const": "42"}, 6, "\"42\""), +]) +def test_completion_with_json_schema(jinja: bool, json_schema: dict, n_predicted: int, re_content: str): + global server + server.jinja = jinja + server.start() + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": n_predicted, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "Write an example"}, + ], + "json_schema": json_schema, + }) + assert res.status_code == 200, f'Expected 200, got {res.status_code}' + choice = res.body["choices"][0] + assert match_regex(re_content, choice["message"]["content"]), f'Expected {re_content}, got {choice["message"]["content"]}' + + +@pytest.mark.parametrize("jinja,grammar,n_predicted,re_content", [ + (False, 'root ::= "a"{5,5}', 6, "a{5,5}"), + (True, 'root ::= "a"{5,5}', 6, "a{5,5}"), +]) +def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re_content: str): + global server + server.jinja = jinja + server.start() + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": n_predicted, + "messages": [ + {"role": "user", "content": "Does not matter what I say, does it?"}, + ], + "grammar": grammar, + }) + assert res.status_code == 200, res.body + choice = res.body["choices"][0] + assert match_regex(re_content, choice["message"]["content"]), choice["message"]["content"] + + @pytest.mark.parametrize("messages", [ None, "string", diff --git a/examples/server/tests/unit/test_embedding.py b/examples/server/tests/unit/test_embedding.py index 8b0eb42b09..0feb452ccf 100644 --- a/examples/server/tests/unit/test_embedding.py +++ b/examples/server/tests/unit/test_embedding.py @@ -49,6 +49,26 @@ def test_embedding_multiple(): assert len(d['embedding']) > 1 +def test_embedding_multiple_with_fa(): + server = ServerPreset.bert_bge_small_with_fa() + server.pooling = 'last' + server.start() + # one of these should trigger the FA branch (i.e. context size % 256 == 0) + res = server.make_request("POST", "/v1/embeddings", data={ + "input": [ + "a "*253, + "b "*254, + "c "*255, + "d "*256, + ], + }) + assert res.status_code == 200 + assert len(res.body['data']) == 4 + for d in res.body['data']: + assert 'embedding' in d + assert len(d['embedding']) > 1 + + @pytest.mark.parametrize( "input,is_multi_prompt", [ diff --git a/examples/server/tests/unit/test_rerank.py b/examples/server/tests/unit/test_rerank.py index 7203d79435..f4f570ad5e 100644 --- a/examples/server/tests/unit/test_rerank.py +++ b/examples/server/tests/unit/test_rerank.py @@ -10,17 +10,20 @@ def create_server(): server = ServerPreset.jina_reranker_tiny() +TEST_DOCUMENTS = [ + "A machine is a physical system that uses power to apply forces and control movement to perform an action. The term is commonly applied to artificial devices, such as those employing engines or motors, but also to natural biological macromolecules, such as molecular machines.", + "Learning is the process of acquiring new understanding, knowledge, behaviors, skills, values, attitudes, and preferences. The ability to learn is possessed by humans, non-human animals, and some machines; there is also evidence for some kind of learning in certain plants.", + "Machine learning is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data, and thus perform tasks without explicit instructions.", + "Paris, capitale de la France, est une grande ville européenne et un centre mondial de l'art, de la mode, de la gastronomie et de la culture. Son paysage urbain du XIXe siècle est traversé par de larges boulevards et la Seine." +] + + def test_rerank(): global server server.start() res = server.make_request("POST", "/rerank", data={ "query": "Machine learning is", - "documents": [ - "A machine is a physical system that uses power to apply forces and control movement to perform an action. The term is commonly applied to artificial devices, such as those employing engines or motors, but also to natural biological macromolecules, such as molecular machines.", - "Learning is the process of acquiring new understanding, knowledge, behaviors, skills, values, attitudes, and preferences. The ability to learn is possessed by humans, non-human animals, and some machines; there is also evidence for some kind of learning in certain plants.", - "Machine learning is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data, and thus perform tasks without explicit instructions.", - "Paris, capitale de la France, est une grande ville européenne et un centre mondial de l'art, de la mode, de la gastronomie et de la culture. Son paysage urbain du XIXe siècle est traversé par de larges boulevards et la Seine." - ] + "documents": TEST_DOCUMENTS, }) assert res.status_code == 200 assert len(res.body["results"]) == 4 @@ -38,6 +41,29 @@ def test_rerank(): assert least_relevant["index"] == 3 +def test_rerank_tei_format(): + global server + server.start() + res = server.make_request("POST", "/rerank", data={ + "query": "Machine learning is", + "texts": TEST_DOCUMENTS, + }) + assert res.status_code == 200 + assert len(res.body) == 4 + + most_relevant = res.body[0] + least_relevant = res.body[0] + for doc in res.body: + if doc["score"] > most_relevant["score"]: + most_relevant = doc + if doc["score"] < least_relevant["score"]: + least_relevant = doc + + assert most_relevant["score"] > least_relevant["score"] + assert most_relevant["index"] == 2 + assert least_relevant["index"] == 3 + + @pytest.mark.parametrize("documents", [ [], None, diff --git a/examples/server/tests/unit/test_tool_call.py b/examples/server/tests/unit/test_tool_call.py new file mode 100755 index 0000000000..569c2a1f8e --- /dev/null +++ b/examples/server/tests/unit/test_tool_call.py @@ -0,0 +1,606 @@ +#!/usr/bin/env python +import pytest + +# ensure grandparent path is in sys.path +from pathlib import Path +import sys +path = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(path)) + +from utils import * + +server: ServerProcess + +TIMEOUT_SERVER_START = 15*60 +TIMEOUT_HTTP_REQUEST = 60 + +@pytest.fixture(autouse=True) +def create_server(): + global server + server = ServerPreset.tinyllama2() + server.model_alias = "tinyllama-2-tool-call" + server.server_port = 8081 + + +TEST_TOOL = { + "type":"function", + "function": { + "name": "test", + "description": "", + "parameters": { + "type": "object", + "properties": { + "success": {"type": "boolean", "const": True}, + }, + "required": ["success"] + } + } +} + +PYTHON_TOOL = { + "type": "function", + "function": { + "name": "python", + "description": "Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The code to run in the ipython interpreter." + } + }, + "required": ["code"] + } + } +} + +WEATHER_TOOL = { + "type":"function", + "function":{ + "name":"get_current_weather", + "description":"Get the current weather in a given location", + "parameters":{ + "type":"object", + "properties":{ + "location":{ + "type":"string", + "description":"The city and country/state, e.g. 'San Francisco, CA', or 'Paris, France'" + } + }, + "required":["location"] + } + } +} + + +def do_test_completion_with_required_tool_tiny(server: ServerProcess, tool: dict, argument_key: str | None, n_predict, **kwargs): + res = server.make_request("POST", "/v1/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "Write an example"}, + ], + "tool_choice": "required", + "tools": [tool], + "parallel_tool_calls": False, + **kwargs, + }) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}' + assert len(tool_call.get("id", "")) > 0, f'Expected non empty tool call id in {tool_call}' + expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"] + assert expected_function_name == tool_call["function"]["name"] + actual_arguments = tool_call["function"]["arguments"] + assert isinstance(actual_arguments, str) + if argument_key is not None: + actual_arguments = json.loads(actual_arguments) + assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}" + + +@pytest.mark.parametrize("template_name,tool,argument_key", [ + ("google-gemma-2-2b-it", TEST_TOOL, "success"), + ("meta-llama-Llama-3.3-70B-Instruct", TEST_TOOL, "success"), + ("meta-llama-Llama-3.3-70B-Instruct", PYTHON_TOOL, "code"), +]) +def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict, argument_key: str | None): + global server + n_predict = 512 + # server = ServerPreset.stories15m_moe() + server.jinja = True + server.n_predict = n_predict + server.chat_template_file = f'../../../models/templates/{template_name}.jinja' + server.start(timeout_seconds=TIMEOUT_SERVER_START) + do_test_completion_with_required_tool_tiny(server, tool, argument_key, n_predict, temperature=0.0, top_k=1, top_p=1.0) + + +@pytest.mark.slow +@pytest.mark.parametrize("template_name,tool,argument_key", [ + ("meta-llama-Llama-3.1-8B-Instruct", TEST_TOOL, "success"), + ("meta-llama-Llama-3.1-8B-Instruct", PYTHON_TOOL, "code"), + ("meetkai-functionary-medium-v3.1", TEST_TOOL, "success"), + ("meetkai-functionary-medium-v3.1", PYTHON_TOOL, "code"), + ("meetkai-functionary-medium-v3.2", TEST_TOOL, "success"), + ("meetkai-functionary-medium-v3.2", PYTHON_TOOL, "code"), + ("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", TEST_TOOL, "success"), + ("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", PYTHON_TOOL, "code"), + ("meta-llama-Llama-3.2-3B-Instruct", TEST_TOOL, "success"), + ("meta-llama-Llama-3.2-3B-Instruct", PYTHON_TOOL, "code"), + ("mistralai-Mistral-Nemo-Instruct-2407", TEST_TOOL, "success"), + ("mistralai-Mistral-Nemo-Instruct-2407", PYTHON_TOOL, "code"), + ("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", TEST_TOOL, "success"), + ("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", PYTHON_TOOL, "code"), + ("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", TEST_TOOL, "success"), + ("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", PYTHON_TOOL, "code"), + ("fireworks-ai-llama-3-firefunction-v2", TEST_TOOL, "success"), + # ("fireworks-ai-llama-3-firefunction-v2", PYTHON_TOOL, "code"), +]) +def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, argument_key: str | None): + global server + n_predict = 512 + # server = ServerPreset.stories15m_moe() + server.jinja = True + server.n_predict = n_predict + server.chat_template_file = f'../../../models/templates/{template_name}.jinja' + server.start(timeout_seconds=TIMEOUT_SERVER_START) + do_test_completion_with_required_tool_tiny(server, tool, argument_key, n_predict) + + +@pytest.mark.slow +@pytest.mark.parametrize("tool,argument_key,hf_repo,template_override", [ + (TEST_TOOL, "success", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + (PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + (PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + (PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + (PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/functionary-small-v3.2-GGUF:Q4_K_M", ("meetkai/functionary-medium-v3.2", None)), + (PYTHON_TOOL, "code", "bartowski/functionary-small-v3.2-GGUF:Q4_K_M", ("meetkai/functionary-medium-v3.2", None)), + (PYTHON_TOOL, "code", "bartowski/functionary-small-v3.2-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + (PYTHON_TOOL, "code", "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + (PYTHON_TOOL, "code", "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + (PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + (PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"), + + (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), +]) +def test_completion_with_required_tool_real_model(tool: dict, argument_key: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None): + global server + n_predict = 512 + server.n_slots = 1 + server.jinja = True + server.n_ctx = 8192 + server.n_predict = n_predict + server.model_hf_repo = hf_repo + server.model_hf_file = None + if isinstance(template_override, tuple): + (template_hf_repo, template_variant) = template_override + server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja" + assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template." + elif isinstance(template_override, str): + server.chat_template = template_override + server.start(timeout_seconds=TIMEOUT_SERVER_START) + res = server.make_request("POST", "/v1/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "Write an example"}, + ], + "tool_choice": "required", + "tools": [tool], + "parallel_tool_calls": False, + "temperature": 0.0, + "top_k": 1, + "top_p": 1.0, + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + # assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}' + expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"] + assert expected_function_name == tool_call["function"]["name"] + actual_arguments = tool_call["function"]["arguments"] + assert isinstance(actual_arguments, str) + if argument_key is not None: + actual_arguments = json.loads(actual_arguments) + assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}" + + +def do_test_completion_without_tool_call(server: ServerProcess, n_predict: int, tools: list[dict], tool_choice: str | None, **kwargs): + res = server.make_request("POST", "/v1/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "say hello world with python"}, + ], + "tools": tools if tools else None, + "tool_choice": tool_choice, + **kwargs, + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + assert choice["message"].get("tool_calls") is None, f'Expected no tool call in {choice["message"]}' + + +@pytest.mark.parametrize("template_name,n_predict,tools,tool_choice", [ + ("meta-llama-Llama-3.3-70B-Instruct", 128, [], None), + ("meta-llama-Llama-3.3-70B-Instruct", 128, [TEST_TOOL], None), + ("meta-llama-Llama-3.3-70B-Instruct", 128, [PYTHON_TOOL], 'none'), +]) +def test_completion_without_tool_call_fast(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None): + global server + server.jinja = True + server.n_predict = n_predict + server.chat_template_file = f'../../../models/templates/{template_name}.jinja' + server.start(timeout_seconds=TIMEOUT_SERVER_START) + do_test_completion_without_tool_call(server, n_predict, tools, tool_choice) + + +@pytest.mark.slow +@pytest.mark.parametrize("template_name,n_predict,tools,tool_choice", [ + ("meetkai-functionary-medium-v3.2", 256, [], None), + ("meetkai-functionary-medium-v3.2", 256, [TEST_TOOL], None), + ("meetkai-functionary-medium-v3.2", 256, [PYTHON_TOOL], 'none'), + ("meetkai-functionary-medium-v3.1", 256, [], None), + ("meetkai-functionary-medium-v3.1", 256, [TEST_TOOL], None), + ("meetkai-functionary-medium-v3.1", 256, [PYTHON_TOOL], 'none'), + ("meta-llama-Llama-3.2-3B-Instruct", 256, [], None), + ("meta-llama-Llama-3.2-3B-Instruct", 256, [TEST_TOOL], None), + ("meta-llama-Llama-3.2-3B-Instruct", 256, [PYTHON_TOOL], 'none'), +]) +def test_completion_without_tool_call_slow(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None): + global server + server.jinja = True + server.n_predict = n_predict + server.chat_template_file = f'../../../models/templates/{template_name}.jinja' + server.start(timeout_seconds=TIMEOUT_SERVER_START) + do_test_completion_without_tool_call(server, n_predict, tools, tool_choice) + + +@pytest.mark.slow +@pytest.mark.parametrize("hf_repo,template_override", [ + ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), + ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None), + ("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None), + ("bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), + ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), + ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"), + + ("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)), + ("bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"), + + ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L", ("CohereForAI/c4ai-command-r7b-12-2024", "tool_use")), + + ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), + + # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it. + ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), + + # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), +]) +def test_weather(hf_repo: str, template_override: str | Tuple[str, str | None] | None): + global server + n_predict = 512 + server.n_slots = 1 + server.jinja = True + server.n_ctx = 8192 + server.n_predict = n_predict + server.model_hf_repo = hf_repo + server.model_hf_file = None + if isinstance(template_override, tuple): + (template_hf_repo, template_variant) = template_override + server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja" + assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template." + elif isinstance(template_override, str): + server.chat_template = template_override + server.start(timeout_seconds=TIMEOUT_SERVER_START) + do_test_weather(server, max_tokens=n_predict) + + +def do_test_weather(server: ServerProcess, **kwargs): + res = server.make_request("POST", "/v1/chat/completions", data={ + "messages": [ + {"role": "system", "content": "You are a chatbot that uses tools/functions. Dont overthink things."}, + {"role": "user", "content": "What is the weather in Istanbul?"}, + ], + "tools": [WEATHER_TOOL], + **kwargs, + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + # assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}' + assert tool_call["function"]["name"] == WEATHER_TOOL["function"]["name"], f'Expected weather tool call, got {tool_call["function"]["name"]}' + assert len(tool_call.get("id", "")) > 0, f'Expected non empty tool call id in {tool_call}' + actual_arguments = json.loads(tool_call["function"]["arguments"]) + assert 'location' in actual_arguments, f"location not found in {json.dumps(actual_arguments)}" + location = actual_arguments["location"] + assert isinstance(location, str), f"Expected location to be a string, got {type(location)}: {json.dumps(location)}" + assert re.match('^Istanbul(( |, ?)(TR|Turkey|Türkiye))?$', location), f'Expected Istanbul for location, got {location}' + + +@pytest.mark.slow +@pytest.mark.parametrize("result_override,n_predict,hf_repo,template_override", [ + (None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), + (None, 128, "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None), + (None, 128, "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", "chatml"), + (None, 128, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"), + (None, 128, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + (None, 128, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + (None, 128, "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)), + (None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), + (None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"), + (None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + ("[\\s\\S]*?\\*\\*\\s*0.5($|\\*\\*)", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)), + + # TODO: fix these (wrong results, either didn't respect decimal instruction or got wrong value) + # (None, 128, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), + # ("[\\s\\S]*?\\*\\*\\s*0.5($|\\*\\*)", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), +]) +def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str, template_override: str | Tuple[str, str | None] | None): + global server + server.n_slots = 1 + server.jinja = True + server.n_ctx = 8192 * 2 + server.n_predict = n_predict + server.model_hf_repo = hf_repo + server.model_hf_file = None + if isinstance(template_override, tuple): + (template_hf_repo, template_variant) = template_override + server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja" + assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template." + elif isinstance(template_override, str): + server.chat_template = template_override + server.start(timeout_seconds=TIMEOUT_SERVER_START) + do_test_calc_result(server, result_override, n_predict) + + +def do_test_calc_result(server: ServerProcess, result_override: str | None, n_predict: int, **kwargs): + res = server.make_request("POST", "/v1/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "system", "content": "You are a tools-calling assistant. You express numerical values with at most two decimals."}, + {"role": "user", "content": "What's the y coordinate of a point on the unit sphere at angle 30 degrees?"}, + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_6789", + "type": "function", + "function": { + "name": "calculate", + "arguments": "{\"expression\":\"sin(30 * pi / 180)\"}" + } + } + ] + }, + { + "role": "tool", + "name": "calculate", + "content": "0.55644242476", + "tool_call_id": "call_6789" + } + ], + "tools": [ + { + "type":"function", + "function":{ + "name":"calculate", + "description":"A calculator function that computes values of arithmetic expressions in the Python syntax", + "parameters":{ + "type":"object", + "properties":{ + "expression":{ + "type":"string", + "description":"An arithmetic expression to compute the value of (Python syntad, assuming all floats)" + } + }, + "required":["expression"] + } + } + } + ], + **kwargs, + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + tool_calls = choice["message"].get("tool_calls") + assert tool_calls is None, f'Expected no tool call in {choice["message"]}' + content = choice["message"].get("content") + assert content is not None, f'Expected content in {choice["message"]}' + if result_override is not None: + assert re.match(result_override, content), f'Expected {result_override}, got {content}' + else: + assert re.match('^[\\s\\S]*?((That\'s|\\bis) (approximately )?)?\\b0\\.(5\\b|56\\b|556)', content), \ + f'Expected something like "The y coordinate is 0.56.", got {content}' + + +@pytest.mark.slow +@pytest.mark.parametrize("n_predict,reasoning_format,expect_content,expect_reasoning_content,hf_repo,template_override", [ + (128, 'deepseek', "^The sum of 102 and 7 is 109[\\s\\S]*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + (128, None, "^The sum of 102 and 7 is 109[\\s\\S]*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + + (1024, 'deepseek', "To find the sum of[\\s\\S]*", "I need to calculate the sum of 102 and 7[\\s\\S]*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), + (1024, 'none', "^(\\s*)?I need[\\s\\S]*?\\s*To find[\\s\\S]*", None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), + + (1024, 'deepseek', "To find the sum of[\\s\\S]*", "First, I [\\s\\S]*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)), +]) +def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none'] | None, expect_content: str | None, expect_reasoning_content: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None): + global server + server.n_slots = 1 + server.reasoning_format = reasoning_format + server.jinja = True + server.n_ctx = 8192 * 2 + server.n_predict = n_predict + server.model_hf_repo = hf_repo + server.model_hf_file = None + if isinstance(template_override, tuple): + (template_hf_repo, template_variant) = template_override + server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja" + assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template." + elif isinstance(template_override, str): + server.chat_template = template_override + server.start(timeout_seconds=TIMEOUT_SERVER_START) + res = server.make_request("POST", "/v1/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "user", "content": "What's the sum of 102 and 7?"}, + ] + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + assert choice["message"].get("tool_calls") is None, f'Expected no tool call in {choice["message"]}' + + content = choice["message"].get("content") + if expect_content is None: + assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}' + else: + assert re.match(expect_content, content), f'Expected {expect_content}, got {content}' + + reasoning_content = choice["message"].get("reasoning_content") + if expect_reasoning_content is None: + assert reasoning_content is None, f'Expected no reasoning content in {choice["message"]}' + else: + assert re.match(expect_reasoning_content, reasoning_content), f'Expected {expect_reasoning_content}, got {reasoning_content}' + + +@pytest.mark.slow +@pytest.mark.parametrize("hf_repo,template_override", [ + ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), + + ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai-functionary-medium-v3.2", None)), + ("bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"), + + # ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), + ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)), + ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", None), + + ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)), + ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", None), + + ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), + ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")), + ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"), + + ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), + ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"), + + ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), + ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", "chatml"), +]) +def test_hello_world(hf_repo: str, template_override: str | Tuple[str, str | None] | None): + global server + n_predict = 512 # High because of DeepSeek R1 + server.n_slots = 1 + server.jinja = True + server.n_ctx = 8192 + server.n_predict = n_predict + server.model_hf_repo = hf_repo + server.model_hf_file = None + if isinstance(template_override, tuple): + (template_hf_repo, template_variant) = template_override + server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja" + assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template." + elif isinstance(template_override, str): + server.chat_template = template_override + server.start(timeout_seconds=TIMEOUT_SERVER_START) + + do_test_hello_world(server, max_tokens=n_predict) + + +def do_test_hello_world(server: ServerProcess, **kwargs): + res = server.make_request("POST", "/v1/chat/completions", data={ + "messages": [ + {"role": "system", "content": "You are a tool-calling agent."}, + {"role": "user", "content": "say hello world with python"}, + ], + "tools": [PYTHON_TOOL], + **kwargs, + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + # assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}' + assert tool_call["function"]["name"] == PYTHON_TOOL["function"]["name"] + assert len(tool_call.get("id", "")) > 0, f'Expected non empty tool call id in {tool_call}' + actual_arguments = json.loads(tool_call["function"]["arguments"]) + assert 'code' in actual_arguments, f"code not found in {json.dumps(actual_arguments)}" + code = actual_arguments["code"] + assert isinstance(code, str), f"Expected code to be a string, got {type(code)}: {json.dumps(code)}" + assert re.match(r'''print\(("[Hh]ello,? [Ww]orld!?"|'[Hh]ello,? [Ww]orld!?')\)''', code), f'Expected hello world, got {code}' diff --git a/examples/server/tests/utils.py b/examples/server/tests/utils.py index 9964db2f99..4dc2062a8e 100644 --- a/examples/server/tests/utils.py +++ b/examples/server/tests/utils.py @@ -26,7 +26,10 @@ from re import RegexFlag import wget -DEFAULT_HTTP_TIMEOUT = 10 if "LLAMA_SANITIZE" not in os.environ else 30 +DEFAULT_HTTP_TIMEOUT = 12 + +if "LLAMA_SANITIZE" in os.environ or "GITHUB_ACTION" in os.environ: + DEFAULT_HTTP_TIMEOUT = 30 class ServerResponse: @@ -41,7 +44,7 @@ class ServerProcess: server_port: int = 8080 server_host: str = "127.0.0.1" model_hf_repo: str = "ggml-org/models" - model_hf_file: str = "tinyllamas/stories260K.gguf" + model_hf_file: str | None = "tinyllamas/stories260K.gguf" model_alias: str = "tinyllama-2" temperature: float = 0.8 seed: int = 42 @@ -64,6 +67,9 @@ class ServerProcess: id_slot: int | None = None cache_prompt: bool | None = None n_slots: int | None = None + ctk: str | None = None + ctv: str | None = None + fa: bool | None = None server_continuous_batching: bool | None = False server_embeddings: bool | None = False server_reranking: bool | None = False @@ -78,8 +84,10 @@ class ServerProcess: draft_max: int | None = None no_webui: bool | None = None jinja: bool | None = None + reasoning_format: Literal['deepseek', 'none'] | None = None chat_template: str | None = None chat_template_file: str | None = None + server_path: str | None = None # session variables process: subprocess.Popen | None = None @@ -93,7 +101,9 @@ class ServerProcess: self.server_port = int(os.environ["PORT"]) def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None: - if "LLAMA_SERVER_BIN_PATH" in os.environ: + if self.server_path is not None: + server_path = self.server_path + elif "LLAMA_SERVER_BIN_PATH" in os.environ: server_path = os.environ["LLAMA_SERVER_BIN_PATH"] elif os.name == "nt": server_path = "../../../build/bin/Release/llama-server.exe" @@ -147,6 +157,12 @@ class ServerProcess: server_args.extend(["--ctx-size", self.n_ctx]) if self.n_slots: server_args.extend(["--parallel", self.n_slots]) + if self.ctk: + server_args.extend(["-ctk", self.ctk]) + if self.ctv: + server_args.extend(["-ctv", self.ctv]) + if self.fa is not None: + server_args.append("-fa") if self.n_predict: server_args.extend(["--n-predict", self.n_predict]) if self.slot_save_path: @@ -172,13 +188,15 @@ class ServerProcess: server_args.append("--no-webui") if self.jinja: server_args.append("--jinja") + if self.reasoning_format is not None: + server_args.extend(("--reasoning-format", self.reasoning_format)) if self.chat_template: server_args.extend(["--chat-template", self.chat_template]) if self.chat_template_file: server_args.extend(["--chat-template-file", self.chat_template_file]) args = [str(arg) for arg in [server_path, *server_args]] - print(f"bench: starting server with: {' '.join(args)}") + print(f"tests: starting server with: {' '.join(args)}") flags = 0 if "nt" == os.name: @@ -191,7 +209,7 @@ class ServerProcess: creationflags=flags, stdout=sys.stdout, stderr=sys.stdout, - env={**os.environ, "LLAMA_CACHE": "tmp"}, + env={**os.environ, "LLAMA_CACHE": "tmp"} if "LLAMA_CACHE" not in os.environ else None, ) server_instances.add(self) @@ -209,6 +227,10 @@ class ServerProcess: return # server is ready except Exception as e: pass + # Check if process died + if self.process.poll() is not None: + raise RuntimeError(f"Server process died with return code {self.process.returncode}") + print(f"Waiting for server to start...") time.sleep(0.5) raise TimeoutError(f"Server did not start within {timeout_seconds} seconds") @@ -280,7 +302,7 @@ class ServerPreset: server.model_hf_repo = "ggml-org/models" server.model_hf_file = "tinyllamas/stories260K.gguf" server.model_alias = "tinyllama-2" - server.n_ctx = 256 + server.n_ctx = 512 server.n_batch = 32 server.n_slots = 2 server.n_predict = 64 @@ -301,6 +323,21 @@ class ServerPreset: server.server_embeddings = True return server + @staticmethod + def bert_bge_small_with_fa() -> ServerProcess: + server = ServerProcess() + server.model_hf_repo = "ggml-org/models" + server.model_hf_file = "bert-bge-small/ggml-model-f16.gguf" + server.model_alias = "bert-bge-small" + server.n_ctx = 1024 + server.n_batch = 300 + server.n_ubatch = 300 + server.n_slots = 2 + server.fa = True + server.seed = 42 + server.server_embeddings = True + return server + @staticmethod def tinyllama_infill() -> ServerProcess: server = ServerProcess() diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index c5987250cc..aba2f27f9b 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -3,21 +3,18 @@ #include "common.h" #include "log.h" #include "llama.h" -#include "common/base64.hpp" +#include "base64.hpp" -#ifndef NDEBUG -// crash the server in debug mode, otherwise send an http 500 error -#define CPPHTTPLIB_NO_EXCEPTIONS 1 -#endif // increase max payload length to allow use of larger context size #define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576 +// disable Nagle's algorithm +#define CPPHTTPLIB_TCP_NODELAY true #include "httplib.h" // Change JSON_ASSERT from assert() to GGML_ASSERT: #define JSON_ASSERT GGML_ASSERT #include "json.hpp" -#include "minja.hpp" -#include "chat-template.hpp" +#include "chat.h" #include #include @@ -61,6 +58,32 @@ static T json_value(const json & body, const std::string & key, const T & defaul const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT); +// thin wrapper around common_grammar_trigger with (de)serialization functions +struct server_grammar_trigger { + common_grammar_trigger value; + + server_grammar_trigger() = default; + server_grammar_trigger(const common_grammar_trigger & value) : value(value) {} + server_grammar_trigger(const json & in) { + value.type = (common_grammar_trigger_type) in.at("type").get(); + value.value = in.at("value").get(); + if (value.type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) { + value.token = (llama_token) in.at("token").get(); + } + } + + json to_json() const { + json out { + {"type", (int) value.type}, + {"value", value.value}, + }; + if (value.type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) { + out["token"] = (int) value.token; + } + return out; + } +}; + // // tokenizer and input processing utils // @@ -350,41 +373,6 @@ static llama_tokens format_infill( return embd_inp; } -// Format given chat. If tmpl is empty, we take the template from model metadata -inline std::string format_chat(const common_chat_template & tmpl, const std::vector & messages) { - std::vector chat; - - for (size_t i = 0; i < messages.size(); ++i) { - const auto & curr_msg = messages[i]; - - std::string role = json_value(curr_msg, "role", std::string("")); - - std::string content; - if (curr_msg.contains("content")) { - if (curr_msg["content"].is_string()) { - content = curr_msg["content"].get(); - } else if (curr_msg["content"].is_array()) { - for (const auto & part : curr_msg["content"]) { - if (part.contains("text")) { - content += "\n" + part["text"].get(); - } - } - } else { - throw std::runtime_error("Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)"); - } - } else { - throw std::runtime_error("Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)"); - } - - chat.push_back({role, content}); - } - - const auto formatted_chat = common_chat_apply_template(tmpl, chat, true, /* use_jinja= */ false); - LOG_DBG("formatted_chat: '%s'\n", formatted_chat.c_str()); - - return formatted_chat; -} - // // base64 utils (TODO: move to common in the future) // @@ -473,6 +461,10 @@ static std::string gen_chatcmplid() { return "chatcmpl-" + random_string(); } +static std::string gen_tool_call_id() { + return random_string(); +} + // // other common utils // @@ -559,8 +551,13 @@ static json oaicompat_completion_params_parse(const json & body) { throw std::runtime_error("Only one completion choice is allowed"); } + // Handle "echo" field + if (json_value(body, "echo", false)) { + throw std::runtime_error("Only no echo is supported"); + } + // Params supported by OAI but unsupported by llama.cpp - static const std::vector unsupported_params { "best_of", "echo", "suffix" }; + static const std::vector unsupported_params { "best_of", "suffix" }; for (const auto & param : unsupported_params) { if (body.contains(param)) { throw std::runtime_error("Unsupported param: " + param); @@ -580,21 +577,28 @@ static json oaicompat_completion_params_parse(const json & body) { static json oaicompat_completion_params_parse( const json & body, /* openai api json semantics */ - const common_chat_template & tmpl, - bool use_jinja) + bool use_jinja, + common_reasoning_format reasoning_format, + const struct common_chat_templates * tmpls) { json llama_params; auto tools = json_value(body, "tools", json()); - auto has_tools = tools.is_array() && !tools.empty(); + auto stream = json_value(body, "stream", false); - if (has_tools) { - if (use_jinja) { - LOG_WRN("tools param is not fully supported yet\n"); - } else { + if (tools.is_array() && !tools.empty()) { + if (stream) { + throw std::runtime_error("Cannot use tools with stream"); + } + if (!use_jinja) { throw std::runtime_error("tools param requires --jinja flag"); } } + if (!use_jinja) { + if (body.contains("tool_choice") && !body.at("tool_choice").is_null()) { + throw std::runtime_error("Unsupported param: tool_choice"); + } + } // Handle "stop" field if (body.contains("stop") && body.at("stop").is_string()) { @@ -603,25 +607,59 @@ static json oaicompat_completion_params_parse( llama_params["stop"] = json_value(body, "stop", json::array()); } + auto json_schema = json_value(body, "json_schema", json()); + auto grammar = json_value(body, "grammar", std::string()); + if (!json_schema.is_null() && !grammar.empty()) { + throw std::runtime_error("Cannot use both json_schema and grammar"); + } + // Handle "response_format" field if (body.contains("response_format")) { json response_format = json_value(body, "response_format", json::object()); std::string response_type = json_value(response_format, "type", std::string()); if (response_type == "json_object") { - llama_params["json_schema"] = json_value(response_format, "schema", json::object()); + json_schema = json_value(response_format, "schema", json::object()); } else if (response_type == "json_schema") { - json json_schema = json_value(response_format, "json_schema", json::object()); - llama_params["json_schema"] = json_value(json_schema, "schema", json::object()); + auto schema_wrapper = json_value(response_format, "json_schema", json::object()); + json_schema = json_value(schema_wrapper, "schema", json::object()); } else if (!response_type.empty() && response_type != "text") { throw std::runtime_error("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type); } } + common_chat_templates_inputs inputs; + inputs.messages = common_chat_msgs_parse_oaicompat(body.at("messages")); + inputs.tools = common_chat_tools_parse_oaicompat(tools); + inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(json_value(body, "tool_choice", std::string("auto"))); + inputs.json_schema = json_schema.is_null() ? "" : json_schema.dump(); + inputs.grammar = grammar; + inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true); + inputs.use_jinja = use_jinja; + inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false); + inputs.extract_reasoning = reasoning_format != COMMON_REASONING_FORMAT_NONE; + inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true); + if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && body.contains("grammar")) { + throw std::runtime_error("Cannot use custom grammar constraints with tools."); + } + // Apply chat template to the list of messages - if (use_jinja) { - llama_params["prompt"] = tmpl.apply(body.at("messages"), tools, /* add_generation_prompt= */ true); - } else { - llama_params["prompt"] = format_chat(tmpl, body.at("messages")); + auto chat_params = common_chat_templates_apply(tmpls, inputs); + + llama_params["chat_format"] = static_cast(chat_params.format); + llama_params["prompt"] = chat_params.prompt; + if (!chat_params.grammar.empty()) { + llama_params["grammar"] = chat_params.grammar; + } + llama_params["grammar_lazy"] = chat_params.grammar_lazy; + auto grammar_triggers = json::array(); + for (const auto & trigger : chat_params.grammar_triggers) { + server_grammar_trigger ct(trigger); + grammar_triggers.push_back(ct.to_json()); + } + llama_params["grammar_triggers"] = grammar_triggers; + llama_params["preserved_tokens"] = chat_params.preserved_tokens; + for (const auto & stop : chat_params.additional_stops) { + llama_params["stop"].push_back(stop); } // Handle "n" field @@ -638,14 +676,6 @@ static json oaicompat_completion_params_parse( throw std::runtime_error("top_logprobs requires logprobs to be set to true"); } - // Params supported by OAI but unsupported by llama.cpp - static const std::vector unsupported_params { "tool_choice" }; - for (const auto & param : unsupported_params) { - if (body.contains(param)) { - throw std::runtime_error("Unsupported param: " + param); - } - } - // Copy remaining properties to llama_params // This allows user to use llama.cpp-specific params like "mirostat", ... via OAI endpoint. // See "launch_slot_with_task()" for a complete list of params supported by llama.cpp @@ -701,29 +731,51 @@ static json format_embeddings_response_oaicompat(const json & request, const jso return res; } -static json format_response_rerank(const json & request, const json & ranks) { - json data = json::array(); - int32_t n_tokens = 0; - int i = 0; - for (const auto & rank : ranks) { - data.push_back(json{ - {"index", i++}, - {"relevance_score", json_value(rank, "score", 0.0)}, - }); +static json format_response_rerank( + const json & request, + const json & ranks, + bool is_tei_format, + std::vector & texts) { + json res; + if (is_tei_format) { + // TEI response format + res = json::array(); + bool return_text = json_value(request, "return_text", false); + for (const auto & rank : ranks) { + int index = json_value(rank, "index", 0); + json elem = json{ + {"index", index}, + {"score", json_value(rank, "score", 0.0)}, + }; + if (return_text) { + elem["text"] = std::move(texts[index]); + } + res.push_back(elem); + } + } else { + // Jina response format + json results = json::array(); + int32_t n_tokens = 0; + for (const auto & rank : ranks) { + results.push_back(json{ + {"index", json_value(rank, "index", 0)}, + {"relevance_score", json_value(rank, "score", 0.0)}, + }); - n_tokens += json_value(rank, "tokens_evaluated", 0); + n_tokens += json_value(rank, "tokens_evaluated", 0); + } + + res = json{ + {"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))}, + {"object", "list"}, + {"usage", json{ + {"prompt_tokens", n_tokens}, + {"total_tokens", n_tokens} + }}, + {"results", results} + }; } - json res = json { - {"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))}, - {"object", "list"}, - {"usage", json { - {"prompt_tokens", n_tokens}, - {"total_tokens", n_tokens} - }}, - {"results", data} - }; - return res; } diff --git a/examples/server/webui/.gitignore b/examples/server/webui/.gitignore new file mode 100644 index 0000000000..a547bf36d8 --- /dev/null +++ b/examples/server/webui/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/examples/server/webui/.prettierignore b/examples/server/webui/.prettierignore new file mode 100644 index 0000000000..c0cb165b37 --- /dev/null +++ b/examples/server/webui/.prettierignore @@ -0,0 +1,10 @@ +**/.vscode +**/.github +**/.git +**/.svn +**/.hg +**/node_modules +**/dist +**/build + +*.config.js diff --git a/examples/server/webui/eslint.config.js b/examples/server/webui/eslint.config.js new file mode 100644 index 0000000000..7c0d39b89b --- /dev/null +++ b/examples/server/webui/eslint.config.js @@ -0,0 +1,26 @@ +import js from '@eslint/js' +import globals from 'globals' +import reactHooks from 'eslint-plugin-react-hooks' +import reactRefresh from 'eslint-plugin-react-refresh' +import tseslint from 'typescript-eslint' + +export default tseslint.config( + { ignores: ['dist'] }, + { + extends: [js.configs.recommended, ...tseslint.configs.recommended], + files: ['**/*.{ts,tsx}'], + languageOptions: { + ecmaVersion: 2020, + globals: globals.browser, + }, + plugins: { + 'react-hooks': reactHooks, + 'react-refresh': reactRefresh, + }, + rules: { + ...reactHooks.configs.recommended.rules, + 'react-refresh/only-export-components': 'off', + '@typescript-eslint/no-unused-vars': 'off', + }, + }, +) diff --git a/examples/server/webui/index.html b/examples/server/webui/index.html index d3893ea4e1..471f46b3ad 100644 --- a/examples/server/webui/index.html +++ b/examples/server/webui/index.html @@ -1,343 +1,16 @@ - + - - - - - 🦙 llama.cpp - chat - - - -
-
- - - -
- -
-
-

Conversations

- - - -
- - -
- + New conversation -
-
- {{ conv.messages[0].content }} -
-
- Conversations are saved to browser's localStorage -
-
-
- - -
- -
- - - -
llama.cpp
- - -
- -
- -
- - -
- -
-
-
- - -
-
- - {{ messages.length === 0 ? 'Send a message to start' : '' }} -
-
- -
- - -
- -
-
- - -
- - - -
-
- -
- - - - - - - -
- - - - - - - - - - - - + + + + + 🦙 llama.cpp - chat + + +
+ + diff --git a/examples/server/webui/package-lock.json b/examples/server/webui/package-lock.json index bbebccbf20..b2e3cf94ac 100644 --- a/examples/server/webui/package-lock.json +++ b/examples/server/webui/package-lock.json @@ -8,22 +8,46 @@ "name": "webui", "version": "0.0.0", "dependencies": { + "@heroicons/react": "^2.2.0", "@sec-ant/readable-stream": "^0.6.0", + "@tailwindcss/postcss": "^4.1.1", + "@tailwindcss/vite": "^4.1.1", "@vscode/markdown-it-katex": "^1.1.1", "autoprefixer": "^10.4.20", - "daisyui": "^4.12.14", + "daisyui": "^5.0.12", + "dexie": "^4.0.11", "highlight.js": "^11.10.0", "katex": "^0.16.15", - "markdown-it": "^14.1.0", "postcss": "^8.4.49", - "tailwindcss": "^3.4.15", + "react": "^18.3.1", + "react-dom": "^18.3.1", + "react-markdown": "^9.0.3", + "react-router": "^7.1.5", + "rehype-highlight": "^7.0.2", + "rehype-katex": "^7.0.1", + "remark-breaks": "^4.0.0", + "remark-gfm": "^4.0.0", + "remark-math": "^6.0.0", + "tailwindcss": "^4.1.1", "textlinestream": "^1.1.1", - "vite-plugin-singlefile": "^2.0.3", - "vue": "^3.5.13" + "vite-plugin-singlefile": "^2.0.3" }, "devDependencies": { - "sass-embedded": "^1.83.0", - "vite": "^5.4.10" + "@eslint/js": "^9.17.0", + "@types/markdown-it": "^14.1.2", + "@types/node": "^22.13.1", + "@types/react": "^18.3.18", + "@types/react-dom": "^18.3.5", + "@vitejs/plugin-react": "^4.3.4", + "eslint": "^9.17.0", + "eslint-plugin-react-hooks": "^5.0.0", + "eslint-plugin-react-refresh": "^0.4.16", + "globals": "^15.14.0", + "prettier": "^3.4.2", + "sass-embedded": "^1.83.4", + "typescript": "~5.6.2", + "typescript-eslint": "^8.18.2", + "vite": "^6.0.5" } }, "node_modules/@alloc/quick-lru": { @@ -38,6 +62,302 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/@ampproject/remapping": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", + "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/code-frame": { + "version": "7.26.2", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz", + "integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-validator-identifier": "^7.25.9", + "js-tokens": "^4.0.0", + "picocolors": "^1.0.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/compat-data": { + "version": "7.26.5", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.26.5.tgz", + "integrity": "sha512-XvcZi1KWf88RVbF9wn8MN6tYFloU5qX8KjuF3E1PVBmJ9eypXfs4GRiJwLuTZL0iSnJUKn1BFPa5BPZZJyFzPg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/core": { + "version": "7.26.7", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.26.7.tgz", + "integrity": "sha512-SRijHmF0PSPgLIBYlWnG0hyeJLwXE2CgpsXaMOrtt2yp9/86ALw6oUlj9KYuZ0JN07T4eBMVIW4li/9S1j2BGA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@ampproject/remapping": "^2.2.0", + "@babel/code-frame": "^7.26.2", + "@babel/generator": "^7.26.5", + "@babel/helper-compilation-targets": "^7.26.5", + "@babel/helper-module-transforms": "^7.26.0", + "@babel/helpers": "^7.26.7", + "@babel/parser": "^7.26.7", + "@babel/template": "^7.25.9", + "@babel/traverse": "^7.26.7", + "@babel/types": "^7.26.7", + "convert-source-map": "^2.0.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.2.3", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/babel" + } + }, + "node_modules/@babel/generator": { + "version": "7.26.5", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.26.5.tgz", + "integrity": "sha512-2caSP6fN9I7HOe6nqhtft7V4g7/V/gfDsC3Ag4W7kEzzvRGKqiv0pu0HogPiZ3KaVSoNDhUws6IJjDjpfmYIXw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.26.5", + "@babel/types": "^7.26.5", + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.25", + "jsesc": "^3.0.2" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-compilation-targets": { + "version": "7.26.5", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.26.5.tgz", + "integrity": "sha512-IXuyn5EkouFJscIDuFF5EsiSolseme1s0CZB+QxVugqJLYmKdxI1VfIBOst0SUu4rnk2Z7kqTwmoO1lp3HIfnA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/compat-data": "^7.26.5", + "@babel/helper-validator-option": "^7.25.9", + "browserslist": "^4.24.0", + "lru-cache": "^5.1.1", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-imports": { + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.25.9.tgz", + "integrity": "sha512-tnUA4RsrmflIM6W6RFTLFSXITtl0wKjgpnLgXyowocVPrbYrLUXSBXDgTs8BlbmIzIdlBySRQjINYs2BAkiLtw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/traverse": "^7.25.9", + "@babel/types": "^7.25.9" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-transforms": { + "version": "7.26.0", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.26.0.tgz", + "integrity": "sha512-xO+xu6B5K2czEnQye6BHA7DolFFmS3LB7stHZFaOLb1pAwO1HWLS8fXA+eh0A2yIvltPVmx3eNNDBJA2SLHXFw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-module-imports": "^7.25.9", + "@babel/helper-validator-identifier": "^7.25.9", + "@babel/traverse": "^7.25.9" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-plugin-utils": { + "version": "7.26.5", + "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.26.5.tgz", + "integrity": "sha512-RS+jZcRdZdRFzMyr+wcsaqOmld1/EqTghfaBGQQd/WnRdzdlvSZ//kF7U8VQTxf1ynZ4cjUcYgjVGx13ewNPMg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz", + "integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz", + "integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-option": { + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.25.9.tgz", + "integrity": "sha512-e/zv1co8pp55dNdEcCynfj9X7nyUKUXoUEwfXqaZt0omVOmDe9oOTdKStH4GmAw6zxMFs50ZayuMfHDKlO7Tfw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helpers": { + "version": "7.26.7", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.26.7.tgz", + "integrity": "sha512-8NHiL98vsi0mbPQmYAGWwfcFaOy4j2HY49fXJCfuDcdE7fMIsH9a7GdaeXpIBsbT7307WU8KCMp5pUVDNL4f9A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/template": "^7.25.9", + "@babel/types": "^7.26.7" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.26.7", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.7.tgz", + "integrity": "sha512-kEvgGGgEjRUutvdVvZhbn/BxVt+5VSpwXz1j3WYXQbXDo8KzFOPNG2GQbdAiNq8g6wn1yKk7C/qrke03a84V+w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.26.7" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/plugin-transform-react-jsx-self": { + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.25.9.tgz", + "integrity": "sha512-y8quW6p0WHkEhmErnfe58r7x0A70uKphQm8Sp8cV7tjNQwK56sNVK0M73LK3WuYmsuyrftut4xAkjjgU0twaMg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-plugin-utils": "^7.25.9" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-transform-react-jsx-source": { + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.25.9.tgz", + "integrity": "sha512-+iqjT8xmXhhYv4/uiYd8FNQsraMFZIfxVSqxxVSZP0WbbSAWvBXAul0m/zu+7Vv4O/3WtApy9pmaTMiumEZgfg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-plugin-utils": "^7.25.9" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/template": { + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.25.9.tgz", + "integrity": "sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.25.9", + "@babel/parser": "^7.25.9", + "@babel/types": "^7.25.9" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse": { + "version": "7.26.7", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.26.7.tgz", + "integrity": "sha512-1x1sgeyRLC3r5fQOM0/xtQKsYjyxmFjaOrLJNtZ81inNjyJHGIolTULPiSc/2qe1/qfpFLisLQYFnnZl7QoedA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.26.2", + "@babel/generator": "^7.26.5", + "@babel/parser": "^7.26.7", + "@babel/template": "^7.25.9", + "@babel/types": "^7.26.7", + "debug": "^4.3.1", + "globals": "^11.1.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse/node_modules/globals": { + "version": "11.12.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz", + "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/types": { + "version": "7.26.7", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.7.tgz", + "integrity": "sha512-t8kDRGrKXyp6+tjUh7hw2RLyclsW4TRoRvRHtSyAX9Bb5ldlFh+90YAYY6awRXrlB4G5G2izNeGySpATlFzmOg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-string-parser": "^7.25.9", + "@babel/helper-validator-identifier": "^7.25.9" + }, + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/@bufbuild/protobuf": { "version": "2.2.3", "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.2.3.tgz", @@ -46,9 +366,9 @@ "license": "(Apache-2.0 AND BSD-3-Clause)" }, "node_modules/@esbuild/aix-ppc64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", - "integrity": "sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.24.2.tgz", + "integrity": "sha512-thpVCb/rhxE/BnMLQ7GReQLLN8q9qbHmI55F4489/ByVg2aQaQ6kbcLb6FHkocZzQhxc4gx0sCk0tJkKBFzDhA==", "cpu": [ "ppc64" ], @@ -58,13 +378,13 @@ "aix" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/android-arm": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.21.5.tgz", - "integrity": "sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.24.2.tgz", + "integrity": "sha512-tmwl4hJkCfNHwFB3nBa8z1Uy3ypZpxqxfTQOcHX+xRByyYgunVbZ9MzUUfb0RxaHIMnbHagwAxuTL+tnNM+1/Q==", "cpu": [ "arm" ], @@ -74,13 +394,13 @@ "android" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/android-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz", - "integrity": "sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.24.2.tgz", + "integrity": "sha512-cNLgeqCqV8WxfcTIOeL4OAtSmL8JjcN6m09XIgro1Wi7cF4t/THaWEa7eL5CMoMBdjoHOTh/vwTO/o2TRXIyzg==", "cpu": [ "arm64" ], @@ -90,13 +410,13 @@ "android" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/android-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.21.5.tgz", - "integrity": "sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.24.2.tgz", + "integrity": "sha512-B6Q0YQDqMx9D7rvIcsXfmJfvUYLoP722bgfBlO5cGvNVb5V/+Y7nhBE3mHV9OpxBf4eAS2S68KZztiPaWq4XYw==", "cpu": [ "x64" ], @@ -106,13 +426,29 @@ "android" ], "engines": { - "node": ">=12" + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.24.2.tgz", + "integrity": "sha512-kj3AnYWc+CekmZnS5IPu9D+HWtUI49hbnyqk0FLEJDbzCIQt7hg7ucF1SQAilhtYpIujfaHr6O0UHlzzSPdOeA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" } }, "node_modules/@esbuild/darwin-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz", - "integrity": "sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.24.2.tgz", + "integrity": "sha512-WeSrmwwHaPkNR5H3yYfowhZcbriGqooyu3zI/3GGpF8AyUdsrrP0X6KumITGA9WOyiJavnGZUwPGvxvwfWPHIA==", "cpu": [ "x64" ], @@ -122,13 +458,13 @@ "darwin" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/freebsd-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz", - "integrity": "sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.24.2.tgz", + "integrity": "sha512-UN8HXjtJ0k/Mj6a9+5u6+2eZ2ERD7Edt1Q9IZiB5UZAIdPnVKDoG7mdTVGhHJIeEml60JteamR3qhsr1r8gXvg==", "cpu": [ "arm64" ], @@ -138,13 +474,13 @@ "freebsd" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/freebsd-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz", - "integrity": "sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.24.2.tgz", + "integrity": "sha512-TvW7wE/89PYW+IevEJXZ5sF6gJRDY/14hyIGFXdIucxCsbRmLUcjseQu1SyTko+2idmCw94TgyaEZi9HUSOe3Q==", "cpu": [ "x64" ], @@ -154,13 +490,13 @@ "freebsd" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/linux-arm": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz", - "integrity": "sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.24.2.tgz", + "integrity": "sha512-n0WRM/gWIdU29J57hJyUdIsk0WarGd6To0s+Y+LwvlC55wt+GT/OgkwoXCXvIue1i1sSNWblHEig00GBWiJgfA==", "cpu": [ "arm" ], @@ -170,13 +506,13 @@ "linux" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/linux-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz", - "integrity": "sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.24.2.tgz", + "integrity": "sha512-7HnAD6074BW43YvvUmE/35Id9/NB7BeX5EoNkK9obndmZBUk8xmJJeU7DwmUeN7tkysslb2eSl6CTrYz6oEMQg==", "cpu": [ "arm64" ], @@ -186,13 +522,13 @@ "linux" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/linux-ia32": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz", - "integrity": "sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.24.2.tgz", + "integrity": "sha512-sfv0tGPQhcZOgTKO3oBE9xpHuUqguHvSo4jl+wjnKwFpapx+vUDcawbwPNuBIAYdRAvIDBfZVvXprIj3HA+Ugw==", "cpu": [ "ia32" ], @@ -202,13 +538,13 @@ "linux" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/linux-loong64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz", - "integrity": "sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.24.2.tgz", + "integrity": "sha512-CN9AZr8kEndGooS35ntToZLTQLHEjtVB5n7dl8ZcTZMonJ7CCfStrYhrzF97eAecqVbVJ7APOEe18RPI4KLhwQ==", "cpu": [ "loong64" ], @@ -218,13 +554,13 @@ "linux" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/linux-mips64el": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz", - "integrity": "sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.24.2.tgz", + "integrity": "sha512-iMkk7qr/wl3exJATwkISxI7kTcmHKE+BlymIAbHO8xanq/TjHaaVThFF6ipWzPHryoFsesNQJPE/3wFJw4+huw==", "cpu": [ "mips64el" ], @@ -234,13 +570,13 @@ "linux" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/linux-ppc64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz", - "integrity": "sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.24.2.tgz", + "integrity": "sha512-shsVrgCZ57Vr2L8mm39kO5PPIb+843FStGt7sGGoqiiWYconSxwTiuswC1VJZLCjNiMLAMh34jg4VSEQb+iEbw==", "cpu": [ "ppc64" ], @@ -250,13 +586,13 @@ "linux" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/linux-riscv64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz", - "integrity": "sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.24.2.tgz", + "integrity": "sha512-4eSFWnU9Hhd68fW16GD0TINewo1L6dRrB+oLNNbYyMUAeOD2yCK5KXGK1GH4qD/kT+bTEXjsyTCiJGHPZ3eM9Q==", "cpu": [ "riscv64" ], @@ -266,13 +602,13 @@ "linux" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/linux-s390x": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz", - "integrity": "sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.24.2.tgz", + "integrity": "sha512-S0Bh0A53b0YHL2XEXC20bHLuGMOhFDO6GN4b3YjRLK//Ep3ql3erpNcPlEFed93hsQAjAQDNsvcK+hV90FubSw==", "cpu": [ "s390x" ], @@ -282,13 +618,13 @@ "linux" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/linux-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz", - "integrity": "sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.24.2.tgz", + "integrity": "sha512-8Qi4nQcCTbLnK9WoMjdC9NiTG6/E38RNICU6sUNqK0QFxCYgoARqVqxdFmWkdonVsvGqWhmm7MO0jyTqLqwj0Q==", "cpu": [ "x64" ], @@ -298,13 +634,29 @@ "linux" ], "engines": { - "node": ">=12" + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.24.2.tgz", + "integrity": "sha512-wuLK/VztRRpMt9zyHSazyCVdCXlpHkKm34WUyinD2lzK07FAHTq0KQvZZlXikNWkDGoT6x3TD51jKQ7gMVpopw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" } }, "node_modules/@esbuild/netbsd-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", - "integrity": "sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.24.2.tgz", + "integrity": "sha512-VefFaQUc4FMmJuAxmIHgUmfNiLXY438XrL4GDNV1Y1H/RW3qow68xTwjZKfj/+Plp9NANmzbH5R40Meudu8mmw==", "cpu": [ "x64" ], @@ -314,13 +666,29 @@ "netbsd" ], "engines": { - "node": ">=12" + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.24.2.tgz", + "integrity": "sha512-YQbi46SBct6iKnszhSvdluqDmxCJA+Pu280Av9WICNwQmMxV7nLRHZfjQzwbPs3jeWnuAhE9Jy0NrnJ12Oz+0A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" } }, "node_modules/@esbuild/openbsd-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", - "integrity": "sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.24.2.tgz", + "integrity": "sha512-+iDS6zpNM6EnJyWv0bMGLWSWeXGN/HTaF/LXHXHwejGsVi+ooqDfMCCTerNFxEkM3wYVcExkeGXNqshc9iMaOA==", "cpu": [ "x64" ], @@ -330,13 +698,13 @@ "openbsd" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/sunos-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", - "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.24.2.tgz", + "integrity": "sha512-hTdsW27jcktEvpwNHJU4ZwWFGkz2zRJUz8pvddmXPtXDzVKTTINmlmga3ZzwcuMpUvLw7JkLy9QLKyGpD2Yxig==", "cpu": [ "x64" ], @@ -346,13 +714,13 @@ "sunos" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/win32-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz", - "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.24.2.tgz", + "integrity": "sha512-LihEQ2BBKVFLOC9ZItT9iFprsE9tqjDjnbulhHoFxYQtQfai7qfluVODIYxt1PgdoyQkz23+01rzwNwYfutxUQ==", "cpu": [ "arm64" ], @@ -362,13 +730,13 @@ "win32" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/win32-ia32": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz", - "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.24.2.tgz", + "integrity": "sha512-q+iGUwfs8tncmFC9pcnD5IvRHAzmbwQ3GPS5/ceCyHdjXubwQWI12MKWSNSMYLJMq23/IUCvJMS76PDqXe1fxA==", "cpu": [ "ia32" ], @@ -378,13 +746,13 @@ "win32" ], "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/@esbuild/win32-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz", - "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.24.2.tgz", + "integrity": "sha512-7VTgWzgMGvup6aSqDPLiW5zHaxYJGTO4OokMjIlrCtf+VpEL+cXKtCvg723iguPYI5oaUNdS+/V7OU2gvXVWEg==", "cpu": [ "x64" ], @@ -394,13 +762,320 @@ "win32" ], "engines": { - "node": ">=12" + "node": ">=18" + } + }, + "node_modules/@eslint-community/eslint-utils": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.1.tgz", + "integrity": "sha512-s3O3waFUrMV8P/XaF/+ZTp1X9XBZW1a4B97ZnjQF2KYWaFD2A8KyFBsrsfSjEmjn3RGWAIuvlneuZm3CUK3jbA==", + "dev": true, + "license": "MIT", + "dependencies": { + "eslint-visitor-keys": "^3.4.3" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + }, + "peerDependencies": { + "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" + } + }, + "node_modules/@eslint-community/eslint-utils/node_modules/eslint-visitor-keys": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", + "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@eslint-community/regexpp": { + "version": "4.12.1", + "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.12.1.tgz", + "integrity": "sha512-CCZCDJuduB9OUkFkY2IgppNZMi2lBQgD2qzwXkEia16cge2pijY/aXi96CJMquDMn3nJdlPV1A5KrJEXwfLNzQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^12.0.0 || ^14.0.0 || >=16.0.0" + } + }, + "node_modules/@eslint/config-array": { + "version": "0.19.2", + "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.19.2.tgz", + "integrity": "sha512-GNKqxfHG2ySmJOBSHg7LxeUx4xpuCoFjacmlCoYWEbaPXLwvfIjixRI12xCQZeULksQb23uiA8F40w5TojpV7w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@eslint/object-schema": "^2.1.6", + "debug": "^4.3.1", + "minimatch": "^3.1.2" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/core": { + "version": "0.10.0", + "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.10.0.tgz", + "integrity": "sha512-gFHJ+xBOo4G3WRlR1e/3G8A6/KZAH6zcE/hkLRCZTi/B9avAG365QhFA8uOGzTMqgTghpn7/fSnscW++dpMSAw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@types/json-schema": "^7.0.15" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/eslintrc": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.2.0.tgz", + "integrity": "sha512-grOjVNN8P3hjJn/eIETF1wwd12DdnwFDoyceUJLYYdkpbwq3nLi+4fqrTAONx7XDALqlL220wC/RHSC/QTI/0w==", + "dev": true, + "license": "MIT", + "dependencies": { + "ajv": "^6.12.4", + "debug": "^4.3.2", + "espree": "^10.0.1", + "globals": "^14.0.0", + "ignore": "^5.2.0", + "import-fresh": "^3.2.1", + "js-yaml": "^4.1.0", + "minimatch": "^3.1.2", + "strip-json-comments": "^3.1.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@eslint/eslintrc/node_modules/globals": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-14.0.0.tgz", + "integrity": "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@eslint/js": { + "version": "9.19.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.19.0.tgz", + "integrity": "sha512-rbq9/g38qjfqFLOVPvwjIvFFdNziEC5S65jmjPw5r6A//QH+W91akh9irMwjDN8zKUTak6W9EsAv4m/7Wnw0UQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/object-schema": { + "version": "2.1.6", + "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-2.1.6.tgz", + "integrity": "sha512-RBMg5FRL0I0gs51M/guSAj5/e14VQ4tpZnQNWwuDT66P14I43ItmPfIZRhO9fUVIPOAQXU47atlywZ/czoqFPA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/plugin-kit": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.2.5.tgz", + "integrity": "sha512-lB05FkqEdUg2AA0xEbUz0SnkXT1LcCTa438W4IWTUh4hdOnVbQyOJ81OrDXsJk/LSiJHubgGEFoR5EHq1NsH1A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@eslint/core": "^0.10.0", + "levn": "^0.4.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@heroicons/react": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@heroicons/react/-/react-2.2.0.tgz", + "integrity": "sha512-LMcepvRaS9LYHJGsF0zzmgKCUim/X3N/DQKc4jepAXJ7l8QxJ1PmxJzqplF2Z3FE4PqBAIGyJAQ/w4B5dsqbtQ==", + "license": "MIT", + "peerDependencies": { + "react": ">= 16 || ^19.0.0-rc" + } + }, + "node_modules/@humanfs/core": { + "version": "0.19.1", + "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", + "integrity": "sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18.0" + } + }, + "node_modules/@humanfs/node": { + "version": "0.16.6", + "resolved": "https://registry.npmjs.org/@humanfs/node/-/node-0.16.6.tgz", + "integrity": "sha512-YuI2ZHQL78Q5HbhDiBA1X4LmYdXCKCMQIfw0pw7piHJwyREFebJUvrQN4cMssyES6x+vfUbx1CIpaQUKYdQZOw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@humanfs/core": "^0.19.1", + "@humanwhocodes/retry": "^0.3.0" + }, + "engines": { + "node": ">=18.18.0" + } + }, + "node_modules/@humanfs/node/node_modules/@humanwhocodes/retry": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.3.1.tgz", + "integrity": "sha512-JBxkERygn7Bv/GbN5Rv8Ul6LVknS+5Bp6RgDC/O8gEBU/yeH5Ui5C/OlWrTb6qct7LjjfT6Re2NxB0ln0yYybA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@humanwhocodes/module-importer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz", + "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=12.22" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@humanwhocodes/retry": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.4.1.tgz", + "integrity": "sha512-c7hNEllBlenFTHBky65mhq8WD2kbN9Q6gk0bTk8lSBvc554jpXSkST1iePudpt7+A/AQvuHs9EMqjHDXMY1lrA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.8", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.8.tgz", + "integrity": "sha512-imAbBGkb+ebQyxKgzv5Hu2nmROxoDOXHh80evxdoXNOrvAnVx7zimzc1Oo5h9RlfV4vPXaE2iM5pOFbvOCClWA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/set-array": "^1.2.1", + "@jridgewell/sourcemap-codec": "^1.4.10", + "@jridgewell/trace-mapping": "^0.3.24" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/set-array": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", + "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", + "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.25", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", + "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", + "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", + "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", + "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" + }, + "engines": { + "node": ">= 8" } }, "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.28.0.tgz", - "integrity": "sha512-wLJuPLT6grGZsy34g4N1yRfYeouklTgPhH1gWXCYspenKYD0s3cR99ZevOGw5BexMNywkbV3UkjADisozBmpPQ==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.34.2.tgz", + "integrity": "sha512-6Fyg9yQbwJR+ykVdT9sid1oc2ewejS6h4wzQltmJfSW53N60G/ah9pngXGANdy9/aaE/TcUFpWosdm7JXS1WTQ==", "cpu": [ "arm" ], @@ -411,9 +1086,9 @@ ] }, "node_modules/@rollup/rollup-android-arm64": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.28.0.tgz", - "integrity": "sha512-eiNkznlo0dLmVG/6wf+Ifi/v78G4d4QxRhuUl+s8EWZpDewgk7PX3ZyECUXU0Zq/Ca+8nU8cQpNC4Xgn2gFNDA==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.34.2.tgz", + "integrity": "sha512-K5GfWe+vtQ3kyEbihrimM38UgX57UqHp+oME7X/EX9Im6suwZfa7Hsr8AtzbJvukTpwMGs+4s29YMSO3rwWtsw==", "cpu": [ "arm64" ], @@ -423,10 +1098,23 @@ "android" ] }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.34.2.tgz", + "integrity": "sha512-PSN58XG/V/tzqDb9kDGutUruycgylMlUE59f40ny6QIRNsTEIZsrNQTJKUN2keMMSmlzgunMFqyaGLmly39sug==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.28.0.tgz", - "integrity": "sha512-8hxgfReVs7k9Js1uAIhS6zq3I+wKQETInnWQtgzt8JfGx51R1N6DRVy3F4o0lQwumbErRz52YqwjfvuwRxGv1w==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.34.2.tgz", + "integrity": "sha512-gQhK788rQJm9pzmXyfBB84VHViDERhAhzGafw+E5mUpnGKuxZGkMVDa3wgDFKT6ukLC5V7QTifzsUKdNVxp5qQ==", "cpu": [ "x64" ], @@ -437,9 +1125,9 @@ ] }, "node_modules/@rollup/rollup-freebsd-arm64": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.28.0.tgz", - "integrity": "sha512-lA1zZB3bFx5oxu9fYud4+g1mt+lYXCoch0M0V/xhqLoGatbzVse0wlSQ1UYOWKpuSu3gyN4qEc0Dxf/DII1bhQ==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.34.2.tgz", + "integrity": "sha512-eiaHgQwGPpxLC3+zTAcdKl4VsBl3r0AiJOd1Um/ArEzAjN/dbPK1nROHrVkdnoE6p7Svvn04w3f/jEZSTVHunA==", "cpu": [ "arm64" ], @@ -450,9 +1138,9 @@ ] }, "node_modules/@rollup/rollup-freebsd-x64": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.28.0.tgz", - "integrity": "sha512-aI2plavbUDjCQB/sRbeUZWX9qp12GfYkYSJOrdYTL/C5D53bsE2/nBPuoiJKoWp5SN78v2Vr8ZPnB+/VbQ2pFA==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.34.2.tgz", + "integrity": "sha512-lhdiwQ+jf8pewYOTG4bag0Qd68Jn1v2gO1i0mTuiD+Qkt5vNfHVK/jrT7uVvycV8ZchlzXp5HDVmhpzjC6mh0g==", "cpu": [ "x64" ], @@ -463,9 +1151,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.28.0.tgz", - "integrity": "sha512-WXveUPKtfqtaNvpf0iOb0M6xC64GzUX/OowbqfiCSXTdi/jLlOmH0Ba94/OkiY2yTGTwteo4/dsHRfh5bDCZ+w==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.34.2.tgz", + "integrity": "sha512-lfqTpWjSvbgQP1vqGTXdv+/kxIznKXZlI109WkIFPbud41bjigjNmOAAKoazmRGx+k9e3rtIdbq2pQZPV1pMig==", "cpu": [ "arm" ], @@ -476,9 +1164,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.28.0.tgz", - "integrity": "sha512-yLc3O2NtOQR67lI79zsSc7lk31xjwcaocvdD1twL64PK1yNaIqCeWI9L5B4MFPAVGEVjH5k1oWSGuYX1Wutxpg==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.34.2.tgz", + "integrity": "sha512-RGjqULqIurqqv+NJTyuPgdZhka8ImMLB32YwUle2BPTDqDoXNgwFjdjQC59FbSk08z0IqlRJjrJ0AvDQ5W5lpw==", "cpu": [ "arm" ], @@ -489,9 +1177,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.28.0.tgz", - "integrity": "sha512-+P9G9hjEpHucHRXqesY+3X9hD2wh0iNnJXX/QhS/J5vTdG6VhNYMxJ2rJkQOxRUd17u5mbMLHM7yWGZdAASfcg==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.34.2.tgz", + "integrity": "sha512-ZvkPiheyXtXlFqHpsdgscx+tZ7hoR59vOettvArinEspq5fxSDSgfF+L5wqqJ9R4t+n53nyn0sKxeXlik7AY9Q==", "cpu": [ "arm64" ], @@ -502,9 +1190,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.28.0.tgz", - "integrity": "sha512-1xsm2rCKSTpKzi5/ypT5wfc+4bOGa/9yI/eaOLW0oMs7qpC542APWhl4A37AENGZ6St6GBMWhCCMM6tXgTIplw==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.34.2.tgz", + "integrity": "sha512-UlFk+E46TZEoxD9ufLKDBzfSG7Ki03fo6hsNRRRHF+KuvNZ5vd1RRVQm8YZlGsjcJG8R252XFK0xNPay+4WV7w==", "cpu": [ "arm64" ], @@ -514,10 +1202,23 @@ "linux" ] }, + "node_modules/@rollup/rollup-linux-loongarch64-gnu": { + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loongarch64-gnu/-/rollup-linux-loongarch64-gnu-4.34.2.tgz", + "integrity": "sha512-hJhfsD9ykx59jZuuoQgYT1GEcNNi3RCoEmbo5OGfG8RlHOiVS7iVNev9rhLKh7UBYq409f4uEw0cclTXx8nh8Q==", + "cpu": [ + "loong64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, "node_modules/@rollup/rollup-linux-powerpc64le-gnu": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.28.0.tgz", - "integrity": "sha512-zgWxMq8neVQeXL+ouSf6S7DoNeo6EPgi1eeqHXVKQxqPy1B2NvTbaOUWPn/7CfMKL7xvhV0/+fq/Z/J69g1WAQ==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.34.2.tgz", + "integrity": "sha512-g/O5IpgtrQqPegvqopvmdCF9vneLE7eqYfdPWW8yjPS8f63DNam3U4ARL1PNNB64XHZDHKpvO2Giftf43puB8Q==", "cpu": [ "ppc64" ], @@ -528,9 +1229,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.28.0.tgz", - "integrity": "sha512-VEdVYacLniRxbRJLNtzwGt5vwS0ycYshofI7cWAfj7Vg5asqj+pt+Q6x4n+AONSZW/kVm+5nklde0qs2EUwU2g==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.34.2.tgz", + "integrity": "sha512-bSQijDC96M6PuooOuXHpvXUYiIwsnDmqGU8+br2U7iPoykNi9JtMUpN7K6xml29e0evK0/g0D1qbAUzWZFHY5Q==", "cpu": [ "riscv64" ], @@ -541,9 +1242,9 @@ ] }, "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.28.0.tgz", - "integrity": "sha512-LQlP5t2hcDJh8HV8RELD9/xlYtEzJkm/aWGsauvdO2ulfl3QYRjqrKW+mGAIWP5kdNCBheqqqYIGElSRCaXfpw==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.34.2.tgz", + "integrity": "sha512-49TtdeVAsdRuiUHXPrFVucaP4SivazetGUVH8CIxVsNsaPHV4PFkpLmH9LeqU/R4Nbgky9lzX5Xe1NrzLyraVA==", "cpu": [ "s390x" ], @@ -554,9 +1255,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.28.0.tgz", - "integrity": "sha512-Nl4KIzteVEKE9BdAvYoTkW19pa7LR/RBrT6F1dJCV/3pbjwDcaOq+edkP0LXuJ9kflW/xOK414X78r+K84+msw==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.34.2.tgz", + "integrity": "sha512-j+jFdfOycLIQ7FWKka9Zd3qvsIyugg5LeZuHF6kFlXo6MSOc6R1w37YUVy8VpAKd81LMWGi5g9J25P09M0SSIw==", "cpu": [ "x64" ], @@ -567,9 +1268,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.28.0.tgz", - "integrity": "sha512-eKpJr4vBDOi4goT75MvW+0dXcNUqisK4jvibY9vDdlgLx+yekxSm55StsHbxUsRxSTt3JEQvlr3cGDkzcSP8bw==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.34.2.tgz", + "integrity": "sha512-aDPHyM/D2SpXfSNCVWCxyHmOqN9qb7SWkY1+vaXqMNMXslZYnwh9V/UCudl6psyG0v6Ukj7pXanIpfZwCOEMUg==", "cpu": [ "x64" ], @@ -580,9 +1281,9 @@ ] }, "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.28.0.tgz", - "integrity": "sha512-Vi+WR62xWGsE/Oj+mD0FNAPY2MEox3cfyG0zLpotZdehPFXwz6lypkGs5y38Jd/NVSbOD02aVad6q6QYF7i8Bg==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.34.2.tgz", + "integrity": "sha512-LQRkCyUBnAo7r8dbEdtNU08EKLCJMgAk2oP5H3R7BnUlKLqgR3dUjrLBVirmc1RK6U6qhtDw29Dimeer8d5hzQ==", "cpu": [ "arm64" ], @@ -593,9 +1294,9 @@ ] }, "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.28.0.tgz", - "integrity": "sha512-kN/Vpip8emMLn/eOza+4JwqDZBL6MPNpkdaEsgUtW1NYN3DZvZqSQrbKzJcTL6hd8YNmFTn7XGWMwccOcJBL0A==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.34.2.tgz", + "integrity": "sha512-wt8OhpQUi6JuPFkm1wbVi1BByeag87LDFzeKSXzIdGcX4bMLqORTtKxLoCbV57BHYNSUSOKlSL4BYYUghainYA==", "cpu": [ "ia32" ], @@ -606,9 +1307,9 @@ ] }, "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.28.0.tgz", - "integrity": "sha512-Bvno2/aZT6usSa7lRDL2+hMjVAGjuqaymF1ApZm31JXzniR/hvr14jpU+/z4X6Gt5BPlzosscyJZGUvguXIqeQ==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.34.2.tgz", + "integrity": "sha512-rUrqINax0TvrPBXrFKg0YbQx18NpPN3NNrgmaao9xRNbTwek7lOXObhx8tQy8gelmQ/gLaGy1WptpU2eKJZImg==", "cpu": [ "x64" ], @@ -624,6 +1325,667 @@ "integrity": "sha512-uiBh8DrB5FN35gP6/o8JEhEQ7/ci1jUsOZO/VMUjyvTpjtV54VstOXVj1TvTj/wsT23pfX6butxxh3qufsW3+g==", "license": "MIT" }, + "node_modules/@tailwindcss/node": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.1.tgz", + "integrity": "sha512-xvlh4pvfG/bkv0fEtJDABAm1tjtSmSyi2QmS4zyj1EKNI1UiOYiUq1IphSwDsNJ5vJ9cWEGs4rJXpUdCN2kujQ==", + "license": "MIT", + "dependencies": { + "enhanced-resolve": "^5.18.1", + "jiti": "^2.4.2", + "lightningcss": "1.29.2", + "tailwindcss": "4.1.1" + } + }, + "node_modules/@tailwindcss/oxide": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.1.tgz", + "integrity": "sha512-7+YBgnPQ4+jv6B6WVOerJ6WOzDzNJXrRKDts674v6TKAqFqYRr9+EBtSziO7nNcwQ8JtoZNMeqA+WJDjtCM/7w==", + "license": "MIT", + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@tailwindcss/oxide-android-arm64": "4.1.1", + "@tailwindcss/oxide-darwin-arm64": "4.1.1", + "@tailwindcss/oxide-darwin-x64": "4.1.1", + "@tailwindcss/oxide-freebsd-x64": "4.1.1", + "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.1", + "@tailwindcss/oxide-linux-arm64-gnu": "4.1.1", + "@tailwindcss/oxide-linux-arm64-musl": "4.1.1", + "@tailwindcss/oxide-linux-x64-gnu": "4.1.1", + "@tailwindcss/oxide-linux-x64-musl": "4.1.1", + "@tailwindcss/oxide-win32-arm64-msvc": "4.1.1", + "@tailwindcss/oxide-win32-x64-msvc": "4.1.1" + } + }, + "node_modules/@tailwindcss/oxide-android-arm64": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.1.tgz", + "integrity": "sha512-gTyRzfdParpoCU1yyUC/iN6XK6T0Ra4bDlF8Aeul5NP9cLzKEZDogdNVNGv5WZmCDkVol7qlex7TMmcfytMmmw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-darwin-arm64": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.1.tgz", + "integrity": "sha512-dI0QbdMWBvLB3MtaTKetzUKG9CUUQow8JSP4Nm+OxVokeZ+N+f1OmZW/hW1LzMxpx9RQCBgSRL+IIvKRat5Wdg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-darwin-x64": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.1.tgz", + "integrity": "sha512-2Y+NPQOTRBCItshPgY/CWg4bKi7E9evMg4bgdb6h9iZObCZLOe3doPcuSxGS3DB0dKyMFKE8pTdWtFUbxZBMSA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-freebsd-x64": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.1.tgz", + "integrity": "sha512-N97NGMsB/7CHShbc5ube4dcsW/bYENkBrg8yWi8ieN9boYVRdw3cZviVryV/Nfu9bKbBV9kUvduFF2qBI7rEqg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.1.tgz", + "integrity": "sha512-33Lk6KbHnUZbXqza6RWNFo9wqPQ4+H5BAn1CkUUfC1RZ1vYbyDN6+iJPj53wmnWJ3mhRI8jWt3Jt1fO02IVdUQ==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-arm64-gnu": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.1.tgz", + "integrity": "sha512-LyW35RzSUy+80WYScv03HKasAUmMFDaSbNpWfk1gG5gEE9kuRGnDzSrqMoLAmY/kzMCYP/1kqmUiAx8EFLkI2A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-arm64-musl": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.1.tgz", + "integrity": "sha512-1KPnDMlHdqjPTUSFjx55pafvs8RZXRgxfeRgUrukwDKkuj7gFk28vW3Mx65YdiugAc9NWs3VgueZWaM1Po6uGw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-x64-gnu": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.1.tgz", + "integrity": "sha512-4WdzA+MRlsinEEE6yxNMLJxpw0kE9XVipbAKdTL8BeUpyC2TdA3TL46lBulXzKp3BIxh3nqyR/UCqzl5o+3waQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-x64-musl": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.1.tgz", + "integrity": "sha512-q7Ugbw3ARcjCW2VMUYrcMbJ6aMQuWPArBBE2EqC/swPZTdGADvMQSlvR0VKusUM4HoSsO7ZbvcZ53YwR57+AKw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.1.tgz", + "integrity": "sha512-0KpqsovgHcIzm7eAGzzEZsEs0/nPYXnRBv+aPq/GehpNQuE/NAQu+YgZXIIof+VflDFuyXOEnaFr7T5MZ1INhA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-win32-x64-msvc": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.1.tgz", + "integrity": "sha512-B1mjeXNS26kBOHv5sXARf6Wd0PWHV9x1TDlW0ummrBUOUAxAy5wcy4Nii1wzNvCdvC448hgiL06ylhwAbNthmg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/postcss": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.1.tgz", + "integrity": "sha512-GX9AEM+msH0i2Yh1b6CuDRaZRo3kmbvIrLbSfvJ53C3uaAgsQ//fTQAh9HMQ6t1a9zvoUptlYqG//plWsBQTCw==", + "license": "MIT", + "dependencies": { + "@alloc/quick-lru": "^5.2.0", + "@tailwindcss/node": "4.1.1", + "@tailwindcss/oxide": "4.1.1", + "postcss": "^8.4.41", + "tailwindcss": "4.1.1" + } + }, + "node_modules/@tailwindcss/vite": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/vite/-/vite-4.1.1.tgz", + "integrity": "sha512-tFTkRZwXq4XKr3S2dUZBxy80wbWYHdDSsu4QOB1yE1HJFKjfxKVpXtup4dyTVdQcLInoHC9lZXFPHnjoBP774g==", + "license": "MIT", + "dependencies": { + "@tailwindcss/node": "4.1.1", + "@tailwindcss/oxide": "4.1.1", + "tailwindcss": "4.1.1" + }, + "peerDependencies": { + "vite": "^5.2.0 || ^6" + } + }, + "node_modules/@types/babel__core": { + "version": "7.20.5", + "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", + "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.20.7", + "@babel/types": "^7.20.7", + "@types/babel__generator": "*", + "@types/babel__template": "*", + "@types/babel__traverse": "*" + } + }, + "node_modules/@types/babel__generator": { + "version": "7.6.8", + "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.6.8.tgz", + "integrity": "sha512-ASsj+tpEDsEiFr1arWrlN6V3mdfjRMZt6LtK/Vp/kreFLnr5QH5+DhvD5nINYZXzwJvXeGq+05iUXcAzVrqWtw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.0.0" + } + }, + "node_modules/@types/babel__template": { + "version": "7.4.4", + "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz", + "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.1.0", + "@babel/types": "^7.0.0" + } + }, + "node_modules/@types/babel__traverse": { + "version": "7.20.6", + "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.20.6.tgz", + "integrity": "sha512-r1bzfrm0tomOI8g1SzvCaQHo6Lcv6zu0EA+W2kHrt8dyrHQxGzBBL4kdkzIS+jBMV+EYcMAEAqXqYaLJq5rOZg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.20.7" + } + }, + "node_modules/@types/cookie": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.6.0.tgz", + "integrity": "sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA==", + "license": "MIT" + }, + "node_modules/@types/debug": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz", + "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==", + "license": "MIT", + "dependencies": { + "@types/ms": "*" + } + }, + "node_modules/@types/estree": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz", + "integrity": "sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==", + "license": "MIT" + }, + "node_modules/@types/estree-jsx": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@types/estree-jsx/-/estree-jsx-1.0.5.tgz", + "integrity": "sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==", + "license": "MIT", + "dependencies": { + "@types/estree": "*" + } + }, + "node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/@types/json-schema": { + "version": "7.0.15", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", + "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/katex": { + "version": "0.16.7", + "resolved": "https://registry.npmjs.org/@types/katex/-/katex-0.16.7.tgz", + "integrity": "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ==", + "license": "MIT" + }, + "node_modules/@types/linkify-it": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@types/linkify-it/-/linkify-it-5.0.0.tgz", + "integrity": "sha512-sVDA58zAw4eWAffKOaQH5/5j3XeayukzDk+ewSsnv3p4yJEZHCCzMDiZM8e0OUrRvmpGZ85jf4yDHkHsgBNr9Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/markdown-it": { + "version": "14.1.2", + "resolved": "https://registry.npmjs.org/@types/markdown-it/-/markdown-it-14.1.2.tgz", + "integrity": "sha512-promo4eFwuiW+TfGxhi+0x3czqTYJkG8qB17ZUJiVF10Xm7NLVRSLUsfRTU/6h1e24VvRnXCx+hG7li58lkzog==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/linkify-it": "^5", + "@types/mdurl": "^2" + } + }, + "node_modules/@types/mdast": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", + "integrity": "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/@types/mdurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@types/mdurl/-/mdurl-2.0.0.tgz", + "integrity": "sha512-RGdgjQUZba5p6QEFAVx2OGb8rQDL/cPRG7GiedRzMcJ1tYnUANBncjbSB1NRGwbvjcPeikRABz2nshyPk1bhWg==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/ms": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", + "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "22.13.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.1.tgz", + "integrity": "sha512-jK8uzQlrvXqEU91UxiK5J7pKHyzgnI1Qnl0QDHIgVGuolJhRb9EEl28Cj9b3rGR8B2lhFCtvIm5os8lFnO/1Ew==", + "devOptional": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.20.0" + } + }, + "node_modules/@types/prop-types": { + "version": "15.7.14", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.14.tgz", + "integrity": "sha512-gNMvNH49DJ7OJYv+KAKn0Xp45p8PLl6zo2YnvDIbTd4J6MER2BmWN49TG7n9LvkyihINxeKW8+3bfS2yDC9dzQ==", + "license": "MIT" + }, + "node_modules/@types/react": { + "version": "18.3.18", + "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.18.tgz", + "integrity": "sha512-t4yC+vtgnkYjNSKlFx1jkAhH8LgTo2N/7Qvi83kdEaUtMDiwpbLAktKDaAMlRcJ5eSxZkH74eEGt1ky31d7kfQ==", + "license": "MIT", + "dependencies": { + "@types/prop-types": "*", + "csstype": "^3.0.2" + } + }, + "node_modules/@types/react-dom": { + "version": "18.3.5", + "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.5.tgz", + "integrity": "sha512-P4t6saawp+b/dFrUr2cvkVsfvPguwsxtH6dNIYRllMsefqFzkZk5UIjzyDOv5g1dXIPdG4Sp1yCR4Z6RCUsG/Q==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "@types/react": "^18.0.0" + } + }, + "node_modules/@types/unist": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", + "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==", + "license": "MIT" + }, + "node_modules/@typescript-eslint/eslint-plugin": { + "version": "8.23.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.23.0.tgz", + "integrity": "sha512-vBz65tJgRrA1Q5gWlRfvoH+w943dq9K1p1yDBY2pc+a1nbBLZp7fB9+Hk8DaALUbzjqlMfgaqlVPT1REJdkt/w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@eslint-community/regexpp": "^4.10.0", + "@typescript-eslint/scope-manager": "8.23.0", + "@typescript-eslint/type-utils": "8.23.0", + "@typescript-eslint/utils": "8.23.0", + "@typescript-eslint/visitor-keys": "8.23.0", + "graphemer": "^1.4.0", + "ignore": "^5.3.1", + "natural-compare": "^1.4.0", + "ts-api-utils": "^2.0.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "@typescript-eslint/parser": "^8.0.0 || ^8.0.0-alpha.0", + "eslint": "^8.57.0 || ^9.0.0", + "typescript": ">=4.8.4 <5.8.0" + } + }, + "node_modules/@typescript-eslint/parser": { + "version": "8.23.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.23.0.tgz", + "integrity": "sha512-h2lUByouOXFAlMec2mILeELUbME5SZRN/7R9Cw2RD2lRQQY08MWMM+PmVVKKJNK1aIwqTo9t/0CvOxwPbRIE2Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/scope-manager": "8.23.0", + "@typescript-eslint/types": "8.23.0", + "@typescript-eslint/typescript-estree": "8.23.0", + "@typescript-eslint/visitor-keys": "8.23.0", + "debug": "^4.3.4" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0", + "typescript": ">=4.8.4 <5.8.0" + } + }, + "node_modules/@typescript-eslint/scope-manager": { + "version": "8.23.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.23.0.tgz", + "integrity": "sha512-OGqo7+dXHqI7Hfm+WqkZjKjsiRtFUQHPdGMXzk5mYXhJUedO7e/Y7i8AK3MyLMgZR93TX4bIzYrfyVjLC+0VSw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/types": "8.23.0", + "@typescript-eslint/visitor-keys": "8.23.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/type-utils": { + "version": "8.23.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.23.0.tgz", + "integrity": "sha512-iIuLdYpQWZKbiH+RkCGc6iu+VwscP5rCtQ1lyQ7TYuKLrcZoeJVpcLiG8DliXVkUxirW/PWlmS+d6yD51L9jvA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/typescript-estree": "8.23.0", + "@typescript-eslint/utils": "8.23.0", + "debug": "^4.3.4", + "ts-api-utils": "^2.0.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0", + "typescript": ">=4.8.4 <5.8.0" + } + }, + "node_modules/@typescript-eslint/types": { + "version": "8.23.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.23.0.tgz", + "integrity": "sha512-1sK4ILJbCmZOTt9k4vkoulT6/y5CHJ1qUYxqpF1K/DBAd8+ZUL4LlSCxOssuH5m4rUaaN0uS0HlVPvd45zjduQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/typescript-estree": { + "version": "8.23.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.23.0.tgz", + "integrity": "sha512-LcqzfipsB8RTvH8FX24W4UUFk1bl+0yTOf9ZA08XngFwMg4Kj8A+9hwz8Cr/ZS4KwHrmo9PJiLZkOt49vPnuvQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/types": "8.23.0", + "@typescript-eslint/visitor-keys": "8.23.0", + "debug": "^4.3.4", + "fast-glob": "^3.3.2", + "is-glob": "^4.0.3", + "minimatch": "^9.0.4", + "semver": "^7.6.0", + "ts-api-utils": "^2.0.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "typescript": ">=4.8.4 <5.8.0" + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/semver": { + "version": "7.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.1.tgz", + "integrity": "sha512-hlq8tAfn0m/61p4BVRcPzIGr6LKiMwo4VM6dGi6pt4qcRkmNzTcWq6eCEjEh+qXjkMDvPlOFFSGwQjoEa6gyMA==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/@typescript-eslint/utils": { + "version": "8.23.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.23.0.tgz", + "integrity": "sha512-uB/+PSo6Exu02b5ZEiVtmY6RVYO7YU5xqgzTIVZwTHvvK3HsL8tZZHFaTLFtRG3CsV4A5mhOv+NZx5BlhXPyIA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@eslint-community/eslint-utils": "^4.4.0", + "@typescript-eslint/scope-manager": "8.23.0", + "@typescript-eslint/types": "8.23.0", + "@typescript-eslint/typescript-estree": "8.23.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0", + "typescript": ">=4.8.4 <5.8.0" + } + }, + "node_modules/@typescript-eslint/visitor-keys": { + "version": "8.23.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.23.0.tgz", + "integrity": "sha512-oWWhcWDLwDfu++BGTZcmXWqpwtkwb5o7fxUIGksMQQDSdPW9prsSnfIOZMlsj4vBOSrcnjIUZMiIjODgGosFhQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/types": "8.23.0", + "eslint-visitor-keys": "^4.2.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@ungap/structured-clone": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.3.0.tgz", + "integrity": "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==", + "license": "ISC" + }, + "node_modules/@vitejs/plugin-react": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.3.4.tgz", + "integrity": "sha512-SCCPBJtYLdE8PX/7ZQAs1QAZ8Jqwih+0VBLum1EGqmCCQal+MIUqLCzj3ZUy8ufbC0cAM4LRlSTm7IQJwWT4ug==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/core": "^7.26.0", + "@babel/plugin-transform-react-jsx-self": "^7.25.9", + "@babel/plugin-transform-react-jsx-source": "^7.25.9", + "@types/babel__core": "^7.20.5", + "react-refresh": "^0.14.2" + }, + "engines": { + "node": "^14.18.0 || >=16.0.0" + }, + "peerDependencies": { + "vite": "^4.2.0 || ^5.0.0 || ^6.0.0" + } + }, "node_modules/@vscode/markdown-it-katex": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@vscode/markdown-it-katex/-/markdown-it-katex-1.1.1.tgz", @@ -633,282 +1995,67 @@ "katex": "^0.16.4" } }, - "node_modules/@vue/compiler-dom": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/compiler-dom/-/compiler-dom-3.5.13.tgz", - "integrity": "sha512-ZOJ46sMOKUjO3e94wPdCzQ6P1Lx/vhp2RSvfaab88Ajexs0AHeV0uasYhi99WPaogmBlRHNRuly8xV75cNTMDA==", + "node_modules/acorn": { + "version": "8.14.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz", + "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==", + "dev": true, "license": "MIT", - "dependencies": { - "@vue/compiler-core": "3.5.13", - "@vue/shared": "3.5.13" - } - }, - "node_modules/@vue/compiler-dom/node_modules/@babel/helper-string-parser": { - "version": "7.25.9", - "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz", - "integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@vue/compiler-dom/node_modules/@babel/helper-validator-identifier": { - "version": "7.25.9", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz", - "integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@vue/compiler-dom/node_modules/@babel/parser": { - "version": "7.26.2", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.2.tgz", - "integrity": "sha512-DWMCZH9WA4Maitz2q21SRKHo9QXZxkDsbNZoVD62gusNtNBBqDg9i7uOhASfTfIGNzW+O+r7+jAlM8dwphcJKQ==", - "license": "MIT", - "dependencies": { - "@babel/types": "^7.26.0" - }, "bin": { - "parser": "bin/babel-parser.js" + "acorn": "bin/acorn" }, "engines": { - "node": ">=6.0.0" + "node": ">=0.4.0" } }, - "node_modules/@vue/compiler-dom/node_modules/@babel/types": { - "version": "7.26.0", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.0.tgz", - "integrity": "sha512-Z/yiTPj+lDVnF7lWeKCIJzaIkI0vYO87dMpZ4bg4TDrFe4XXLFWL1TbXU27gBP3QccxV9mZICCrnjnYlJjXHOA==", + "node_modules/acorn-jsx": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", + "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", + "dev": true, "license": "MIT", - "dependencies": { - "@babel/helper-string-parser": "^7.25.9", - "@babel/helper-validator-identifier": "^7.25.9" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@vue/compiler-dom/node_modules/@vue/compiler-core": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.5.13.tgz", - "integrity": "sha512-oOdAkwqUfW1WqpwSYJce06wvt6HljgY3fGeM9NcVA1HaYOij3mZG9Rkysn0OHuyUAGMbEbARIpsG+LPVlBJ5/Q==", - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.25.3", - "@vue/shared": "3.5.13", - "entities": "^4.5.0", - "estree-walker": "^2.0.2", - "source-map-js": "^1.2.0" - } - }, - "node_modules/@vue/compiler-dom/node_modules/estree-walker": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", - "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", - "license": "MIT" - }, - "node_modules/@vue/compiler-dom/node_modules/source-map-js": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", - "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/@vue/compiler-sfc": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/compiler-sfc/-/compiler-sfc-3.5.13.tgz", - "integrity": "sha512-6VdaljMpD82w6c2749Zhf5T9u5uLBWKnVue6XWxprDobftnletJ8+oel7sexFfM3qIxNmVE7LSFGTpv6obNyaQ==", - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.25.3", - "@vue/compiler-core": "3.5.13", - "@vue/compiler-dom": "3.5.13", - "@vue/compiler-ssr": "3.5.13", - "@vue/shared": "3.5.13", - "estree-walker": "^2.0.2", - "magic-string": "^0.30.11", - "postcss": "^8.4.48", - "source-map-js": "^1.2.0" - } - }, - "node_modules/@vue/compiler-sfc/node_modules/@babel/helper-string-parser": { - "version": "7.25.9", - "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz", - "integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@vue/compiler-sfc/node_modules/@babel/helper-validator-identifier": { - "version": "7.25.9", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz", - "integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@vue/compiler-sfc/node_modules/@babel/parser": { - "version": "7.26.2", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.2.tgz", - "integrity": "sha512-DWMCZH9WA4Maitz2q21SRKHo9QXZxkDsbNZoVD62gusNtNBBqDg9i7uOhASfTfIGNzW+O+r7+jAlM8dwphcJKQ==", - "license": "MIT", - "dependencies": { - "@babel/types": "^7.26.0" - }, - "bin": { - "parser": "bin/babel-parser.js" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@vue/compiler-sfc/node_modules/@babel/types": { - "version": "7.26.0", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.0.tgz", - "integrity": "sha512-Z/yiTPj+lDVnF7lWeKCIJzaIkI0vYO87dMpZ4bg4TDrFe4XXLFWL1TbXU27gBP3QccxV9mZICCrnjnYlJjXHOA==", - "license": "MIT", - "dependencies": { - "@babel/helper-string-parser": "^7.25.9", - "@babel/helper-validator-identifier": "^7.25.9" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@vue/compiler-sfc/node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", - "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", - "license": "MIT" - }, - "node_modules/@vue/compiler-sfc/node_modules/@vue/compiler-core": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.5.13.tgz", - "integrity": "sha512-oOdAkwqUfW1WqpwSYJce06wvt6HljgY3fGeM9NcVA1HaYOij3mZG9Rkysn0OHuyUAGMbEbARIpsG+LPVlBJ5/Q==", - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.25.3", - "@vue/shared": "3.5.13", - "entities": "^4.5.0", - "estree-walker": "^2.0.2", - "source-map-js": "^1.2.0" - } - }, - "node_modules/@vue/compiler-sfc/node_modules/@vue/compiler-ssr": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/compiler-ssr/-/compiler-ssr-3.5.13.tgz", - "integrity": "sha512-wMH6vrYHxQl/IybKJagqbquvxpWCuVYpoUJfCqFZwa/JY1GdATAQ+TgVtgrwwMZ0D07QhA99rs/EAAWfvG6KpA==", - "license": "MIT", - "dependencies": { - "@vue/compiler-dom": "3.5.13", - "@vue/shared": "3.5.13" - } - }, - "node_modules/@vue/compiler-sfc/node_modules/estree-walker": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", - "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", - "license": "MIT" - }, - "node_modules/@vue/compiler-sfc/node_modules/magic-string": { - "version": "0.30.14", - "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.14.tgz", - "integrity": "sha512-5c99P1WKTed11ZC0HMJOj6CDIue6F8ySu+bJL+85q1zBEIY8IklrJ1eiKC2NDRh3Ct3FcvmJPyQHb9erXMTJNw==", - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0" - } - }, - "node_modules/@vue/compiler-sfc/node_modules/source-map-js": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", - "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/@vue/runtime-dom": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/runtime-dom/-/runtime-dom-3.5.13.tgz", - "integrity": "sha512-dLaj94s93NYLqjLiyFzVs9X6dWhTdAlEAciC3Moq7gzAc13VJUdCnjjRurNM6uTLFATRHexHCTu/Xp3eW6yoog==", - "license": "MIT", - "dependencies": { - "@vue/reactivity": "3.5.13", - "@vue/runtime-core": "3.5.13", - "@vue/shared": "3.5.13", - "csstype": "^3.1.3" - } - }, - "node_modules/@vue/runtime-dom/node_modules/@vue/reactivity": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/reactivity/-/reactivity-3.5.13.tgz", - "integrity": "sha512-NaCwtw8o48B9I6L1zl2p41OHo/2Z4wqYGGIK1Khu5T7yxrn+ATOixn/Udn2m+6kZKB/J7cuT9DbWWhRxqixACg==", - "license": "MIT", - "dependencies": { - "@vue/shared": "3.5.13" - } - }, - "node_modules/@vue/runtime-dom/node_modules/@vue/runtime-core": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/runtime-core/-/runtime-core-3.5.13.tgz", - "integrity": "sha512-Fj4YRQ3Az0WTZw1sFe+QDb0aXCerigEpw418pw1HBUKFtnQHWzwojaukAs2X/c9DQz4MQ4bsXTGlcpGxU/RCIw==", - "license": "MIT", - "dependencies": { - "@vue/reactivity": "3.5.13", - "@vue/shared": "3.5.13" - } - }, - "node_modules/@vue/runtime-dom/node_modules/csstype": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", - "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "license": "MIT" - }, - "node_modules/@vue/server-renderer": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/server-renderer/-/server-renderer-3.5.13.tgz", - "integrity": "sha512-wAi4IRJV/2SAW3htkTlB+dHeRmpTiVIK1OGLWV1yeStVSebSQQOwGwIq0D3ZIoBj2C2qpgz5+vX9iEBkTdk5YA==", - "license": "MIT", - "dependencies": { - "@vue/compiler-ssr": "3.5.13", - "@vue/shared": "3.5.13" - }, "peerDependencies": { - "vue": "3.5.13" + "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, - "node_modules/@vue/server-renderer/node_modules/@vue/compiler-ssr": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/compiler-ssr/-/compiler-ssr-3.5.13.tgz", - "integrity": "sha512-wMH6vrYHxQl/IybKJagqbquvxpWCuVYpoUJfCqFZwa/JY1GdATAQ+TgVtgrwwMZ0D07QhA99rs/EAAWfvG6KpA==", + "node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, "license": "MIT", "dependencies": { - "@vue/compiler-dom": "3.5.13", - "@vue/shared": "3.5.13" + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" } }, - "node_modules/@vue/shared": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.5.13.tgz", - "integrity": "sha512-/hnE/qP5ZoGpol0a5mDi45bOd7t3tjYJBjsgCsivow7D48cJeV5l05RD82lPqi7gRiphZM37rnhW1l6ZoCNNnQ==", - "license": "MIT" - }, - "node_modules/arg": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", - "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==", - "license": "MIT" + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } }, "node_modules/argparse": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true, "license": "Python-2.0" }, "node_modules/autoprefixer": { @@ -948,10 +2095,50 @@ "postcss": "^8.1.0" } }, + "node_modules/bail": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz", + "integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "license": "MIT", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/browserslist": { - "version": "4.24.2", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.2.tgz", - "integrity": "sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg==", + "version": "4.24.4", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.4.tgz", + "integrity": "sha512-KDi1Ny1gSePi1vm0q4oxSF8b4DR44GF4BbmS2YdhPLOEqd8pDviZOGH/GsmRwoWJ2+5Lr085X7naowMwKHDG1A==", "funding": [ { "type": "opencollective", @@ -968,9 +2155,9 @@ ], "license": "MIT", "dependencies": { - "caniuse-lite": "^1.0.30001669", - "electron-to-chromium": "^1.5.41", - "node-releases": "^2.0.18", + "caniuse-lite": "^1.0.30001688", + "electron-to-chromium": "^1.5.73", + "node-releases": "^2.0.19", "update-browserslist-db": "^1.1.1" }, "bin": { @@ -980,57 +2167,6 @@ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, - "node_modules/browserslist/node_modules/electron-to-chromium": { - "version": "1.5.67", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.67.tgz", - "integrity": "sha512-nz88NNBsD7kQSAGGJyp8hS6xSPtWwqNogA0mjtc2nUYeEf3nURK9qpV18TuBdDmEDgVWotS8Wkzf+V52dSQ/LQ==", - "license": "ISC" - }, - "node_modules/browserslist/node_modules/escalade": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", - "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/browserslist/node_modules/node-releases": { - "version": "2.0.18", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz", - "integrity": "sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==", - "license": "MIT" - }, - "node_modules/browserslist/node_modules/update-browserslist-db": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.1.tgz", - "integrity": "sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "escalade": "^3.2.0", - "picocolors": "^1.1.0" - }, - "bin": { - "update-browserslist-db": "cli.js" - }, - "peerDependencies": { - "browserslist": ">= 4.21.0" - } - }, "node_modules/buffer-builder": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/buffer-builder/-/buffer-builder-0.2.0.tgz", @@ -1038,10 +2174,20 @@ "devOptional": true, "license": "MIT/X11" }, + "node_modules/callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/caniuse-lite": { - "version": "1.0.30001684", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001684.tgz", - "integrity": "sha512-G1LRwLIQjBQoyq0ZJGqGIJUXzJ8irpbjHLpVRXDvBEScFJ9b17sgK6vlx0GAJFE21okD7zXl08rRRUfq6HdoEQ==", + "version": "1.0.30001697", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001697.tgz", + "integrity": "sha512-GwNPlWJin8E+d7Gxq96jxM6w0w+VFeyyXRsjU58emtkYqnbwHqXm5uT2uCmO0RQE9htWknOP4xtBlLmM/gWxvQ==", "funding": [ { "type": "opencollective", @@ -1058,148 +2204,93 @@ ], "license": "CC-BY-4.0" }, - "node_modules/chokidar": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", - "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", + "node_modules/ccount": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", + "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, "license": "MIT", "dependencies": { - "anymatch": "~3.1.2", - "braces": "~3.0.2", - "glob-parent": "~5.1.2", - "is-binary-path": "~2.1.0", - "is-glob": "~4.0.1", - "normalize-path": "~3.0.0", - "readdirp": "~3.6.0" + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" }, "engines": { - "node": ">= 8.10.0" + "node": ">=10" }, "funding": { - "url": "https://paulmillr.com/funding/" - }, - "optionalDependencies": { - "fsevents": "~2.3.2" + "url": "https://github.com/chalk/chalk?sponsor=1" } }, - "node_modules/chokidar/node_modules/anymatch": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", - "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", - "license": "ISC", - "dependencies": { - "normalize-path": "^3.0.0", - "picomatch": "^2.0.4" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/chokidar/node_modules/binary-extensions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", - "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", + "node_modules/character-entities": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-2.0.2.tgz", + "integrity": "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==", "license": "MIT", - "engines": { - "node": ">=8" - }, "funding": { - "url": "https://github.com/sponsors/sindresorhus" + "type": "github", + "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/chokidar/node_modules/braces": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", - "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", - "license": "MIT", - "dependencies": { - "fill-range": "^7.1.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/chokidar/node_modules/fill-range": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", - "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", - "license": "MIT", - "dependencies": { - "to-regex-range": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/chokidar/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/chokidar/node_modules/is-binary-path": { + "node_modules/character-entities-html4": { "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", - "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "resolved": "https://registry.npmjs.org/character-entities-html4/-/character-entities-html4-2.1.0.tgz", + "integrity": "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==", "license": "MIT", - "dependencies": { - "binary-extensions": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/chokidar/node_modules/is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "license": "MIT", - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/chokidar/node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", - "license": "MIT", - "engines": { - "node": ">=8.6" - }, "funding": { - "url": "https://github.com/sponsors/jonschlinkert" + "type": "github", + "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/chokidar/node_modules/readdirp": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", - "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "node_modules/character-entities-legacy": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-3.0.0.tgz", + "integrity": "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==", "license": "MIT", - "dependencies": { - "picomatch": "^2.2.1" - }, - "engines": { - "node": ">=8.10.0" + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/chokidar/node_modules/to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "node_modules/character-reference-invalid": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz", + "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, "license": "MIT", "dependencies": { - "is-number": "^7.0.0" + "color-name": "~1.1.4" }, "engines": { - "node": ">=8.0" + "node": ">=7.0.0" } }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, + "license": "MIT" + }, "node_modules/colorjs.io": { "version": "0.5.2", "resolved": "https://registry.npmjs.org/colorjs.io/-/colorjs.io-0.5.2.tgz", @@ -1207,6 +2298,16 @@ "devOptional": true, "license": "MIT" }, + "node_modules/comma-separated-tokens": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", + "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/commander": { "version": "8.3.0", "resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz", @@ -1216,73 +2317,151 @@ "node": ">= 12" } }, - "node_modules/css-selector-tokenizer": { - "version": "0.8.0", - "resolved": "https://registry.npmjs.org/css-selector-tokenizer/-/css-selector-tokenizer-0.8.0.tgz", - "integrity": "sha512-Jd6Ig3/pe62/qe5SBPTN8h8LeUg/pT4lLgtavPf7updwwHpvFzxvOQBHYj2LZDMjUnBzgvIUSjRcf6oT5HzHFg==", - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "fastparse": "^1.1.2" - } - }, - "node_modules/css-selector-tokenizer/node_modules/cssesc": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", - "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==", - "license": "MIT", - "bin": { - "cssesc": "bin/cssesc" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/css-selector-tokenizer/node_modules/fastparse": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/fastparse/-/fastparse-1.1.2.tgz", - "integrity": "sha512-483XLLxTVIwWK3QTrMGRqUfUpoOs/0hbQrl2oz4J0pAcm3A3bu84wxTFqGqkJzewCLdME38xJLJAxBABfQT8sQ==", + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true, "license": "MIT" }, - "node_modules/culori": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/culori/-/culori-3.3.0.tgz", - "integrity": "sha512-pHJg+jbuFsCjz9iclQBqyL3B2HLCBF71BwVNujUYEvCeQMvV97R59MNK3R2+jgJ3a1fcZgI9B3vYgz8lzr/BFQ==", + "node_modules/convert-source-map": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", + "dev": true, + "license": "MIT" + }, + "node_modules/cookie": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-1.0.2.tgz", + "integrity": "sha512-9Kr/j4O16ISv8zBBhJoi4bXOYNTkFLOqSL3UDB0njXxCXNezjeyVrJyGOWtgfs/q2km1gwBcfH8q1yEGoMYunA==", "license": "MIT", "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + "node": ">=18" } }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/csstype": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", + "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", + "license": "MIT" + }, "node_modules/daisyui": { - "version": "4.12.14", - "resolved": "https://registry.npmjs.org/daisyui/-/daisyui-4.12.14.tgz", - "integrity": "sha512-hA27cdBasdwd4/iEjn+aidoCrRroDuo3G5W9NDKaVCJI437Mm/3eSL/2u7MkZ0pt8a+TrYF3aT2pFVemTS3how==", + "version": "5.0.12", + "resolved": "https://registry.npmjs.org/daisyui/-/daisyui-5.0.12.tgz", + "integrity": "sha512-01DU0eYBcHgPtuf5fxcrkGkIN6/Uyaqmkle5Yo3ZyW9YVAu036ALZbjv2KH5euvUbeQ4r9q3gAarGcf7Tywhng==", "license": "MIT", - "dependencies": { - "css-selector-tokenizer": "^0.8", - "culori": "^3", - "picocolors": "^1", - "postcss-js": "^4" - }, - "engines": { - "node": ">=16.9.0" - }, "funding": { - "type": "opencollective", - "url": "https://opencollective.com/daisyui" + "url": "https://github.com/saadeghi/daisyui?sponsor=1" } }, - "node_modules/didyoumean": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz", - "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==", + "node_modules/debug": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", + "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/decode-named-character-reference": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz", + "integrity": "sha512-O8x12RzrUF8xyVcY0KJowWsmaJxQbmy0/EtnNtHRpsOcT7dFk5W598coHqBVpmWo1oQQfsCqfCmkZN5DJrZVdg==", + "license": "MIT", + "dependencies": { + "character-entities": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/deep-is": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/detect-libc": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz", + "integrity": "sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==", + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, + "node_modules/devlop": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", + "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==", + "license": "MIT", + "dependencies": { + "dequal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/dexie": { + "version": "4.0.11", + "resolved": "https://registry.npmjs.org/dexie/-/dexie-4.0.11.tgz", + "integrity": "sha512-SOKO002EqlvBYYKQSew3iymBoN2EQ4BDw/3yprjh7kAfFzjBYkaMNa/pZvcA7HSWlcKSQb9XhPe3wKyQ0x4A8A==", "license": "Apache-2.0" }, - "node_modules/dlv": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz", - "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==", - "license": "MIT" + "node_modules/electron-to-chromium": { + "version": "1.5.91", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.91.tgz", + "integrity": "sha512-sNSHHyq048PFmZY4S90ax61q+gLCs0X0YmcOII9wG9S2XwbVr+h4VW2wWhnbp/Eys3cCwTxVF292W3qPaxIapQ==", + "license": "ISC" + }, + "node_modules/enhanced-resolve": { + "version": "5.18.1", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.1.tgz", + "integrity": "sha512-ZSW3ma5GkcQBIpwZTSRAI8N71Uuwgs93IezB7mf7R60tC8ZbJideoDNKjHn2O9KIlx6rkGTTEk1xUCK2E1Y2Yg==", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.4", + "tapable": "^2.2.0" + }, + "engines": { + "node": ">=10.13.0" + } }, "node_modules/entities": { "version": "4.5.0", @@ -1297,123 +2476,289 @@ } }, "node_modules/esbuild": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz", - "integrity": "sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw==", + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.24.2.tgz", + "integrity": "sha512-+9egpBW8I3CD5XPe0n6BfT5fxLzxrlDzqydF3aviG+9ni1lDC/OvMHcxqEFV0+LANZG5R1bFMWfUrjVsdwxJvA==", "hasInstallScript": true, "license": "MIT", "bin": { "esbuild": "bin/esbuild" }, "engines": { - "node": ">=12" + "node": ">=18" }, "optionalDependencies": { - "@esbuild/aix-ppc64": "0.21.5", - "@esbuild/android-arm": "0.21.5", - "@esbuild/android-arm64": "0.21.5", - "@esbuild/android-x64": "0.21.5", - "@esbuild/darwin-arm64": "0.21.5", - "@esbuild/darwin-x64": "0.21.5", - "@esbuild/freebsd-arm64": "0.21.5", - "@esbuild/freebsd-x64": "0.21.5", - "@esbuild/linux-arm": "0.21.5", - "@esbuild/linux-arm64": "0.21.5", - "@esbuild/linux-ia32": "0.21.5", - "@esbuild/linux-loong64": "0.21.5", - "@esbuild/linux-mips64el": "0.21.5", - "@esbuild/linux-ppc64": "0.21.5", - "@esbuild/linux-riscv64": "0.21.5", - "@esbuild/linux-s390x": "0.21.5", - "@esbuild/linux-x64": "0.21.5", - "@esbuild/netbsd-x64": "0.21.5", - "@esbuild/openbsd-x64": "0.21.5", - "@esbuild/sunos-x64": "0.21.5", - "@esbuild/win32-arm64": "0.21.5", - "@esbuild/win32-ia32": "0.21.5", - "@esbuild/win32-x64": "0.21.5" + "@esbuild/aix-ppc64": "0.24.2", + "@esbuild/android-arm": "0.24.2", + "@esbuild/android-arm64": "0.24.2", + "@esbuild/android-x64": "0.24.2", + "@esbuild/darwin-arm64": "0.24.2", + "@esbuild/darwin-x64": "0.24.2", + "@esbuild/freebsd-arm64": "0.24.2", + "@esbuild/freebsd-x64": "0.24.2", + "@esbuild/linux-arm": "0.24.2", + "@esbuild/linux-arm64": "0.24.2", + "@esbuild/linux-ia32": "0.24.2", + "@esbuild/linux-loong64": "0.24.2", + "@esbuild/linux-mips64el": "0.24.2", + "@esbuild/linux-ppc64": "0.24.2", + "@esbuild/linux-riscv64": "0.24.2", + "@esbuild/linux-s390x": "0.24.2", + "@esbuild/linux-x64": "0.24.2", + "@esbuild/netbsd-arm64": "0.24.2", + "@esbuild/netbsd-x64": "0.24.2", + "@esbuild/openbsd-arm64": "0.24.2", + "@esbuild/openbsd-x64": "0.24.2", + "@esbuild/sunos-x64": "0.24.2", + "@esbuild/win32-arm64": "0.24.2", + "@esbuild/win32-ia32": "0.24.2", + "@esbuild/win32-x64": "0.24.2" } }, - "node_modules/esbuild/node_modules/@esbuild/darwin-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz", - "integrity": "sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==", - "cpu": [ - "arm64" - ], + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], "engines": { - "node": ">=12" + "node": ">=6" } }, + "node_modules/escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/eslint": { + "version": "9.19.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.19.0.tgz", + "integrity": "sha512-ug92j0LepKlbbEv6hD911THhoRHmbdXt2gX+VDABAW/Ir7D3nqKdv5Pf5vtlyY6HQMTEP2skXY43ueqTCWssEA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@eslint-community/eslint-utils": "^4.2.0", + "@eslint-community/regexpp": "^4.12.1", + "@eslint/config-array": "^0.19.0", + "@eslint/core": "^0.10.0", + "@eslint/eslintrc": "^3.2.0", + "@eslint/js": "9.19.0", + "@eslint/plugin-kit": "^0.2.5", + "@humanfs/node": "^0.16.6", + "@humanwhocodes/module-importer": "^1.0.1", + "@humanwhocodes/retry": "^0.4.1", + "@types/estree": "^1.0.6", + "@types/json-schema": "^7.0.15", + "ajv": "^6.12.4", + "chalk": "^4.0.0", + "cross-spawn": "^7.0.6", + "debug": "^4.3.2", + "escape-string-regexp": "^4.0.0", + "eslint-scope": "^8.2.0", + "eslint-visitor-keys": "^4.2.0", + "espree": "^10.3.0", + "esquery": "^1.5.0", + "esutils": "^2.0.2", + "fast-deep-equal": "^3.1.3", + "file-entry-cache": "^8.0.0", + "find-up": "^5.0.0", + "glob-parent": "^6.0.2", + "ignore": "^5.2.0", + "imurmurhash": "^0.1.4", + "is-glob": "^4.0.0", + "json-stable-stringify-without-jsonify": "^1.0.1", + "lodash.merge": "^4.6.2", + "minimatch": "^3.1.2", + "natural-compare": "^1.4.0", + "optionator": "^0.9.3" + }, + "bin": { + "eslint": "bin/eslint.js" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://eslint.org/donate" + }, + "peerDependencies": { + "jiti": "*" + }, + "peerDependenciesMeta": { + "jiti": { + "optional": true + } + } + }, + "node_modules/eslint-plugin-react-hooks": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-5.1.0.tgz", + "integrity": "sha512-mpJRtPgHN2tNAvZ35AMfqeB3Xqeo273QxrHJsbBEPWODRM4r0yB6jfoROqKEYrOn27UtRPpcpHc2UqyBSuUNTw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0" + } + }, + "node_modules/eslint-plugin-react-refresh": { + "version": "0.4.18", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.18.tgz", + "integrity": "sha512-IRGEoFn3OKalm3hjfolEWGqoF/jPqeEYFp+C8B0WMzwGwBMvlRDQd06kghDhF0C61uJ6WfSDhEZE/sAQjduKgw==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "eslint": ">=8.40" + } + }, + "node_modules/eslint-scope": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.2.0.tgz", + "integrity": "sha512-PHlWUfG6lvPc3yvP5A4PNyBL1W8fkDUccmI21JUu/+GKZBoH/W5u6usENXUrWFRsyoW5ACUjFGgAFQp5gUlb/A==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "esrecurse": "^4.3.0", + "estraverse": "^5.2.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/eslint-visitor-keys": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.0.tgz", + "integrity": "sha512-UyLnSehNt62FFhSwjZlHmeokpRK59rcz29j+F1/aDgbkbRTk7wIc9XzdoasMUbRNKDM0qQt/+BJ4BrpFeABemw==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/espree": { + "version": "10.3.0", + "resolved": "https://registry.npmjs.org/espree/-/espree-10.3.0.tgz", + "integrity": "sha512-0QYC8b24HWY8zjRnDTL6RiHfDbAWn63qb4LMj1Z4b076A4une81+z03Kg7l7mn/48PUTqoLptSXez8oknU8Clg==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "acorn": "^8.14.0", + "acorn-jsx": "^5.3.2", + "eslint-visitor-keys": "^4.2.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/esquery": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz", + "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "estraverse": "^5.1.0" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "estraverse": "^5.2.0" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estree-util-is-identifier-name": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/estree-util-is-identifier-name/-/estree-util-is-identifier-name-3.0.0.tgz", + "integrity": "sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "license": "MIT" + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true, + "license": "MIT" + }, "node_modules/fast-glob": { - "version": "3.3.2", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", - "integrity": "sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==", + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", + "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", + "dev": true, "license": "MIT", "dependencies": { "@nodelib/fs.stat": "^2.0.2", "@nodelib/fs.walk": "^1.2.3", "glob-parent": "^5.1.2", "merge2": "^1.3.0", - "micromatch": "^4.0.4" + "micromatch": "^4.0.8" }, "engines": { "node": ">=8.6.0" } }, - "node_modules/fast-glob/node_modules/@nodelib/fs.scandir": { - "version": "2.1.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", - "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/fast-glob/node_modules/@nodelib/fs.stat": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", - "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/fast-glob/node_modules/@nodelib/fs.walk": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", - "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", - "license": "MIT", - "dependencies": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/fast-glob/node_modules/fastq": { - "version": "1.17.1", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.17.1.tgz", - "integrity": "sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w==", - "license": "ISC", - "dependencies": { - "reusify": "^1.0.4" - } - }, "node_modules/fast-glob/node_modules/glob-parent": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, "license": "ISC", "dependencies": { "is-glob": "^4.0.1" @@ -1422,68 +2767,93 @@ "node": ">= 6" } }, - "node_modules/fast-glob/node_modules/merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/fast-glob/node_modules/queue-microtask": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", - "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true, "license": "MIT" }, - "node_modules/fast-glob/node_modules/reusify": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", - "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", - "license": "MIT", - "engines": { - "iojs": ">=1.0.0", - "node": ">=0.10.0" + "node_modules/fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", + "dev": true, + "license": "MIT" + }, + "node_modules/fastq": { + "version": "1.19.0", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.0.tgz", + "integrity": "sha512-7SFSRCNjBQIZH/xZR3iy5iQYR8aGBE0h3VG6/cwlbrpdciNYBMotQav8c1XI3HjHH+NikUpP53nPdlZSdWmFzA==", + "dev": true, + "license": "ISC", + "dependencies": { + "reusify": "^1.0.4" } }, - "node_modules/fast-glob/node_modules/run-parallel": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", - "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], + "node_modules/file-entry-cache": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", + "integrity": "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==", + "dev": true, "license": "MIT", "dependencies": { - "queue-microtask": "^1.2.2" + "flat-cache": "^4.0.0" + }, + "engines": { + "node": ">=16.0.0" } }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/find-up": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", + "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", + "dev": true, + "license": "MIT", + "dependencies": { + "locate-path": "^6.0.0", + "path-exists": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/flat-cache": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-4.0.1.tgz", + "integrity": "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==", + "dev": true, + "license": "MIT", + "dependencies": { + "flatted": "^3.2.9", + "keyv": "^4.5.4" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/flatted": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.2.tgz", + "integrity": "sha512-AiwGJM8YcNOaobumgtng+6NHuOqC3A7MixFeDafM3X9cIUM+xUXoS5Vfgf+OihAYe20fxqNM9yPBXJzRtZ/4eA==", + "dev": true, + "license": "ISC" + }, "node_modules/fraction.js": { "version": "4.3.7", "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz", @@ -1511,10 +2881,21 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, + "node_modules/gensync": { + "version": "1.0.0-beta.2", + "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", + "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/glob-parent": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", + "dev": true, "license": "ISC", "dependencies": { "is-glob": "^4.0.3" @@ -1523,6 +2904,32 @@ "node": ">=10.13.0" } }, + "node_modules/globals": { + "version": "15.14.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-15.14.0.tgz", + "integrity": "sha512-OkToC372DtlQeje9/zHIo5CT8lRP/FUgEOKBEhU4e0abL7J7CD24fD9ohiLN5hagG/kWCYj4K5oaxxtj2Z0Dig==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "license": "ISC" + }, + "node_modules/graphemer": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz", + "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==", + "dev": true, + "license": "MIT" + }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -1533,14 +2940,203 @@ "node": ">=8" } }, + "node_modules/hast-util-from-dom": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/hast-util-from-dom/-/hast-util-from-dom-5.0.1.tgz", + "integrity": "sha512-N+LqofjR2zuzTjCPzyDUdSshy4Ma6li7p/c3pA78uTwzFgENbgbUrm2ugwsOdcjI1muO+o6Dgzp9p8WHtn/39Q==", + "license": "ISC", + "dependencies": { + "@types/hast": "^3.0.0", + "hastscript": "^9.0.0", + "web-namespaces": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-html": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/hast-util-from-html/-/hast-util-from-html-2.0.3.tgz", + "integrity": "sha512-CUSRHXyKjzHov8yKsQjGOElXy/3EKpyX56ELnkHH34vDVw1N1XSQ1ZcAvTyAPtGqLTuKP/uxM+aLkSPqF/EtMw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "devlop": "^1.1.0", + "hast-util-from-parse5": "^8.0.0", + "parse5": "^7.0.0", + "vfile": "^6.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-html-isomorphic": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/hast-util-from-html-isomorphic/-/hast-util-from-html-isomorphic-2.0.0.tgz", + "integrity": "sha512-zJfpXq44yff2hmE0XmwEOzdWin5xwH+QIhMLOScpX91e/NSGPsAzNCvLQDIEPyO2TXi+lBmU6hjLIhV8MwP2kw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "hast-util-from-dom": "^5.0.0", + "hast-util-from-html": "^2.0.0", + "unist-util-remove-position": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-parse5": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/hast-util-from-parse5/-/hast-util-from-parse5-8.0.2.tgz", + "integrity": "sha512-SfMzfdAi/zAoZ1KkFEyyeXBn7u/ShQrfd675ZEE9M3qj+PMFX05xubzRyF76CCSJu8au9jgVxDV1+okFvgZU4A==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "devlop": "^1.0.0", + "hastscript": "^9.0.0", + "property-information": "^6.0.0", + "vfile": "^6.0.0", + "vfile-location": "^5.0.0", + "web-namespaces": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-is-element": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/hast-util-is-element/-/hast-util-is-element-3.0.0.tgz", + "integrity": "sha512-Val9mnv2IWpLbNPqc/pUem+a7Ipj2aHacCwgNfTiK0vJKl0LF+4Ba4+v1oPHFpf3bLYmreq0/l3Gud9S5OH42g==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-parse-selector": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz", + "integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-to-jsx-runtime": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.2.tgz", + "integrity": "sha512-1ngXYb+V9UT5h+PxNRa1O1FYguZK/XL+gkeqvp7EdHlB9oHUG0eYRo/vY5inBdcqo3RkPMC58/H94HvkbfGdyg==", + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "devlop": "^1.0.0", + "estree-util-is-identifier-name": "^3.0.0", + "hast-util-whitespace": "^3.0.0", + "mdast-util-mdx-expression": "^2.0.0", + "mdast-util-mdx-jsx": "^3.0.0", + "mdast-util-mdxjs-esm": "^2.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0", + "style-to-object": "^1.0.0", + "unist-util-position": "^5.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-to-text": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-4.0.2.tgz", + "integrity": "sha512-KK6y/BN8lbaq654j7JgBydev7wuNMcID54lkRav1P0CaE1e47P72AWWPiGKXTJU271ooYzcvTAn/Zt0REnvc7A==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "hast-util-is-element": "^3.0.0", + "unist-util-find-after": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-whitespace": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz", + "integrity": "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hastscript": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-9.0.0.tgz", + "integrity": "sha512-jzaLBGavEDKHrc5EfFImKN7nZKKBdSLIdGvCwDZ9TfzbF2ffXiov8CKE445L2Z1Ek2t/m4SKQ2j6Ipv7NyUolw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-parse-selector": "^4.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/highlight.js": { - "version": "11.10.0", - "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.10.0.tgz", - "integrity": "sha512-SYVnVFswQER+zu1laSya563s+F8VDGt7o35d4utbamowvUNLLMovFqwCLSocpZTz3MgaSRA1IbqRWZv97dtErQ==", + "version": "11.11.1", + "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.11.1.tgz", + "integrity": "sha512-Xwwo44whKBVCYoliBQwaPvtd/2tYFkRQtXDWj1nackaV2JPXx3L0+Jvd8/qCJ2p+ML0/XVkJ2q+Mr+UVdpJK5w==", + "license": "BSD-3-Clause", "engines": { "node": ">=12.0.0" } }, + "node_modules/html-url-attributes": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz", + "integrity": "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/ignore": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", + "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, "node_modules/immutable": { "version": "5.0.3", "resolved": "https://registry.npmjs.org/immutable/-/immutable-5.0.3.tgz", @@ -1548,10 +3144,88 @@ "devOptional": true, "license": "MIT" }, + "node_modules/import-fresh": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", + "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.8.19" + } + }, + "node_modules/inline-style-parser": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.4.tgz", + "integrity": "sha512-0aO8FkhNZlj/ZIbNi7Lxxr12obT7cL1moPfE4tg1LkX7LlLfC6DeX4l2ZEud1ukP9jNQyNnfzQVqwbwmAATY4Q==", + "license": "MIT" + }, + "node_modules/is-alphabetical": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-2.0.1.tgz", + "integrity": "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-alphanumerical": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-2.0.1.tgz", + "integrity": "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==", + "license": "MIT", + "dependencies": { + "is-alphabetical": "^2.0.0", + "is-decimal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-decimal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-2.0.1.tgz", + "integrity": "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-glob": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, "license": "MIT", "dependencies": { "is-extglob": "^2.1.1" @@ -1560,28 +3234,123 @@ "node": ">=0.10.0" } }, - "node_modules/is-glob/node_modules/is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "node_modules/is-hexadecimal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz", + "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==", "license": "MIT", - "engines": { - "node": ">=0.10.0" + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-plain-obj": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz", + "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, + "license": "ISC" + }, "node_modules/jiti": { - "version": "1.21.6", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.6.tgz", - "integrity": "sha512-2yTgeWTWzMWkHu6Jp9NKgePDaYHbntiwvYuuJLbbN9vl7DC9DvXKOB2BC3ZZ92D3cvV/aflH0osDfwpHepQ53w==", + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.4.2.tgz", + "integrity": "sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==", "license": "MIT", "bin": { - "jiti": "bin/jiti.js" + "jiti": "lib/jiti-cli.mjs" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "license": "MIT" + }, + "node_modules/js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "dev": true, + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/jsesc": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz", + "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", + "dev": true, + "license": "MIT", + "bin": { + "jsesc": "bin/jsesc" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/json-buffer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", + "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true, + "license": "MIT" + }, + "node_modules/json-stable-stringify-without-jsonify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/json5": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", + "dev": true, + "license": "MIT", + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" } }, "node_modules/katex": { - "version": "0.16.15", - "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.15.tgz", - "integrity": "sha512-yE9YJIEAk2aZ+FL/G8r+UGw0CTUzEA8ZFy6E+8tc3spHUKq3qBnzCkI1CQwGoI9atJhVyFPEypQsTY7mJ1Pi9w==", + "version": "0.16.21", + "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.21.tgz", + "integrity": "sha512-XvqR7FgOHtWupfMiigNzmh+MgUVmDGU2kXZm899ZkPfcuoPuFxyHmXsgATDpFZDAXCI8tvinaVcDo8PIIJSo4A==", "funding": [ "https://opencollective.com/katex", "https://github.com/sponsors/katex" @@ -1594,45 +3363,1243 @@ "katex": "cli.js" } }, - "node_modules/lilconfig": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz", - "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==", + "node_modules/keyv": { + "version": "4.5.4", + "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", + "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", + "dev": true, "license": "MIT", + "dependencies": { + "json-buffer": "3.0.1" + } + }, + "node_modules/levn": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", + "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "prelude-ls": "^1.2.1", + "type-check": "~0.4.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/lightningcss": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.29.2.tgz", + "integrity": "sha512-6b6gd/RUXKaw5keVdSEtqFVdzWnU5jMxTUjA2bVcMNPLwSQ08Sv/UodBVtETLCn7k4S1Ibxwh7k68IwLZPgKaA==", + "license": "MPL-2.0", + "dependencies": { + "detect-libc": "^2.0.3" + }, + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "lightningcss-darwin-arm64": "1.29.2", + "lightningcss-darwin-x64": "1.29.2", + "lightningcss-freebsd-x64": "1.29.2", + "lightningcss-linux-arm-gnueabihf": "1.29.2", + "lightningcss-linux-arm64-gnu": "1.29.2", + "lightningcss-linux-arm64-musl": "1.29.2", + "lightningcss-linux-x64-gnu": "1.29.2", + "lightningcss-linux-x64-musl": "1.29.2", + "lightningcss-win32-arm64-msvc": "1.29.2", + "lightningcss-win32-x64-msvc": "1.29.2" + } + }, + "node_modules/lightningcss-darwin-arm64": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.29.2.tgz", + "integrity": "sha512-cK/eMabSViKn/PG8U/a7aCorpeKLMlK0bQeNHmdb7qUnBkNPnL+oV5DjJUo0kqWsJUapZsM4jCfYItbqBDvlcA==", + "cpu": [ + "arm64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-x64": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.29.2.tgz", + "integrity": "sha512-j5qYxamyQw4kDXX5hnnCKMf3mLlHvG44f24Qyi2965/Ycz829MYqjrVg2H8BidybHBp9kom4D7DR5VqCKDXS0w==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-freebsd-x64": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.29.2.tgz", + "integrity": "sha512-wDk7M2tM78Ii8ek9YjnY8MjV5f5JN2qNVO+/0BAGZRvXKtQrBC4/cn4ssQIpKIPP44YXw6gFdpUF+Ps+RGsCwg==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm-gnueabihf": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.29.2.tgz", + "integrity": "sha512-IRUrOrAF2Z+KExdExe3Rz7NSTuuJ2HvCGlMKoquK5pjvo2JY4Rybr+NrKnq0U0hZnx5AnGsuFHjGnNT14w26sg==", + "cpu": [ + "arm" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-gnu": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.29.2.tgz", + "integrity": "sha512-KKCpOlmhdjvUTX/mBuaKemp0oeDIBBLFiU5Fnqxh1/DZ4JPZi4evEH7TKoSBFOSOV3J7iEmmBaw/8dpiUvRKlQ==", + "cpu": [ + "arm64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-musl": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.29.2.tgz", + "integrity": "sha512-Q64eM1bPlOOUgxFmoPUefqzY1yV3ctFPE6d/Vt7WzLW4rKTv7MyYNky+FWxRpLkNASTnKQUaiMJ87zNODIrrKQ==", + "cpu": [ + "arm64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-gnu": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.29.2.tgz", + "integrity": "sha512-0v6idDCPG6epLXtBH/RPkHvYx74CVziHo6TMYga8O2EiQApnUPZsbR9nFNrg2cgBzk1AYqEd95TlrsL7nYABQg==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-musl": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.29.2.tgz", + "integrity": "sha512-rMpz2yawkgGT8RULc5S4WiZopVMOFWjiItBT7aSfDX4NQav6M44rhn5hjtkKzB+wMTRlLLqxkeYEtQ3dd9696w==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-arm64-msvc": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.29.2.tgz", + "integrity": "sha512-nL7zRW6evGQqYVu/bKGK+zShyz8OVzsCotFgc7judbt6wnB2KbiKKJwBE4SGoDBQ1O94RjW4asrCjQL4i8Fhbw==", + "cpu": [ + "arm64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-x64-msvc": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.29.2.tgz", + "integrity": "sha512-EdIUW3B2vLuHmv7urfzMI/h2fmlnOQBk1xlsDxkN1tCWKjNFjfLhGxYk8C8mzpSfr+A6jFFIi8fU6LbQGsRWjA==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/locate-path": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", + "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", + "dev": true, + "license": "MIT", + "dependencies": { + "p-locate": "^5.0.0" + }, "engines": { "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/linkify-it": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-5.0.0.tgz", - "integrity": "sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==", + "node_modules/lodash.merge": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", + "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/longest-streak": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz", + "integrity": "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==", "license": "MIT", - "dependencies": { - "uc.micro": "^2.0.0" + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/markdown-it": { - "version": "14.1.0", - "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-14.1.0.tgz", - "integrity": "sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==", + "node_modules/loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", "license": "MIT", "dependencies": { - "argparse": "^2.0.1", - "entities": "^4.4.0", - "linkify-it": "^5.0.0", - "mdurl": "^2.0.0", - "punycode.js": "^2.3.1", - "uc.micro": "^2.1.0" + "js-tokens": "^3.0.0 || ^4.0.0" }, "bin": { - "markdown-it": "bin/markdown-it.mjs" + "loose-envify": "cli.js" } }, - "node_modules/mdurl": { + "node_modules/lowlight": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lowlight/-/lowlight-3.3.0.tgz", + "integrity": "sha512-0JNhgFoPvP6U6lE/UdVsSq99tn6DhjjpAj5MxG49ewd2mOBVtwWYIT8ClyABhq198aXXODMU6Ox8DrGy/CpTZQ==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "devlop": "^1.0.0", + "highlight.js": "~11.11.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/lru-cache": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", + "dev": true, + "license": "ISC", + "dependencies": { + "yallist": "^3.0.2" + } + }, + "node_modules/markdown-table": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.4.tgz", + "integrity": "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/mdast-util-find-and-replace": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-3.0.2.tgz", + "integrity": "sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "escape-string-regexp": "^5.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-find-and-replace/node_modules/escape-string-regexp": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz", + "integrity": "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/mdast-util-from-markdown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-2.0.2.tgz", + "integrity": "sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "decode-named-character-reference": "^1.0.0", + "devlop": "^1.0.0", + "mdast-util-to-string": "^4.0.0", + "micromark": "^4.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-decode-string": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0", + "unist-util-stringify-position": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.0.0.tgz", + "integrity": "sha512-dgQEX5Amaq+DuUqf26jJqSK9qgixgd6rYDHAv4aTBuA92cTknZlKpPfa86Z/s8Dj8xsAQpFfBmPUHWJBWqS4Bw==", + "license": "MIT", + "dependencies": { + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-gfm-autolink-literal": "^2.0.0", + "mdast-util-gfm-footnote": "^2.0.0", + "mdast-util-gfm-strikethrough": "^2.0.0", + "mdast-util-gfm-table": "^2.0.0", + "mdast-util-gfm-task-list-item": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-autolink-literal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-autolink-literal/-/mdast-util-gfm-autolink-literal-2.0.1.tgz", + "integrity": "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "ccount": "^2.0.0", + "devlop": "^1.0.0", + "mdast-util-find-and-replace": "^3.0.0", + "micromark-util-character": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-footnote": { "version": "2.0.0", - "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-2.0.0.tgz", - "integrity": "sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w==", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-footnote/-/mdast-util-gfm-footnote-2.0.0.tgz", + "integrity": "sha512-5jOT2boTSVkMnQ7LTrd6n/18kqwjmuYqo7JUPe+tRCY6O7dAuTFMtTPauYYrMPpox9hlN0uOx/FL8XvEfG9/mQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-strikethrough": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-strikethrough/-/mdast-util-gfm-strikethrough-2.0.0.tgz", + "integrity": "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-table": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-table/-/mdast-util-gfm-table-2.0.0.tgz", + "integrity": "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "markdown-table": "^3.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-task-list-item": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-task-list-item/-/mdast-util-gfm-task-list-item-2.0.0.tgz", + "integrity": "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-math": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-math/-/mdast-util-math-3.0.0.tgz", + "integrity": "sha512-Tl9GBNeG/AhJnQM221bJR2HPvLOSnLE/T9cJI9tlc6zwQk2nPk/4f0cHkOdEixQPC/j8UtKDdITswvLAy1OZ1w==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "longest-streak": "^3.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.1.0", + "unist-util-remove-position": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdx-expression": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.1.tgz", + "integrity": "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ==", + "license": "MIT", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdx-jsx": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.2.0.tgz", + "integrity": "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==", + "license": "MIT", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "ccount": "^2.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "parse-entities": "^4.0.0", + "stringify-entities": "^4.0.0", + "unist-util-stringify-position": "^4.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdxjs-esm": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-mdxjs-esm/-/mdast-util-mdxjs-esm-2.0.1.tgz", + "integrity": "sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==", + "license": "MIT", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-newline-to-break": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-newline-to-break/-/mdast-util-newline-to-break-2.0.0.tgz", + "integrity": "sha512-MbgeFca0hLYIEx/2zGsszCSEJJ1JSCdiY5xQxRcLDDGa8EPvlLPupJ4DSajbMPAnC0je8jfb9TiUATnxxrHUog==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-find-and-replace": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-phrasing": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz", + "integrity": "sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-hast": { + "version": "13.2.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.0.tgz", + "integrity": "sha512-QGYKEuUsYT9ykKBCMOEDLsU5JRObWQusAolFMeko/tYPufNkRffBAQjIE+99jbA87xv6FgmjLtwjh9wBWajwAA==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@ungap/structured-clone": "^1.0.0", + "devlop": "^1.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "trim-lines": "^3.0.0", + "unist-util-position": "^5.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-markdown": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-2.1.2.tgz", + "integrity": "sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "longest-streak": "^3.0.0", + "mdast-util-phrasing": "^4.0.0", + "mdast-util-to-string": "^4.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-decode-string": "^2.0.0", + "unist-util-visit": "^5.0.0", + "zwitch": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-string": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-4.0.0.tgz", + "integrity": "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/micromark": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.1.tgz", + "integrity": "sha512-eBPdkcoCNvYcxQOAKAlceo5SNdzZWfF+FcSupREAzdAh9rRmE239CEQAiTwIgblwnoM8zzj35sZ5ZwvSEOF6Kw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "@types/debug": "^4.0.0", + "debug": "^4.0.0", + "decode-named-character-reference": "^1.0.0", + "devlop": "^1.0.0", + "micromark-core-commonmark": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-combine-extensions": "^2.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-encode": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-subtokenize": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-core-commonmark": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-2.0.2.tgz", + "integrity": "sha512-FKjQKbxd1cibWMM1P9N+H8TwlgGgSkWZMmfuVucLCHaYqeSvJ0hFeHsIa65pA2nYbes0f8LDHPMrd9X7Ujxg9w==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "devlop": "^1.0.0", + "micromark-factory-destination": "^2.0.0", + "micromark-factory-label": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-factory-title": "^2.0.0", + "micromark-factory-whitespace": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-html-tag-name": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-subtokenize": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-extension-gfm": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-3.0.0.tgz", + "integrity": "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w==", + "license": "MIT", + "dependencies": { + "micromark-extension-gfm-autolink-literal": "^2.0.0", + "micromark-extension-gfm-footnote": "^2.0.0", + "micromark-extension-gfm-strikethrough": "^2.0.0", + "micromark-extension-gfm-table": "^2.0.0", + "micromark-extension-gfm-tagfilter": "^2.0.0", + "micromark-extension-gfm-task-list-item": "^2.0.0", + "micromark-util-combine-extensions": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-autolink-literal": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-autolink-literal/-/micromark-extension-gfm-autolink-literal-2.1.0.tgz", + "integrity": "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw==", + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-footnote": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-footnote/-/micromark-extension-gfm-footnote-2.1.0.tgz", + "integrity": "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-core-commonmark": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-strikethrough": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-strikethrough/-/micromark-extension-gfm-strikethrough-2.1.0.tgz", + "integrity": "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-table": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-table/-/micromark-extension-gfm-table-2.1.1.tgz", + "integrity": "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-tagfilter": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-tagfilter/-/micromark-extension-gfm-tagfilter-2.0.0.tgz", + "integrity": "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg==", + "license": "MIT", + "dependencies": { + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-task-list-item": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-task-list-item/-/micromark-extension-gfm-task-list-item-2.1.0.tgz", + "integrity": "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-math": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-math/-/micromark-extension-math-3.1.0.tgz", + "integrity": "sha512-lvEqd+fHjATVs+2v/8kg9i5Q0AP2k85H0WUOwpIVvUML8BapsMvh1XAogmQjOCsLpoKRCVQqEkQBB3NhVBcsOg==", + "license": "MIT", + "dependencies": { + "@types/katex": "^0.16.0", + "devlop": "^1.0.0", + "katex": "^0.16.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-factory-destination": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.1.tgz", + "integrity": "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-label": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.1.tgz", + "integrity": "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-space": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.1.tgz", + "integrity": "sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-title": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-2.0.1.tgz", + "integrity": "sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-whitespace": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.1.tgz", + "integrity": "sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-character": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.1.tgz", + "integrity": "sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-chunked": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-2.0.1.tgz", + "integrity": "sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-classify-character": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-2.0.1.tgz", + "integrity": "sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-combine-extensions": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-2.0.1.tgz", + "integrity": "sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-chunked": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-decode-numeric-character-reference": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-2.0.2.tgz", + "integrity": "sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-decode-string": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-2.0.1.tgz", + "integrity": "sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-encode": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-2.0.1.tgz", + "integrity": "sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-html-tag-name": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-2.0.1.tgz", + "integrity": "sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-normalize-identifier": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-2.0.1.tgz", + "integrity": "sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-resolve-all": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-2.0.1.tgz", + "integrity": "sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-sanitize-uri": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-2.0.1.tgz", + "integrity": "sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-encode": "^2.0.0", + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-subtokenize": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-2.0.4.tgz", + "integrity": "sha512-N6hXjrin2GTJDe3MVjf5FuXpm12PGm80BrUAeub9XFXca8JZbP+oIwY4LJSVwFUCL1IPm/WwSVUN7goFHmSGGQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-symbol": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-2.0.1.tgz", + "integrity": "sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-types": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-2.0.1.tgz", + "integrity": "sha512-534m2WhVTddrcKVepwmVEVnUAmtrx9bfIjNoQHRqfnvdaHQiFytEhJoTgpWJvDEXCO5gLTQh3wYC1PgOJA4NSQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], "license": "MIT" }, "node_modules/micromatch": { @@ -1648,309 +4615,26 @@ "node": ">=8.6" } }, - "node_modules/micromatch/node_modules/braces": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", - "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", - "license": "MIT", - "dependencies": { - "fill-range": "^7.1.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/micromatch/node_modules/fill-range": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", - "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", - "license": "MIT", - "dependencies": { - "to-regex-range": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/micromatch/node_modules/is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "license": "MIT", - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/micromatch/node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", - "license": "MIT", - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/micromatch/node_modules/to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", - "license": "MIT", - "dependencies": { - "is-number": "^7.0.0" - }, - "engines": { - "node": ">=8.0" - } - }, - "node_modules/normalize-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", - "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/normalize-range": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz", - "integrity": "sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/object-hash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", - "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/picocolors": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", - "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", - "license": "ISC" - }, - "node_modules/postcss": { - "version": "8.4.49", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz", - "integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/postcss" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "nanoid": "^3.3.7", - "picocolors": "^1.1.1", - "source-map-js": "^1.2.1" - }, - "engines": { - "node": "^10 || ^12 || >=14" - } - }, - "node_modules/postcss-import": { - "version": "15.1.0", - "resolved": "https://registry.npmjs.org/postcss-import/-/postcss-import-15.1.0.tgz", - "integrity": "sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew==", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.0.0", - "read-cache": "^1.0.0", - "resolve": "^1.1.7" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "postcss": "^8.0.0" - } - }, - "node_modules/postcss-import/node_modules/pify": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", - "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/postcss-import/node_modules/read-cache": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", - "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==", - "license": "MIT", - "dependencies": { - "pify": "^2.3.0" - } - }, - "node_modules/postcss-js": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/postcss-js/-/postcss-js-4.0.1.tgz", - "integrity": "sha512-dDLF8pEO191hJMtlHFPRa8xsizHaM82MLfNkUHdUtVEV3tgTp5oj+8qbEqYM57SLfc74KSbw//4SeJma2LRVIw==", - "license": "MIT", - "dependencies": { - "camelcase-css": "^2.0.1" - }, - "engines": { - "node": "^12 || ^14 || >= 16" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - "peerDependencies": { - "postcss": "^8.4.21" - } - }, - "node_modules/postcss-js/node_modules/camelcase-css": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz", - "integrity": "sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/postcss-load-config": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz", - "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "lilconfig": "^3.0.0", - "yaml": "^2.3.4" - }, - "engines": { - "node": ">= 14" - }, - "peerDependencies": { - "postcss": ">=8.0.9", - "ts-node": ">=9.0.0" - }, - "peerDependenciesMeta": { - "postcss": { - "optional": true - }, - "ts-node": { - "optional": true - } - } - }, - "node_modules/postcss-load-config/node_modules/lilconfig": { + "node_modules/minimatch": { "version": "3.1.2", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.2.tgz", - "integrity": "sha512-eop+wDAvpItUys0FWkHIKeC9ybYrTGbU41U5K7+bttZZeohvnY7M9dZ5kB21GNWiFT2q1OoPTvncPCgSOVO5ow==", - "license": "MIT", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/antonk52" - } - }, - "node_modules/postcss-load-config/node_modules/yaml": { - "version": "2.6.1", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.6.1.tgz", - "integrity": "sha512-7r0XPzioN/Q9kXBro/XPnA6kznR73DHq+GXh5ON7ZozRO6aMjbmiBuKste2wslTFkC5d1dw0GooOCepZXJ2SAg==", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, "license": "ISC", - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/postcss-nested": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz", - "integrity": "sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", "dependencies": { - "postcss-selector-parser": "^6.1.1" + "brace-expansion": "^1.1.7" }, "engines": { - "node": ">=12.0" - }, - "peerDependencies": { - "postcss": "^8.2.14" + "node": "*" } }, - "node_modules/postcss-selector-parser": { - "version": "6.1.2", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", - "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/postcss-selector-parser/node_modules/cssesc": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", - "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==", - "license": "MIT", - "bin": { - "cssesc": "bin/cssesc" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/postcss-selector-parser/node_modules/util-deprecate": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, - "node_modules/postcss-value-parser": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", - "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", - "license": "MIT" - }, - "node_modules/postcss/node_modules/nanoid": { + "node_modules/nanoid": { "version": "3.3.8", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.8.tgz", "integrity": "sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==", @@ -1968,99 +4652,510 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, - "node_modules/postcss/node_modules/source-map-js": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", - "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "license": "BSD-3-Clause", + "node_modules/natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", + "dev": true, + "license": "MIT" + }, + "node_modules/node-releases": { + "version": "2.0.19", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz", + "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==", + "license": "MIT" + }, + "node_modules/normalize-range": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz", + "integrity": "sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==", + "license": "MIT", "engines": { "node": ">=0.10.0" } }, - "node_modules/punycode.js": { + "node_modules/optionator": { + "version": "0.9.4", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", + "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==", + "dev": true, + "license": "MIT", + "dependencies": { + "deep-is": "^0.1.3", + "fast-levenshtein": "^2.0.6", + "levn": "^0.4.1", + "prelude-ls": "^1.2.1", + "type-check": "^0.4.0", + "word-wrap": "^1.2.5" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "yocto-queue": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-locate": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", + "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", + "dev": true, + "license": "MIT", + "dependencies": { + "p-limit": "^3.0.2" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dev": true, + "license": "MIT", + "dependencies": { + "callsites": "^3.0.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/parse-entities": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-4.0.2.tgz", + "integrity": "sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "character-entities-legacy": "^3.0.0", + "character-reference-invalid": "^2.0.0", + "decode-named-character-reference": "^1.0.0", + "is-alphanumerical": "^2.0.0", + "is-decimal": "^2.0.0", + "is-hexadecimal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/parse-entities/node_modules/@types/unist": { + "version": "2.0.11", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", + "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", + "license": "MIT" + }, + "node_modules/parse5": { + "version": "7.2.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.2.1.tgz", + "integrity": "sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==", + "license": "MIT", + "dependencies": { + "entities": "^4.5.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/path-exists": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", + "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "license": "ISC" + }, + "node_modules/picomatch": { "version": "2.3.1", - "resolved": "https://registry.npmjs.org/punycode.js/-/punycode.js-2.3.1.tgz", - "integrity": "sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/postcss": { + "version": "8.5.1", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.1.tgz", + "integrity": "sha512-6oz2beyjc5VMn/KV1pPw8fliQkhBXrVn1Z3TVyqZxU8kZpzEKhBdmCFqI6ZbmGtamQvQGuU1sgPTk8ZrXDD7jQ==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "nanoid": "^3.3.8", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/postcss-value-parser": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", + "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", + "license": "MIT" + }, + "node_modules/prelude-ls": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", + "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/prettier": { + "version": "3.4.2", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.4.2.tgz", + "integrity": "sha512-e9MewbtFo+Fevyuxn/4rrcDAaq0IYxPGLvObpQjiZBMAzB9IGmzlnG9RZy3FFas+eBMu2vA0CszMeduow5dIuQ==", + "dev": true, + "license": "MIT", + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, + "node_modules/property-information": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/property-information/-/property-information-6.5.0.tgz", + "integrity": "sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "dev": true, "license": "MIT", "engines": { "node": ">=6" } }, - "node_modules/resolve": { - "version": "1.22.8", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.8.tgz", - "integrity": "sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==", - "license": "MIT", - "dependencies": { - "is-core-module": "^2.13.0", - "path-parse": "^1.0.7", - "supports-preserve-symlinks-flag": "^1.0.0" - }, - "bin": { - "resolve": "bin/resolve" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/resolve/node_modules/function-bind": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", - "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/resolve/node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "license": "MIT", - "dependencies": { - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/resolve/node_modules/is-core-module": { - "version": "2.15.1", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.15.1.tgz", - "integrity": "sha512-z0vtXSwucUJtANQWldhbtbt7BnL0vxiFjIdDLAatwhDYty2bad6s+rijD6Ri4YuYJubLzIJLUidCh09e1djEVQ==", - "license": "MIT", - "dependencies": { - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/resolve/node_modules/path-parse": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", - "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", + "node_modules/queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], "license": "MIT" }, - "node_modules/resolve/node_modules/supports-preserve-symlinks-flag": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", - "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", + "node_modules/react": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", + "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", "license": "MIT", + "dependencies": { + "loose-envify": "^1.1.0" + }, "engines": { - "node": ">= 0.4" + "node": ">=0.10.0" + } + }, + "node_modules/react-dom": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", + "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", + "license": "MIT", + "dependencies": { + "loose-envify": "^1.1.0", + "scheduler": "^0.23.2" + }, + "peerDependencies": { + "react": "^18.3.1" + } + }, + "node_modules/react-markdown": { + "version": "9.0.3", + "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-9.0.3.tgz", + "integrity": "sha512-Yk7Z94dbgYTOrdk41Z74GoKA7rThnsbbqBTRYuxoe08qvfQ9tJVhmAKw6BJS/ZORG7kTy/s1QvYzSuaoBA1qfw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "devlop": "^1.0.0", + "hast-util-to-jsx-runtime": "^2.0.0", + "html-url-attributes": "^3.0.0", + "mdast-util-to-hast": "^13.0.0", + "remark-parse": "^11.0.0", + "remark-rehype": "^11.0.0", + "unified": "^11.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" }, "funding": { - "url": "https://github.com/sponsors/ljharb" + "type": "opencollective", + "url": "https://opencollective.com/unified" + }, + "peerDependencies": { + "@types/react": ">=18", + "react": ">=18" + } + }, + "node_modules/react-refresh": { + "version": "0.14.2", + "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.14.2.tgz", + "integrity": "sha512-jCvmsr+1IUSMUyzOkRcvnVbX3ZYC6g9TDrDbFuFmRDq7PD4yaGbLKNQL6k2jnArV8hjYxh7hVhAZB6s9HDGpZA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/react-router": { + "version": "7.1.5", + "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.1.5.tgz", + "integrity": "sha512-8BUF+hZEU4/z/JD201yK6S+UYhsf58bzYIDq2NS1iGpwxSXDu7F+DeGSkIXMFBuHZB21FSiCzEcUb18cQNdRkA==", + "license": "MIT", + "dependencies": { + "@types/cookie": "^0.6.0", + "cookie": "^1.0.1", + "set-cookie-parser": "^2.6.0", + "turbo-stream": "2.4.0" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "react": ">=18", + "react-dom": ">=18" + }, + "peerDependenciesMeta": { + "react-dom": { + "optional": true + } + } + }, + "node_modules/rehype-highlight": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/rehype-highlight/-/rehype-highlight-7.0.2.tgz", + "integrity": "sha512-k158pK7wdC2qL3M5NcZROZ2tR/l7zOzjxXd5VGdcfIyoijjQqpHd3JKtYSBDpDZ38UI2WJWuFAtkMDxmx5kstA==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "hast-util-to-text": "^4.0.0", + "lowlight": "^3.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/rehype-katex": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/rehype-katex/-/rehype-katex-7.0.1.tgz", + "integrity": "sha512-OiM2wrZ/wuhKkigASodFoo8wimG3H12LWQaH8qSPVJn9apWKFSH3YOCtbKpBorTVw/eI7cuT21XBbvwEswbIOA==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/katex": "^0.16.0", + "hast-util-from-html-isomorphic": "^2.0.0", + "hast-util-to-text": "^4.0.0", + "katex": "^0.16.0", + "unist-util-visit-parents": "^6.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-breaks": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/remark-breaks/-/remark-breaks-4.0.0.tgz", + "integrity": "sha512-IjEjJOkH4FuJvHZVIW0QCDWxcG96kCq7An/KVH2NfJe6rKZU2AsHeB3OEjPNRxi4QC34Xdx7I2KGYn6IpT7gxQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-newline-to-break": "^2.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-gfm": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.0.tgz", + "integrity": "sha512-U92vJgBPkbw4Zfu/IiW2oTZLSL3Zpv+uI7My2eq8JxKgqraFdU8YUGicEJCEgSbeaG+QDFqIcwwfMTOEelPxuA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-gfm": "^3.0.0", + "micromark-extension-gfm": "^3.0.0", + "remark-parse": "^11.0.0", + "remark-stringify": "^11.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-math": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/remark-math/-/remark-math-6.0.0.tgz", + "integrity": "sha512-MMqgnP74Igy+S3WwnhQ7kqGlEerTETXMvJhrUzDikVZ2/uogJCb+WHUg97hK9/jcfc0dkD73s3LN8zU49cTEtA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-math": "^3.0.0", + "micromark-extension-math": "^3.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-parse": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz", + "integrity": "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-from-markdown": "^2.0.0", + "micromark-util-types": "^2.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-rehype": { + "version": "11.1.1", + "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-11.1.1.tgz", + "integrity": "sha512-g/osARvjkBXb6Wo0XvAeXQohVta8i84ACbenPpoSsxTOQH/Ae0/RGP4WZgnMH5pMLpsj4FG7OHmcIcXxpza8eQ==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "mdast-util-to-hast": "^13.0.0", + "unified": "^11.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-stringify": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/remark-stringify/-/remark-stringify-11.0.0.tgz", + "integrity": "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-to-markdown": "^2.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/reusify": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", + "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", + "dev": true, + "license": "MIT", + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" } }, "node_modules/rollup": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.28.0.tgz", - "integrity": "sha512-G9GOrmgWHBma4YfCcX8PjH0qhXSdH8B4HDE2o4/jaxj93S4DPCIDoLcXz99eWMji4hB29UFCEd7B2gwGJDR9cQ==", + "version": "4.34.2", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.34.2.tgz", + "integrity": "sha512-sBDUoxZEaqLu9QeNalL8v3jw6WjPku4wfZGyTU7l7m1oC+rpRihXc/n/H+4148ZkGz5Xli8CHMns//fFGKvpIQ==", "license": "MIT", "dependencies": { "@types/estree": "1.0.6" @@ -2073,45 +5168,51 @@ "npm": ">=8.0.0" }, "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.28.0", - "@rollup/rollup-android-arm64": "4.28.0", - "@rollup/rollup-darwin-arm64": "4.28.0", - "@rollup/rollup-darwin-x64": "4.28.0", - "@rollup/rollup-freebsd-arm64": "4.28.0", - "@rollup/rollup-freebsd-x64": "4.28.0", - "@rollup/rollup-linux-arm-gnueabihf": "4.28.0", - "@rollup/rollup-linux-arm-musleabihf": "4.28.0", - "@rollup/rollup-linux-arm64-gnu": "4.28.0", - "@rollup/rollup-linux-arm64-musl": "4.28.0", - "@rollup/rollup-linux-powerpc64le-gnu": "4.28.0", - "@rollup/rollup-linux-riscv64-gnu": "4.28.0", - "@rollup/rollup-linux-s390x-gnu": "4.28.0", - "@rollup/rollup-linux-x64-gnu": "4.28.0", - "@rollup/rollup-linux-x64-musl": "4.28.0", - "@rollup/rollup-win32-arm64-msvc": "4.28.0", - "@rollup/rollup-win32-ia32-msvc": "4.28.0", - "@rollup/rollup-win32-x64-msvc": "4.28.0", + "@rollup/rollup-android-arm-eabi": "4.34.2", + "@rollup/rollup-android-arm64": "4.34.2", + "@rollup/rollup-darwin-arm64": "4.34.2", + "@rollup/rollup-darwin-x64": "4.34.2", + "@rollup/rollup-freebsd-arm64": "4.34.2", + "@rollup/rollup-freebsd-x64": "4.34.2", + "@rollup/rollup-linux-arm-gnueabihf": "4.34.2", + "@rollup/rollup-linux-arm-musleabihf": "4.34.2", + "@rollup/rollup-linux-arm64-gnu": "4.34.2", + "@rollup/rollup-linux-arm64-musl": "4.34.2", + "@rollup/rollup-linux-loongarch64-gnu": "4.34.2", + "@rollup/rollup-linux-powerpc64le-gnu": "4.34.2", + "@rollup/rollup-linux-riscv64-gnu": "4.34.2", + "@rollup/rollup-linux-s390x-gnu": "4.34.2", + "@rollup/rollup-linux-x64-gnu": "4.34.2", + "@rollup/rollup-linux-x64-musl": "4.34.2", + "@rollup/rollup-win32-arm64-msvc": "4.34.2", + "@rollup/rollup-win32-ia32-msvc": "4.34.2", + "@rollup/rollup-win32-x64-msvc": "4.34.2", "fsevents": "~2.3.2" } }, - "node_modules/rollup/node_modules/@rollup/rollup-darwin-arm64": { - "version": "4.28.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.28.0.tgz", - "integrity": "sha512-lmKx9yHsppblnLQZOGxdO66gT77bvdBtr/0P+TPOseowE7D9AJoBw8ZDULRasXRWf1Z86/gcOdpBrV6VDUY36Q==", - "cpu": [ - "arm64" + "node_modules/run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } ], "license": "MIT", - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/rollup/node_modules/@types/estree": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz", - "integrity": "sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==", - "license": "MIT" + "dependencies": { + "queue-microtask": "^1.2.2" + } }, "node_modules/rxjs": { "version": "7.8.1", @@ -2124,9 +5225,9 @@ } }, "node_modules/sass-embedded": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded/-/sass-embedded-1.83.0.tgz", - "integrity": "sha512-/8cYZeL39evUqe0o//193na51Q1VWZ61qhxioQvLJwOtWIrX+PgNhCyD8RSuTtmzc4+6+waFZf899bfp/MCUwA==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded/-/sass-embedded-1.83.4.tgz", + "integrity": "sha512-Hf2burRA/y5PGxsg6jB9UpoK/xZ6g/pgrkOcdl6j+rRg1Zj8XhGKZ1MTysZGtTPUUmiiErqzkP5+Kzp95yv9GQ==", "devOptional": true, "license": "MIT", "dependencies": { @@ -2146,32 +5247,32 @@ "node": ">=16.0.0" }, "optionalDependencies": { - "sass-embedded-android-arm": "1.83.0", - "sass-embedded-android-arm64": "1.83.0", - "sass-embedded-android-ia32": "1.83.0", - "sass-embedded-android-riscv64": "1.83.0", - "sass-embedded-android-x64": "1.83.0", - "sass-embedded-darwin-arm64": "1.83.0", - "sass-embedded-darwin-x64": "1.83.0", - "sass-embedded-linux-arm": "1.83.0", - "sass-embedded-linux-arm64": "1.83.0", - "sass-embedded-linux-ia32": "1.83.0", - "sass-embedded-linux-musl-arm": "1.83.0", - "sass-embedded-linux-musl-arm64": "1.83.0", - "sass-embedded-linux-musl-ia32": "1.83.0", - "sass-embedded-linux-musl-riscv64": "1.83.0", - "sass-embedded-linux-musl-x64": "1.83.0", - "sass-embedded-linux-riscv64": "1.83.0", - "sass-embedded-linux-x64": "1.83.0", - "sass-embedded-win32-arm64": "1.83.0", - "sass-embedded-win32-ia32": "1.83.0", - "sass-embedded-win32-x64": "1.83.0" + "sass-embedded-android-arm": "1.83.4", + "sass-embedded-android-arm64": "1.83.4", + "sass-embedded-android-ia32": "1.83.4", + "sass-embedded-android-riscv64": "1.83.4", + "sass-embedded-android-x64": "1.83.4", + "sass-embedded-darwin-arm64": "1.83.4", + "sass-embedded-darwin-x64": "1.83.4", + "sass-embedded-linux-arm": "1.83.4", + "sass-embedded-linux-arm64": "1.83.4", + "sass-embedded-linux-ia32": "1.83.4", + "sass-embedded-linux-musl-arm": "1.83.4", + "sass-embedded-linux-musl-arm64": "1.83.4", + "sass-embedded-linux-musl-ia32": "1.83.4", + "sass-embedded-linux-musl-riscv64": "1.83.4", + "sass-embedded-linux-musl-x64": "1.83.4", + "sass-embedded-linux-riscv64": "1.83.4", + "sass-embedded-linux-x64": "1.83.4", + "sass-embedded-win32-arm64": "1.83.4", + "sass-embedded-win32-ia32": "1.83.4", + "sass-embedded-win32-x64": "1.83.4" } }, "node_modules/sass-embedded-android-arm": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-android-arm/-/sass-embedded-android-arm-1.83.0.tgz", - "integrity": "sha512-uwFSXzJlfbd4Px189xE5l+cxN8+TQpXdQgJec7TIrb4HEY7imabtpYufpVdqUVwT1/uiis5V4+qIEC4Vl5XObQ==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-android-arm/-/sass-embedded-android-arm-1.83.4.tgz", + "integrity": "sha512-9Z4pJAOgEkXa3VDY/o+U6l5XvV0mZTJcSl0l/mSPHihjAHSpLYnOW6+KOWeM8dxqrsqTYcd6COzhanI/a++5Gw==", "cpu": [ "arm" ], @@ -2185,9 +5286,9 @@ } }, "node_modules/sass-embedded-android-arm64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-android-arm64/-/sass-embedded-android-arm64-1.83.0.tgz", - "integrity": "sha512-GBiCvM4a2rkWBLdYDxI6XYnprfk5U5c81g69RC2X6kqPuzxzx8qTArQ9M6keFK4+iDQ5N9QTwFCr0KbZTn+ZNQ==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-android-arm64/-/sass-embedded-android-arm64-1.83.4.tgz", + "integrity": "sha512-tgX4FzmbVqnQmD67ZxQDvI+qFNABrboOQgwsG05E5bA/US42zGajW9AxpECJYiMXVOHmg+d81ICbjb0fsVHskw==", "cpu": [ "arm64" ], @@ -2201,9 +5302,9 @@ } }, "node_modules/sass-embedded-android-ia32": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-android-ia32/-/sass-embedded-android-ia32-1.83.0.tgz", - "integrity": "sha512-5ATPdGo2SICqAhiJl/Z8KQ23zH4sGgobGgux0TnrNtt83uHZ+r+To/ubVJ7xTkZxed+KJZnIpolGD8dQyQqoTg==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-android-ia32/-/sass-embedded-android-ia32-1.83.4.tgz", + "integrity": "sha512-RsFOziFqPcfZXdFRULC4Ayzy9aK6R6FwQ411broCjlOBX+b0gurjRadkue3cfUEUR5mmy0KeCbp7zVKPLTK+5Q==", "cpu": [ "ia32" ], @@ -2217,9 +5318,9 @@ } }, "node_modules/sass-embedded-android-riscv64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-android-riscv64/-/sass-embedded-android-riscv64-1.83.0.tgz", - "integrity": "sha512-aveknUOB8GZewOzVn2Uwk+DKcncTR50Q6vtzslNMGbYnxtgQNHzy8A1qVEviNUruex+pHofppeMK4iMPFAbiEQ==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-android-riscv64/-/sass-embedded-android-riscv64-1.83.4.tgz", + "integrity": "sha512-EHwh0nmQarBBrMRU928eTZkFGx19k/XW2YwbPR4gBVdWLkbTgCA5aGe8hTE6/1zStyx++3nDGvTZ78+b/VvvLg==", "cpu": [ "riscv64" ], @@ -2233,9 +5334,9 @@ } }, "node_modules/sass-embedded-android-x64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-android-x64/-/sass-embedded-android-x64-1.83.0.tgz", - "integrity": "sha512-WqIay/72ncyf9Ph4vS742J3a73wZihWmzFUwpn1OD6lme1Aj4eWzWIve5IVnlTEJgcZcDHu6ECID9IZgehJKoA==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-android-x64/-/sass-embedded-android-x64-1.83.4.tgz", + "integrity": "sha512-0PgQNuPWYy1jEOEPDVsV89KfqOsMLIp9CSbjBY7jRcwRhyVAcigqrUG6bDeNtojHUYKA1kU+Eh/85WxOHUOgBw==", "cpu": [ "x64" ], @@ -2249,9 +5350,9 @@ } }, "node_modules/sass-embedded-darwin-arm64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-darwin-arm64/-/sass-embedded-darwin-arm64-1.83.0.tgz", - "integrity": "sha512-XQl9QqgxFFIPm/CzHhmppse5o9ocxrbaAdC2/DAnlAqvYWBBtgFqPjGoYlej13h9SzfvNoogx+y9r+Ap+e+hYg==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-darwin-arm64/-/sass-embedded-darwin-arm64-1.83.4.tgz", + "integrity": "sha512-rp2ywymWc3nymnSnAFG5R/8hvxWCsuhK3wOnD10IDlmNB7o4rzKby1c+2ZfpQGowlYGWsWWTgz8FW2qzmZsQRw==", "cpu": [ "arm64" ], @@ -2265,9 +5366,9 @@ } }, "node_modules/sass-embedded-darwin-x64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-darwin-x64/-/sass-embedded-darwin-x64-1.83.0.tgz", - "integrity": "sha512-ERQ7Tvp1kFOW3ux4VDFIxb7tkYXHYc+zJpcrbs0hzcIO5ilIRU2tIOK1OrNwrFO6Qxyf7AUuBwYKLAtIU/Nz7g==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-darwin-x64/-/sass-embedded-darwin-x64-1.83.4.tgz", + "integrity": "sha512-kLkN2lXz9PCgGfDS8Ev5YVcl/V2173L6379en/CaFuJJi7WiyPgBymW7hOmfCt4uO4R1y7CP2Uc08DRtZsBlAA==", "cpu": [ "x64" ], @@ -2281,9 +5382,9 @@ } }, "node_modules/sass-embedded-linux-arm": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-arm/-/sass-embedded-linux-arm-1.83.0.tgz", - "integrity": "sha512-baG9RYBJxUFmqwDNC9h9ZFElgJoyO3jgHGjzEZ1wHhIS9anpG+zZQvO8bHx3dBpKEImX+DBeLX+CxsFR9n81gQ==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-arm/-/sass-embedded-linux-arm-1.83.4.tgz", + "integrity": "sha512-nL90ryxX2lNmFucr9jYUyHHx21AoAgdCL1O5Ltx2rKg2xTdytAGHYo2MT5S0LIeKLa/yKP/hjuSvrbICYNDvtA==", "cpu": [ "arm" ], @@ -2297,9 +5398,9 @@ } }, "node_modules/sass-embedded-linux-arm64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-arm64/-/sass-embedded-linux-arm64-1.83.0.tgz", - "integrity": "sha512-syEAVTJt4qhaMLxrSwOWa46zdqHJdnqJkLUK+t9aCr8xqBZLPxSUeIGji76uOehQZ1C+KGFj6n9xstHN6wzOJw==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-arm64/-/sass-embedded-linux-arm64-1.83.4.tgz", + "integrity": "sha512-E0zjsZX2HgESwyqw31EHtI39DKa7RgK7nvIhIRco1d0QEw227WnoR9pjH3M/ZQy4gQj3GKilOFHM5Krs/omeIA==", "cpu": [ "arm64" ], @@ -2313,9 +5414,9 @@ } }, "node_modules/sass-embedded-linux-ia32": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-ia32/-/sass-embedded-linux-ia32-1.83.0.tgz", - "integrity": "sha512-RRBxQxMpoxu5+XcSSc6QR/o9asEwUzR8AbCS83RaXcdTIHTa/CccQsiAoDDoPlRsMTLqnzs0LKL4CfOsf7zBbA==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-ia32/-/sass-embedded-linux-ia32-1.83.4.tgz", + "integrity": "sha512-ew5HpchSzgAYbQoriRh8QhlWn5Kw2nQ2jHoV9YLwGKe3fwwOWA0KDedssvDv7FWnY/FCqXyymhLd6Bxae4Xquw==", "cpu": [ "ia32" ], @@ -2329,9 +5430,9 @@ } }, "node_modules/sass-embedded-linux-musl-arm": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-arm/-/sass-embedded-linux-musl-arm-1.83.0.tgz", - "integrity": "sha512-Yc7u2TelCfBab+PRob9/MNJFh3EooMiz4urvhejXkihTiKSHGCv5YqDdtWzvyb9tY2Jb7YtYREVuHwfdVn3dTQ==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-arm/-/sass-embedded-linux-musl-arm-1.83.4.tgz", + "integrity": "sha512-0RrJRwMrmm+gG0VOB5b5Cjs7Sd+lhqpQJa6EJNEaZHljJokEfpE5GejZsGMRMIQLxEvVphZnnxl6sonCGFE/QQ==", "cpu": [ "arm" ], @@ -2345,9 +5446,9 @@ } }, "node_modules/sass-embedded-linux-musl-arm64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-arm64/-/sass-embedded-linux-musl-arm64-1.83.0.tgz", - "integrity": "sha512-Y7juhPHClUO2H5O+u+StRy6SEAcwZ+hTEk5WJdEmo1Bb1gDtfHvJaWB/iFZJ2tW0W1e865AZeUrC4OcOFjyAQA==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-arm64/-/sass-embedded-linux-musl-arm64-1.83.4.tgz", + "integrity": "sha512-IzMgalf6MZOxgp4AVCgsaWAFDP/IVWOrgVXxkyhw29fyAEoSWBJH4k87wyPhEtxSuzVHLxKNbc8k3UzdWmlBFg==", "cpu": [ "arm64" ], @@ -2361,9 +5462,9 @@ } }, "node_modules/sass-embedded-linux-musl-ia32": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-ia32/-/sass-embedded-linux-musl-ia32-1.83.0.tgz", - "integrity": "sha512-arQeYwGmwXV8byx5G1PtSzZWW1jbkfR5qrIHMEbTFSAvAxpqjgSvCvrHMOFd73FcMxVaYh4BX9LQNbKinkbEdg==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-ia32/-/sass-embedded-linux-musl-ia32-1.83.4.tgz", + "integrity": "sha512-LLb4lYbcxPzX4UaJymYXC+WwokxUlfTJEFUv5VF0OTuSsHAGNRs/rslPtzVBTvMeG9TtlOQDhku1F7G6iaDotA==", "cpu": [ "ia32" ], @@ -2377,9 +5478,9 @@ } }, "node_modules/sass-embedded-linux-musl-riscv64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-riscv64/-/sass-embedded-linux-musl-riscv64-1.83.0.tgz", - "integrity": "sha512-E6uzlIWz59rut+Z3XR6mLG915zNzv07ISvj3GUNZENdHM7dF8GQ//ANoIpl5PljMQKp89GnYdvo6kj2gnaBf/g==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-riscv64/-/sass-embedded-linux-musl-riscv64-1.83.4.tgz", + "integrity": "sha512-zoKlPzD5Z13HKin1UGR74QkEy+kZEk2AkGX5RelRG494mi+IWwRuWCppXIovor9+BQb9eDWPYPoMVahwN5F7VA==", "cpu": [ "riscv64" ], @@ -2393,9 +5494,9 @@ } }, "node_modules/sass-embedded-linux-musl-x64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-x64/-/sass-embedded-linux-musl-x64-1.83.0.tgz", - "integrity": "sha512-eAMK6tyGqvqr21r9g8BnR3fQc1rYFj85RGduSQ3xkITZ6jOAnOhuU94N5fwRS852Hpws0lXhET+7JHXgg3U18w==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-x64/-/sass-embedded-linux-musl-x64-1.83.4.tgz", + "integrity": "sha512-hB8+/PYhfEf2zTIcidO5Bpof9trK6WJjZ4T8g2MrxQh8REVtdPcgIkoxczRynqybf9+fbqbUwzXtiUao2GV+vQ==", "cpu": [ "x64" ], @@ -2409,9 +5510,9 @@ } }, "node_modules/sass-embedded-linux-riscv64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-riscv64/-/sass-embedded-linux-riscv64-1.83.0.tgz", - "integrity": "sha512-Ojpi78pTv02sy2fUYirRGXHLY3fPnV/bvwuC2i5LwPQw2LpCcFyFTtN0c5h4LJDk9P6wr+/ZB/JXU8tHIOlK+Q==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-riscv64/-/sass-embedded-linux-riscv64-1.83.4.tgz", + "integrity": "sha512-83fL4n+oeDJ0Y4KjASmZ9jHS1Vl9ESVQYHMhJE0i4xDi/P3BNarm2rsKljq/QtrwGpbqwn8ujzOu7DsNCMDSHA==", "cpu": [ "riscv64" ], @@ -2425,9 +5526,9 @@ } }, "node_modules/sass-embedded-linux-x64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-linux-x64/-/sass-embedded-linux-x64-1.83.0.tgz", - "integrity": "sha512-3iLjlXdoPfgZRtX4odhRvka1BQs5mAXqfCtDIQBgh/o0JnGPzJIWWl9bYLpHxK8qb+uyVBxXYgXpI0sCzArBOw==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-linux-x64/-/sass-embedded-linux-x64-1.83.4.tgz", + "integrity": "sha512-NlnGdvCmTD5PK+LKXlK3sAuxOgbRIEoZfnHvxd157imCm/s2SYF/R28D0DAAjEViyI8DovIWghgbcqwuertXsA==", "cpu": [ "x64" ], @@ -2441,9 +5542,9 @@ } }, "node_modules/sass-embedded-win32-arm64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-win32-arm64/-/sass-embedded-win32-arm64-1.83.0.tgz", - "integrity": "sha512-iOHw/8/t2dlTW3lOFwG5eUbiwhEyGWawivlKWJ8lkXH7fjMpVx2VO9zCFAm8RvY9xOHJ9sf1L7g5bx3EnNP9BQ==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-win32-arm64/-/sass-embedded-win32-arm64-1.83.4.tgz", + "integrity": "sha512-J2BFKrEaeSrVazU2qTjyQdAk+MvbzJeTuCET0uAJEXSKtvQ3AzxvzndS7LqkDPbF32eXAHLw8GVpwcBwKbB3Uw==", "cpu": [ "arm64" ], @@ -2457,9 +5558,9 @@ } }, "node_modules/sass-embedded-win32-ia32": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-win32-ia32/-/sass-embedded-win32-ia32-1.83.0.tgz", - "integrity": "sha512-2PxNXJ8Pad4geVcTXY4rkyTr5AwbF8nfrCTDv0ulbTvPhzX2mMKEGcBZUXWn5BeHZTBc6whNMfS7d5fQXR9dDQ==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-win32-ia32/-/sass-embedded-win32-ia32-1.83.4.tgz", + "integrity": "sha512-uPAe9T/5sANFhJS5dcfAOhOJy8/l2TRYG4r+UO3Wp4yhqbN7bggPvY9c7zMYS0OC8tU/bCvfYUDFHYMCl91FgA==", "cpu": [ "ia32" ], @@ -2473,9 +5574,9 @@ } }, "node_modules/sass-embedded-win32-x64": { - "version": "1.83.0", - "resolved": "https://registry.npmjs.org/sass-embedded-win32-x64/-/sass-embedded-win32-x64-1.83.0.tgz", - "integrity": "sha512-muBXkFngM6eLTNqOV0FQi7Dv9s+YRQ42Yem26mosdan/GmJQc81deto6uDTgrYn+bzFNmiXcOdfm+0MkTWK3OQ==", + "version": "1.83.4", + "resolved": "https://registry.npmjs.org/sass-embedded-win32-x64/-/sass-embedded-win32-x64-1.83.4.tgz", + "integrity": "sha512-C9fkDY0jKITdJFij4UbfPFswxoXN9O/Dr79v17fJnstVwtUojzVJWKHUXvF0Zg2LIR7TCc4ju3adejKFxj7ueA==", "cpu": [ "x64" ], @@ -2488,626 +5589,7 @@ "node": ">=14.0.0" } }, - "node_modules/sucrase": { - "version": "3.35.0", - "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.0.tgz", - "integrity": "sha512-8EbVDiu9iN/nESwxeSxDKe0dunta1GOlHufmSSXxMD2z2/tMZpDMpvXQGsc+ajGo8y2uYUmixaSRUc/QPoQ0GA==", - "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.2", - "commander": "^4.0.0", - "glob": "^10.3.10", - "lines-and-columns": "^1.1.6", - "mz": "^2.7.0", - "pirates": "^4.0.1", - "ts-interface-checker": "^0.1.9" - }, - "bin": { - "sucrase": "bin/sucrase", - "sucrase-node": "bin/sucrase-node" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/sucrase/node_modules/@isaacs/cliui": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", - "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", - "license": "ISC", - "dependencies": { - "string-width": "^5.1.2", - "string-width-cjs": "npm:string-width@^4.2.0", - "strip-ansi": "^7.0.1", - "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", - "wrap-ansi": "^8.1.0", - "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/sucrase/node_modules/@jridgewell/gen-mapping": { - "version": "0.3.5", - "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz", - "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==", - "license": "MIT", - "dependencies": { - "@jridgewell/set-array": "^1.2.1", - "@jridgewell/sourcemap-codec": "^1.4.10", - "@jridgewell/trace-mapping": "^0.3.24" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/sucrase/node_modules/@jridgewell/resolve-uri": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", - "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", - "license": "MIT", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/sucrase/node_modules/@jridgewell/set-array": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", - "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", - "license": "MIT", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/sucrase/node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", - "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", - "license": "MIT" - }, - "node_modules/sucrase/node_modules/@jridgewell/trace-mapping": { - "version": "0.3.25", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", - "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", - "license": "MIT", - "dependencies": { - "@jridgewell/resolve-uri": "^3.1.0", - "@jridgewell/sourcemap-codec": "^1.4.14" - } - }, - "node_modules/sucrase/node_modules/@pkgjs/parseargs": { - "version": "0.11.0", - "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", - "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "license": "MIT", - "optional": true, - "engines": { - "node": ">=14" - } - }, - "node_modules/sucrase/node_modules/ansi-regex": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", - "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, - "node_modules/sucrase/node_modules/ansi-styles": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", - "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/sucrase/node_modules/any-promise": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", - "license": "MIT" - }, - "node_modules/sucrase/node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "license": "MIT" - }, - "node_modules/sucrase/node_modules/brace-expansion": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", - "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/sucrase/node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "license": "MIT", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/sucrase/node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "license": "MIT" - }, - "node_modules/sucrase/node_modules/commander": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", - "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/sucrase/node_modules/cross-spawn": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", - "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "license": "MIT", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/sucrase/node_modules/eastasianwidth": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", - "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", - "license": "MIT" - }, - "node_modules/sucrase/node_modules/emoji-regex": { - "version": "9.2.2", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", - "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "license": "MIT" - }, - "node_modules/sucrase/node_modules/foreground-child": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz", - "integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==", - "license": "ISC", - "dependencies": { - "cross-spawn": "^7.0.0", - "signal-exit": "^4.0.1" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/sucrase/node_modules/glob": { - "version": "10.4.5", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", - "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", - "license": "ISC", - "dependencies": { - "foreground-child": "^3.1.0", - "jackspeak": "^3.1.2", - "minimatch": "^9.0.4", - "minipass": "^7.1.2", - "package-json-from-dist": "^1.0.0", - "path-scurry": "^1.11.1" - }, - "bin": { - "glob": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/sucrase/node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/isexe": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "license": "ISC" - }, - "node_modules/sucrase/node_modules/jackspeak": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", - "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", - "license": "BlueOak-1.0.0", - "dependencies": { - "@isaacs/cliui": "^8.0.2" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - }, - "optionalDependencies": { - "@pkgjs/parseargs": "^0.11.0" - } - }, - "node_modules/sucrase/node_modules/lines-and-columns": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", - "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", - "license": "MIT" - }, - "node_modules/sucrase/node_modules/lru-cache": { - "version": "10.4.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", - "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "license": "ISC" - }, - "node_modules/sucrase/node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/sucrase/node_modules/minipass": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", - "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "license": "ISC", - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/sucrase/node_modules/mz": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", - "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0", - "object-assign": "^4.0.1", - "thenify-all": "^1.0.0" - } - }, - "node_modules/sucrase/node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/sucrase/node_modules/package-json-from-dist": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", - "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", - "license": "BlueOak-1.0.0" - }, - "node_modules/sucrase/node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/path-scurry": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", - "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", - "license": "BlueOak-1.0.0", - "dependencies": { - "lru-cache": "^10.2.0", - "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" - }, - "engines": { - "node": ">=16 || 14 >=14.18" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/sucrase/node_modules/pirates": { - "version": "4.0.6", - "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.6.tgz", - "integrity": "sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/sucrase/node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "license": "MIT", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/signal-exit": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", - "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", - "license": "ISC", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/sucrase/node_modules/string-width": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", - "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", - "license": "MIT", - "dependencies": { - "eastasianwidth": "^0.2.0", - "emoji-regex": "^9.2.2", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/sucrase/node_modules/string-width-cjs": { - "name": "string-width", - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/string-width-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/string-width-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "license": "MIT" - }, - "node_modules/sucrase/node_modules/string-width-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/strip-ansi": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", - "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^6.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/sucrase/node_modules/strip-ansi-cjs": { - "name": "strip-ansi", - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/strip-ansi-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/thenify": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", - "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0" - } - }, - "node_modules/sucrase/node_modules/thenify-all": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", - "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", - "license": "MIT", - "dependencies": { - "thenify": ">= 3.1.0 < 4" - }, - "engines": { - "node": ">=0.8" - } - }, - "node_modules/sucrase/node_modules/ts-interface-checker": { - "version": "0.1.13", - "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", - "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==", - "license": "Apache-2.0" - }, - "node_modules/sucrase/node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "license": "ISC", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/sucrase/node_modules/wrap-ansi": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", - "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.1.0", - "string-width": "^5.0.1", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/sucrase/node_modules/wrap-ansi-cjs": { - "name": "wrap-ansi", - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/sucrase/node_modules/wrap-ansi-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/wrap-ansi-cjs/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/sucrase/node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "license": "MIT" - }, - "node_modules/sucrase/node_modules/wrap-ansi-cjs/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/supports-color": { + "node_modules/sass-embedded/node_modules/supports-color": { "version": "8.1.1", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", @@ -3123,6 +5605,122 @@ "url": "https://github.com/chalk/supports-color?sponsor=1" } }, + "node_modules/scheduler": { + "version": "0.23.2", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", + "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", + "license": "MIT", + "dependencies": { + "loose-envify": "^1.1.0" + } + }, + "node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/set-cookie-parser": { + "version": "2.7.1", + "resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-2.7.1.tgz", + "integrity": "sha512-IOc8uWeOZgnb3ptbCURJWNjWUPcO3ZnTTdzsurqERrP6nPyv+paC55vJM0LpOlT2ne+Ix+9+CRG1MNLlyZ4GjQ==", + "license": "MIT" + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/space-separated-tokens": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz", + "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/stringify-entities": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", + "integrity": "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==", + "license": "MIT", + "dependencies": { + "character-entities-html4": "^2.0.0", + "character-entities-legacy": "^3.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/style-to-object": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.8.tgz", + "integrity": "sha512-xT47I/Eo0rwJmaXC4oilDGDWLohVhR6o/xAQcPQN8q6QBuZVL8qMYL85kLmST5cPjAorwvqIA4qXTRQoYHaL6g==", + "license": "MIT", + "dependencies": { + "inline-style-parser": "0.2.4" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/sync-child-process": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/sync-child-process/-/sync-child-process-1.0.2.tgz", @@ -3147,40 +5745,18 @@ } }, "node_modules/tailwindcss": { - "version": "3.4.15", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.15.tgz", - "integrity": "sha512-r4MeXnfBmSOuKUWmXe6h2CcyfzJCEk4F0pptO5jlnYSIViUkVmsawj80N5h2lO3gwcmSb4n3PuN+e+GC1Guylw==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.1.tgz", + "integrity": "sha512-QNbdmeS979Efzim2g/bEvfuh+fTcIdp1y7gA+sb6OYSW74rt7Cr7M78AKdf6HqWT3d5AiTb7SwTT3sLQxr4/qw==", + "license": "MIT" + }, + "node_modules/tapable": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz", + "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==", "license": "MIT", - "dependencies": { - "@alloc/quick-lru": "^5.2.0", - "arg": "^5.0.2", - "chokidar": "^3.6.0", - "didyoumean": "^1.2.2", - "dlv": "^1.1.3", - "fast-glob": "^3.3.2", - "glob-parent": "^6.0.2", - "is-glob": "^4.0.3", - "jiti": "^1.21.6", - "lilconfig": "^2.1.0", - "micromatch": "^4.0.8", - "normalize-path": "^3.0.0", - "object-hash": "^3.0.0", - "picocolors": "^1.1.1", - "postcss": "^8.4.47", - "postcss-import": "^15.1.0", - "postcss-js": "^4.0.1", - "postcss-load-config": "^4.0.2", - "postcss-nested": "^6.2.0", - "postcss-selector-parser": "^6.1.2", - "resolve": "^1.22.8", - "sucrase": "^3.35.0" - }, - "bin": { - "tailwind": "lib/cli.js", - "tailwindcss": "lib/cli.js" - }, "engines": { - "node": ">=14.0.0" + "node": ">=6" } }, "node_modules/textlinestream": { @@ -3189,6 +5765,51 @@ "integrity": "sha512-iBHbi7BQxrFmwZUQJsT0SjNzlLLsXhvW/kg7EyOMVMBIrlnj/qYofwo1LVLZi+3GbUEo96Iu2eqToI2+lZoAEQ==", "license": "MIT" }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/trim-lines": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", + "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/trough": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz", + "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/ts-api-utils": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.0.1.tgz", + "integrity": "sha512-dnlgjFSVetynI8nzgJ+qF62efpglpWRk8isUEWZGWlJYySCTD6aKvbUDu+zbPeDakk3bg5H4XpitHukgfL1m9w==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.12" + }, + "peerDependencies": { + "typescript": ">=4.8.4" + } + }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", @@ -3196,12 +5817,224 @@ "devOptional": true, "license": "0BSD" }, - "node_modules/uc.micro": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz", - "integrity": "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A==", + "node_modules/turbo-stream": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/turbo-stream/-/turbo-stream-2.4.0.tgz", + "integrity": "sha512-FHncC10WpBd2eOmGwpmQsWLDoK4cqsA/UT/GqNoaKOQnT8uzhtCbg3EoUDMvqpOSAI0S26mr0rkjzbOO6S3v1g==", + "license": "ISC" + }, + "node_modules/type-check": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", + "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", + "dev": true, + "license": "MIT", + "dependencies": { + "prelude-ls": "^1.2.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/typescript": { + "version": "5.6.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.3.tgz", + "integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/typescript-eslint": { + "version": "8.23.0", + "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.23.0.tgz", + "integrity": "sha512-/LBRo3HrXr5LxmrdYSOCvoAMm7p2jNizNfbIpCgvG4HMsnoprRUOce/+8VJ9BDYWW68rqIENE/haVLWPeFZBVQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/eslint-plugin": "8.23.0", + "@typescript-eslint/parser": "8.23.0", + "@typescript-eslint/utils": "8.23.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0", + "typescript": ">=4.8.4 <5.8.0" + } + }, + "node_modules/undici-types": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "devOptional": true, "license": "MIT" }, + "node_modules/unified": { + "version": "11.0.5", + "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz", + "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "bail": "^2.0.0", + "devlop": "^1.0.0", + "extend": "^3.0.0", + "is-plain-obj": "^4.0.0", + "trough": "^2.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-find-after": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-find-after/-/unist-util-find-after-5.0.0.tgz", + "integrity": "sha512-amQa0Ep2m6hE2g72AugUItjbuM8X8cGQnFoHk0pGfrFeT9GZhzN5SW8nRsiGKK7Aif4CrACPENkA6P/Lw6fHGQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-is": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.0.tgz", + "integrity": "sha512-2qCTHimwdxLfz+YzdGfkqNlH0tLi9xjTnHddPmJwtIG9MGsdbutfTc4P+haPD7l7Cjxf/WZj+we5qfVPvvxfYw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-position": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-5.0.0.tgz", + "integrity": "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-remove-position": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-remove-position/-/unist-util-remove-position-5.0.0.tgz", + "integrity": "sha512-Hp5Kh3wLxv0PHj9m2yZhhLt58KzPtEYKQQ4yxfYFEO7EvHwzyDYnduhHnY1mDxoqr7VUwVuHXk9RXKIiYS1N8Q==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-visit": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-stringify-position": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz", + "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz", + "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit-parents": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.1.tgz", + "integrity": "sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/update-browserslist-db": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.2.tgz", + "integrity": "sha512-PPypAm5qvlD7XMZC3BujecnaOxwhrtoFR+Dqkk5Aa/6DssiH0ibKoketaj9w8LP7Bont1rYeoV5plxD7RTEPRg==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "escalade": "^3.2.0", + "picocolors": "^1.1.1" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "punycode": "^2.1.0" + } + }, "node_modules/varint": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/varint/-/varint-6.0.0.tgz", @@ -3209,21 +6042,63 @@ "devOptional": true, "license": "MIT" }, - "node_modules/vite": { - "version": "5.4.11", - "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.11.tgz", - "integrity": "sha512-c7jFQRklXua0mTzneGW9QVyxFjUgwcihC4bXEtujIo2ouWCe1Ajt/amn2PCxYnhYfd5k09JX3SB7OYWFKYqj8Q==", + "node_modules/vfile": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz", + "integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==", "license": "MIT", "dependencies": { - "esbuild": "^0.21.3", - "postcss": "^8.4.43", - "rollup": "^4.20.0" + "@types/unist": "^3.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile-location": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-5.0.3.tgz", + "integrity": "sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile-message": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.2.tgz", + "integrity": "sha512-jRDZ1IMLttGj41KcZvlrYAaI3CfqpLpfpf+Mfig13viT6NKvRzWZ+lXz0Y5D60w6uJIBAOGq9mSHf0gktF0duw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-stringify-position": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vite": { + "version": "6.0.11", + "resolved": "https://registry.npmjs.org/vite/-/vite-6.0.11.tgz", + "integrity": "sha512-4VL9mQPKoHy4+FE0NnRE/kbY51TOfaknxAjt3fJbGJxhIpBZiqVzlZDEesWWsuREXHwNdAoOFZ9MkPEVXczHwg==", + "license": "MIT", + "dependencies": { + "esbuild": "^0.24.2", + "postcss": "^8.4.49", + "rollup": "^4.23.0" }, "bin": { "vite": "bin/vite.js" }, "engines": { - "node": "^18.0.0 || >=20.0.0" + "node": "^18.0.0 || ^20.0.0 || >=22.0.0" }, "funding": { "url": "https://github.com/vitejs/vite?sponsor=1" @@ -3232,19 +6107,25 @@ "fsevents": "~2.3.3" }, "peerDependencies": { - "@types/node": "^18.0.0 || >=20.0.0", + "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", + "jiti": ">=1.21.0", "less": "*", "lightningcss": "^1.21.0", "sass": "*", "sass-embedded": "*", "stylus": "*", "sugarss": "*", - "terser": "^5.4.0" + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" }, "peerDependenciesMeta": { "@types/node": { "optional": true }, + "jiti": { + "optional": true + }, "less": { "optional": true }, @@ -3265,13 +6146,19 @@ }, "terser": { "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true } } }, "node_modules/vite-plugin-singlefile": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/vite-plugin-singlefile/-/vite-plugin-singlefile-2.0.3.tgz", - "integrity": "sha512-OEBEwdX8nCGPSdtaB1D7rryYnT+YfPTS8ojL1TDyeUF+bWDCTfRriQqw6T0vl9EbKI/KMg7szN3awst6cLrKkA==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/vite-plugin-singlefile/-/vite-plugin-singlefile-2.1.0.tgz", + "integrity": "sha512-7tJo+UgZABlKpY/nubth/wxJ4+pUGREPnEwNOknxwl2MM0zTvF14KTU4Ln1lc140gjLLV5mjDrvuoquU7OZqCg==", "license": "MIT", "dependencies": { "micromatch": "^4.0.8" @@ -3280,29 +6167,88 @@ "node": ">18.0.0" }, "peerDependencies": { - "rollup": "^4.24.3", - "vite": "^5.4.10" + "rollup": "^4.28.1", + "vite": "^5.4.11 || ^6.0.0" } }, - "node_modules/vue": { - "version": "3.5.13", - "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.13.tgz", - "integrity": "sha512-wmeiSMxkZCSc+PM2w2VRsOYAZC8GdipNFRTsLSfodVqI9mbejKeXEGr8SckuLnrQPGe3oJN5c3K0vpoU9q/wCQ==", + "node_modules/web-namespaces": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz", + "integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==", "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "license": "ISC", "dependencies": { - "@vue/compiler-dom": "3.5.13", - "@vue/compiler-sfc": "3.5.13", - "@vue/runtime-dom": "3.5.13", - "@vue/server-renderer": "3.5.13", - "@vue/shared": "3.5.13" + "isexe": "^2.0.0" }, - "peerDependencies": { - "typescript": "*" + "bin": { + "node-which": "bin/node-which" }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } + "engines": { + "node": ">= 8" + } + }, + "node_modules/word-wrap": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", + "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/yallist": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", + "dev": true, + "license": "ISC" + }, + "node_modules/yaml": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.7.0.tgz", + "integrity": "sha512-+hSoy/QHluxmC9kCIJyL/uyFmLmc+e5CFR5Wa+bpIhIj85LVb9ZH2nVnqrHoSvKogwODv0ClqZkmiSSaIH5LTA==", + "license": "ISC", + "optional": true, + "peer": true, + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/zwitch": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", + "integrity": "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" } } } diff --git a/examples/server/webui/package.json b/examples/server/webui/package.json index 2836cce00d..6ac06b1a49 100644 --- a/examples/server/webui/package.json +++ b/examples/server/webui/package.json @@ -5,26 +5,58 @@ "type": "module", "scripts": { "dev": "vite", - "build": "vite build", - "preview": "vite preview", - "analyze": "ANALYZE=1 npx vite-bundle-visualizer" - }, - "devDependencies": { - "sass-embedded": "^1.83.0", - "vite": "^5.4.10" + "build": "tsc -b && vite build", + "format": "eslint . && prettier --write .", + "lint": "eslint .", + "preview": "vite preview" }, "dependencies": { + "@heroicons/react": "^2.2.0", "@sec-ant/readable-stream": "^0.6.0", + "@tailwindcss/postcss": "^4.1.1", + "@tailwindcss/vite": "^4.1.1", "@vscode/markdown-it-katex": "^1.1.1", "autoprefixer": "^10.4.20", - "daisyui": "^4.12.14", + "daisyui": "^5.0.12", + "dexie": "^4.0.11", "highlight.js": "^11.10.0", "katex": "^0.16.15", - "markdown-it": "^14.1.0", "postcss": "^8.4.49", - "tailwindcss": "^3.4.15", + "react": "^18.3.1", + "react-dom": "^18.3.1", + "react-markdown": "^9.0.3", + "react-router": "^7.1.5", + "rehype-highlight": "^7.0.2", + "rehype-katex": "^7.0.1", + "remark-breaks": "^4.0.0", + "remark-gfm": "^4.0.0", + "remark-math": "^6.0.0", + "tailwindcss": "^4.1.1", "textlinestream": "^1.1.1", - "vite-plugin-singlefile": "^2.0.3", - "vue": "^3.5.13" + "vite-plugin-singlefile": "^2.0.3" + }, + "devDependencies": { + "@eslint/js": "^9.17.0", + "@types/markdown-it": "^14.1.2", + "@types/node": "^22.13.1", + "@types/react": "^18.3.18", + "@types/react-dom": "^18.3.5", + "@vitejs/plugin-react": "^4.3.4", + "eslint": "^9.17.0", + "eslint-plugin-react-hooks": "^5.0.0", + "eslint-plugin-react-refresh": "^0.4.16", + "globals": "^15.14.0", + "prettier": "^3.4.2", + "sass-embedded": "^1.83.4", + "typescript": "~5.6.2", + "typescript-eslint": "^8.18.2", + "vite": "^6.0.5" + }, + "prettier": { + "trailingComma": "es5", + "tabWidth": 2, + "semi": true, + "singleQuote": true, + "bracketSameLine": false } } diff --git a/examples/server/webui/postcss.config.js b/examples/server/webui/postcss.config.js index 2e7af2b7f1..fb05b5692b 100644 --- a/examples/server/webui/postcss.config.js +++ b/examples/server/webui/postcss.config.js @@ -1,6 +1,5 @@ export default { plugins: { - tailwindcss: {}, - autoprefixer: {}, + "@tailwindcss/postcss": {}, }, } diff --git a/examples/server/webui/public/demo-conversation.json b/examples/server/webui/public/demo-conversation.json index 75ab599dd6..338b4aea59 100644 --- a/examples/server/webui/public/demo-conversation.json +++ b/examples/server/webui/public/demo-conversation.json @@ -11,7 +11,7 @@ { "id": 1734087548327, "role": "assistant", - "content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$", + "content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\n$2x + y = z$\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$", "timings": { "prompt_n": 1, "prompt_ms": 28.923, diff --git a/examples/server/webui/src/App.tsx b/examples/server/webui/src/App.tsx new file mode 100644 index 0000000000..cc4659e152 --- /dev/null +++ b/examples/server/webui/src/App.tsx @@ -0,0 +1,47 @@ +import { HashRouter, Outlet, Route, Routes } from 'react-router'; +import Header from './components/Header'; +import Sidebar from './components/Sidebar'; +import { AppContextProvider, useAppContext } from './utils/app.context'; +import ChatScreen from './components/ChatScreen'; +import SettingDialog from './components/SettingDialog'; + +function App() { + return ( + +
+ + + }> + } /> + } /> + + + +
+
+ ); +} + +function AppLayout() { + const { showSettings, setShowSettings } = useAppContext(); + return ( + <> + +
+
+ +
+ { + setShowSettings(false)} + /> + } + + ); +} + +export default App; diff --git a/examples/server/webui/src/Config.ts b/examples/server/webui/src/Config.ts new file mode 100644 index 0000000000..dd1cc0e100 --- /dev/null +++ b/examples/server/webui/src/Config.ts @@ -0,0 +1,92 @@ +import daisyuiThemes from 'daisyui/theme/object'; +import { isNumeric } from './utils/misc'; + +export const isDev = import.meta.env.MODE === 'development'; + +// constants +export const BASE_URL = new URL('.', document.baseURI).href + .toString() + .replace(/\/$/, ''); + +export const CONFIG_DEFAULT = { + // Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value. + // Do not use nested objects, keep it single level. Prefix the key if you need to group them. + apiKey: '', + systemMessage: 'You are a helpful assistant.', + showTokensPerSecond: false, + showThoughtInProgress: false, + excludeThoughtOnReq: true, + // make sure these default values are in sync with `common.h` + samplers: 'edkypmxt', + temperature: 0.8, + dynatemp_range: 0.0, + dynatemp_exponent: 1.0, + top_k: 40, + top_p: 0.95, + min_p: 0.05, + xtc_probability: 0.0, + xtc_threshold: 0.1, + typical_p: 1.0, + repeat_last_n: 64, + repeat_penalty: 1.0, + presence_penalty: 0.0, + frequency_penalty: 0.0, + dry_multiplier: 0.0, + dry_base: 1.75, + dry_allowed_length: 2, + dry_penalty_last_n: -1, + max_tokens: -1, + custom: '', // custom json-stringified object + // experimental features + pyIntepreterEnabled: false, +}; +export const CONFIG_INFO: Record = { + apiKey: 'Set the API Key if you are using --api-key option for the server.', + systemMessage: 'The starting message that defines how model should behave.', + samplers: + 'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature', + temperature: + 'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.', + dynatemp_range: + 'Addon for the temperature sampler. The added value to the range of dynamic temperature, which adjusts probabilities by entropy of tokens.', + dynatemp_exponent: + 'Addon for the temperature sampler. Smoothes out the probability redistribution based on the most probable token.', + top_k: 'Keeps only k top tokens.', + top_p: + 'Limits tokens to those that together have a cumulative probability of at least p', + min_p: + 'Limits tokens based on the minimum probability for a token to be considered, relative to the probability of the most likely token.', + xtc_probability: + 'XTC sampler cuts out top tokens; this parameter controls the chance of cutting tokens at all. 0 disables XTC.', + xtc_threshold: + 'XTC sampler cuts out top tokens; this parameter controls the token probability that is required to cut that token.', + typical_p: + 'Sorts and limits tokens based on the difference between log-probability and entropy.', + repeat_last_n: 'Last n tokens to consider for penalizing repetition', + repeat_penalty: + 'Controls the repetition of token sequences in the generated text', + presence_penalty: + 'Limits tokens based on whether they appear in the output or not.', + frequency_penalty: + 'Limits tokens based on how often they appear in the output.', + dry_multiplier: + 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling multiplier.', + dry_base: + 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling base value.', + dry_allowed_length: + 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the allowed length for DRY sampling.', + dry_penalty_last_n: + 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets DRY penalty for the last n tokens.', + max_tokens: 'The maximum number of token per output.', + custom: '', // custom json-stringified object +}; +// config keys having numeric value (i.e. temperature, top_k, top_p, etc) +export const CONFIG_NUMERIC_KEYS = Object.entries(CONFIG_DEFAULT) + .filter((e) => isNumeric(e[1])) + .map((e) => e[0]); +// list of themes supported by daisyui +export const THEMES = ['light', 'dark'] + // make sure light & dark are always at the beginning + .concat( + Object.keys(daisyuiThemes).filter((t) => t !== 'light' && t !== 'dark') + ); diff --git a/examples/server/webui/src/components/CanvasPyInterpreter.tsx b/examples/server/webui/src/components/CanvasPyInterpreter.tsx new file mode 100644 index 0000000000..c2707fe20f --- /dev/null +++ b/examples/server/webui/src/components/CanvasPyInterpreter.tsx @@ -0,0 +1,195 @@ +import { useEffect, useState } from 'react'; +import { useAppContext } from '../utils/app.context'; +import { OpenInNewTab, XCloseButton } from '../utils/common'; +import { CanvasType } from '../utils/types'; +import { PlayIcon, StopIcon } from '@heroicons/react/24/outline'; +import StorageUtils from '../utils/storage'; + +const canInterrupt = typeof SharedArrayBuffer === 'function'; + +// adapted from https://pyodide.org/en/stable/usage/webworker.html +const WORKER_CODE = ` +importScripts("https://cdn.jsdelivr.net/pyodide/v0.27.2/full/pyodide.js"); + +let stdOutAndErr = []; + +let pyodideReadyPromise = loadPyodide({ + stdout: (data) => stdOutAndErr.push(data), + stderr: (data) => stdOutAndErr.push(data), +}); + +let alreadySetBuff = false; + +self.onmessage = async (event) => { + stdOutAndErr = []; + + // make sure loading is done + const pyodide = await pyodideReadyPromise; + const { id, python, context, interruptBuffer } = event.data; + + if (interruptBuffer && !alreadySetBuff) { + pyodide.setInterruptBuffer(interruptBuffer); + alreadySetBuff = true; + } + + // Now load any packages we need, run the code, and send the result back. + await pyodide.loadPackagesFromImports(python); + + // make a Python dictionary with the data from content + const dict = pyodide.globals.get("dict"); + const globals = dict(Object.entries(context)); + try { + self.postMessage({ id, running: true }); + // Execute the python code in this context + const result = pyodide.runPython(python, { globals }); + self.postMessage({ result, id, stdOutAndErr }); + } catch (error) { + self.postMessage({ error: error.message, id }); + } + interruptBuffer[0] = 0; +}; +`; + +let worker: Worker; +const interruptBuffer = canInterrupt + ? new Uint8Array(new SharedArrayBuffer(1)) + : null; + +const startWorker = () => { + if (!worker) { + worker = new Worker( + URL.createObjectURL(new Blob([WORKER_CODE], { type: 'text/javascript' })) + ); + } +}; + +if (StorageUtils.getConfig().pyIntepreterEnabled) { + startWorker(); +} + +const runCodeInWorker = ( + pyCode: string, + callbackRunning: () => void +): { + donePromise: Promise; + interrupt: () => void; +} => { + startWorker(); + const id = Math.random() * 1e8; + const context = {}; + if (interruptBuffer) { + interruptBuffer[0] = 0; + } + + const donePromise = new Promise((resolve) => { + worker.onmessage = (event) => { + const { error, stdOutAndErr, running } = event.data; + if (id !== event.data.id) return; + if (running) { + callbackRunning(); + return; + } else if (error) { + resolve(error.toString()); + } else { + resolve(stdOutAndErr.join('\n')); + } + }; + worker.postMessage({ id, python: pyCode, context, interruptBuffer }); + }); + + const interrupt = () => { + console.log('Interrupting...'); + console.trace(); + if (interruptBuffer) { + interruptBuffer[0] = 2; + } + }; + + return { donePromise, interrupt }; +}; + +export default function CanvasPyInterpreter() { + const { canvasData, setCanvasData } = useAppContext(); + + const [code, setCode] = useState(canvasData?.content ?? ''); // copy to avoid direct mutation + const [running, setRunning] = useState(false); + const [output, setOutput] = useState(''); + const [interruptFn, setInterruptFn] = useState<() => void>(); + const [showStopBtn, setShowStopBtn] = useState(false); + + const runCode = async (pycode: string) => { + interruptFn?.(); + setRunning(true); + setOutput('Loading Pyodide...'); + const { donePromise, interrupt } = runCodeInWorker(pycode, () => { + setOutput('Running...'); + setShowStopBtn(canInterrupt); + }); + setInterruptFn(() => interrupt); + const out = await donePromise; + setOutput(out); + setRunning(false); + setShowStopBtn(false); + }; + + // run code on mount + useEffect(() => { + setCode(canvasData?.content ?? ''); + runCode(canvasData?.content ?? ''); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [canvasData?.content]); + + if (canvasData?.type !== CanvasType.PY_INTERPRETER) { + return null; + } + + return ( +
+
+
+ Python Interpreter + setCanvasData(null)} + /> +
+
+ +
+
+ + {showStopBtn && ( + + )} + + + Report a bug + + +
+ +
+
+
+
+ ); +} diff --git a/examples/server/webui/src/components/ChatMessage.tsx b/examples/server/webui/src/components/ChatMessage.tsx new file mode 100644 index 0000000000..40ea74711f --- /dev/null +++ b/examples/server/webui/src/components/ChatMessage.tsx @@ -0,0 +1,296 @@ +import { useMemo, useState } from 'react'; +import { useAppContext } from '../utils/app.context'; +import { Message, PendingMessage } from '../utils/types'; +import { classNames } from '../utils/misc'; +import MarkdownDisplay, { CopyButton } from './MarkdownDisplay'; +import { ChevronLeftIcon, ChevronRightIcon } from '@heroicons/react/24/outline'; + +interface SplitMessage { + content: PendingMessage['content']; + thought?: string; + isThinking?: boolean; +} + +export default function ChatMessage({ + msg, + siblingLeafNodeIds, + siblingCurrIdx, + id, + onRegenerateMessage, + onEditMessage, + onChangeSibling, + isPending, +}: { + msg: Message | PendingMessage; + siblingLeafNodeIds: Message['id'][]; + siblingCurrIdx: number; + id?: string; + onRegenerateMessage(msg: Message): void; + onEditMessage(msg: Message, content: string): void; + onChangeSibling(sibling: Message['id']): void; + isPending?: boolean; +}) { + const { viewingChat, config } = useAppContext(); + const [editingContent, setEditingContent] = useState(null); + const timings = useMemo( + () => + msg.timings + ? { + ...msg.timings, + prompt_per_second: + (msg.timings.prompt_n / msg.timings.prompt_ms) * 1000, + predicted_per_second: + (msg.timings.predicted_n / msg.timings.predicted_ms) * 1000, + } + : null, + [msg.timings] + ); + const nextSibling = siblingLeafNodeIds[siblingCurrIdx + 1]; + const prevSibling = siblingLeafNodeIds[siblingCurrIdx - 1]; + + // for reasoning model, we split the message into content and thought + // TODO: implement this as remark/rehype plugin in the future + const { content, thought, isThinking }: SplitMessage = useMemo(() => { + if (msg.content === null || msg.role !== 'assistant') { + return { content: msg.content }; + } + let actualContent = ''; + let thought = ''; + let isThinking = false; + let thinkSplit = msg.content.split('', 2); + actualContent += thinkSplit[0]; + while (thinkSplit[1] !== undefined) { + // tag found + thinkSplit = thinkSplit[1].split('', 2); + thought += thinkSplit[0]; + isThinking = true; + if (thinkSplit[1] !== undefined) { + // closing tag found + isThinking = false; + thinkSplit = thinkSplit[1].split('', 2); + actualContent += thinkSplit[0]; + } + } + return { content: actualContent, thought, isThinking }; + }, [msg]); + + if (!viewingChat) return null; + + return ( +
+
+
+ {/* textarea for editing message */} + {editingContent !== null && ( + <> + +
+ + + + )} + {/* not editing content, render message */} + {editingContent === null && ( + <> + {content === null ? ( + <> + {/* show loading dots for pending message */} + + + ) : ( + <> + {/* render message as markdown */} +
+ {thought && ( +
+ + {isPending && isThinking ? ( + + + Thinking + + ) : ( + Thought Process + )} + +
+ +
+
+ )} + + {msg.extra && msg.extra.length > 0 && ( +
+ + Extra content + +
+ {msg.extra.map( + (extra, i) => + extra.type === 'textFile' ? ( +
+ {extra.name} +
{extra.content}
+
+ ) : extra.type === 'context' ? ( +
+
{extra.content}
+
+ ) : null // TODO: support other extra types + )} +
+
+ )} + + +
+ + )} + {/* render timings if enabled */} + {timings && config.showTokensPerSecond && ( +
+
+ Speed: {timings.predicted_per_second.toFixed(1)} t/s +
+
+ Prompt +
- Tokens: {timings.prompt_n} +
- Time: {timings.prompt_ms} ms +
- Speed: {timings.prompt_per_second.toFixed(1)} t/s +
+ Generation +
- Tokens: {timings.predicted_n} +
- Time: {timings.predicted_ms} ms +
- Speed: {timings.predicted_per_second.toFixed(1)} t/s +
+
+
+ )} + + )} +
+
+ + {/* actions for each message */} + {msg.content !== null && ( +
+ {siblingLeafNodeIds && siblingLeafNodeIds.length > 1 && ( +
+ + + {siblingCurrIdx + 1} / {siblingLeafNodeIds.length} + + +
+ )} + {/* user message */} + {msg.role === 'user' && ( + + )} + {/* assistant message */} + {msg.role === 'assistant' && ( + <> + {!isPending && ( + + )} + + )} + +
+ )} +
+ ); +} diff --git a/examples/server/webui/src/components/ChatScreen.tsx b/examples/server/webui/src/components/ChatScreen.tsx new file mode 100644 index 0000000000..29ab5ea64f --- /dev/null +++ b/examples/server/webui/src/components/ChatScreen.tsx @@ -0,0 +1,296 @@ +import { useEffect, useMemo, useState } from 'react'; +import { CallbackGeneratedChunk, useAppContext } from '../utils/app.context'; +import ChatMessage from './ChatMessage'; +import { CanvasType, Message, PendingMessage } from '../utils/types'; +import { classNames, cleanCurrentUrl, throttle } from '../utils/misc'; +import CanvasPyInterpreter from './CanvasPyInterpreter'; +import StorageUtils from '../utils/storage'; +import { useVSCodeContext } from '../utils/llama-vscode'; +import { useChatTextarea, ChatTextareaApi } from './useChatTextarea.ts'; + +/** + * A message display is a message node with additional information for rendering. + * For example, siblings of the message node are stored as their last node (aka leaf node). + */ +export interface MessageDisplay { + msg: Message | PendingMessage; + siblingLeafNodeIds: Message['id'][]; + siblingCurrIdx: number; + isPending?: boolean; +} + +/** + * If the current URL contains "?m=...", prefill the message input with the value. + * If the current URL contains "?q=...", prefill and SEND the message. + */ +const prefilledMsg = { + content() { + const url = new URL(window.location.href); + return url.searchParams.get('m') ?? url.searchParams.get('q') ?? ''; + }, + shouldSend() { + const url = new URL(window.location.href); + return url.searchParams.has('q'); + }, + clear() { + cleanCurrentUrl(['m', 'q']); + }, +}; + +function getListMessageDisplay( + msgs: Readonly, + leafNodeId: Message['id'] +): MessageDisplay[] { + const currNodes = StorageUtils.filterByLeafNodeId(msgs, leafNodeId, true); + const res: MessageDisplay[] = []; + const nodeMap = new Map(); + for (const msg of msgs) { + nodeMap.set(msg.id, msg); + } + // find leaf node from a message node + const findLeafNode = (msgId: Message['id']): Message['id'] => { + let currNode: Message | undefined = nodeMap.get(msgId); + while (currNode) { + if (currNode.children.length === 0) break; + currNode = nodeMap.get(currNode.children.at(-1) ?? -1); + } + return currNode?.id ?? -1; + }; + // traverse the current nodes + for (const msg of currNodes) { + const parentNode = nodeMap.get(msg.parent ?? -1); + if (!parentNode) continue; + const siblings = parentNode.children; + if (msg.type !== 'root') { + res.push({ + msg, + siblingLeafNodeIds: siblings.map(findLeafNode), + siblingCurrIdx: siblings.indexOf(msg.id), + }); + } + } + return res; +} + +const scrollToBottom = throttle( + (requiresNearBottom: boolean, delay: number = 80) => { + const mainScrollElem = document.getElementById('main-scroll'); + if (!mainScrollElem) return; + const spaceToBottom = + mainScrollElem.scrollHeight - + mainScrollElem.scrollTop - + mainScrollElem.clientHeight; + if (!requiresNearBottom || spaceToBottom < 50) { + setTimeout( + () => mainScrollElem.scrollTo({ top: mainScrollElem.scrollHeight }), + delay + ); + } + }, + 80 +); + +export default function ChatScreen() { + const { + viewingChat, + sendMessage, + isGenerating, + stopGenerating, + pendingMessages, + canvasData, + replaceMessageAndGenerate, + } = useAppContext(); + + const textarea: ChatTextareaApi = useChatTextarea(prefilledMsg.content()); + + const { extraContext, clearExtraContext } = useVSCodeContext(textarea); + // TODO: improve this when we have "upload file" feature + const currExtra: Message['extra'] = extraContext ? [extraContext] : undefined; + + // keep track of leaf node for rendering + const [currNodeId, setCurrNodeId] = useState(-1); + const messages: MessageDisplay[] = useMemo(() => { + if (!viewingChat) return []; + else return getListMessageDisplay(viewingChat.messages, currNodeId); + }, [currNodeId, viewingChat]); + + const currConvId = viewingChat?.conv.id ?? null; + const pendingMsg: PendingMessage | undefined = + pendingMessages[currConvId ?? '']; + + useEffect(() => { + // reset to latest node when conversation changes + setCurrNodeId(-1); + // scroll to bottom when conversation changes + scrollToBottom(false, 1); + }, [currConvId]); + + const onChunk: CallbackGeneratedChunk = (currLeafNodeId?: Message['id']) => { + if (currLeafNodeId) { + setCurrNodeId(currLeafNodeId); + } + scrollToBottom(true); + }; + + const sendNewMessage = async () => { + const lastInpMsg = textarea.value(); + if (lastInpMsg.trim().length === 0 || isGenerating(currConvId ?? '')) + return; + textarea.setValue(''); + scrollToBottom(false); + setCurrNodeId(-1); + // get the last message node + const lastMsgNodeId = messages.at(-1)?.msg.id ?? null; + if ( + !(await sendMessage( + currConvId, + lastMsgNodeId, + lastInpMsg, + currExtra, + onChunk + )) + ) { + // restore the input message if failed + textarea.setValue(lastInpMsg); + } + // OK + clearExtraContext(); + }; + + const handleEditMessage = async (msg: Message, content: string) => { + if (!viewingChat) return; + setCurrNodeId(msg.id); + scrollToBottom(false); + await replaceMessageAndGenerate( + viewingChat.conv.id, + msg.parent, + content, + msg.extra, + onChunk + ); + setCurrNodeId(-1); + scrollToBottom(false); + }; + + const handleRegenerateMessage = async (msg: Message) => { + if (!viewingChat) return; + setCurrNodeId(msg.parent); + scrollToBottom(false); + await replaceMessageAndGenerate( + viewingChat.conv.id, + msg.parent, + null, + msg.extra, + onChunk + ); + setCurrNodeId(-1); + scrollToBottom(false); + }; + + const hasCanvas = !!canvasData; + + useEffect(() => { + if (prefilledMsg.shouldSend()) { + // send the prefilled message if needed + sendNewMessage(); + } else { + // otherwise, focus on the input + textarea.focus(); + } + prefilledMsg.clear(); + // no need to keep track of sendNewMessage + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [textarea.ref]); + + // due to some timing issues of StorageUtils.appendMsg(), we need to make sure the pendingMsg is not duplicated upon rendering (i.e. appears once in the saved conversation and once in the pendingMsg) + const pendingMsgDisplay: MessageDisplay[] = + pendingMsg && messages.at(-1)?.msg.id !== pendingMsg.id + ? [ + { + msg: pendingMsg, + siblingLeafNodeIds: [], + siblingCurrIdx: 0, + isPending: true, + }, + ] + : []; + + return ( +
+
+ {/* chat messages */} +
+
+ {/* placeholder to shift the message to the bottom */} + {viewingChat ? '' : 'Send a message to start'} +
+ {[...messages, ...pendingMsgDisplay].map((msg) => ( + + ))} +
+ + {/* chat input */} +
+ + + {isGenerating(currConvId ?? '') ? ( + + ) : ( + + )} +
+
+
+ {canvasData?.type === CanvasType.PY_INTERPRETER && ( + + )} +
+
+ ); +} diff --git a/examples/server/webui/src/components/Header.tsx b/examples/server/webui/src/components/Header.tsx new file mode 100644 index 0000000000..4c6b291e61 --- /dev/null +++ b/examples/server/webui/src/components/Header.tsx @@ -0,0 +1,178 @@ +import { useEffect, useState } from 'react'; +import StorageUtils from '../utils/storage'; +import { useAppContext } from '../utils/app.context'; +import { classNames } from '../utils/misc'; +import daisyuiThemes from 'daisyui/theme/object'; +import { THEMES } from '../Config'; +import { useNavigate } from 'react-router'; + +export default function Header() { + const navigate = useNavigate(); + const [selectedTheme, setSelectedTheme] = useState(StorageUtils.getTheme()); + const { setShowSettings } = useAppContext(); + + const setTheme = (theme: string) => { + StorageUtils.setTheme(theme); + setSelectedTheme(theme); + }; + + useEffect(() => { + document.body.setAttribute('data-theme', selectedTheme); + document.body.setAttribute( + 'data-color-scheme', + daisyuiThemes[selectedTheme]?.['color-scheme'] ?? 'auto' + ); + }, [selectedTheme]); + + const { isGenerating, viewingChat } = useAppContext(); + const isCurrConvGenerating = isGenerating(viewingChat?.conv.id ?? ''); + + const removeConversation = () => { + if (isCurrConvGenerating || !viewingChat) return; + const convId = viewingChat?.conv.id; + if (window.confirm('Are you sure to delete this conversation?')) { + StorageUtils.remove(convId); + navigate('/'); + } + }; + + const downloadConversation = () => { + if (isCurrConvGenerating || !viewingChat) return; + const convId = viewingChat?.conv.id; + const conversationJson = JSON.stringify(viewingChat, null, 2); + const blob = new Blob([conversationJson], { type: 'application/json' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `conversation_${convId}.json`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + }; + + return ( +
+ {/* open sidebar button */} + + +
llama.cpp
+ + {/* action buttons (top right) */} +
+ {viewingChat && ( +
+ {/* "..." button */} + + {/* dropdown menu */} + +
+ )} + +
+ +
+ + {/* theme controller is copied from https://daisyui.com/components/theme-controller/ */} +
+
+
+ + + +
+
    +
  • + +
  • + {THEMES.map((theme) => ( +
  • + e.target.checked && setTheme(theme)} + /> +
  • + ))} +
+
+
+
+
+ ); +} diff --git a/examples/server/webui/src/components/MarkdownDisplay.tsx b/examples/server/webui/src/components/MarkdownDisplay.tsx new file mode 100644 index 0000000000..5b7a725914 --- /dev/null +++ b/examples/server/webui/src/components/MarkdownDisplay.tsx @@ -0,0 +1,310 @@ +import React, { useMemo, useState } from 'react'; +import Markdown, { ExtraProps } from 'react-markdown'; +import remarkGfm from 'remark-gfm'; +import rehypeHightlight from 'rehype-highlight'; +import rehypeKatex from 'rehype-katex'; +import remarkMath from 'remark-math'; +import remarkBreaks from 'remark-breaks'; +import 'katex/dist/katex.min.css'; +import { classNames, copyStr } from '../utils/misc'; +import { ElementContent, Root } from 'hast'; +import { visit } from 'unist-util-visit'; +import { useAppContext } from '../utils/app.context'; +import { CanvasType } from '../utils/types'; + +export default function MarkdownDisplay({ + content, + isGenerating, +}: { + content: string; + isGenerating?: boolean; +}) { + const preprocessedContent = useMemo( + () => preprocessLaTeX(content), + [content] + ); + return ( + ( + + ), + // note: do not use "pre", "p" or other basic html elements here, it will cause the node to re-render when the message is being generated (this should be a bug with react-markdown, not sure how to fix it) + }} + > + {preprocessedContent} + + ); +} + +const CodeBlockButtons: React.ElementType< + React.ClassAttributes & + React.HTMLAttributes & + ExtraProps & { origContent: string; isGenerating?: boolean } +> = ({ node, origContent, isGenerating }) => { + const { config } = useAppContext(); + const startOffset = node?.position?.start.offset ?? 0; + const endOffset = node?.position?.end.offset ?? 0; + + const copiedContent = useMemo( + () => + origContent + .substring(startOffset, endOffset) + .replace(/^```[^\n]+\n/g, '') + .replace(/```$/g, ''), + [origContent, startOffset, endOffset] + ); + + const codeLanguage = useMemo( + () => + origContent + .substring(startOffset, startOffset + 10) + .match(/^```([^\n]+)\n/)?.[1] ?? '', + [origContent, startOffset] + ); + + const canRunCode = + !isGenerating && + config.pyIntepreterEnabled && + codeLanguage.startsWith('py'); + + return ( +
+ + {canRunCode && ( + + )} +
+ ); +}; + +export const CopyButton = ({ + content, + className, +}: { + content: string; + className?: string; +}) => { + const [copied, setCopied] = useState(false); + return ( + + ); +}; + +export const RunPyCodeButton = ({ + content, + className, +}: { + content: string; + className?: string; +}) => { + const { setCanvasData } = useAppContext(); + return ( + <> + + + ); +}; + +/** + * This injects the "button" element before each "pre" element. + * The actual button will be replaced with a react component in the MarkdownDisplay. + * We don't replace "pre" node directly because it will cause the node to re-render, which causes this bug: https://github.com/ggerganov/llama.cpp/issues/9608 + */ +function rehypeCustomCopyButton() { + return function (tree: Root) { + visit(tree, 'element', function (node) { + if (node.tagName === 'pre' && !node.properties.visited) { + const preNode = { ...node }; + // replace current node + preNode.properties.visited = 'true'; + node.tagName = 'div'; + node.properties = {}; + // add node for button + const btnNode: ElementContent = { + type: 'element', + tagName: 'button', + properties: {}, + children: [], + position: node.position, + }; + node.children = [btnNode, preNode]; + } + }); + }; +} + +/** + * The part below is copied and adapted from: + * https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts + * (MIT License) + */ + +// Regex to check if the processed content contains any potential LaTeX patterns +const containsLatexRegex = + /\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/; + +// Regex for inline and block LaTeX expressions +const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g'); +const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs'); + +// Function to restore code blocks +const restoreCodeBlocks = (content: string, codeBlocks: string[]) => { + return content.replace( + /<>/g, + (_, index) => codeBlocks[index] + ); +}; + +// Regex to identify code blocks and inline code +const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g; + +export const processLaTeX = (_content: string) => { + let content = _content; + // Temporarily replace code blocks and inline code with placeholders + const codeBlocks: string[] = []; + let index = 0; + content = content.replace(codeBlockRegex, (match) => { + codeBlocks[index] = match; + return `<>`; + }); + + // Escape dollar signs followed by a digit or space and digit + let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$'); + + // If no LaTeX patterns are found, restore code blocks and return the processed content + if (!containsLatexRegex.test(processedContent)) { + return restoreCodeBlocks(processedContent, codeBlocks); + } + + // Convert LaTeX expressions to a markdown compatible format + processedContent = processedContent + .replace(inlineLatex, (_: string, equation: string) => `$${equation}$`) // Convert inline LaTeX + .replace(blockLatex, (_: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX + + // Restore code blocks + return restoreCodeBlocks(processedContent, codeBlocks); +}; + +/** + * Preprocesses LaTeX content by replacing delimiters and escaping certain characters. + * + * @param content The input string containing LaTeX expressions. + * @returns The processed string with replaced delimiters and escaped characters. + */ +export function preprocessLaTeX(content: string): string { + // Step 1: Protect code blocks + const codeBlocks: string[] = []; + content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (_, code) => { + codeBlocks.push(code); + return `<>`; + }); + + // Step 2: Protect existing LaTeX expressions + const latexExpressions: string[] = []; + + // Protect block math ($$...$$), \[...\], and \(...\) as before. + content = content.replace( + /(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, + (match) => { + latexExpressions.push(match); + return `<>`; + } + ); + + // Protect inline math ($...$) only if it does NOT match a currency pattern. + // We assume a currency pattern is one where the inner content is purely numeric (with optional decimals). + content = content.replace(/\$([^$]+)\$/g, (match, inner) => { + if (/^\s*\d+(?:\.\d+)?\s*$/.test(inner)) { + // This looks like a currency value (e.g. "$123" or "$12.34"), + // so don't protect it. + return match; + } else { + // Otherwise, treat it as a LaTeX expression. + latexExpressions.push(match); + return `<>`; + } + }); + + // Step 3: Escape dollar signs that are likely currency indicators. + // (Now that inline math is protected, this will only escape dollars not already protected) + content = content.replace(/\$(?=\d)/g, '\\$'); + + // Step 4: Restore LaTeX expressions + content = content.replace( + /<>/g, + (_, index) => latexExpressions[parseInt(index)] + ); + + // Step 5: Restore code blocks + content = content.replace( + /<>/g, + (_, index) => codeBlocks[parseInt(index)] + ); + + // Step 6: Apply additional escaping functions + content = escapeBrackets(content); + content = escapeMhchem(content); + + return content; +} + +export function escapeBrackets(text: string): string { + const pattern = + /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g; + return text.replace( + pattern, + ( + match: string, + codeBlock: string | undefined, + squareBracket: string | undefined, + roundBracket: string | undefined + ): string => { + if (codeBlock != null) { + return codeBlock; + } else if (squareBracket != null) { + return `$$${squareBracket}$$`; + } else if (roundBracket != null) { + return `$${roundBracket}$`; + } + return match; + } + ); +} + +export function escapeMhchem(text: string) { + return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{'); +} diff --git a/examples/server/webui/src/components/SettingDialog.tsx b/examples/server/webui/src/components/SettingDialog.tsx new file mode 100644 index 0000000000..b65e73ae16 --- /dev/null +++ b/examples/server/webui/src/components/SettingDialog.tsx @@ -0,0 +1,536 @@ +import { useState } from 'react'; +import { useAppContext } from '../utils/app.context'; +import { CONFIG_DEFAULT, CONFIG_INFO } from '../Config'; +import { isDev } from '../Config'; +import StorageUtils from '../utils/storage'; +import { classNames, isBoolean, isNumeric, isString } from '../utils/misc'; +import { + BeakerIcon, + ChatBubbleOvalLeftEllipsisIcon, + Cog6ToothIcon, + FunnelIcon, + HandRaisedIcon, + SquaresPlusIcon, +} from '@heroicons/react/24/outline'; +import { OpenInNewTab } from '../utils/common'; + +type SettKey = keyof typeof CONFIG_DEFAULT; + +const BASIC_KEYS: SettKey[] = [ + 'temperature', + 'top_k', + 'top_p', + 'min_p', + 'max_tokens', +]; +const SAMPLER_KEYS: SettKey[] = [ + 'dynatemp_range', + 'dynatemp_exponent', + 'typical_p', + 'xtc_probability', + 'xtc_threshold', +]; +const PENALTY_KEYS: SettKey[] = [ + 'repeat_last_n', + 'repeat_penalty', + 'presence_penalty', + 'frequency_penalty', + 'dry_multiplier', + 'dry_base', + 'dry_allowed_length', + 'dry_penalty_last_n', +]; + +enum SettingInputType { + SHORT_INPUT, + LONG_INPUT, + CHECKBOX, + CUSTOM, +} + +interface SettingFieldInput { + type: Exclude; + label: string | React.ReactElement; + help?: string | React.ReactElement; + key: SettKey; +} + +interface SettingFieldCustom { + type: SettingInputType.CUSTOM; + key: SettKey; + component: + | string + | React.FC<{ + value: string | boolean | number; + onChange: (value: string) => void; + }>; +} + +interface SettingSection { + title: React.ReactElement; + fields: (SettingFieldInput | SettingFieldCustom)[]; +} + +const ICON_CLASSNAME = 'w-4 h-4 mr-1 inline'; + +const SETTING_SECTIONS: SettingSection[] = [ + { + title: ( + <> + + General + + ), + fields: [ + { + type: SettingInputType.SHORT_INPUT, + label: 'API Key', + key: 'apiKey', + }, + { + type: SettingInputType.LONG_INPUT, + label: 'System Message (will be disabled if left empty)', + key: 'systemMessage', + }, + ...BASIC_KEYS.map( + (key) => + ({ + type: SettingInputType.SHORT_INPUT, + label: key, + key, + }) as SettingFieldInput + ), + ], + }, + { + title: ( + <> + + Samplers + + ), + fields: [ + { + type: SettingInputType.SHORT_INPUT, + label: 'Samplers queue', + key: 'samplers', + }, + ...SAMPLER_KEYS.map( + (key) => + ({ + type: SettingInputType.SHORT_INPUT, + label: key, + key, + }) as SettingFieldInput + ), + ], + }, + { + title: ( + <> + + Penalties + + ), + fields: PENALTY_KEYS.map((key) => ({ + type: SettingInputType.SHORT_INPUT, + label: key, + key, + })), + }, + { + title: ( + <> + + Reasoning + + ), + fields: [ + { + type: SettingInputType.CHECKBOX, + label: 'Expand thought process by default when generating messages', + key: 'showThoughtInProgress', + }, + { + type: SettingInputType.CHECKBOX, + label: + 'Exclude thought process when sending requests to API (Recommended for DeepSeek-R1)', + key: 'excludeThoughtOnReq', + }, + ], + }, + { + title: ( + <> + + Advanced + + ), + fields: [ + { + type: SettingInputType.CUSTOM, + key: 'custom', // dummy key, won't be used + component: () => { + const debugImportDemoConv = async () => { + const res = await fetch('/demo-conversation.json'); + const demoConv = await res.json(); + StorageUtils.remove(demoConv.id); + for (const msg of demoConv.messages) { + StorageUtils.appendMsg(demoConv.id, msg); + } + }; + return ( + + ); + }, + }, + { + type: SettingInputType.CHECKBOX, + label: 'Show tokens per second', + key: 'showTokensPerSecond', + }, + { + type: SettingInputType.LONG_INPUT, + label: ( + <> + Custom JSON config (For more info, refer to{' '} + + server documentation + + ) + + ), + key: 'custom', + }, + ], + }, + { + title: ( + <> + + Experimental + + ), + fields: [ + { + type: SettingInputType.CUSTOM, + key: 'custom', // dummy key, won't be used + component: () => ( + <> +

+ Experimental features are not guaranteed to work correctly. +
+
+ If you encounter any problems, create a{' '} + + Bug (misc.) + {' '} + report on Github. Please also specify webui/experimental on + the report title and include screenshots. +
+
+ Some features may require packages downloaded from CDN, so they + need internet connection. +

+ + ), + }, + { + type: SettingInputType.CHECKBOX, + label: ( + <> + Enable Python interpreter +
+ + This feature uses{' '} + pyodide, + downloaded from CDN. To use this feature, ask the LLM to generate + Python code inside a Markdown code block. You will see a "Run" + button on the code block, near the "Copy" button. + + + ), + key: 'pyIntepreterEnabled', + }, + ], + }, +]; + +export default function SettingDialog({ + show, + onClose, +}: { + show: boolean; + onClose: () => void; +}) { + const { config, saveConfig } = useAppContext(); + const [sectionIdx, setSectionIdx] = useState(0); + + // clone the config object to prevent direct mutation + const [localConfig, setLocalConfig] = useState( + JSON.parse(JSON.stringify(config)) + ); + + const resetConfig = () => { + if (window.confirm('Are you sure you want to reset all settings?')) { + setLocalConfig(CONFIG_DEFAULT); + } + }; + + const handleSave = () => { + // copy the local config to prevent direct mutation + const newConfig: typeof CONFIG_DEFAULT = JSON.parse( + JSON.stringify(localConfig) + ); + // validate the config + for (const key in newConfig) { + const value = newConfig[key as SettKey]; + const mustBeBoolean = isBoolean(CONFIG_DEFAULT[key as SettKey]); + const mustBeString = isString(CONFIG_DEFAULT[key as SettKey]); + const mustBeNumeric = isNumeric(CONFIG_DEFAULT[key as SettKey]); + if (mustBeString) { + if (!isString(value)) { + alert(`Value for ${key} must be string`); + return; + } + } else if (mustBeNumeric) { + const trimmedValue = value.toString().trim(); + const numVal = Number(trimmedValue); + if (isNaN(numVal) || !isNumeric(numVal) || trimmedValue.length === 0) { + alert(`Value for ${key} must be numeric`); + return; + } + // force conversion to number + // @ts-expect-error this is safe + newConfig[key] = numVal; + } else if (mustBeBoolean) { + if (!isBoolean(value)) { + alert(`Value for ${key} must be boolean`); + return; + } + } else { + console.error(`Unknown default type for key ${key}`); + } + } + if (isDev) console.log('Saving config', newConfig); + saveConfig(newConfig); + onClose(); + }; + + const onChange = (key: SettKey) => (value: string | boolean) => { + // note: we do not perform validation here, because we may get incomplete value as user is still typing it + setLocalConfig({ ...localConfig, [key]: value }); + }; + + return ( + +
+

Settings

+
+ {/* Left panel, showing sections - Desktop version */} +
+ {SETTING_SECTIONS.map((section, idx) => ( +
setSectionIdx(idx)} + dir="auto" + > + {section.title} +
+ ))} +
+ + {/* Left panel, showing sections - Mobile version */} +
+
+ + {SETTING_SECTIONS[sectionIdx].title} + +
    + {SETTING_SECTIONS.map((section, idx) => ( +
    setSectionIdx(idx)} + dir="auto" + > + {section.title} +
    + ))} +
+
+
+ + {/* Right panel, showing setting fields */} +
+ {SETTING_SECTIONS[sectionIdx].fields.map((field, idx) => { + const key = `${sectionIdx}-${idx}`; + if (field.type === SettingInputType.SHORT_INPUT) { + return ( + + ); + } else if (field.type === SettingInputType.LONG_INPUT) { + return ( + + ); + } else if (field.type === SettingInputType.CHECKBOX) { + return ( + + ); + } else if (field.type === SettingInputType.CUSTOM) { + return ( +
+ {typeof field.component === 'string' + ? field.component + : field.component({ + value: localConfig[field.key], + onChange: onChange(field.key), + })} +
+ ); + } + })} + +

+ Settings are saved in browser's localStorage +

+
+
+ +
+ + + +
+
+
+ ); +} + +function SettingsModalLongInput({ + configKey, + value, + onChange, + label, +}: { + configKey: SettKey; + value: string; + onChange: (value: string) => void; + label?: string; +}) { + return ( +