diff --git a/.devops/cpu.Dockerfile b/.devops/cpu.Dockerfile index 522ee8147d..aa2aa03120 100644 --- a/.devops/cpu.Dockerfile +++ b/.devops/cpu.Dockerfile @@ -14,9 +14,9 @@ WORKDIR /app COPY . . RUN if [ "$TARGETARCH" = "amd64" ]; then \ - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \ + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \ elif [ "$TARGETARCH" = "arm64" ]; then \ - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \ + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \ else \ echo "Unsupported architecture"; \ exit 1; \ diff --git a/.devops/cuda.Dockerfile b/.devops/cuda.Dockerfile index 0bc4e7ee13..8ae57d2e28 100644 --- a/.devops/cuda.Dockerfile +++ b/.devops/cuda.Dockerfile @@ -21,7 +21,7 @@ COPY . . RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile index af783f5e99..091e1dc5d8 100644 --- a/.devops/intel.Dockerfile +++ b/.devops/intel.Dockerfile @@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ fi && \ echo "Building with dynamic libs" && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.devops/llama-cli-cann.Dockerfile b/.devops/llama-cli-cann.Dockerfile index 02dce501ce..0eb1af87cb 100644 --- a/.devops/llama-cli-cann.Dockerfile +++ b/.devops/llama-cli-cann.Dockerfile @@ -1,4 +1,4 @@ -ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8 +ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10 FROM ascendai/cann:$ASCEND_VERSION AS build @@ -6,7 +6,7 @@ WORKDIR /app COPY . . -RUN yum install -y gcc g++ cmake make +RUN yum install -y gcc g++ cmake make libcurl-devel ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile index 1e87737abf..261a2823a0 100644 --- a/.devops/musa.Dockerfile +++ b/.devops/musa.Dockerfile @@ -35,7 +35,7 @@ COPY . . RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile index 48e7e6aaa5..a1b34723a4 100644 --- a/.devops/rocm.Dockerfile +++ b/.devops/rocm.Dockerfile @@ -17,8 +17,8 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build # gfx906 is deprecated #check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html -#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102' -ARG ROCM_DOCKER_ARCH=gfx1100 +ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102' +#ARG ROCM_DOCKER_ARCH=gfx1100 # Set nvcc architectured ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} @@ -40,7 +40,7 @@ WORKDIR /app COPY . . RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ - cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \ + cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \ && cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib \ diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile index 9064f38385..f8f3072e95 100644 --- a/.devops/vulkan.Dockerfile +++ b/.devops/vulkan.Dockerfile @@ -16,7 +16,7 @@ WORKDIR /app COPY . . -RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \ +RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.github/actions/windows-setup-curl/action.yml b/.github/actions/windows-setup-curl/action.yml new file mode 100644 index 0000000000..5d76da3d79 --- /dev/null +++ b/.github/actions/windows-setup-curl/action.yml @@ -0,0 +1,25 @@ +name: 'Windows - Setup CURL' +description: 'Composite action, to be reused in other workflow' +inputs: + curl_version: + description: 'CURL version' + required: false + default: '8.6.0_6' +outputs: + curl_path: + description: "Path to the downloaded libcurl" + value: ${{ steps.get_libcurl.outputs.curl_path }} + +runs: + using: "composite" + steps: + - name: libCURL + id: get_libcurl + shell: powershell + env: + CURL_VERSION: ${{ inputs.curl_version }} + run: | + curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" + mkdir $env:RUNNER_TEMP/libcurl + tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl + echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT diff --git a/.github/workflows/bench.yml.disabled b/.github/workflows/bench.yml.disabled index 0370c8943f..75d2714792 100644 --- a/.github/workflows/bench.yml.disabled +++ b/.github/workflows/bench.yml.disabled @@ -104,7 +104,6 @@ jobs: cmake -B build \ -DGGML_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ -DLLAMA_CUBLAS=ON \ -DCUDAToolkit_ROOT=/usr/local/cuda \ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml new file mode 100644 index 0000000000..e8639913ea --- /dev/null +++ b/.github/workflows/build-linux-cross.yml @@ -0,0 +1,124 @@ +name: Build on Linux using cross-compiler +on: + workflow_dispatch: + workflow_call: + +jobs: + ubuntu-latest-riscv64-cpu-cross: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Setup Riscv + run: | + sudo dpkg --add-architecture riscv64 + sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \ + /etc/apt/sources.list /etc/apt/apt-mirrors.txt + sudo apt-get clean + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + build-essential \ + gcc-14-riscv64-linux-gnu \ + g++-14-riscv64-linux-gnu \ + libcurl4-openssl-dev:riscv64 + + - name: Build + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + cmake --build build --config Release -j $(nproc) + + ubuntu-latest-riscv64-vulkan-cross: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Riscv + run: | + sudo dpkg --add-architecture riscv64 + sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \ + /etc/apt/sources.list /etc/apt/apt-mirrors.txt + sudo apt-get clean + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + build-essential \ + glslc \ + gcc-14-riscv64-linux-gnu \ + g++-14-riscv64-linux-gnu \ + libvulkan-dev:riscv64 \ + libcurl4-openssl-dev:riscv64 + + - name: Build + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release \ + -DGGML_VULKAN=ON \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + cmake --build build --config Release -j $(nproc) + + ubuntu-latest-arm64-vulkan-cross: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Arm64 + run: | + sudo dpkg --add-architecture arm64 + sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \ + /etc/apt/sources.list /etc/apt/apt-mirrors.txt + sudo apt-get clean + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + build-essential \ + glslc \ + crossbuild-essential-arm64 \ + libvulkan-dev:arm64 \ + libcurl4-openssl-dev:arm64 + + - name: Build + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release \ + -DGGML_VULKAN=ON \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ + -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \ + -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + cmake --build build --config Release -j $(nproc) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9b955f905b..34417985d2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,7 +10,7 @@ on: push: branches: - master - paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] + paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] pull_request: types: [opened, synchronize, reopened] paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] @@ -54,6 +54,7 @@ jobs: continue-on-error: true run: | brew update + brew install curl - name: Build id: cmake_build @@ -62,7 +63,6 @@ jobs: cmake -B build \ -DCMAKE_BUILD_RPATH="@loader_path" \ -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_CURL=ON \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DGGML_RPC=ON @@ -92,7 +92,6 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* - name: Upload artifacts @@ -123,6 +122,7 @@ jobs: continue-on-error: true run: | brew update + brew install curl - name: Build id: cmake_build @@ -133,7 +133,6 @@ jobs: cmake -B build \ -DCMAKE_BUILD_RPATH="@loader_path" \ -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_CURL=ON \ -DGGML_METAL=OFF \ -DGGML_RPC=ON cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) @@ -162,7 +161,6 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* - name: Upload artifacts @@ -207,7 +205,6 @@ jobs: run: | cmake -B build \ -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_CURL=ON \ -DGGML_RPC=ON cmake --build build --config Release -j $(nproc) @@ -246,7 +243,6 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/* - name: Upload artifacts @@ -281,7 +277,7 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install build-essential + sudo apt-get install build-essential libcurl4-openssl-dev - name: Build id: cmake_build @@ -322,7 +318,7 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install build-essential + sudo apt-get install build-essential libcurl4-openssl-dev - name: Build id: cmake_build @@ -360,7 +356,7 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install build-essential + sudo apt-get install build-essential libcurl4-openssl-dev - name: Build id: cmake_build @@ -397,7 +393,7 @@ jobs: wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list sudo apt-get update -y - sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk + sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev - name: Build id: cmake_build @@ -431,7 +427,6 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/* - name: Upload artifacts @@ -454,7 +449,7 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev + sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libcurl4-openssl-dev - name: ccache uses: hendrikmuhs/ccache-action@v1.2.16 @@ -530,7 +525,7 @@ jobs: shell: bash run: | sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp + sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev - name: install oneAPI MKL library shell: bash @@ -578,7 +573,7 @@ jobs: shell: bash run: | sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp + sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev - name: install oneAPI MKL library shell: bash @@ -606,6 +601,9 @@ jobs: -DGGML_SYCL_F16=ON cmake --build build --config Release -j $(nproc) + build-linux-cross: + uses: ./.github/workflows/build-linux-cross.yml + macOS-latest-cmake-ios: runs-on: macos-latest @@ -633,6 +631,7 @@ jobs: cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ @@ -668,6 +667,7 @@ jobs: cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ @@ -697,6 +697,7 @@ jobs: cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ @@ -736,6 +737,7 @@ jobs: cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_CURL=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ @@ -896,10 +898,17 @@ jobs: -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" cmake --build build-arm64-release --target install --config release + - name: libCURL + id: get_libcurl + uses: ./.github/actions/windows-setup-curl + - name: Build id: cmake_build + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - cmake -S . -B build ${{ matrix.defines }} + cmake -S . -B build ${{ matrix.defines }} ` + -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} - name: Add libopenblas.dll @@ -959,9 +968,10 @@ jobs: - name: Pack artifacts id: pack_artifacts if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt - Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt + Copy-Item $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\Release\libcurl-x64.dll 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\* - name: Upload artifacts @@ -987,7 +997,7 @@ jobs: DEBIAN_FRONTEND: noninteractive run: | apt update - apt install -y cmake build-essential ninja-build libgomp1 git + apt install -y cmake build-essential ninja-build libgomp1 git libcurl4-openssl-dev - name: ccache uses: hendrikmuhs/ccache-action@v1.2.16 @@ -1089,16 +1099,23 @@ jobs: run: | choco install ninja + - name: libCURL + id: get_libcurl + uses: ./.github/actions/windows-setup-curl + - name: Build id: cmake_build shell: cmd + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" cmake -S . -B build -G "Ninja Multi-Config" ^ -DLLAMA_BUILD_SERVER=ON ^ -DGGML_NATIVE=OFF ^ -DGGML_CUDA=ON ^ - -DGGML_RPC=ON + -DGGML_RPC=ON ^ + -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 cmake --build build --config Release -j %NINJA_JOBS% -t ggml cmake --build build --config Release @@ -1119,7 +1136,10 @@ jobs: - name: Pack artifacts id: pack_artifacts if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | + cp $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\Release\libcurl-x64.dll 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\* - name: Upload artifacts @@ -1174,6 +1194,8 @@ jobs: run: | scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL + # TODO: add libcurl support ; we will also need to modify win-build-sycl.bat to accept user-specified args + - name: Build id: cmake_build run: examples/sycl/win-build-sycl.bat @@ -1259,8 +1281,14 @@ jobs: key: ${{ github.job }} evict-old-files: 1d + - name: libCURL + id: get_libcurl + uses: ./.github/actions/windows-setup-curl + - name: Build id: cmake_build + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" @@ -1271,9 +1299,11 @@ jobs: -DCMAKE_BUILD_TYPE=Release ` -DGGML_HIP=ON ` -DGGML_HIP_ROCWMMA_FATTN=ON ` - -DGGML_RPC=ON + -DGGML_RPC=ON ` + -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" cmake --build build -j ${env:NUMBER_OF_PROCESSORS} + # TODO: reuse windows-latest-cmake-hip instead of duplicating this job windows-latest-cmake-hip-release: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} runs-on: windows-latest @@ -1315,8 +1345,14 @@ jobs: run: | & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version + - name: libCURL + id: get_libcurl + uses: ./.github/actions/windows-setup-curl + - name: Build id: cmake_build + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" @@ -1328,7 +1364,8 @@ jobs: -DAMDGPU_TARGETS=${{ matrix.gpu_target }} ` -DGGML_HIP_ROCWMMA_FATTN=ON ` -DGGML_HIP=ON ` - -DGGML_RPC=ON + -DGGML_RPC=ON ` + -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" cmake --build build -j ${env:NUMBER_OF_PROCESSORS} md "build\bin\rocblas\library\" cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" @@ -1350,7 +1387,10 @@ jobs: - name: Pack artifacts id: pack_artifacts + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | + cp $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\libcurl-x64.dll 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\* - name: Upload artifacts @@ -1375,6 +1415,7 @@ jobs: cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_CURL=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ @@ -1725,16 +1766,17 @@ jobs: if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }} defaults: run: - shell: bash -el {0} - runs-on: ubuntu-24.04-arm + shell: bash -el {0} strategy: matrix: + arch: [x86, aarch64] cann: - - '8.0.rc3.beta1-910b-openeuler22.03-py3.10' + - '8.1.RC1.alpha001-910b-openeuler22.03-py3.10' device: - 'ascend910b3' build: - 'Release' + runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} container: ascendai/cann:${{ matrix.cann }} steps: - name: Checkout @@ -1743,7 +1785,7 @@ jobs: - name: Dependencies run: | yum update -y - yum install -y git gcc gcc-c++ make cmake + yum install -y git gcc gcc-c++ make cmake libcurl-devel - name: Build run: | diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index c81d21fcd7..114cbf83ed 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -36,13 +36,13 @@ jobs: matrix: config: # Multi-stage build - - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false} - - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} + - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false } + - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false } + - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true } + - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false } + - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false } # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete - #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true } + #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true } steps: - name: Check out the repo uses: actions/checkout@v4 diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 092c928bdf..6c9b513227 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -129,7 +129,6 @@ jobs: cmake -B build \ -DGGML_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ -DGGML_OPENMP=OFF ; @@ -142,7 +141,6 @@ jobs: cmake -B build \ -DGGML_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server @@ -154,7 +152,6 @@ jobs: cmake -B build \ -DGGML_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ; cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server @@ -195,17 +192,14 @@ jobs: - name: libCURL id: get_libcurl - env: - CURL_VERSION: 8.6.0_6 - run: | - curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" - mkdir $env:RUNNER_TEMP/libcurl - tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl + uses: ./.github/actions/windows-setup-curl - name: Build id: cmake_build + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" + cmake -B build -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server - name: Python setup @@ -221,8 +215,10 @@ jobs: - name: Copy Libcurl id: prepare_libcurl + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll + cp $env:CURL_PATH/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll - name: Tests id: server_integration_tests diff --git a/CMakeLists.txt b/CMakeLists.txt index a5eb992db6..44a930b98a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,7 +81,7 @@ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) # 3rd party libs -option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) +option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON) option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF) # Required for relocatable CMake package @@ -169,6 +169,11 @@ add_subdirectory(src) # utils, programs, examples and tests # +if (NOT LLAMA_BUILD_COMMON) + message(STATUS "LLAMA_BUILD_COMMON is OFF, disabling LLAMA_CURL") + set(LLAMA_CURL OFF) +endif() + if (LLAMA_BUILD_COMMON) add_subdirectory(common) endif() @@ -243,3 +248,20 @@ configure_file(cmake/llama.pc.in install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + +# +# copy the license files +# + +# Check if running in GitHub Actions +if(DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true") + message(STATUS "Running inside GitHub Actions - copying license files") + + # Copy all files from licenses/ to build/bin/ + file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*") + foreach(LICENSE_FILE ${LICENSE_FILES}) + get_filename_component(FILENAME ${LICENSE_FILE} NAME) + configure_file(${LICENSE_FILE} "${CMAKE_BINARY_DIR}/bin/${FILENAME}" COPYONLY) + endforeach() +endif() + diff --git a/Makefile b/Makefile index 1f9455eff0..772993ada2 100644 --- a/Makefile +++ b/Makefile @@ -780,10 +780,6 @@ ifdef GGML_HIP MK_CPPFLAGS += -DGGML_USE_HIP -DGGML_USE_CUDA -ifdef GGML_HIP_UMA - MK_CPPFLAGS += -DGGML_HIP_UMA -endif # GGML_HIP_UMA - MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas diff --git a/README.md b/README.md index f8ed423c4e..cf45f23cf4 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,6 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ -> [!IMPORTANT] -> New `llama.cpp` package location: [ggml-org/llama.cpp](https://github.com/ggml-org/llama.cpp/pkgs/container/llama.cpp) -> -> Update your container URLs to: `ghcr.io/ggml-org/llama.cpp` -> -> More info: https://github.com/ggml-org/llama.cpp/discussions/11801 - ## Recent API changes - [Changelog for `libllama` API](https://github.com/ggml-org/llama.cpp/issues/9289) @@ -104,6 +97,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [x] [Flan T5](https://huggingface.co/models?search=flan-t5) - [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca) - [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) + [GLMEdge-1.5b](https://huggingface.co/THUDM/glm-edge-1.5b-chat) + [GLMEdge-4b](https://huggingface.co/THUDM/glm-edge-4b-chat) +- [x] [GLM-4-0414](https://huggingface.co/collections/THUDM/glm-4-0414-67f3cbcb34dd9d252707cb2e) - [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966) - [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct) - [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a) @@ -247,6 +241,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo | [Vulkan](docs/build.md#vulkan) | GPU | | [CANN](docs/build.md#cann) | Ascend NPU | | [OpenCL](docs/backend/OPENCL.md) | Adreno GPU | +| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/examples/rpc) | All | ## Building the project @@ -265,7 +260,9 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt - [Trending](https://huggingface.co/models?library=gguf&sort=trending) - [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf) -You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf /[:quant]` +You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from [Hugging Face](https://huggingface.co/) or other model hosting sites, such as [ModelScope](https://modelscope.cn/), by using this CLI argument: `-hf /[:quant]`. + +By default, the CLI would download from Hugging Face, you can switch to other options with the environment variable `MODEL_ENDPOINT`. For example, you may opt to downloading model checkpoints from ModelScope or other model sharing communities by setting the environment variable, e.g. `MODEL_ENDPOINT=https://www.modelscope.cn/`. After downloading a model, use the CLI tools to run it locally - see below. @@ -530,6 +527,35 @@ If your issue is with model generation quality, then please at least scan the fo - [Aligning language models to follow instructions](https://openai.com/research/instruction-following) - [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155) +## XCFramework +The XCFramework is a precompiled version of the library for iOS, visionOS, tvOS, +and macOS. It can be used in Swift projects without the need to compile the +library from source. For example: +```swift +// swift-tools-version: 5.10 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "MyLlamaPackage", + targets: [ + .executableTarget( + name: "MyLlamaPackage", + dependencies: [ + "LlamaFramework" + ]), + .binaryTarget( + name: "LlamaFramework", + url: "https://github.com/ggml-org/llama.cpp/releases/download/b5046/llama-b5046-xcframework.zip", + checksum: "c19be78b5f00d8d29a25da41042cb7afa094cbf6280a225abe614b03b20029ab" + ) + ] +) +``` +The above example is using an intermediate build `b5046` of the library. This can be modified +to use a different version by changing the URL and checksum. + ## Completions Command-line completion is available for some environments. diff --git a/build-xcframework.sh b/build-xcframework.sh index 2ce3939c43..97001b5f7f 100755 --- a/build-xcframework.sh +++ b/build-xcframework.sh @@ -41,6 +41,11 @@ COMMON_CMAKE_ARGS=( -DGGML_OPENMP=${GGML_OPENMP} ) +XCODE_VERSION=$(xcodebuild -version 2>/dev/null | head -n1 | awk '{ print $2 }') +MAJOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f1) +MINOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f2) +echo "Detected Xcode version: $XCODE_VERSION" + check_required_tool() { local tool=$1 local install_message=$2 @@ -325,21 +330,28 @@ combine_static_libraries() { # Platform-specific post-processing for device builds if [[ "$is_simulator" == "false" ]]; then - if command -v vtool &>/dev/null; then + if command -v xcrun vtool &>/dev/null; then case "$platform" in "ios") echo "Marking binary as a framework binary for iOS..." - vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \ + xcrun vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \ -output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}" ;; "visionos") echo "Marking binary as a framework binary for visionOS..." - vtool -set-build-version xros ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \ + if [[ "$MAJOR_VERSION" -gt 16 ]] || [[ "$MAJOR_VERSION" -eq 16 && "$MINOR_VERSION" -gt 2 ]]; then + echo "Xcode version greater than 16.2, using visionOS." + VISION_OS_BUILD_VERSION="visionos" + else + echo "Xcode version less than or equal to 16.2, using xros." + VISION_OS_BUILD_VERSION="xros" + fi + xcrun vtool -set-build-version ${VISION_OS_BUILD_VERSION} ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \ -output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}" ;; "tvos") echo "Marking binary as a framework binary for tvOS..." - vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \ + xcrun vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \ -output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}" ;; esac @@ -399,6 +411,7 @@ cmake -B build-ios-sim -G Xcode \ -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphonesimulator \ -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ -S . cmake --build build-ios-sim --config Release -- -quiet @@ -411,6 +424,7 @@ cmake -B build-ios-device -G Xcode \ -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \ -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ -S . cmake --build build-ios-device --config Release -- -quiet @@ -421,6 +435,7 @@ cmake -B build-macos -G Xcode \ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \ -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ -S . cmake --build build-macos --config Release -- -quiet @@ -434,6 +449,7 @@ cmake -B build-visionos -G Xcode \ -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \ -DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \ -DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ -S . cmake --build build-visionos --config Release -- -quiet @@ -447,6 +463,7 @@ cmake -B build-visionos-sim -G Xcode \ -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \ -DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \ -DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ -S . cmake --build build-visionos-sim --config Release -- -quiet @@ -462,6 +479,7 @@ cmake -B build-tvos-sim -G Xcode \ -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvsimulator \ -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ -S . cmake --build build-tvos-sim --config Release -- -quiet @@ -476,6 +494,7 @@ cmake -B build-tvos-device -G Xcode \ -DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvos \ -DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \ -DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \ + -DLLAMA_CURL=OFF \ -S . cmake --build build-tvos-device --config Release -- -quiet diff --git a/ci/run.sh b/ci/run.sh index ebd662b38b..f463d7a8b2 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -39,7 +39,7 @@ sd=`dirname $0` cd $sd/../ SRC=`pwd` -CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON" +CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=OFF" if [ ! -z ${GG_BUILD_METAL} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON" diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 829eb5b723..43533fc86a 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -85,7 +85,10 @@ set(LLAMA_COMMON_EXTRA_LIBS build_info) # Use curl to download model url if (LLAMA_CURL) - find_package(CURL REQUIRED) + find_package(CURL) + if (NOT CURL_FOUND) + message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF") + endif() target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL) include_directories(${CURL_INCLUDE_DIRS}) find_library(CURL_LIBRARY curl REQUIRED) diff --git a/common/arg.cpp b/common/arg.cpp index fa22e86cd1..0b57f9da1e 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -162,6 +163,8 @@ struct common_hf_file_res { # if !defined(PATH_MAX) # define PATH_MAX MAX_PATH # endif +#elif defined(_AIX) +#include #else #include #endif @@ -225,12 +228,13 @@ static bool common_download_file_single(const std::string & url, const std::stri curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); // Check if hf-token or bearer-token was specified if (!bearer_token.empty()) { std::string auth_header = "Authorization: Bearer " + bearer_token; http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); } + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); #if defined(_WIN32) // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of @@ -541,7 +545,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_ curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); curl_slist_ptr http_headers; std::string res_str; - std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag; + + std::string model_endpoint = get_model_endpoint(); + + std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag; curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); @@ -656,9 +663,8 @@ static void common_params_handle_model( } } - // TODO: allow custom host - model.url = "https://huggingface.co/" + model.hf_repo + "/resolve/main/" + model.hf_file; - + std::string model_endpoint = get_model_endpoint(); + model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file; // make sure model path is present (for caching purposes) if (model.path.empty()) { // this is to avoid different repo having same file name, or same file name in different subdirs diff --git a/common/chat.cpp b/common/chat.cpp index 62ca26ad76..bbc5f087cd 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1622,7 +1622,7 @@ static common_chat_params common_chat_templates_apply_jinja( } // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools) - if (src.find("") != std::string::npos && params.json_schema.is_null()) { + if (src.find("") != std::string::npos && params.json_schema.is_null() && params.tools.is_array() && params.json_schema.is_null()) { return common_chat_params_init_hermes_2_pro(tmpl, params); } diff --git a/common/common.cpp b/common/common.cpp index d4882c5123..94f545f815 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -830,7 +830,7 @@ std::string fs_get_cache_directory() { if (getenv("LLAMA_CACHE")) { cache_directory = std::getenv("LLAMA_CACHE"); } else { -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) if (std::getenv("XDG_CACHE_HOME")) { cache_directory = std::getenv("XDG_CACHE_HOME"); } else { @@ -840,7 +840,9 @@ std::string fs_get_cache_directory() { cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); #elif defined(_WIN32) cache_directory = std::getenv("LOCALAPPDATA"); -#endif // __linux__ +#else +# error Unknown architecture +#endif cache_directory = ensure_trailing_slash(cache_directory); cache_directory += "llama.cpp"; } @@ -1027,6 +1029,19 @@ struct common_init_result common_init_from_params(common_params & params) { return iparams; } +std::string get_model_endpoint() { + const char * model_endpoint_env = getenv("MODEL_ENDPOINT"); + // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility. + const char * hf_endpoint_env = getenv("HF_ENDPOINT"); + const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env; + std::string model_endpoint = "https://huggingface.co/"; + if (endpoint_env) { + model_endpoint = endpoint_env; + if (model_endpoint.back() != '/') model_endpoint += '/'; + } + return model_endpoint; +} + void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora) { llama_clear_adapter_lora(ctx); for (auto & la : lora) { diff --git a/common/common.h b/common/common.h index 725b5123d2..e6eaa8e80c 100644 --- a/common/common.h +++ b/common/common.h @@ -543,6 +543,8 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p // clear LoRA adapters from context, then apply new list of adapters void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora); +std::string get_model_endpoint(); + // // Batch utils // diff --git a/common/minja/chat-template.hpp b/common/minja/chat-template.hpp index 882ba41bd1..237a7625cd 100644 --- a/common/minja/chat-template.hpp +++ b/common/minja/chat-template.hpp @@ -9,10 +9,19 @@ #pragma once #include "minja.hpp" -#include + +#include +#include +#include +#include +#include +#include +#include #include #include +#include + using json = nlohmann::ordered_json; namespace minja { @@ -425,7 +434,7 @@ class chat_template { auto obj = json { {"tool_calls", tool_calls}, }; - if (!content.is_null() && content != "") { + if (!content.is_null() && !content.empty()) { obj["content"] = content; } message["content"] = obj.dump(2); @@ -435,13 +444,12 @@ class chat_template { if (polyfill_tool_responses && role == "tool") { message["role"] = "user"; auto obj = json { - {"tool_response", { - {"content", message.at("content")}, - }}, + {"tool_response", json::object()}, }; if (message.contains("name")) { - obj["tool_response"]["name"] = message.at("name"); + obj["tool_response"]["tool"] = message.at("name"); } + obj["tool_response"]["content"] = message.at("content"); if (message.contains("tool_call_id")) { obj["tool_response"]["tool_call_id"] = message.at("tool_call_id"); } @@ -510,7 +518,7 @@ class chat_template { static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) { json messages_with_system = messages; - if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") { + if (!messages_with_system.empty() && messages_with_system[0].at("role") == "system") { std::string existing_system = messages_with_system.at(0).at("content"); messages_with_system[0] = json { {"role", "system"}, diff --git a/common/minja/minja.hpp b/common/minja/minja.hpp index c4f40b17e6..e52e792d84 100644 --- a/common/minja/minja.hpp +++ b/common/minja/minja.hpp @@ -8,14 +8,26 @@ // SPDX-License-Identifier: MIT #pragma once +#include +#include +#include +#include +#include +#include #include -#include -#include -#include +#include +#include +#include #include -#include +#include #include +#include +#include +#include #include +#include +#include + #include using json = nlohmann::ordered_json; @@ -240,7 +252,7 @@ public: auto index = key.get(); return array_->at(index < 0 ? array_->size() + index : index); } else if (object_) { - if (!key.is_hashable()) throw std::runtime_error("Unhashable type: " + dump()); + if (!key.is_hashable()) throw std::runtime_error("Unashable type: " + dump()); auto it = object_->find(key.primitive_); if (it == object_->end()) return Value(); return it->second; @@ -249,7 +261,7 @@ public: } void set(const Value& key, const Value& value) { if (!object_) throw std::runtime_error("Value is not an object: " + dump()); - if (!key.is_hashable()) throw std::runtime_error("Unhashable type: " + dump()); + if (!key.is_hashable()) throw std::runtime_error("Unashable type: " + dump()); (*object_)[key.primitive_] = value; } Value call(const std::shared_ptr & context, ArgumentsValue & args) const { @@ -731,51 +743,51 @@ public: struct TextTemplateToken : public TemplateToken { std::string text; - TextTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, const std::string& t) : TemplateToken(Type::Text, location, pre, post), text(t) {} + TextTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, const std::string& t) : TemplateToken(Type::Text, loc, pre, post), text(t) {} }; struct ExpressionTemplateToken : public TemplateToken { std::shared_ptr expr; - ExpressionTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && e) : TemplateToken(Type::Expression, location, pre, post), expr(std::move(e)) {} + ExpressionTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && e) : TemplateToken(Type::Expression, loc, pre, post), expr(std::move(e)) {} }; struct IfTemplateToken : public TemplateToken { std::shared_ptr condition; - IfTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && c) : TemplateToken(Type::If, location, pre, post), condition(std::move(c)) {} + IfTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && c) : TemplateToken(Type::If, loc, pre, post), condition(std::move(c)) {} }; struct ElifTemplateToken : public TemplateToken { std::shared_ptr condition; - ElifTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && c) : TemplateToken(Type::Elif, location, pre, post), condition(std::move(c)) {} + ElifTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && c) : TemplateToken(Type::Elif, loc, pre, post), condition(std::move(c)) {} }; struct ElseTemplateToken : public TemplateToken { - ElseTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::Else, location, pre, post) {} + ElseTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::Else, loc, pre, post) {} }; struct EndIfTemplateToken : public TemplateToken { - EndIfTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndIf, location, pre, post) {} + EndIfTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndIf, loc, pre, post) {} }; struct MacroTemplateToken : public TemplateToken { std::shared_ptr name; Expression::Parameters params; - MacroTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && n, Expression::Parameters && p) - : TemplateToken(Type::Macro, location, pre, post), name(std::move(n)), params(std::move(p)) {} + MacroTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && n, Expression::Parameters && p) + : TemplateToken(Type::Macro, loc, pre, post), name(std::move(n)), params(std::move(p)) {} }; struct EndMacroTemplateToken : public TemplateToken { - EndMacroTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndMacro, location, pre, post) {} + EndMacroTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndMacro, loc, pre, post) {} }; struct FilterTemplateToken : public TemplateToken { std::shared_ptr filter; - FilterTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, std::shared_ptr && filter) - : TemplateToken(Type::Filter, location, pre, post), filter(std::move(filter)) {} + FilterTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && filter) + : TemplateToken(Type::Filter, loc, pre, post), filter(std::move(filter)) {} }; struct EndFilterTemplateToken : public TemplateToken { - EndFilterTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndFilter, location, pre, post) {} + EndFilterTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndFilter, loc, pre, post) {} }; struct ForTemplateToken : public TemplateToken { @@ -783,38 +795,38 @@ struct ForTemplateToken : public TemplateToken { std::shared_ptr iterable; std::shared_ptr condition; bool recursive; - ForTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, const std::vector & vns, std::shared_ptr && iter, + ForTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, const std::vector & vns, std::shared_ptr && iter, std::shared_ptr && c, bool r) - : TemplateToken(Type::For, location, pre, post), var_names(vns), iterable(std::move(iter)), condition(std::move(c)), recursive(r) {} + : TemplateToken(Type::For, loc, pre, post), var_names(vns), iterable(std::move(iter)), condition(std::move(c)), recursive(r) {} }; struct EndForTemplateToken : public TemplateToken { - EndForTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndFor, location, pre, post) {} + EndForTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndFor, loc, pre, post) {} }; struct GenerationTemplateToken : public TemplateToken { - GenerationTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::Generation, location, pre, post) {} + GenerationTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::Generation, loc, pre, post) {} }; struct EndGenerationTemplateToken : public TemplateToken { - EndGenerationTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndGeneration, location, pre, post) {} + EndGenerationTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndGeneration, loc, pre, post) {} }; struct SetTemplateToken : public TemplateToken { std::string ns; std::vector var_names; std::shared_ptr value; - SetTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, const std::string & ns, const std::vector & vns, std::shared_ptr && v) - : TemplateToken(Type::Set, location, pre, post), ns(ns), var_names(vns), value(std::move(v)) {} + SetTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, const std::string & ns, const std::vector & vns, std::shared_ptr && v) + : TemplateToken(Type::Set, loc, pre, post), ns(ns), var_names(vns), value(std::move(v)) {} }; struct EndSetTemplateToken : public TemplateToken { - EndSetTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndSet, location, pre, post) {} + EndSetTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndSet, loc, pre, post) {} }; struct CommentTemplateToken : public TemplateToken { std::string text; - CommentTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, const std::string& t) : TemplateToken(Type::Comment, location, pre, post), text(t) {} + CommentTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, const std::string& t) : TemplateToken(Type::Comment, loc, pre, post), text(t) {} }; enum class LoopControlType { Break, Continue }; @@ -830,7 +842,7 @@ public: struct LoopControlTemplateToken : public TemplateToken { LoopControlType control_type; - LoopControlTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, LoopControlType control_type) : TemplateToken(Type::Break, location, pre, post), control_type(control_type) {} + LoopControlTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, LoopControlType control_type) : TemplateToken(Type::Break, loc, pre, post), control_type(control_type) {} }; class TemplateNode { @@ -868,8 +880,8 @@ public: class SequenceNode : public TemplateNode { std::vector> children; public: - SequenceNode(const Location & location, std::vector> && c) - : TemplateNode(location), children(std::move(c)) {} + SequenceNode(const Location & loc, std::vector> && c) + : TemplateNode(loc), children(std::move(c)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { for (const auto& child : children) child->render(out, context); } @@ -878,7 +890,7 @@ public: class TextNode : public TemplateNode { std::string text; public: - TextNode(const Location & location, const std::string& t) : TemplateNode(location), text(t) {} + TextNode(const Location & loc, const std::string& t) : TemplateNode(loc), text(t) {} void do_render(std::ostringstream & out, const std::shared_ptr &) const override { out << text; } @@ -887,7 +899,7 @@ public: class ExpressionNode : public TemplateNode { std::shared_ptr expr; public: - ExpressionNode(const Location & location, std::shared_ptr && e) : TemplateNode(location), expr(std::move(e)) {} + ExpressionNode(const Location & loc, std::shared_ptr && e) : TemplateNode(loc), expr(std::move(e)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { if (!expr) throw std::runtime_error("ExpressionNode.expr is null"); auto result = expr->evaluate(context); @@ -904,8 +916,8 @@ public: class IfNode : public TemplateNode { std::vector, std::shared_ptr>> cascade; public: - IfNode(const Location & location, std::vector, std::shared_ptr>> && c) - : TemplateNode(location), cascade(std::move(c)) {} + IfNode(const Location & loc, std::vector, std::shared_ptr>> && c) + : TemplateNode(loc), cascade(std::move(c)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { for (const auto& branch : cascade) { auto enter_branch = true; @@ -924,7 +936,7 @@ public: class LoopControlNode : public TemplateNode { LoopControlType control_type_; public: - LoopControlNode(const Location & location, LoopControlType control_type) : TemplateNode(location), control_type_(control_type) {} + LoopControlNode(const Location & loc, LoopControlType control_type) : TemplateNode(loc), control_type_(control_type) {} void do_render(std::ostringstream &, const std::shared_ptr &) const override { throw LoopControlException(control_type_); } @@ -938,9 +950,9 @@ class ForNode : public TemplateNode { bool recursive; std::shared_ptr else_body; public: - ForNode(const Location & location, std::vector && var_names, std::shared_ptr && iterable, + ForNode(const Location & loc, std::vector && var_names, std::shared_ptr && iterable, std::shared_ptr && condition, std::shared_ptr && body, bool recursive, std::shared_ptr && else_body) - : TemplateNode(location), var_names(var_names), iterable(std::move(iterable)), condition(std::move(condition)), body(std::move(body)), recursive(recursive), else_body(std::move(else_body)) {} + : TemplateNode(loc), var_names(var_names), iterable(std::move(iterable)), condition(std::move(condition)), body(std::move(body)), recursive(recursive), else_body(std::move(else_body)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { // https://jinja.palletsprojects.com/en/3.0.x/templates/#for @@ -1025,8 +1037,8 @@ class MacroNode : public TemplateNode { std::shared_ptr body; std::unordered_map named_param_positions; public: - MacroNode(const Location & location, std::shared_ptr && n, Expression::Parameters && p, std::shared_ptr && b) - : TemplateNode(location), name(std::move(n)), params(std::move(p)), body(std::move(b)) { + MacroNode(const Location & loc, std::shared_ptr && n, Expression::Parameters && p, std::shared_ptr && b) + : TemplateNode(loc), name(std::move(n)), params(std::move(p)), body(std::move(b)) { for (size_t i = 0; i < params.size(); ++i) { const auto & name = params[i].first; if (!name.empty()) { @@ -1072,8 +1084,8 @@ class FilterNode : public TemplateNode { std::shared_ptr body; public: - FilterNode(const Location & location, std::shared_ptr && f, std::shared_ptr && b) - : TemplateNode(location), filter(std::move(f)), body(std::move(b)) {} + FilterNode(const Location & loc, std::shared_ptr && f, std::shared_ptr && b) + : TemplateNode(loc), filter(std::move(f)), body(std::move(b)) {} void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { if (!filter) throw std::runtime_error("FilterNode.filter is null"); @@ -1095,8 +1107,8 @@ class SetNode : public TemplateNode { std::vector var_names; std::shared_ptr value; public: - SetNode(const Location & location, const std::string & ns, const std::vector & vns, std::shared_ptr && v) - : TemplateNode(location), ns(ns), var_names(vns), value(std::move(v)) {} + SetNode(const Location & loc, const std::string & ns, const std::vector & vns, std::shared_ptr && v) + : TemplateNode(loc), ns(ns), var_names(vns), value(std::move(v)) {} void do_render(std::ostringstream &, const std::shared_ptr & context) const override { if (!value) throw std::runtime_error("SetNode.value is null"); if (!ns.empty()) { @@ -1118,8 +1130,8 @@ class SetTemplateNode : public TemplateNode { std::string name; std::shared_ptr template_value; public: - SetTemplateNode(const Location & location, const std::string & name, std::shared_ptr && tv) - : TemplateNode(location), name(name), template_value(std::move(tv)) {} + SetTemplateNode(const Location & loc, const std::string & name, std::shared_ptr && tv) + : TemplateNode(loc), name(name), template_value(std::move(tv)) {} void do_render(std::ostringstream &, const std::shared_ptr & context) const override { if (!template_value) throw std::runtime_error("SetTemplateNode.template_value is null"); Value value { template_value->render(context) }; @@ -1132,8 +1144,8 @@ class IfExpr : public Expression { std::shared_ptr then_expr; std::shared_ptr else_expr; public: - IfExpr(const Location & location, std::shared_ptr && c, std::shared_ptr && t, std::shared_ptr && e) - : Expression(location), condition(std::move(c)), then_expr(std::move(t)), else_expr(std::move(e)) {} + IfExpr(const Location & loc, std::shared_ptr && c, std::shared_ptr && t, std::shared_ptr && e) + : Expression(loc), condition(std::move(c)), then_expr(std::move(t)), else_expr(std::move(e)) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!condition) throw std::runtime_error("IfExpr.condition is null"); if (!then_expr) throw std::runtime_error("IfExpr.then_expr is null"); @@ -1150,16 +1162,16 @@ public: class LiteralExpr : public Expression { Value value; public: - LiteralExpr(const Location & location, const Value& v) - : Expression(location), value(v) {} + LiteralExpr(const Location & loc, const Value& v) + : Expression(loc), value(v) {} Value do_evaluate(const std::shared_ptr &) const override { return value; } }; class ArrayExpr : public Expression { std::vector> elements; public: - ArrayExpr(const Location & location, std::vector> && e) - : Expression(location), elements(std::move(e)) {} + ArrayExpr(const Location & loc, std::vector> && e) + : Expression(loc), elements(std::move(e)) {} Value do_evaluate(const std::shared_ptr & context) const override { auto result = Value::array(); for (const auto& e : elements) { @@ -1173,8 +1185,8 @@ public: class DictExpr : public Expression { std::vector, std::shared_ptr>> elements; public: - DictExpr(const Location & location, std::vector, std::shared_ptr>> && e) - : Expression(location), elements(std::move(e)) {} + DictExpr(const Location & loc, std::vector, std::shared_ptr>> && e) + : Expression(loc), elements(std::move(e)) {} Value do_evaluate(const std::shared_ptr & context) const override { auto result = Value::object(); for (const auto& [key, value] : elements) { @@ -1189,8 +1201,8 @@ public: class SliceExpr : public Expression { public: std::shared_ptr start, end; - SliceExpr(const Location & location, std::shared_ptr && s, std::shared_ptr && e) - : Expression(location), start(std::move(s)), end(std::move(e)) {} + SliceExpr(const Location & loc, std::shared_ptr && s, std::shared_ptr && e) + : Expression(loc), start(std::move(s)), end(std::move(e)) {} Value do_evaluate(const std::shared_ptr &) const override { throw std::runtime_error("SliceExpr not implemented"); } @@ -1200,8 +1212,8 @@ class SubscriptExpr : public Expression { std::shared_ptr base; std::shared_ptr index; public: - SubscriptExpr(const Location & location, std::shared_ptr && b, std::shared_ptr && i) - : Expression(location), base(std::move(b)), index(std::move(i)) {} + SubscriptExpr(const Location & loc, std::shared_ptr && b, std::shared_ptr && i) + : Expression(loc), base(std::move(b)), index(std::move(i)) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!base) throw std::runtime_error("SubscriptExpr.base is null"); if (!index) throw std::runtime_error("SubscriptExpr.index is null"); @@ -1243,8 +1255,8 @@ public: enum class Op { Plus, Minus, LogicalNot, Expansion, ExpansionDict }; std::shared_ptr expr; Op op; - UnaryOpExpr(const Location & location, std::shared_ptr && e, Op o) - : Expression(location), expr(std::move(e)), op(o) {} + UnaryOpExpr(const Location & loc, std::shared_ptr && e, Op o) + : Expression(loc), expr(std::move(e)), op(o) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!expr) throw std::runtime_error("UnaryOpExpr.expr is null"); auto e = expr->evaluate(context); @@ -1269,8 +1281,8 @@ private: std::shared_ptr right; Op op; public: - BinaryOpExpr(const Location & location, std::shared_ptr && l, std::shared_ptr && r, Op o) - : Expression(location), left(std::move(l)), right(std::move(r)), op(o) {} + BinaryOpExpr(const Location & loc, std::shared_ptr && l, std::shared_ptr && r, Op o) + : Expression(loc), left(std::move(l)), right(std::move(r)), op(o) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!left) throw std::runtime_error("BinaryOpExpr.left is null"); if (!right) throw std::runtime_error("BinaryOpExpr.right is null"); @@ -1427,8 +1439,8 @@ class MethodCallExpr : public Expression { std::shared_ptr method; ArgumentsExpression args; public: - MethodCallExpr(const Location & location, std::shared_ptr && obj, std::shared_ptr && m, ArgumentsExpression && a) - : Expression(location), object(std::move(obj)), method(std::move(m)), args(std::move(a)) {} + MethodCallExpr(const Location & loc, std::shared_ptr && obj, std::shared_ptr && m, ArgumentsExpression && a) + : Expression(loc), object(std::move(obj)), method(std::move(m)), args(std::move(a)) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!object) throw std::runtime_error("MethodCallExpr.object is null"); if (!method) throw std::runtime_error("MethodCallExpr.method is null"); @@ -1526,8 +1538,8 @@ class CallExpr : public Expression { public: std::shared_ptr object; ArgumentsExpression args; - CallExpr(const Location & location, std::shared_ptr && obj, ArgumentsExpression && a) - : Expression(location), object(std::move(obj)), args(std::move(a)) {} + CallExpr(const Location & loc, std::shared_ptr && obj, ArgumentsExpression && a) + : Expression(loc), object(std::move(obj)), args(std::move(a)) {} Value do_evaluate(const std::shared_ptr & context) const override { if (!object) throw std::runtime_error("CallExpr.object is null"); auto obj = object->evaluate(context); @@ -1542,8 +1554,8 @@ public: class FilterExpr : public Expression { std::vector> parts; public: - FilterExpr(const Location & location, std::vector> && p) - : Expression(location), parts(std::move(p)) {} + FilterExpr(const Location & loc, std::vector> && p) + : Expression(loc), parts(std::move(p)) {} Value do_evaluate(const std::shared_ptr & context) const override { Value result; bool first = true; @@ -2460,7 +2472,7 @@ private: static std::regex leading_space_regex(R"(^\s+)"); text = std::regex_replace(text, leading_space_regex, ""); } else if (options.trim_blocks && (it - 1) != begin && !dynamic_cast((*(it - 2)).get())) { - if (text.length() > 0 && text[0] == '\n') { + if (!text.empty() && text[0] == '\n') { text.erase(0, 1); } } @@ -2538,7 +2550,7 @@ public: TemplateTokenIterator begin = tokens.begin(); auto it = begin; TemplateTokenIterator end = tokens.end(); - return parser.parseTemplate(begin, it, end, /* full= */ true); + return parser.parseTemplate(begin, it, end, /* fully= */ true); } }; @@ -2577,7 +2589,7 @@ inline std::shared_ptr Context::builtins() { throw std::runtime_error(args.at("message").get()); })); globals.set("tojson", simple_function("tojson", { "value", "indent" }, [](const std::shared_ptr &, Value & args) { - return Value(args.at("value").dump(args.get("indent", -1), /* tojson= */ true)); + return Value(args.at("value").dump(args.get("indent", -1), /* to_json= */ true)); })); globals.set("items", simple_function("items", { "object" }, [](const std::shared_ptr &, Value & args) { auto items = Value::array(); @@ -2599,7 +2611,7 @@ inline std::shared_ptr Context::builtins() { globals.set("last", simple_function("last", { "items" }, [](const std::shared_ptr &, Value & args) { auto items = args.at("items"); if (!items.is_array()) throw std::runtime_error("object is not a list"); - if (items.size() == 0) return Value(); + if (items.empty()) return Value(); return items.at(items.size() - 1); })); globals.set("trim", simple_function("trim", { "text" }, [](const std::shared_ptr &, Value & args) { @@ -2747,12 +2759,17 @@ inline std::shared_ptr Context::builtins() { return Value::callable([=](const std::shared_ptr & context, ArgumentsValue & args) { args.expectArgs(is_select ? "select" : "reject", {2, (std::numeric_limits::max)()}, {0, 0}); auto & items = args.args[0]; - if (items.is_null()) + if (items.is_null()) { return Value::array(); - if (!items.is_array()) throw std::runtime_error("object is not iterable: " + items.dump()); + } + if (!items.is_array()) { + throw std::runtime_error("object is not iterable: " + items.dump()); + } auto filter_fn = context->get(args.args[1]); - if (filter_fn.is_null()) throw std::runtime_error("Undefined filter: " + args.args[1].dump()); + if (filter_fn.is_null()) { + throw std::runtime_error("Undefined filter: " + args.args[1].dump()); + } auto filter_args = Value::array(); for (size_t i = 2, n = args.args.size(); i < n; i++) { @@ -2874,20 +2891,25 @@ inline std::shared_ptr Context::builtins() { auto v = arg.get(); startEndStep[i] = v; param_set[i] = true; - } } - for (auto & [name, value] : args.kwargs) { - size_t i; - if (name == "start") i = 0; - else if (name == "end") i = 1; - else if (name == "step") i = 2; - else throw std::runtime_error("Unknown argument " + name + " for function range"); + } + for (auto & [name, value] : args.kwargs) { + size_t i; + if (name == "start") { + i = 0; + } else if (name == "end") { + i = 1; + } else if (name == "step") { + i = 2; + } else { + throw std::runtime_error("Unknown argument " + name + " for function range"); + } - if (param_set[i]) { - throw std::runtime_error("Duplicate argument " + name + " for function range"); - } - startEndStep[i] = value.get(); - param_set[i] = true; + if (param_set[i]) { + throw std::runtime_error("Duplicate argument " + name + " for function range"); + } + startEndStep[i] = value.get(); + param_set[i] = true; } if (!param_set[1]) { throw std::runtime_error("Missing required argument 'end' for function range"); diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index cfe94deaf7..89522dee8b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -65,6 +65,7 @@ class Model: model_name: str | None metadata_override: Path | None dir_model_card: Path + remote_hf_model_id: str | None # subclasses should define this! model_arch: gguf.MODEL_ARCH @@ -73,7 +74,7 @@ class Model: use_temp_file: bool = False, eager: bool = False, metadata_override: Path | None = None, model_name: str | None = None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, - small_first_shard: bool = False, hparams: dict[str, Any] | None = None): + small_first_shard: bool = False, hparams: dict[str, Any] | None = None, remote_hf_model_id: str | None = None): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") @@ -83,11 +84,24 @@ class Model: self.is_big_endian = is_big_endian self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE self.use_temp_file = use_temp_file - self.lazy = not eager - self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors") - self.is_safetensors = len(self.part_names) > 0 - if not self.is_safetensors: - self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") + self.lazy = not eager or (remote_hf_model_id is not None) + self.remote_hf_model_id = remote_hf_model_id + if remote_hf_model_id is not None: + self.is_safetensors = True + + def get_remote_tensors() -> Iterator[tuple[str, Tensor]]: + logger.info(f"Using remote model with HuggingFace id: {remote_hf_model_id}") + remote_tensors = gguf.utility.SafetensorRemote.get_list_tensors_hf_model(remote_hf_model_id) + self.tensor_names = set(name for name in remote_tensors.keys()) + for name, remote_tensor in gguf.utility.SafetensorRemote.get_list_tensors_hf_model(remote_hf_model_id).items(): + yield (name, LazyTorchTensor.from_remote_tensor(remote_tensor)) + + self.get_tensors = get_remote_tensors + else: + self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors") + self.is_safetensors = len(self.part_names) > 0 + if not self.is_safetensors: + self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") self.hparams = Model.load_hparams(self.dir_model) if hparams is None else hparams self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @@ -393,6 +407,10 @@ class Model: self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model_card, self.model_name, total_params) + # If we are using HF model id, set the metadata name to the model id + if self.remote_hf_model_id: + self.metadata.name = self.remote_hf_model_id + # Fallback to model directory name if metadata name is still missing if self.metadata.name is None: self.metadata.name = self.dir_model.name @@ -714,6 +732,12 @@ class Model: if chkhsh == "96a5f08be6259352137b512d4157e333e21df7edd3fcd152990608735a65b224": # ref: https://huggingface.co/inclusionAI/Ling-lite res = "bailingmoe" + if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406": + # ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct + res = "llama4" + if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2": + # ref: https://huggingface.co/THUDM/glm-4-9b-hf + res = "glm4" if res is None: logger.warning("\n") @@ -1608,6 +1632,7 @@ class StableLMModel(Model): @Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM") class LlamaModel(Model): model_arch = gguf.MODEL_ARCH.LLAMA + undo_permute = True def set_vocab(self): try: @@ -1672,10 +1697,11 @@ class LlamaModel(Model): n_head = self.hparams["num_attention_heads"] n_kv_head = self.hparams.get("num_key_value_heads") - if name.endswith(("q_proj.weight", "q_proj.bias")): - data_torch = LlamaModel.permute(data_torch, n_head, n_head) - if name.endswith(("k_proj.weight", "k_proj.bias")): - data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head) + if self.undo_permute: + if name.endswith(("q_proj.weight", "q_proj.bias")): + data_torch = LlamaModel.permute(data_torch, n_head, n_head) + if name.endswith(("k_proj.weight", "k_proj.bias")): + data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head) # process the experts separately if name.find("block_sparse_moe.experts") != -1: @@ -1727,7 +1753,7 @@ class LlamaModel(Model): low_freq_wavelen = old_context_len / low_freq_factor high_freq_wavelen = old_context_len / high_freq_factor - assert low_freq_wavelen != high_freq_wavelen + # assert low_freq_wavelen != high_freq_wavelen # Errors for Llama4 rope_factors = [] for freq in freqs: @@ -1752,6 +1778,57 @@ class LlamaModel(Model): raise ValueError(f"Unprocessed experts: {experts}") +@Model.register("Llama4ForConditionalGeneration") +class Llama4Model(LlamaModel): + model_arch = gguf.MODEL_ARCH.LLAMA4 + has_vision: bool = False + undo_permute = False + + # TODO @ngxson : avoid duplicate this code everywhere by at least support "text_config" + # same with llama, but we need to merge the text_config into the root level of hparams + def __init__(self, *args, **kwargs): + hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0]) + if "text_config" in hparams: + hparams = {**hparams, **hparams["text_config"]} + kwargs["hparams"] = hparams + super().__init__(*args, **kwargs) + if "vision_config" in hparams: + logger.info("Has vision encoder, but it will be ignored") + self.has_vision = True + # IMPORTANT: the normal "intermediate_size" is renamed to "intermediate_size_mlp", we need to undo this + self.hparams["intermediate_size_moe"] = self.hparams["intermediate_size"] + self.hparams["intermediate_size"] = self.hparams["intermediate_size_mlp"] + + def set_vocab(self): + self._set_vocab_gpt2() + self.gguf_writer.add_add_bos_token(True) + + def set_gguf_parameters(self): + super().set_gguf_parameters() + self.gguf_writer.add_interleave_moe_layer_step(self.hparams["interleave_moe_layer_step"]) + self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"]) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None): + # split the gate_up into gate and up + if "gate_up_proj" in name: + name_up = name.replace("gate_up_proj", "up_proj.weight") + name_gate = name.replace("gate_up_proj", "gate_proj.weight") + dim_half = data_torch.shape[-1] // 2 + gate_proj_weight, up_proj_weight = data_torch.transpose(-1, -2).split(dim_half, dim=-2) + return [ + (self.map_tensor_name(name_gate), gate_proj_weight), + (self.map_tensor_name(name_up), up_proj_weight) + ] + + if name.endswith("down_proj"): + name += ".weight" + data_torch = data_torch.transpose(-1, -2) + + if "multi_modal_projector" in name or "vision_model" in name: + return [] + return super().modify_tensors(data_torch, name, bid) + + @Model.register("Mistral3ForConditionalGeneration") class Mistral3Model(LlamaModel): model_arch = gguf.MODEL_ARCH.LLAMA @@ -2399,6 +2476,16 @@ class Qwen2MoeModel(Model): raise ValueError(f"Unprocessed experts: {experts}") +@Model.register("Qwen3ForCausalLM") +class Qwen3Model(Qwen2Model): + model_arch = gguf.MODEL_ARCH.QWEN3 + + +@Model.register("Qwen3MoeForCausalLM") +class Qwen3MoeModel(Qwen2MoeModel): + model_arch = gguf.MODEL_ARCH.QWEN3MOE + + @Model.register("GPT2LMHeadModel") class GPT2Model(Model): model_arch = gguf.MODEL_ARCH.GPT2 @@ -4335,6 +4422,10 @@ class DeepseekV2Model(Model): self._set_vocab_gpt2() def set_gguf_parameters(self): + + # note: deepseek2 using MLA converts into MQA (ie: GQA with 1 group) + self.hparams["num_key_value_heads"] = 1 + super().set_gguf_parameters() hparams = self.hparams @@ -4343,8 +4434,13 @@ class DeepseekV2Model(Model): if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None: self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"]) self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"]) - self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"]) - self.gguf_writer.add_value_length(hparams["v_head_dim"]) + + # note: deepseek2 using MLA converts into MQA with larger heads, then decompresses to MHA + self.gguf_writer.add_key_length(hparams["kv_lora_rank"] + hparams["qk_rope_head_dim"]) + self.gguf_writer.add_value_length(hparams["kv_lora_rank"]) + self.gguf_writer.add_key_length_mla(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"]) + self.gguf_writer.add_value_length_mla(hparams["v_head_dim"]) + self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"]) self.gguf_writer.add_expert_count(hparams["n_routed_experts"]) self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"]) @@ -4413,6 +4509,26 @@ class DeepseekV2Model(Model): else: return [] + # note: MLA with the absorption optimization, needs these two split and k_b_proj transposed + if name.endswith("kv_b_proj.weight"): + name_kb = name.replace("kv_b_proj", "k_b_proj") + name_vb = name.replace("kv_b_proj", "v_b_proj") + + n_head_kv = self.hparams["num_key_value_heads"] + v_head_dim = self.hparams["v_head_dim"] + qk_nope_head_dim = self.hparams["qk_nope_head_dim"] + + assert data_torch.shape[0] == n_head_kv * (v_head_dim + qk_nope_head_dim) + + kv_b = data_torch.view(n_head_kv, v_head_dim + qk_nope_head_dim, data_torch.shape[-1]) + k_b, v_b = torch.split(kv_b, [qk_nope_head_dim, v_head_dim], dim=1) + k_b = k_b.transpose(1, 2) + + return [ + (self.map_tensor_name(name_kb), k_b), + (self.map_tensor_name(name_vb), v_b) + ] + return [(self.map_tensor_name(name), data_torch)] def prepare_tensors(self): @@ -4813,6 +4929,22 @@ class JaisModel(Model): self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias) +@Model.register("Glm4ForCausalLM") +class Glm4Model(Model): + model_arch = gguf.MODEL_ARCH.GLM4 + + def set_vocab(self): + self._set_vocab_gpt2() + + def set_gguf_parameters(self): + super().set_gguf_parameters() + if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: + if self.hparams["rope_scaling"].get("type") == "yarn": + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) + self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) + self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) + + @Model.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration") class ChatGLMModel(Model): model_arch = gguf.MODEL_ARCH.CHATGLM @@ -5333,6 +5465,14 @@ class LazyTorchTensor(gguf.LazyBase): lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(st_slice,), func=lambda s: s[:]) return cast(torch.Tensor, lazy) + @classmethod + def from_remote_tensor(cls, remote_tensor: gguf.utility.RemoteTensor): + dtype = cls._dtype_str_map[remote_tensor.dtype] + shape = remote_tensor.shape + meta = cls.meta_with_dtype_and_shape(dtype, shape) + lazy = cls(meta=meta, args=(remote_tensor,), func=lambda r: torch.frombuffer(r.data(), dtype=dtype).reshape(shape)) + return cast(torch.Tensor, lazy) + @classmethod def __torch_function__(cls, func, types, args=(), kwargs=None): del types # unused @@ -5410,6 +5550,10 @@ def parse_args() -> argparse.Namespace: "--print-supported-models", action="store_true", help="Print the supported models" ) + parser.add_argument( + "--remote", action="store_true", + help="(Experimental) Read safetensors file remotely without downloading to disk. Config and tokenizer files will still be downloaded. To use this feature, you need to specify Hugging Face model repo name instead of a local directory. For example: 'HuggingFaceTB/SmolLM2-1.7B-Instruct'. Note: To access gated repo, set HF_TOKEN environment variable to your Hugging Face token.", + ) args = parser.parse_args() if not args.print_supported_models and args.model is None: @@ -5450,6 +5594,14 @@ def main() -> None: dir_model = args.model + if args.remote: + from huggingface_hub import snapshot_download + local_dir = snapshot_download( + repo_id=str(dir_model), + allow_patterns=["LICENSE", "*.json", "*.md", "*.txt", "tokenizer.model"]) + dir_model = Path(local_dir) + logger.info(f"Downloaded config and tokenizer to {local_dir}") + if not dir_model.is_dir(): logger.error(f'Error: {args.model} is not a directory') sys.exit(1) @@ -5471,6 +5623,9 @@ def main() -> None: if args.outfile is not None: fname_out = args.outfile + elif args.remote: + # if remote, use the model ID as the output file name + fname_out = Path("./" + str(args.model).replace("/", "-") + "-{ftype}.gguf") else: fname_out = dir_model @@ -5481,7 +5636,6 @@ def main() -> None: with torch.inference_mode(): output_type = ftype_map[args.outtype] model_architecture = hparams["architectures"][0] - try: model_class = Model.from_model_architecture(model_architecture) except NotImplementedError: @@ -5494,7 +5648,8 @@ def main() -> None: metadata_override=args.metadata, model_name=args.model_name, split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, - small_first_shard=args.no_tensor_first_split) + small_first_shard=args.no_tensor_first_split, + remote_hf_model_id=str(args.model) if args.remote else None) if args.vocab_only: logger.info("Exporting model vocab...") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 1b86f4c90a..160c9fe0e6 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -113,6 +113,8 @@ models = [ {"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", }, {"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", }, {"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", }, + {"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", }, + {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", }, ] diff --git a/docs/backend/OPENCL.md b/docs/backend/OPENCL.md index 2a946dc8df..07146f7102 100644 --- a/docs/backend/OPENCL.md +++ b/docs/backend/OPENCL.md @@ -145,8 +145,13 @@ A Snapdragon X Elite device with Windows 11 Arm64 is used. Make sure the followi * Clang 19 * Ninja * Visual Studio 2022 +* Powershell 7 -Powershell is used for the following instructions. +Visual Studio provides necessary headers and libraries although it is not directly used for building. +Alternatively, Visual Studio Build Tools can be installed instead of the full Visual Studio. + +Powershell 7 is used for the following commands. +If an older version of Powershell is used, these commands may not work as they are. ### I. Setup Environment @@ -196,10 +201,9 @@ ninja ## Known Issues -- Qwen2.5 0.5B model produces gibberish output with Adreno kernels. +- Currently OpenCL backend does not work on Adreno 6xx GPUs. ## TODO -- Fix Qwen2.5 0.5B - Optimization for Q6_K - Support and optimization for Q4_K diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index f1204dded0..20aefec2f3 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -425,13 +425,13 @@ Examples: - Use device 0: ```sh -ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0 +ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -no-cnv -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0 ``` - Use multiple devices: ```sh -ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer +ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -no-cnv -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer ``` *Notes:* @@ -475,6 +475,12 @@ b. Enable oneAPI running environment: "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 ``` +- if you are using Powershell, enable the runtime environment with the following: + +``` +cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell' +``` + c. Verify installation In the oneAPI command line, run the following to print the available SYCL devices: @@ -505,13 +511,13 @@ You could download the release package for Windows directly, which including bin Choose one of following methods to build from source code. -1. Script +#### 1. Script ```sh .\examples\sycl\win-build-sycl.bat ``` -2. CMake +#### 2. CMake On the oneAPI command line window, step into the llama.cpp main directory and run the following: @@ -540,13 +546,84 @@ cmake --preset x64-windows-sycl-debug cmake --build build-x64-windows-sycl-debug -j --target llama-cli ``` -3. Visual Studio +#### 3. Visual Studio -You can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project. +You have two options to use Visual Studio to build llama.cpp: +- As CMake Project using CMake presets. +- Creating a Visual Studio solution to handle the project. + +**Note**: + +All following commands are executed in PowerShell. + +##### - Open as a CMake Project + +You can use Visual Studio to open the `llama.cpp` folder directly as a CMake project. Before compiling, select one of the SYCL CMake presets: + +- `x64-windows-sycl-release` + +- `x64-windows-sycl-debug` *Notes:* +- For a minimal experimental setup, you can build only the inference executable using: -- In case of a minimal experimental setup, the user can build the inference executable only through `cmake --build build --config Release -j --target llama-cli`. + ```Powershell + cmake --build build --config Release -j --target llama-cli + ``` + +##### - Generating a Visual Studio Solution + +You can use Visual Studio solution to build and work on llama.cpp on Windows. You need to convert the CMake Project into a `.sln` file. + +If you want to use the Intel C++ Compiler for the entire `llama.cpp` project, run the following command: + +```Powershell +cmake -B build -G "Visual Studio 17 2022" -T "Intel C++ Compiler 2025" -A x64 -DGGML_SYCL=ON -DCMAKE_BUILD_TYPE=Release +``` + +If you prefer to use the Intel C++ Compiler only for `ggml-sycl`, ensure that `ggml` and its backend libraries are built as shared libraries ( i.e. `-DBUILD_SHARED_LIBRARIES=ON`, this is default behaviour): + +```Powershell +cmake -B build -G "Visual Studio 17 2022" -A x64 -DGGML_SYCL=ON -DCMAKE_BUILD_TYPE=Release \ + -DSYCL_INCLUDE_DIR="C:\Program Files (x86)\Intel\oneAPI\compiler\latest\include" \ + -DSYCL_LIBRARY_DIR="C:\Program Files (x86)\Intel\oneAPI\compiler\latest\lib" +``` + +If successful the build files have been written to: *path/to/llama.cpp/build* +Open the project file **build/llama.cpp.sln** with Visual Studio. + +Once the Visual Studio solution is created, follow these steps: + +1. Open the solution in Visual Studio. + +2. Right-click on `ggml-sycl` and select **Properties**. + +3. In the left column, expand **C/C++** and select **DPC++**. + +4. In the right panel, find **Enable SYCL Offload** and set it to `Yes`. + +5. Apply the changes and save. + + +*Navigation Path:* + +``` +Properties -> C/C++ -> DPC++ -> Enable SYCL Offload (Yes) +``` + +Now, you can build `llama.cpp` with the SYCL backend as a Visual Studio project. +To do it from menu: `Build -> Build Solution`. +Once it is completed, final results will be in **build/Release/bin** + +*Additional Note* + +- You can avoid specifying `SYCL_INCLUDE_DIR` and `SYCL_LIBRARY_DIR` in the CMake command by setting the environment variables: + + - `SYCL_INCLUDE_DIR_HINT` + + - `SYCL_LIBRARY_DIR_HINT` + +- Above instruction has been tested with Visual Studio 17 Community edition and oneAPI 2025.0. We expect them to work also with future version if the instructions are adapted accordingly. ### III. Run the inference @@ -620,13 +697,13 @@ Examples: - Use device 0: ``` -build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm none -mg 0 +build\bin\llama-cli.exe -no-cnv -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm none -mg 0 ``` - Use multiple devices: ``` -build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm layer +build\bin\llama-cli.exe -no-cnv -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm layer ``` diff --git a/docs/build.md b/docs/build.md index 9c1314a294..c9027c0b58 100644 --- a/docs/build.md +++ b/docs/build.md @@ -259,8 +259,6 @@ You can download it from your Linux distro's package manager or from here: [ROCm cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \ && cmake --build build --config Release -- -j 16 ``` - On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DGGML_HIP_UMA=ON`. - However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs). To enhance flash attention performance on RDNA3+ or CDNA architectures, you can utilize the rocWMMA library by enabling the `-DGGML_HIP_ROCWMMA_FATTN=ON` option. This requires rocWMMA headers to be installed on the build system. @@ -296,6 +294,10 @@ You can download it from your Linux distro's package manager or from here: [ROCm The environment variable [`HIP_VISIBLE_DEVICES`](https://rocm.docs.amd.com/en/latest/understand/gpu_isolation.html#hip-visible-devices) can be used to specify which GPU(s) will be used. If your GPU is not officially supported you can use the environment variable [`HSA_OVERRIDE_GFX_VERSION`] set to a similar GPU, for example 10.3.0 on RDNA2 (e.g. gfx1030, gfx1031, or gfx1035) or 11.0.0 on RDNA3. +### Unified Memory + +On Linux it is possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1`. However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs). + ## Vulkan **Windows** @@ -456,6 +458,96 @@ KleidiAI's microkernels implement optimized tensor operations using Arm CPU feat Depending on your build target, other higher priority backends may be enabled by default. To ensure the CPU backend is used, you must disable the higher priority backends either at compile time, e.g. -DGGML_METAL=OFF, or during run-time using the command line option `--device none`. +## OpenCL + +This provides GPU acceleration through OpenCL on recent Adreno GPU. +More information about OpenCL backend can be found in [OPENCL.md](./backend/OPENCL.md) for more information. + +### Android + +Assume NDK is available in `$ANDROID_NDK`. First, install OpenCL headers and ICD loader library if not available, + +```sh +mkdir -p ~/dev/llm +cd ~/dev/llm + +git clone https://github.com/KhronosGroup/OpenCL-Headers && \ +cd OpenCL-Headers && \ +cp -r CL $ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include + +cd ~/dev/llm + +git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader && \ +cd OpenCL-ICD-Loader && \ +mkdir build_ndk && cd build_ndk && \ +cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DOPENCL_ICD_LOADER_HEADERS_DIR=$ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=24 \ + -DANDROID_STL=c++_shared && \ +ninja && \ +cp libOpenCL.so $ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android +``` + +Then build llama.cpp with OpenCL enabled, + +```sh +cd ~/dev/llm + +git clone https://github.com/ggml-org/llama.cpp && \ +cd llama.cpp && \ +mkdir build-android && cd build-android + +cmake .. -G Ninja \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=android-28 \ + -DBUILD_SHARED_LIBS=OFF \ + -DGGML_OPENCL=ON + +ninja +``` + +### Windows Arm64 + +First, install OpenCL headers and ICD loader library if not available, + +```powershell +mkdir -p ~/dev/llm + +cd ~/dev/llm +git clone https://github.com/KhronosGroup/OpenCL-Headers && cd OpenCL-Headers +mkdir build && cd build +cmake .. -G Ninja ` + -DBUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` + -DCMAKE_INSTALL_PREFIX="$HOME/dev/llm/opencl" +cmake --build . --target install + +cd ~/dev/llm +git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader && cd OpenCL-ICD-Loader +mkdir build && cd build +cmake .. -G Ninja ` + -DCMAKE_BUILD_TYPE=Release ` + -DCMAKE_PREFIX_PATH="$HOME/dev/llm/opencl" ` + -DCMAKE_INSTALL_PREFIX="$HOME/dev/llm/opencl" +cmake --build . --target install +``` + +Then build llama.cpp with OpenCL enabled, + +```powershell +cmake .. -G Ninja ` + -DCMAKE_TOOLCHAIN_FILE="$HOME/dev/llm/llama.cpp/cmake/arm64-windows-llvm.cmake" ` + -DCMAKE_BUILD_TYPE=Release ` + -DCMAKE_PREFIX_PATH="$HOME/dev/llm/opencl" ` + -DBUILD_SHARED_LIBS=OFF ` + -DGGML_OPENCL=ON +ninja +``` + ## Android To read documentation for how to build on Android, [click here](./android.md) diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp index ef3ceb686f..30e771564e 100644 --- a/examples/gguf-split/gguf-split.cpp +++ b/examples/gguf-split/gguf-split.cpp @@ -408,8 +408,6 @@ static void gguf_merge(const split_params & split_params) { exit(EXIT_FAILURE); } - std::ofstream fout(split_params.output.c_str(), std::ios::binary); - fout.exceptions(std::ofstream::failbit); // fail fast on write errors auto * ctx_out = gguf_init_empty(); @@ -453,7 +451,6 @@ static void gguf_merge(const split_params & split_params) { gguf_free(ctx_gguf); ggml_free(ctx_meta); gguf_free(ctx_out); - fout.close(); exit(EXIT_FAILURE); } @@ -466,7 +463,6 @@ static void gguf_merge(const split_params & split_params) { gguf_free(ctx_gguf); ggml_free(ctx_meta); gguf_free(ctx_out); - fout.close(); exit(EXIT_FAILURE); } @@ -479,7 +475,6 @@ static void gguf_merge(const split_params & split_params) { gguf_free(ctx_gguf); ggml_free(ctx_meta); gguf_free(ctx_out); - fout.close(); exit(EXIT_FAILURE); } @@ -500,9 +495,11 @@ static void gguf_merge(const split_params & split_params) { fprintf(stderr, "\033[3Ddone\n"); } - - // placeholder for the meta data - { + std::ofstream fout; + if (!split_params.dry_run) { + fout.open(split_params.output.c_str(), std::ios::binary); + fout.exceptions(std::ofstream::failbit); // fail fast on write errors + // placeholder for the meta data auto meta_size = gguf_get_meta_size(ctx_out); ::zeros(fout, meta_size); } @@ -518,7 +515,9 @@ static void gguf_merge(const split_params & split_params) { ggml_free(ctx_metas[i]); } gguf_free(ctx_out); - fout.close(); + if (!split_params.dry_run) { + fout.close(); + } exit(EXIT_FAILURE); } fprintf(stderr, "%s: writing tensors %s ...", __func__, split_path); @@ -540,10 +539,11 @@ static void gguf_merge(const split_params & split_params) { auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor); f_input.seekg(offset); f_input.read((char *)read_data.data(), n_bytes); - - // write tensor data + padding - fout.write((const char *)read_data.data(), n_bytes); - zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes); + if (!split_params.dry_run) { + // write tensor data + padding + fout.write((const char *)read_data.data(), n_bytes); + zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes); + } } gguf_free(ctx_gguf); @@ -552,16 +552,15 @@ static void gguf_merge(const split_params & split_params) { fprintf(stderr, "\033[3Ddone\n"); } - { + if (!split_params.dry_run) { // go back to beginning of file and write the updated metadata fout.seekp(0); std::vector data(gguf_get_meta_size(ctx_out)); gguf_get_meta_data(ctx_out, data.data()); fout.write((const char *)data.data(), data.size()); - fout.close(); - gguf_free(ctx_out); } + gguf_free(ctx_out); fprintf(stderr, "%s: %s merged from %d split with %d tensors.\n", __func__, split_params.output.c_str(), n_split, total_tensors); diff --git a/examples/llama.android/llama/build.gradle.kts b/examples/llama.android/llama/build.gradle.kts index 28dbc19048..5bb6478022 100644 --- a/examples/llama.android/llama/build.gradle.kts +++ b/examples/llama.android/llama/build.gradle.kts @@ -18,6 +18,7 @@ android { } externalNativeBuild { cmake { + arguments += "-DLLAMA_CURL=OFF" arguments += "-DLLAMA_BUILD_COMMON=ON" arguments += "-DGGML_LLAMAFILE=OFF" arguments += "-DCMAKE_BUILD_TYPE=Release" diff --git a/examples/llava/CMakeLists.txt b/examples/llava/CMakeLists.txt index f275ce1ccd..2d5061de46 100644 --- a/examples/llava/CMakeLists.txt +++ b/examples/llava/CMakeLists.txt @@ -1,3 +1,5 @@ +# llava (legacy) + add_library(llava OBJECT llava.cpp llava.h @@ -22,12 +24,41 @@ if (BUILD_SHARED_LIBS) install(TARGETS llava_shared LIBRARY) endif() +# mtmd + +add_library(mtmd OBJECT + mtmd.cpp + mtmd.h + clip.cpp + clip.h + clip-impl.h + ) + +target_link_libraries(mtmd PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT}) + +target_include_directories(mtmd PUBLIC .) +target_include_directories(mtmd PRIVATE ../..) +target_include_directories(mtmd PRIVATE ../../common) # for stb_image.h + +target_compile_features(mtmd PRIVATE cxx_std_17) + +add_library(mtmd_static STATIC $) +if (BUILD_SHARED_LIBS) + set_target_properties(mtmd PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(mtmd PRIVATE LLAMA_SHARED LLAMA_BUILD) + add_library(mtmd_shared SHARED $) + target_link_libraries(mtmd_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT}) + install(TARGETS mtmd_shared LIBRARY) +endif() + if (NOT MSVC) target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h + target_compile_options(mtmd PRIVATE -Wno-cast-qual) # stb_image.h endif() if(TARGET BUILD_INFO) add_dependencies(llava BUILD_INFO) + add_dependencies(mtmd BUILD_INFO) endif() set(TARGET llama-llava-cli) @@ -55,7 +86,7 @@ set(TARGET llama-gemma3-cli) add_executable(${TARGET} gemma3-cli.cpp) set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-gemma3-cli) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) set(TARGET llama-llava-clip-quantize-cli) diff --git a/examples/llava/clip-impl.h b/examples/llava/clip-impl.h new file mode 100644 index 0000000000..4d7340a56b --- /dev/null +++ b/examples/llava/clip-impl.h @@ -0,0 +1,344 @@ +#include "ggml.h" +#include "gguf.h" +#include "clip.h" + +#include "clip.h" + +#include +#include +#include +#include +#include +#include +#include + +// Internal header for clip.cpp + +#define KEY_FTYPE "general.file_type" +#define KEY_NAME "general.name" +#define KEY_DESCRIPTION "general.description" +#define KEY_HAS_TEXT_ENC "clip.has_text_encoder" +#define KEY_HAS_VIS_ENC "clip.has_vision_encoder" +#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector" +#define KEY_HAS_MINICPMV_PROJ "clip.has_minicpmv_projector" +#define KEY_HAS_GLM_PROJ "clip.has_glm_projector" +#define KEY_MINICPMV_VERSION "clip.minicpmv_version" +#define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger" +#define KEY_USE_GELU "clip.use_gelu" +#define KEY_USE_SILU "clip.use_silu" +#define KEY_N_EMBD "clip.%s.embedding_length" +#define KEY_N_FF "clip.%s.feed_forward_length" +#define KEY_N_BLOCK "clip.%s.block_count" +#define KEY_N_HEAD "clip.%s.attention.head_count" +#define KEY_LAYER_NORM_EPS "clip.%s.attention.layer_norm_epsilon" +#define KEY_PROJ_DIM "clip.%s.projection_dim" +#define KEY_TOKENS "tokenizer.ggml.tokens" +#define KEY_N_POSITIONS "clip.text.context_length" +#define KEY_IMAGE_SIZE "clip.vision.image_size" +#define KEY_PATCH_SIZE "clip.vision.patch_size" +#define KEY_IMAGE_MEAN "clip.vision.image_mean" +#define KEY_IMAGE_STD "clip.vision.image_std" +#define KEY_PROJ_TYPE "clip.projector_type" +#define KEY_FEATURE_LAYER "clip.vision.feature_layer" + +#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type" +#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints" +#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution" + + +// +// tensor name constants +// + +#define TN_TOKEN_EMBD "%s.token_embd.weight" +#define TN_POS_EMBD "%s.position_embd.weight" +#define TN_CLASS_EMBD "v.class_embd" +#define TN_PATCH_EMBD "v.patch_embd.weight" // not rename tensor with ".0" postfix for backwrad compat +#define TN_PATCH_EMBD_1 "v.patch_embd.weight.1" +#define TN_PATCH_BIAS "v.patch_embd.bias" +#define TN_ATTN_K "%s.blk.%d.attn_k.%s" +#define TN_ATTN_Q "%s.blk.%d.attn_q.%s" +#define TN_ATTN_V "%s.blk.%d.attn_v.%s" +#define TN_ATTN_OUTPUT "%s.blk.%d.attn_out.%s" +#define TN_FFN_DOWN "%s.blk.%d.ffn_down.%s" +#define TN_FFN_UP "%s.blk.%d.ffn_up.%s" +#define TN_LN_1 "%s.blk.%d.ln1.%s" +#define TN_LN_2 "%s.blk.%d.ln2.%s" +#define TN_LN_PRE "%s.pre_ln.%s" +#define TN_LN_POST "%s.post_ln.%s" +#define TN_TEXT_PROJ "text_projection.weight" +#define TN_VIS_PROJ "visual_projection.weight" +#define TN_LLAVA_PROJ "mm.%d.%s" +#define TN_MVLM_PROJ_MLP "mm.model.mlp.%d.%s" +#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s" +#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s" +#define TN_IMAGE_NEWLINE "model.image_newline" +#define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3 +#define TN_MM_SOFT_EMB_N "mm.soft_emb_norm.weight" // gemma3 + +// mimicpmv +#define TN_MINICPMV_POS_EMBD_K "resampler.pos_embed_k" +#define TN_MINICPMV_QUERY "resampler.query" +#define TN_MINICPMV_PROJ "resampler.proj.weight" +#define TN_MINICPMV_KV_PROJ "resampler.kv.weight" +#define TN_MINICPMV_ATTN "resampler.attn.%s.%s" +#define TN_MINICPMV_LN "resampler.ln_%s.%s" + +#define TN_GLM_ADAPER_CONV "adapter.conv.%s" +#define TN_GLM_ADAPTER_LINEAR "adapter.linear.linear.%s" +#define TN_GLM_ADAPTER_NORM_1 "adapter.linear.norm1.%s" +#define TN_GLM_ADAPTER_D_H_2_4H "adapter.linear.dense_h_to_4h.%s" +#define TN_GLM_ADAPTER_GATE "adapter.linear.gate.%s" +#define TN_GLM_ADAPTER_D_4H_2_H "adapter.linear.dense_4h_to_h.%s" +#define TN_GLM_BOI_W "adapter.boi" +#define TN_GLM_EOI_W "adapter.eoi" + +enum projector_type { + PROJECTOR_TYPE_MLP, + PROJECTOR_TYPE_MLP_NORM, + PROJECTOR_TYPE_LDP, + PROJECTOR_TYPE_LDPV2, + PROJECTOR_TYPE_RESAMPLER, + PROJECTOR_TYPE_GLM_EDGE, + PROJECTOR_TYPE_MERGER, + PROJECTOR_TYPE_GEMMA3, + PROJECTOR_TYPE_UNKNOWN, +}; + +static std::map PROJECTOR_TYPE_NAMES = { + { PROJECTOR_TYPE_MLP, "mlp" }, + { PROJECTOR_TYPE_LDP, "ldp" }, + { PROJECTOR_TYPE_LDPV2, "ldpv2"}, + { PROJECTOR_TYPE_RESAMPLER, "resampler"}, + { PROJECTOR_TYPE_GLM_EDGE, "adapter"}, + { PROJECTOR_TYPE_MERGER, "qwen2vl_merger"}, + { PROJECTOR_TYPE_GEMMA3, "gemma3"}, +}; + +static projector_type clip_projector_type_from_string(const std::string & str) { + for (const auto & pair : PROJECTOR_TYPE_NAMES) { + if (pair.second == str) { + return pair.first; + } + } + return PROJECTOR_TYPE_UNKNOWN; +} + +// RGB uint8 image +struct clip_image_u8 { + int nx; + int ny; + + std::vector buf; +}; + +// RGB float32 image (NHWC) +// Memory layout: RGBRGBRGB... +struct clip_image_f32 { + int nx; + int ny; + + std::vector buf; +}; + +// +// logging +// + +static void clip_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) { + (void) level; + (void) user_data; + fputs(text, stderr); + fflush(stderr); +} + +struct clip_logger_state { + ggml_log_level verbosity_thold; + ggml_log_callback log_callback; + void * log_callback_user_data; +}; + +extern struct clip_logger_state g_logger_state; + +static void clip_log_internal_v(enum ggml_log_level level, const char * format, va_list args) { + if (format == NULL) { + return; + } + va_list args_copy; + va_copy(args_copy, args); + char buffer[128]; + int len = vsnprintf(buffer, 128, format, args); + if (len < 128) { + g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data); + } else { + char * buffer2 = (char *) calloc(len + 1, sizeof(char)); + vsnprintf(buffer2, len + 1, format, args_copy); + buffer2[len] = 0; + g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data); + free(buffer2); + } + va_end(args_copy); +} + +static void clip_log_internal(enum ggml_log_level level, const char * format, ...) { + va_list args; + va_start(args, format); + clip_log_internal_v(level, format, args); + va_end(args); +} + +#define LOG_TMPL(level, ...) \ + do { \ + if ((level) >= g_logger_state.verbosity_thold) { \ + clip_log_internal((level), __VA_ARGS__); \ + } \ + } while (0) +#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, __VA_ARGS__) +#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, __VA_ARGS__) +#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) +#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__) +#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, __VA_ARGS__) + +// +// cpp wrappers +// + +// wrapper for clip_image_size +struct clip_image_size_deleter { + void operator()(clip_image_size * val) { clip_image_size_free(val); } +}; +typedef std::unique_ptr clip_image_size_ptr; + +// wrapper for clip_image_u8 +struct clip_image_u8_deleter { + void operator()(clip_image_u8 * val) { clip_image_u8_free(val); } +}; +typedef std::unique_ptr clip_image_u8_ptr; + +// wrapper for clip_image_f32 +struct clip_image_f32_deleter { + void operator()(clip_image_f32 * val) { clip_image_f32_free(val); } +}; +typedef std::unique_ptr clip_image_f32_ptr; + +struct clip_image_u8_batch { + std::vector entries; +}; + +struct clip_image_f32_batch { + std::vector entries; +}; + +// +// common utils +// + +static std::string string_format(const char * fmt, ...) { + va_list ap; + va_list ap2; + va_start(ap, fmt); + va_copy(ap2, ap); + int size = vsnprintf(NULL, 0, fmt, ap); + GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT + std::vector buf(size + 1); + int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); + GGML_ASSERT(size2 == size); + va_end(ap2); + va_end(ap); + return std::string(buf.data(), buf.size()); +} + +static void string_replace_all(std::string & s, const std::string & search, const std::string & replace) { + if (search.empty()) { + return; + } + std::string builder; + builder.reserve(s.length()); + size_t pos = 0; + size_t last_pos = 0; + while ((pos = s.find(search, last_pos)) != std::string::npos) { + builder.append(s, last_pos, pos - last_pos); + builder.append(replace); + last_pos = pos + search.length(); + } + builder.append(s, last_pos, std::string::npos); + s = std::move(builder); +} + +// split string by a `std::string delim` instead of `char delim` +static std::vector string_split_str(std::string s, const std::string & delimiter) { + std::vector tokens; + size_t pos = 0; + std::string token; + while ((pos = s.find(delimiter)) != std::string::npos) { + token = s.substr(0, pos); + tokens.push_back(token); + s.erase(0, pos + delimiter.length()); + } + tokens.push_back(s); + return tokens; +} + +// +// gguf utils +// + +static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) { + switch (type) { + case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]); + case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]); + case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]); + case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]); + case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]); + case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]); + case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]); + case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]); + case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]); + case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]); + case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false"; + default: return string_format("unknown type %d", type); + } +} + +static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { + const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); + + switch (type) { + case GGUF_TYPE_STRING: + return gguf_get_val_str(ctx_gguf, i); + case GGUF_TYPE_ARRAY: + { + const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); + int arr_n = gguf_get_arr_n(ctx_gguf, i); + const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i); + std::stringstream ss; + ss << "["; + for (int j = 0; j < arr_n; j++) { + if (arr_type == GGUF_TYPE_STRING) { + std::string val = gguf_get_arr_str(ctx_gguf, i, j); + // escape quotes + string_replace_all(val, "\\", "\\\\"); + string_replace_all(val, "\"", "\\\""); + ss << '"' << val << '"'; + } else if (arr_type == GGUF_TYPE_ARRAY) { + ss << "???"; + } else { + ss << gguf_data_to_str(arr_type, data, j); + } + if (j < arr_n - 1) { + ss << ", "; + } + } + ss << "]"; + return ss.str(); + } + default: + return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); + } +} + +// +// API used internally with mtmd +// + +projector_type clip_get_projector_type(const struct clip_ctx * ctx); diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index e315ef571f..49c90b7506 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -3,6 +3,7 @@ // I'll gradually clean and extend it // Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch #include "clip.h" +#include "clip-impl.h" #include "ggml.h" #include "ggml-cpp.h" #include "ggml-cpu.h" @@ -27,284 +28,10 @@ #include #include -#if defined(LLAVA_LOG_OFF) -# define LOG_INF(...) -# define LOG_WRN(...) -# define LOG_ERR(...) -# define LOG_DBG(...) -#else // defined(LLAVA_LOG_OFF) -# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0) -# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0) -# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0) -# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0) -#endif // defined(LLAVA_LOG_OFF) +struct clip_logger_state g_logger_state = {GGML_LOG_LEVEL_CONT, clip_log_callback_default, NULL}; //#define CLIP_DEBUG_FUNCTIONS -// RGB uint8 image -struct clip_image_u8 { - int nx; - int ny; - - std::vector buf; -}; - -// RGB float32 image (NHWC) -// Memory layout: RGBRGBRGB... -struct clip_image_f32 { - int nx; - int ny; - - std::vector buf; -}; - -static std::string format(const char * fmt, ...) { - va_list ap; - va_list ap2; - va_start(ap, fmt); - va_copy(ap2, ap); - int size = vsnprintf(NULL, 0, fmt, ap); - GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT - std::vector buf(size + 1); - int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); - GGML_ASSERT(size2 == size); - va_end(ap2); - va_end(ap); - return std::string(buf.data(), buf.size()); -} - -// -// key constants -// - -#define KEY_FTYPE "general.file_type" -#define KEY_NAME "general.name" -#define KEY_DESCRIPTION "general.description" -#define KEY_HAS_TEXT_ENC "clip.has_text_encoder" -#define KEY_HAS_VIS_ENC "clip.has_vision_encoder" -#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector" -#define KEY_HAS_MINICPMV_PROJ "clip.has_minicpmv_projector" -#define KEY_HAS_GLM_PROJ "clip.has_glm_projector" -#define KEY_MINICPMV_VERSION "clip.minicpmv_version" -#define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger" -#define KEY_USE_GELU "clip.use_gelu" -#define KEY_USE_SILU "clip.use_silu" -#define KEY_N_EMBD "clip.%s.embedding_length" -#define KEY_N_FF "clip.%s.feed_forward_length" -#define KEY_N_BLOCK "clip.%s.block_count" -#define KEY_N_HEAD "clip.%s.attention.head_count" -#define KEY_LAYER_NORM_EPS "clip.%s.attention.layer_norm_epsilon" -#define KEY_PROJ_DIM "clip.%s.projection_dim" -#define KEY_TOKENS "tokenizer.ggml.tokens" -#define KEY_N_POSITIONS "clip.text.context_length" -#define KEY_IMAGE_SIZE "clip.vision.image_size" -#define KEY_PATCH_SIZE "clip.vision.patch_size" -#define KEY_IMAGE_MEAN "clip.vision.image_mean" -#define KEY_IMAGE_STD "clip.vision.image_std" -#define KEY_PROJ_TYPE "clip.projector_type" -#define KEY_FEATURE_LAYER "clip.vision.feature_layer" - -#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type" -#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints" -#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution" - - -// -// tensor name constants -// - -#define TN_TOKEN_EMBD "%s.token_embd.weight" -#define TN_POS_EMBD "%s.position_embd.weight" -#define TN_CLASS_EMBD "v.class_embd" -#define TN_PATCH_EMBD "v.patch_embd.weight" // not rename tensor with ".0" postfix for backwrad compat -#define TN_PATCH_EMBD_1 "v.patch_embd.weight.1" -#define TN_PATCH_BIAS "v.patch_embd.bias" -#define TN_ATTN_K "%s.blk.%d.attn_k.%s" -#define TN_ATTN_Q "%s.blk.%d.attn_q.%s" -#define TN_ATTN_V "%s.blk.%d.attn_v.%s" -#define TN_ATTN_OUTPUT "%s.blk.%d.attn_out.%s" -#define TN_FFN_DOWN "%s.blk.%d.ffn_down.%s" -#define TN_FFN_UP "%s.blk.%d.ffn_up.%s" -#define TN_LN_1 "%s.blk.%d.ln1.%s" -#define TN_LN_2 "%s.blk.%d.ln2.%s" -#define TN_LN_PRE "%s.pre_ln.%s" -#define TN_LN_POST "%s.post_ln.%s" -#define TN_TEXT_PROJ "text_projection.weight" -#define TN_VIS_PROJ "visual_projection.weight" -#define TN_LLAVA_PROJ "mm.%d.%s" -#define TN_MVLM_PROJ_MLP "mm.model.mlp.%d.%s" -#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s" -#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s" -#define TN_IMAGE_NEWLINE "model.image_newline" -#define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3 -#define TN_MM_SOFT_EMB_N "mm.soft_emb_norm.weight" // gemma3 - -#define TN_MINICPMV_POS_EMBD_K "resampler.pos_embed_k" -#define TN_MINICPMV_QUERY "resampler.query" -#define TN_MINICPMV_PROJ "resampler.proj.weight" -#define TN_MINICPMV_KV_PROJ "resampler.kv.weight" -#define TN_MINICPMV_ATTN "resampler.attn.%s.%s" -#define TN_MINICPMV_LN "resampler.ln_%s.%s" - -#define TN_GLM_ADAPER_CONV "adapter.conv.%s" -#define TN_GLM_ADAPTER_LINEAR "adapter.linear.linear.%s" -#define TN_GLM_ADAPTER_NORM_1 "adapter.linear.norm1.%s" -#define TN_GLM_ADAPTER_D_H_2_4H "adapter.linear.dense_h_to_4h.%s" -#define TN_GLM_ADAPTER_GATE "adapter.linear.gate.%s" -#define TN_GLM_ADAPTER_D_4H_2_H "adapter.linear.dense_4h_to_h.%s" -#define TN_GLM_BOI_W "adapter.boi" -#define TN_GLM_EOI_W "adapter.eoi" - - -enum projector_type { - PROJECTOR_TYPE_MLP, - PROJECTOR_TYPE_MLP_NORM, - PROJECTOR_TYPE_LDP, - PROJECTOR_TYPE_LDPV2, - PROJECTOR_TYPE_RESAMPLER, - PROJECTOR_TYPE_GLM_EDGE, - PROJECTOR_TYPE_MERGER, - PROJECTOR_TYPE_GEMMA3, - PROJECTOR_TYPE_UNKNOWN, -}; - -static std::map PROJECTOR_TYPE_NAMES = { - { PROJECTOR_TYPE_MLP, "mlp" }, - { PROJECTOR_TYPE_LDP, "ldp" }, - { PROJECTOR_TYPE_LDPV2, "ldpv2"}, - { PROJECTOR_TYPE_RESAMPLER, "resampler"}, - { PROJECTOR_TYPE_GLM_EDGE, "adapter"}, - { PROJECTOR_TYPE_MERGER, "qwen2vl_merger"}, - { PROJECTOR_TYPE_GEMMA3, "gemma3"}, -}; - - -// -// utilities to get data from a gguf file -// - -static int get_key_idx(const gguf_context * ctx, const char * key) { - int i = gguf_find_key(ctx, key); - if (i == -1) { - LOG_ERR("key %s not found in file\n", key); - throw std::runtime_error(format("Missing required key: %s", key)); - } - - return i; -} - -static uint32_t get_u32(const gguf_context * ctx, const std::string & key) { - const int i = get_key_idx(ctx, key.c_str()); - - return gguf_get_val_u32(ctx, i); -} - -static float get_f32(const gguf_context * ctx, const std::string & key) { - const int i = get_key_idx(ctx, key.c_str()); - - return gguf_get_val_f32(ctx, i); -} - -static struct ggml_tensor * get_tensor(struct ggml_context * ctx, const std::string & name) { - struct ggml_tensor * cur = ggml_get_tensor(ctx, name.c_str()); - if (!cur) { - throw std::runtime_error(format("%s: unable to find tensor %s\n", __func__, name.c_str())); - } - - return cur; -} - -static std::string get_ftype(int ftype) { - return ggml_type_name(static_cast(ftype)); -} - -static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) { - switch (type) { - case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]); - case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]); - case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]); - case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]); - case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]); - case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]); - case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]); - case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]); - case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]); - case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]); - case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false"; - default: return format("unknown type %d", type); - } -} - -static void replace_all(std::string & s, const std::string & search, const std::string & replace) { - if (search.empty()) { - return; - } - std::string builder; - builder.reserve(s.length()); - size_t pos = 0; - size_t last_pos = 0; - while ((pos = s.find(search, last_pos)) != std::string::npos) { - builder.append(s, last_pos, pos - last_pos); - builder.append(replace); - last_pos = pos + search.length(); - } - builder.append(s, last_pos, std::string::npos); - s = std::move(builder); -} - -static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { - const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); - - switch (type) { - case GGUF_TYPE_STRING: - return gguf_get_val_str(ctx_gguf, i); - case GGUF_TYPE_ARRAY: - { - const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); - int arr_n = gguf_get_arr_n(ctx_gguf, i); - const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i); - std::stringstream ss; - ss << "["; - for (int j = 0; j < arr_n; j++) { - if (arr_type == GGUF_TYPE_STRING) { - std::string val = gguf_get_arr_str(ctx_gguf, i, j); - // escape quotes - replace_all(val, "\\", "\\\\"); - replace_all(val, "\"", "\\\""); - ss << '"' << val << '"'; - } else if (arr_type == GGUF_TYPE_ARRAY) { - ss << "???"; - } else { - ss << gguf_data_to_str(arr_type, data, j); - } - if (j < arr_n - 1) { - ss << ", "; - } - } - ss << "]"; - return ss.str(); - } - default: - return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); - } -} - -static void print_tensor_info(const ggml_tensor * tensor, const char * prefix = "") { - size_t tensor_size = ggml_nbytes(tensor); - LOG_INF("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n", - prefix, ggml_n_dims(tensor), tensor->name, tensor_size, - tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], ggml_type_name(tensor->type)); -} - -static projector_type clip_projector_type_from_string(const std::string & name) { - for (const auto & kv : PROJECTOR_TYPE_NAMES) { // NOLINT - if (kv.second == name) { - return kv.first; - } - } - throw std::runtime_error(format("Unknown projector type: %s", name.c_str())); -} - #ifdef CLIP_DEBUG_FUNCTIONS static void clip_image_write_image_to_ppm(const clip_image_u8& img, const std::string& filename) { std::ofstream file(filename, std::ios::binary); @@ -418,6 +145,11 @@ static void clip_image_convert_f32_to_u8(const clip_image_f32& src, clip_image_u // clip layers // +enum patch_merge_type { + PATCH_MERGE_FLAT, + PATCH_MERGE_SPATIAL_UNPAD, +}; + struct clip_hparams { int32_t image_size; int32_t patch_size; @@ -427,9 +159,9 @@ struct clip_hparams { int32_t n_head; int32_t n_layer; - float eps; + patch_merge_type mm_patch_merge_type = PATCH_MERGE_FLAT; - char mm_patch_merge_type[32] = "flat"; // spatial_unpad or flat (default) + float eps; std::vector image_grid_pinpoints; int32_t image_crop_resolution; @@ -438,44 +170,44 @@ struct clip_hparams { struct clip_layer { // attention - struct ggml_tensor * k_w; - struct ggml_tensor * k_b; - struct ggml_tensor * q_w; - struct ggml_tensor * q_b; - struct ggml_tensor * v_w; - struct ggml_tensor * v_b; + struct ggml_tensor * k_w = nullptr; + struct ggml_tensor * k_b = nullptr; + struct ggml_tensor * q_w = nullptr; + struct ggml_tensor * q_b = nullptr; + struct ggml_tensor * v_w = nullptr; + struct ggml_tensor * v_b = nullptr; - struct ggml_tensor * o_w; - struct ggml_tensor * o_b; + struct ggml_tensor * o_w = nullptr; + struct ggml_tensor * o_b = nullptr; // layernorm 1 - struct ggml_tensor * ln_1_w; - struct ggml_tensor * ln_1_b; + struct ggml_tensor * ln_1_w = nullptr; + struct ggml_tensor * ln_1_b = nullptr; // ff - struct ggml_tensor * ff_i_w; - struct ggml_tensor * ff_i_b; + struct ggml_tensor * ff_i_w = nullptr; + struct ggml_tensor * ff_i_b = nullptr; - struct ggml_tensor * ff_o_w; - struct ggml_tensor * ff_o_b; + struct ggml_tensor * ff_o_w = nullptr; + struct ggml_tensor * ff_o_b = nullptr; // layernorm 2 - struct ggml_tensor * ln_2_w; - struct ggml_tensor * ln_2_b; + struct ggml_tensor * ln_2_w = nullptr; + struct ggml_tensor * ln_2_b = nullptr; }; struct clip_vision_model { struct clip_hparams hparams; // embeddings - struct ggml_tensor * class_embedding; - struct ggml_tensor * patch_embeddings_0; - struct ggml_tensor * patch_embeddings_1; // second Conv2D kernel when we decouple Conv3D along temproal dimension (Qwen2VL) - struct ggml_tensor * patch_bias; - struct ggml_tensor * position_embeddings; + struct ggml_tensor * class_embedding = nullptr; + struct ggml_tensor * patch_embeddings_0 = nullptr; + struct ggml_tensor * patch_embeddings_1 = nullptr; // second Conv2D kernel when we decouple Conv3D along temproal dimension (Qwen2VL) + struct ggml_tensor * patch_bias = nullptr; + struct ggml_tensor * position_embeddings = nullptr; - struct ggml_tensor * pre_ln_w; - struct ggml_tensor * pre_ln_b; + struct ggml_tensor * pre_ln_w = nullptr; + struct ggml_tensor * pre_ln_b = nullptr; std::vector layers; @@ -485,84 +217,84 @@ struct clip_vision_model { struct ggml_tensor * projection; // LLaVA projection - struct ggml_tensor * mm_0_w = NULL; - struct ggml_tensor * mm_0_b = NULL; - struct ggml_tensor * mm_2_w = NULL; - struct ggml_tensor * mm_2_b = NULL; + struct ggml_tensor * mm_0_w = nullptr; + struct ggml_tensor * mm_0_b = nullptr; + struct ggml_tensor * mm_2_w = nullptr; + struct ggml_tensor * mm_2_b = nullptr; - struct ggml_tensor * image_newline = NULL; + struct ggml_tensor * image_newline = nullptr; // Yi type models with mlp+normalization projection - struct ggml_tensor * mm_1_w = NULL; // Yi type models have 0, 1, 3, 4 - struct ggml_tensor * mm_1_b = NULL; - struct ggml_tensor * mm_3_w = NULL; - struct ggml_tensor * mm_3_b = NULL; - struct ggml_tensor * mm_4_w = NULL; - struct ggml_tensor * mm_4_b = NULL; + struct ggml_tensor * mm_1_w = nullptr; // Yi type models have 0, 1, 3, 4 + struct ggml_tensor * mm_1_b = nullptr; + struct ggml_tensor * mm_3_w = nullptr; + struct ggml_tensor * mm_3_b = nullptr; + struct ggml_tensor * mm_4_w = nullptr; + struct ggml_tensor * mm_4_b = nullptr; //GLMV-Edge projection - struct ggml_tensor * mm_model_adapter_conv_w; - struct ggml_tensor * mm_model_adapter_conv_b; - struct ggml_tensor * boi_w; - struct ggml_tensor * eoi_w; + struct ggml_tensor * mm_model_adapter_conv_w = nullptr; + struct ggml_tensor * mm_model_adapter_conv_b = nullptr; + struct ggml_tensor * boi_w = nullptr; + struct ggml_tensor * eoi_w = nullptr; // MobileVLM projection - struct ggml_tensor * mm_model_mlp_1_w; - struct ggml_tensor * mm_model_mlp_1_b; - struct ggml_tensor * mm_model_mlp_3_w; - struct ggml_tensor * mm_model_mlp_3_b; - struct ggml_tensor * mm_model_block_1_block_0_0_w; - struct ggml_tensor * mm_model_block_1_block_0_1_w; - struct ggml_tensor * mm_model_block_1_block_0_1_b; - struct ggml_tensor * mm_model_block_1_block_1_fc1_w; - struct ggml_tensor * mm_model_block_1_block_1_fc1_b; - struct ggml_tensor * mm_model_block_1_block_1_fc2_w; - struct ggml_tensor * mm_model_block_1_block_1_fc2_b; - struct ggml_tensor * mm_model_block_1_block_2_0_w; - struct ggml_tensor * mm_model_block_1_block_2_1_w; - struct ggml_tensor * mm_model_block_1_block_2_1_b; - struct ggml_tensor * mm_model_block_2_block_0_0_w; - struct ggml_tensor * mm_model_block_2_block_0_1_w; - struct ggml_tensor * mm_model_block_2_block_0_1_b; - struct ggml_tensor * mm_model_block_2_block_1_fc1_w; - struct ggml_tensor * mm_model_block_2_block_1_fc1_b; - struct ggml_tensor * mm_model_block_2_block_1_fc2_w; - struct ggml_tensor * mm_model_block_2_block_1_fc2_b; - struct ggml_tensor * mm_model_block_2_block_2_0_w; - struct ggml_tensor * mm_model_block_2_block_2_1_w; - struct ggml_tensor * mm_model_block_2_block_2_1_b; + struct ggml_tensor * mm_model_mlp_1_w = nullptr; + struct ggml_tensor * mm_model_mlp_1_b = nullptr; + struct ggml_tensor * mm_model_mlp_3_w = nullptr; + struct ggml_tensor * mm_model_mlp_3_b = nullptr; + struct ggml_tensor * mm_model_block_1_block_0_0_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_0_1_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_0_1_b = nullptr; + struct ggml_tensor * mm_model_block_1_block_1_fc1_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_1_fc1_b = nullptr; + struct ggml_tensor * mm_model_block_1_block_1_fc2_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_1_fc2_b = nullptr; + struct ggml_tensor * mm_model_block_1_block_2_0_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_2_1_w = nullptr; + struct ggml_tensor * mm_model_block_1_block_2_1_b = nullptr; + struct ggml_tensor * mm_model_block_2_block_0_0_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_0_1_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_0_1_b = nullptr; + struct ggml_tensor * mm_model_block_2_block_1_fc1_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_1_fc1_b = nullptr; + struct ggml_tensor * mm_model_block_2_block_1_fc2_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_1_fc2_b = nullptr; + struct ggml_tensor * mm_model_block_2_block_2_0_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_2_1_w = nullptr; + struct ggml_tensor * mm_model_block_2_block_2_1_b = nullptr; // MobileVLM_V2 projection - struct ggml_tensor * mm_model_mlp_0_w; - struct ggml_tensor * mm_model_mlp_0_b; - struct ggml_tensor * mm_model_mlp_2_w; - struct ggml_tensor * mm_model_mlp_2_b; - struct ggml_tensor * mm_model_peg_0_w; - struct ggml_tensor * mm_model_peg_0_b; + struct ggml_tensor * mm_model_mlp_0_w = nullptr; + struct ggml_tensor * mm_model_mlp_0_b = nullptr; + struct ggml_tensor * mm_model_mlp_2_w = nullptr; + struct ggml_tensor * mm_model_mlp_2_b = nullptr; + struct ggml_tensor * mm_model_peg_0_w = nullptr; + struct ggml_tensor * mm_model_peg_0_b = nullptr; // MINICPMV projection - struct ggml_tensor * mm_model_pos_embed_k; - struct ggml_tensor * mm_model_query; - struct ggml_tensor * mm_model_proj; - struct ggml_tensor * mm_model_kv_proj; - struct ggml_tensor * mm_model_attn_q_w; - struct ggml_tensor * mm_model_attn_q_b; - struct ggml_tensor * mm_model_attn_k_w; - struct ggml_tensor * mm_model_attn_k_b; - struct ggml_tensor * mm_model_attn_v_w; - struct ggml_tensor * mm_model_attn_v_b; - struct ggml_tensor * mm_model_attn_o_w; - struct ggml_tensor * mm_model_attn_o_b; - struct ggml_tensor * mm_model_ln_q_w; - struct ggml_tensor * mm_model_ln_q_b; - struct ggml_tensor * mm_model_ln_kv_w; - struct ggml_tensor * mm_model_ln_kv_b; - struct ggml_tensor * mm_model_ln_post_w; - struct ggml_tensor * mm_model_ln_post_b; + struct ggml_tensor * mm_model_pos_embed_k = nullptr; + struct ggml_tensor * mm_model_query = nullptr; + struct ggml_tensor * mm_model_proj = nullptr; + struct ggml_tensor * mm_model_kv_proj = nullptr; + struct ggml_tensor * mm_model_attn_q_w = nullptr; + struct ggml_tensor * mm_model_attn_q_b = nullptr; + struct ggml_tensor * mm_model_attn_k_w = nullptr; + struct ggml_tensor * mm_model_attn_k_b = nullptr; + struct ggml_tensor * mm_model_attn_v_w = nullptr; + struct ggml_tensor * mm_model_attn_v_b = nullptr; + struct ggml_tensor * mm_model_attn_o_w = nullptr; + struct ggml_tensor * mm_model_attn_o_b = nullptr; + struct ggml_tensor * mm_model_ln_q_w = nullptr; + struct ggml_tensor * mm_model_ln_q_b = nullptr; + struct ggml_tensor * mm_model_ln_kv_w = nullptr; + struct ggml_tensor * mm_model_ln_kv_b = nullptr; + struct ggml_tensor * mm_model_ln_post_w = nullptr; + struct ggml_tensor * mm_model_ln_post_b = nullptr; // gemma3 - struct ggml_tensor * mm_input_proj_w; - struct ggml_tensor * mm_soft_emb_norm_w; + struct ggml_tensor * mm_input_proj_w = nullptr; + struct ggml_tensor * mm_soft_emb_norm_w = nullptr; }; struct clip_ctx { @@ -582,28 +314,22 @@ struct clip_ctx { float image_std[3]; bool use_gelu = false; bool use_silu = false; - int32_t ftype = 1; - bool has_class_embedding = true; - bool has_pre_norm = true; - bool has_post_norm = false; - bool has_patch_bias = false; - - struct gguf_context * ctx_gguf = nullptr; - struct ggml_context * ctx_data = nullptr; + gguf_context_ptr ctx_gguf; + ggml_context_ptr ctx_data; std::vector buf_compute_meta; std::vector backend_ptrs; std::vector backend_buft; - ggml_backend_t backend = nullptr; - ggml_backend_t backend_cpu = nullptr; - ggml_backend_buffer_t buf = nullptr; + ggml_backend_t backend; + ggml_backend_t backend_cpu; + ggml_backend_buffer_ptr buf; ggml_backend_sched_ptr sched; - struct clip_image_size * load_image_size = nullptr; + clip_image_size load_image_size; clip_ctx(clip_context_params & ctx_params) { backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr); @@ -629,17 +355,14 @@ struct clip_ctx { } ~clip_ctx() { - ggml_free(ctx_data); - gguf_free(ctx_gguf); - ggml_backend_buffer_free(buf); ggml_backend_free(backend); - if (backend_cpu != backend) { + if (backend != backend_cpu) { ggml_backend_free(backend_cpu); } } }; -static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_image_f32_batch * imgs) { +static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_image_f32_batch & imgs) { const auto & model = ctx->vision_model; const auto & hparams = model.hparams; @@ -655,7 +378,7 @@ static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_im const int n_layer = hparams.n_layer; const float eps = hparams.eps; - GGML_ASSERT(imgs->size == 1); // batch_size == 1 + GGML_ASSERT(imgs.entries.size() == 1); // batch_size == 1 struct ggml_init_params params = { /*.mem_size =*/ ctx->buf_compute_meta.size(), @@ -663,7 +386,9 @@ static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_im /*.no_alloc =*/ true, }; - struct ggml_context * ctx0 = ggml_init(params); + ggml_context_ptr ctx0_ptr(ggml_init(params)); + auto ctx0 = ctx0_ptr.get(); + struct ggml_cgraph * gf = ggml_new_graph(ctx0); // input raw @@ -711,8 +436,7 @@ static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_im V = ggml_cont(ctx0, ggml_permute(ctx0, V, 1, 2, 0, 3)); struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); - KQ = ggml_scale_inplace(ctx0, KQ, 1.0f / sqrtf((float)d_head)); - KQ = ggml_soft_max_inplace(ctx0, KQ); + KQ = ggml_soft_max_ext(ctx0, KQ, nullptr, 1.0f / sqrtf((float)d_head), 0.0f); struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ); KQV = ggml_reshape_3d(ctx0, KQV, d_head, num_patches, n_head); @@ -751,7 +475,7 @@ static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_im } // post-layernorm - if (ctx->has_post_norm) { + if (model.post_ln_w) { embeddings = ggml_norm(ctx0, embeddings, eps); ggml_set_name(embeddings, "post_ln"); @@ -786,12 +510,10 @@ static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_im // build the graph ggml_build_forward_expand(gf, embeddings); - ggml_free(ctx0); - return gf; } -static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_image_f32_batch * imgs, struct clip_image_size * load_image_size, bool is_inf = false) { +static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_image_f32_batch & imgs, struct clip_image_size load_image_size, bool is_inf = false) { if (!ctx->has_vision_encoder) { LOG_ERR("This gguf file seems to have no vision encoder\n"); return nullptr; @@ -804,30 +526,27 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im int image_size_width = image_size; int image_size_height = image_size; if (ctx->has_minicpmv_projector) { - if (load_image_size == nullptr) { - load_image_size = clip_image_size_init(); - } - LOG_DBG("%s: %d %d\n", __func__, load_image_size->width, load_image_size->height); - image_size_width = load_image_size->width; - image_size_height = load_image_size->height; + LOG_DBG("%s: %d %d\n", __func__, load_image_size.width, load_image_size.height); + image_size_width = load_image_size.width; + image_size_height = load_image_size.height; if (is_inf) { - image_size_width = imgs->data->nx; - image_size_height = imgs->data->ny; + image_size_width = imgs.entries[0]->nx; + image_size_height = imgs.entries[0]->ny; } } else if (ctx->has_qwen2vl_merger) { // use the image's native resolution when image is avaible if (is_inf) { // if (imgs->data->nx && imgs->data->ny) { - image_size_width = imgs->data->nx; - image_size_height = imgs->data->ny; + image_size_width = imgs.entries[0]->nx; + image_size_height = imgs.entries[0]->ny; } } const int patch_size = hparams.patch_size; const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size)); const int patches_w = image_size_width / patch_size; const int patches_h = image_size_height / patch_size; - const int num_positions = num_patches + (ctx->has_class_embedding ? 1 : 0); + const int num_positions = num_patches + (model.class_embedding ? 1 : 0); const int num_position_ids = ctx->has_qwen2vl_merger ? num_positions * 4 : num_positions; const int hidden_size = hparams.hidden_size; const int n_head = hparams.n_head; @@ -835,7 +554,7 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im const float eps = hparams.eps; int mrope_sections[4] = {d_head/4, d_head/4, d_head/4, d_head/4}; - const int batch_size = imgs->size; + const int batch_size = imgs.entries.size(); if (ctx->has_llava_projector || ctx->has_minicpmv_projector || ctx->has_glm_projector) { GGML_ASSERT(batch_size == 1); @@ -847,7 +566,9 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im /*.no_alloc =*/ true, }; - struct ggml_context * ctx0 = ggml_init(params); + ggml_context_ptr ctx0_ptr(ggml_init(params)); + auto ctx0 = ctx0_ptr.get(); + struct ggml_cgraph * gf = ggml_new_graph(ctx0); struct ggml_tensor * inp_raw = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, image_size_width, image_size_height, 3, batch_size); @@ -879,7 +600,7 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 0, 2, 3)); } - if (ctx->has_patch_bias) { + if (model.patch_bias) { // inp = ggml_add(ctx0, inp, ggml_repeat(ctx0, model.patch_bias, inp)); inp = ggml_add(ctx0, inp, model.patch_bias); } @@ -888,7 +609,7 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im if (ctx->has_llava_projector) { // concat class_embeddings and patch_embeddings - if (ctx->has_class_embedding) { + if (model.class_embedding) { embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size); ggml_set_name(embeddings, "embeddings"); ggml_set_input(embeddings); @@ -925,7 +646,7 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im } // pre-layernorm - if (ctx->has_pre_norm) { + if (model.pre_ln_w) { embeddings = ggml_norm(ctx0, embeddings, eps); ggml_set_name(embeddings, "pre_ln"); @@ -967,7 +688,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im ctx0, Q, positions, nullptr, d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1); } - Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head)); Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3)); Q = ggml_reshape_3d(ctx0, Q, d_head, num_positions, n_head * batch_size); @@ -991,7 +711,7 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im V = ggml_reshape_3d(ctx0, V, num_positions, d_head, n_head * batch_size); struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); - KQ = ggml_soft_max_inplace(ctx0, KQ); + KQ = ggml_soft_max_ext(ctx0, KQ, nullptr, 1.0f / sqrtf((float)d_head), 0.0f); struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ); KQV = ggml_reshape_4d(ctx0, KQV, d_head, num_positions, n_head, batch_size); KQV = ggml_permute(ctx0, KQV, 0, 2, 1, 3); @@ -1035,7 +755,7 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im } // post-layernorm - if (ctx->has_post_norm) { + if (model.post_ln_w) { embeddings = ggml_norm(ctx0, embeddings, eps); ggml_set_name(embeddings, "post_ln"); @@ -1075,8 +795,10 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im embeddings = ggml_add(ctx0, embeddings, model.mm_0_b); embeddings = ggml_gelu(ctx0, embeddings); - embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings); - embeddings = ggml_add(ctx0, embeddings, model.mm_2_b); + if (model.mm_2_w) { + embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings); + embeddings = ggml_add(ctx0, embeddings, model.mm_2_b); + } } else if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) { embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings); @@ -1279,7 +1001,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im } struct ggml_tensor * Q = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_q_w, q), model.mm_model_attn_q_b); - Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head)); struct ggml_tensor * K = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_k_w, k), model.mm_model_attn_k_b); struct ggml_tensor * V = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_v_w, v), model.mm_model_attn_v_b); // permute @@ -1293,7 +1014,7 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im V = ggml_cont(ctx0, ggml_permute(ctx0, V, 1, 2, 0, 3)); V = ggml_reshape_3d(ctx0, V, num_positions, d_head, n_head * batch_size); struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); - KQ = ggml_soft_max_inplace(ctx0, KQ); + KQ = ggml_soft_max_ext(ctx0, KQ, nullptr, 1.0f / sqrtf((float)d_head), 0.0f); struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ); KQV = ggml_reshape_4d(ctx0, KQV, d_head, num_query, n_head, batch_size); KQV = ggml_permute(ctx0, KQV, 0, 2, 1, 3); @@ -1335,7 +1056,7 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im embeddings = ggml_mul_mat(ctx0, model.mm_model_mlp_3_w, embeddings); } } else { - GGML_ABORT("fatel error"); + GGML_ABORT("fatal error"); } } else if (ctx->proj_type == PROJECTOR_TYPE_MERGER) { @@ -1355,12 +1076,10 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im // build the graph ggml_build_forward_expand(gf, embeddings); - ggml_free(ctx0); - return gf; } -static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32_batch * imgs, struct clip_image_size * load_image_size, bool is_inf = false) { +static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32_batch & imgs, struct clip_image_size load_image_size, bool is_inf = false) { if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) { return clip_image_build_graph_siglip(ctx, imgs); } else { @@ -1369,549 +1088,401 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 } } -// read and create ggml_context containing the tensors and their data -struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { - return clip_init(fname, clip_context_params{ - /* use_gpu */ true, - /* verbosity */ verbosity, - }); -} +struct clip_model_loader { + ggml_context_ptr ctx_meta; + gguf_context_ptr ctx_gguf; -struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params) { - int verbosity = ctx_params.verbosity; - struct ggml_context * meta = NULL; + clip_ctx & ctx_clip; + std::string fname; - struct gguf_init_params params = { - /*.no_alloc = */ true, - /*.ctx = */ &meta, - }; + size_t model_size; // in bytes - struct gguf_context * ctx = gguf_init_from_file(fname, params); - if (!ctx) { - throw std::runtime_error(format("%s: failed to load CLIP model from %s. Does this file exist?\n", __func__, fname)); - } + // TODO @ngxson : we should not pass clip_ctx here, it should be clip_vision_model + clip_model_loader(const char * fname, clip_ctx & ctx_clip) : ctx_clip(ctx_clip), fname(fname) { + struct ggml_context * meta = nullptr; - if (verbosity >= 1) { - const int n_tensors = gguf_get_n_tensors(ctx); - const int n_kv = gguf_get_n_kv(ctx); - const int ftype = get_u32(ctx, KEY_FTYPE); - const std::string ftype_str = get_ftype(ftype); - const int idx_name = gguf_find_key(ctx, KEY_NAME); - if (idx_name != -1) { // make name optional temporarily as some of the uploaded models missing it due to a bug - const std::string name = gguf_get_val_str(ctx, idx_name); - LOG_INF("%s: model name: %s\n", __func__, name.c_str()); - } - const int idx_desc = gguf_find_key(ctx, KEY_DESCRIPTION); - if (idx_desc != -1) { // ditto - const std::string description = gguf_get_val_str(ctx, idx_desc); - LOG_INF("%s: description: %s\n", __func__, description.c_str()); - } - LOG_INF("%s: GGUF version: %d\n", __func__, gguf_get_version(ctx)); - LOG_INF("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx)); - LOG_INF("%s: n_tensors: %d\n", __func__, n_tensors); - LOG_INF("%s: n_kv: %d\n", __func__, n_kv); - LOG_INF("%s: ftype: %s\n", __func__, ftype_str.c_str()); - LOG_INF("\n"); - } - const int n_tensors = gguf_get_n_tensors(ctx); + struct gguf_init_params params = { + /*.no_alloc = */ true, + /*.ctx = */ &meta, + }; - // kv - const int n_kv = gguf_get_n_kv(ctx); - LOG_INF("%s: loaded meta data with %d key-value pairs and %d tensors from %s\n", - __func__, n_kv, n_tensors, fname); - { - std::map n_type; - - for (int i = 0; i < n_tensors; i++) { - enum ggml_type type = gguf_get_tensor_type(ctx, i); - - n_type[type]++; + ctx_gguf = gguf_context_ptr(gguf_init_from_file(fname, params)); + if (!ctx_gguf.get()) { + throw std::runtime_error(string_format("%s: failed to load CLIP model from %s. Does this file exist?\n", __func__, fname)); } - LOG_INF("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__); - for (int i = 0; i < n_kv; i++) { - const char * name = gguf_get_key(ctx, i); - const enum gguf_type type = gguf_get_kv_type(ctx, i); - const std::string type_name = - type == GGUF_TYPE_ARRAY - ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i)) - : gguf_type_name(type); + ctx_meta.reset(meta); - std::string value = gguf_kv_to_str(ctx, i); - const size_t MAX_VALUE_LEN = 40; - if (value.size() > MAX_VALUE_LEN) { - value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()); - } - replace_all(value, "\n", "\\n"); + const int n_tensors = gguf_get_n_tensors(ctx_gguf.get()); - LOG_INF("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str()); + // print gguf info + { + std::string name; + get_string(KEY_NAME, name, false); + std::string description; + get_string(KEY_DESCRIPTION, description, false); + LOG_INF("%s: model name: %s\n", __func__, name.c_str()); + LOG_INF("%s: description: %s\n", __func__, description.c_str()); + LOG_INF("%s: GGUF version: %d\n", __func__, gguf_get_version(ctx_gguf.get())); + LOG_INF("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx_gguf.get())); + LOG_INF("%s: n_tensors: %d\n", __func__, n_tensors); + LOG_INF("%s: n_kv: %d\n", __func__, (int)gguf_get_n_kv(ctx_gguf.get())); + LOG_INF("\n"); } - // print type counts - for (auto & kv : n_type) { - if (kv.second == 0) { - continue; - } - - LOG_INF("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second); - } - } - - // data - size_t model_size = 0; - { - for (int i = 0; i < n_tensors; ++i) { - const char * name = gguf_get_tensor_name(ctx, i); - const size_t offset = gguf_get_tensor_offset(ctx, i); - enum ggml_type type = gguf_get_tensor_type(ctx, i); - struct ggml_tensor * cur = ggml_get_tensor(meta, name); - size_t tensor_size = ggml_nbytes(cur); - model_size += tensor_size; - if (verbosity >= 3) { - LOG_INF("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n", - __func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type)); + // tensors + { + for (int i = 0; i < n_tensors; ++i) { + const char * name = gguf_get_tensor_name(ctx_gguf.get(), i); + const size_t offset = gguf_get_tensor_offset(ctx_gguf.get(), i); + enum ggml_type type = gguf_get_tensor_type(ctx_gguf.get(), i); + struct ggml_tensor * cur = ggml_get_tensor(meta, name); + size_t tensor_size = ggml_nbytes(cur); + model_size += tensor_size; + LOG_DBG("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n", + __func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type)); } } } - clip_ctx * new_clip = new clip_ctx(ctx_params); - - // update projector type - { - int idx = gguf_find_key(ctx, KEY_PROJ_TYPE); - if (idx != -1) { - const std::string proj_type = gguf_get_val_str(ctx, idx); - new_clip->proj_type = clip_projector_type_from_string(proj_type); - } else { - new_clip->proj_type = PROJECTOR_TYPE_MLP; - } - - if (new_clip->proj_type == PROJECTOR_TYPE_MLP) { - if (gguf_find_tensor(ctx, format(TN_LLAVA_PROJ, 3, "weight").c_str()) != -1) { - new_clip->proj_type = PROJECTOR_TYPE_MLP_NORM; + void load_hparams() { + // projector type + { + std::string proj_type; + get_string(KEY_PROJ_TYPE, proj_type, false); + if (!proj_type.empty()) { + ctx_clip.proj_type = clip_projector_type_from_string(proj_type); + } + if (ctx_clip.proj_type == PROJECTOR_TYPE_UNKNOWN) { + throw std::runtime_error(string_format("%s: unknown projector type: %s\n", __func__, proj_type.c_str())); } } - } - // model size and capabilities - { - int idx = get_key_idx(ctx, KEY_HAS_TEXT_ENC); - new_clip->has_text_encoder = gguf_get_val_bool(ctx, idx); + // other hparams + { + get_bool(KEY_HAS_TEXT_ENC, ctx_clip.has_text_encoder, false); + get_bool(KEY_HAS_VIS_ENC, ctx_clip.has_vision_encoder, false); + GGML_ASSERT(ctx_clip.has_vision_encoder); + GGML_ASSERT(!ctx_clip.has_text_encoder); - idx = get_key_idx(ctx, KEY_HAS_VIS_ENC); - new_clip->has_vision_encoder = gguf_get_val_bool(ctx, idx); + // legacy keys, use KEY_PROJ_TYPE instead + get_bool(KEY_HAS_LLAVA_PROJ, ctx_clip.has_llava_projector, false); + get_bool(KEY_HAS_MINICPMV_PROJ, ctx_clip.has_minicpmv_projector, false); + get_i32(KEY_MINICPMV_VERSION, ctx_clip.minicpmv_version, false); + get_bool(KEY_HAS_GLM_PROJ, ctx_clip.has_glm_projector, false); + get_bool(KEY_HAS_QWEN2VL_MERGER, ctx_clip.has_qwen2vl_merger, false); + // !!! do NOT extend the list above, use KEY_PROJ_TYPE instead - idx = gguf_find_key(ctx, KEY_HAS_LLAVA_PROJ); - if (idx != -1) { - new_clip->has_llava_projector = gguf_get_val_bool(ctx, idx); - } + get_bool(KEY_USE_GELU, ctx_clip.use_gelu, false); + get_bool(KEY_USE_SILU, ctx_clip.use_silu, false); - idx = gguf_find_key(ctx, KEY_HAS_MINICPMV_PROJ); - if (idx != -1) { - new_clip->has_minicpmv_projector = gguf_get_val_bool(ctx, idx); - } + auto & hparams = ctx_clip.vision_model.hparams; + get_u32(string_format(KEY_N_EMBD, "vision"), hparams.hidden_size); + get_u32(string_format(KEY_N_HEAD, "vision"), hparams.n_head); + get_u32(string_format(KEY_N_FF, "vision"), hparams.n_intermediate); + get_u32(string_format(KEY_N_BLOCK, "vision"), hparams.n_layer); + get_u32(string_format(KEY_PROJ_DIM, "vision"), hparams.projection_dim); + get_f32(string_format(KEY_LAYER_NORM_EPS, "vision"), hparams.eps); + get_u32(KEY_IMAGE_SIZE, hparams.image_size); + get_u32(KEY_PATCH_SIZE, hparams.patch_size); + get_u32(KEY_IMAGE_CROP_RESOLUTION, hparams.image_crop_resolution, false); + get_arr_int(KEY_IMAGE_GRID_PINPOINTS, hparams.image_grid_pinpoints, false); - idx = gguf_find_key(ctx, KEY_MINICPMV_VERSION); - if (idx != -1) { - new_clip->minicpmv_version = gguf_get_val_i32(ctx, idx); - } + { + std::string mm_patch_merge_type; + get_string(KEY_MM_PATCH_MERGE_TYPE, mm_patch_merge_type, false); + if (mm_patch_merge_type == "spatial_unpad") { + hparams.mm_patch_merge_type = PATCH_MERGE_SPATIAL_UNPAD; + } + } - idx = gguf_find_key(ctx, KEY_HAS_GLM_PROJ); - if (idx != -1) { - new_clip->has_glm_projector = gguf_get_val_bool(ctx, idx); - } + { + int idx_mean = gguf_find_key(ctx_gguf.get(), KEY_IMAGE_MEAN); + int idx_std = gguf_find_key(ctx_gguf.get(), KEY_IMAGE_STD); + GGML_ASSERT(idx_mean >= 0 && "image_mean not found"); + GGML_ASSERT(idx_std >= 0 && "image_std not found"); + const float * mean_data = (const float *) gguf_get_arr_data(ctx_gguf.get(), idx_mean); + const float * std_data = (const float *) gguf_get_arr_data(ctx_gguf.get(), idx_std); + for (int i = 0; i < 3; ++i) { + ctx_clip.image_mean[i] = mean_data[i]; + ctx_clip.image_std[i] = std_data[i]; + } + } - idx = gguf_find_key(ctx, KEY_HAS_QWEN2VL_MERGER); - if (idx != -1) { - new_clip->has_qwen2vl_merger = gguf_get_val_bool(ctx, idx); - } - // GGML_ASSERT(new_clip->has_llava_projector); // see monatis/clip.cpp for image and/or text encoding for semantic search + // Load the vision feature layer indices if they are explicitly provided; + // if multiple vision feature layers are present, the values will be concatenated + // to form the final visual features. + // NOTE: gguf conversions should standardize the values of the vision feature layer to + // be non-negative, since we use -1 to mark values as unset here. + std::vector vision_feature_layer; + get_arr_int(KEY_FEATURE_LAYER, vision_feature_layer, false); + // convert std::vector to std::unordered_set + for (auto & layer : vision_feature_layer) { + hparams.vision_feature_layer.insert(layer); + } + // Calculate the deepest feature layer based on hparams and projector type + ctx_clip.max_feature_layer = get_deepest_feature_layer(&ctx_clip); - GGML_ASSERT(new_clip->has_vision_encoder); - GGML_ASSERT(!new_clip->has_text_encoder); - - try { - idx = get_key_idx(ctx, KEY_USE_GELU); - new_clip->use_gelu = gguf_get_val_bool(ctx, idx); - } catch (std::runtime_error & /*e*/) { - new_clip->use_gelu = false; - } - - try { - idx = get_key_idx(ctx, KEY_USE_SILU); - new_clip->use_silu = gguf_get_val_bool(ctx, idx); - } catch (std::runtime_error & /*e*/) { - new_clip->use_silu = false; - } - - if (verbosity >= 1) { - LOG_INF("%s: text_encoder: %d\n", __func__, new_clip->has_text_encoder); - LOG_INF("%s: vision_encoder: %d\n", __func__, new_clip->has_vision_encoder); - LOG_INF("%s: llava_projector: %d\n", __func__, new_clip->has_llava_projector); - LOG_INF("%s: minicpmv_projector: %d\n", __func__, new_clip->has_minicpmv_projector); - LOG_INF("%s: minicpmv_version: %d\n", __func__, new_clip->minicpmv_version); - LOG_INF("%s: glm_projector: %d\n", __func__, new_clip->has_glm_projector); - LOG_INF("%s: model size: %.2f MB\n", __func__, model_size / 1024.0 / 1024.0); - LOG_INF("%s: metadata size: %.2f MB\n", __func__, ggml_get_mem_size(meta) / 1024.0 / 1024.0); + LOG_INF("%s: text_encoder: %d\n", __func__, ctx_clip.has_text_encoder); + LOG_INF("%s: vision_encoder: %d\n", __func__, ctx_clip.has_vision_encoder); + LOG_INF("%s: llava_projector: %d\n", __func__, ctx_clip.has_llava_projector); + LOG_INF("%s: minicpmv_projector: %d\n", __func__, ctx_clip.has_minicpmv_projector); + LOG_INF("%s: minicpmv_version: %d\n", __func__, ctx_clip.minicpmv_version); + LOG_INF("%s: glm_projector: %d\n", __func__, ctx_clip.has_glm_projector); + LOG_INF("%s: model size: %.2f MiB\n", __func__, model_size / 1024.0 / 1024.0); + LOG_INF("%s: metadata size: %.2f MiB\n", __func__, ggml_get_mem_size(ctx_meta.get()) / 1024.0 / 1024.0); } } - LOG_INF("%s: params backend buffer size = % 6.2f MB (%i tensors)\n", __func__, model_size / (1024.0 * 1024.0), n_tensors); + void load_tensors() { + std::map tensor_offset; + std::vector tensors_to_load; - // load tensors - { - std::vector read_buf; + // get offsets + for (int64_t i = 0; i < gguf_get_n_tensors(ctx_gguf.get()); ++i) { + const char * name = gguf_get_tensor_name(ctx_gguf.get(), i); + tensor_offset[name] = gguf_get_data_offset(ctx_gguf.get()) + gguf_get_tensor_offset(ctx_gguf.get(), i); + } + + // create data context struct ggml_init_params params = { - /*.mem_size =*/ (n_tensors + 1) * ggml_tensor_overhead(), + /*.mem_size =*/ (gguf_get_n_tensors(ctx_gguf.get()) + 1) * ggml_tensor_overhead(), /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; - - new_clip->ctx_data = ggml_init(params); - if (!new_clip->ctx_data) { - LOG_ERR("%s: ggml_init() failed\n", __func__); - clip_free(new_clip); - gguf_free(ctx); - return nullptr; + ctx_clip.ctx_data.reset(ggml_init(params)); + if (!ctx_clip.ctx_data) { + throw std::runtime_error(string_format("%s: failed to init ggml context\n", __func__)); } - auto fin = std::ifstream(fname, std::ios::binary); - if (!fin) { - LOG_ERR("cannot open model file for loading tensors\n"); - clip_free(new_clip); - gguf_free(ctx); - return nullptr; - } - - // add tensors to context - for (int i = 0; i < n_tensors; ++i) { - const char * name = gguf_get_tensor_name(ctx, i); - struct ggml_tensor * t = ggml_get_tensor(meta, name); - struct ggml_tensor * cur = ggml_dup_tensor(new_clip->ctx_data, t); - ggml_set_name(cur, name); - } - - // alloc memory and offload data - ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(new_clip->backend); - new_clip->buf = ggml_backend_alloc_ctx_tensors_from_buft(new_clip->ctx_data, buft); - ggml_backend_buffer_set_usage(new_clip->buf, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); - for (int i = 0; i < n_tensors; ++i) { - const char * name = gguf_get_tensor_name(ctx, i); - struct ggml_tensor * cur = ggml_get_tensor(new_clip->ctx_data, name); - const size_t offset = gguf_get_data_offset(ctx) + gguf_get_tensor_offset(ctx, i); - fin.seekg(offset, std::ios::beg); - if (!fin) { - LOG_ERR("%s: failed to seek for tensor %s\n", __func__, name); - clip_free(new_clip); - gguf_free(ctx); - return nullptr; + // helper function + auto get_tensor = [&](const std::string & name, bool required = true) { + struct ggml_tensor * cur = ggml_get_tensor(ctx_meta.get(), name.c_str()); + if (!cur && required) { + throw std::runtime_error(string_format("%s: unable to find tensor %s\n", __func__, name.c_str())); } - int num_bytes = ggml_nbytes(cur); - if (ggml_backend_buft_is_host(buft)) { - // for the CPU and Metal backend, we can read directly into the tensor - fin.read(reinterpret_cast(cur->data), num_bytes); - } else { - // read into a temporary buffer first, then copy to device memory - read_buf.resize(num_bytes); - fin.read(reinterpret_cast(read_buf.data()), num_bytes); - ggml_backend_tensor_set(cur, read_buf.data(), 0, num_bytes); + if (cur) { + tensors_to_load.push_back(cur); + // add tensors to context + struct ggml_tensor * data_tensor = ggml_dup_tensor(ctx_clip.ctx_data.get(), cur); + ggml_set_name(data_tensor, cur->name); + cur = data_tensor; } - } - fin.close(); - } + return cur; + }; - // vision model - if (new_clip->has_vision_encoder) { - // load vision model - auto & vision_model = new_clip->vision_model; - auto & hparams = vision_model.hparams; - hparams.hidden_size = get_u32(ctx, format(KEY_N_EMBD, "vision")); - hparams.n_head = get_u32(ctx, format(KEY_N_HEAD, "vision")); - hparams.n_intermediate = get_u32(ctx, format(KEY_N_FF, "vision")); - hparams.n_layer = get_u32(ctx, format(KEY_N_BLOCK, "vision")); - hparams.image_size = get_u32(ctx, KEY_IMAGE_SIZE); - hparams.patch_size = get_u32(ctx, KEY_PATCH_SIZE); - hparams.projection_dim = get_u32(ctx, format(KEY_PROJ_DIM, "vision")); - hparams.eps = get_f32(ctx, format(KEY_LAYER_NORM_EPS, "vision")); + auto & vision_model = ctx_clip.vision_model; - try { - int idx = get_key_idx(ctx, KEY_IMAGE_GRID_PINPOINTS); - int n = gguf_get_arr_n(ctx, idx); - const int32_t * pinpoints = (const int32_t *)gguf_get_arr_data(ctx, idx); - for (int i = 0; i < n; ++i) { - hparams.image_grid_pinpoints.push_back(pinpoints[i]); - } - } catch (std::runtime_error & /*e*/) { } + vision_model.class_embedding = get_tensor(TN_CLASS_EMBD, false); - // Load the vision feature layer indices if they are explicitly provided; - // if multiple vision feature layers are present, the values will be concatenated - // to form the final visual features. - // NOTE: gguf conversions should standardize the values of the vision feature layer to - // be non-negative, since we use -1 to mark values as unset here. - try { - int idx = get_key_idx(ctx, KEY_FEATURE_LAYER); - int n = gguf_get_arr_n(ctx, idx); + vision_model.pre_ln_w = get_tensor(string_format(TN_LN_PRE, "v", "weight"), false); + vision_model.pre_ln_b = get_tensor(string_format(TN_LN_PRE, "v", "bias"), false); - const int32_t * vision_feature_layer = (const int32_t *)gguf_get_arr_data(ctx, idx); + vision_model.post_ln_w = get_tensor(string_format(TN_LN_POST, "v", "weight"), false); + vision_model.post_ln_b = get_tensor(string_format(TN_LN_POST, "v", "bias"), false); - for (int i = 0; i < n; ++i) { - hparams.vision_feature_layer.insert(vision_feature_layer[i]); - } - } catch (std::runtime_error & /*e*/) { } - - try { - int idx = get_key_idx(ctx, KEY_MM_PATCH_MERGE_TYPE); - strcpy(hparams.mm_patch_merge_type, gguf_get_val_str(ctx, idx)); - } catch (std::runtime_error & /*e*/) { - strcpy(hparams.mm_patch_merge_type, "flat"); + vision_model.patch_bias = get_tensor(TN_PATCH_BIAS, false); + vision_model.patch_embeddings_0 = get_tensor(TN_PATCH_EMBD, false); + vision_model.patch_embeddings_1 = get_tensor(TN_PATCH_EMBD_1, false); + if (vision_model.patch_embeddings_1 == nullptr) { + ctx_clip.has_qwen2vl_merger = false; } - try { - hparams.image_crop_resolution = get_u32(ctx, KEY_IMAGE_CROP_RESOLUTION); // llava-1.6 - } catch(const std::exception& /*e*/) { - hparams.image_crop_resolution = hparams.image_size; - } + vision_model.position_embeddings = get_tensor(string_format(TN_POS_EMBD, "v"), false); - int idx_mean = get_key_idx(ctx, KEY_IMAGE_MEAN); - int idx_std = get_key_idx(ctx, KEY_IMAGE_STD); - - const float * mean_data = (const float *)gguf_get_arr_data(ctx, idx_mean); - const float * std_data = (const float *)gguf_get_arr_data(ctx, idx_std); - - for (int i = 0; i < 3; ++i) { - new_clip->image_mean[i] = mean_data[i]; - new_clip->image_std[i] = std_data[i]; - } - - // Calculate the deepest feature layer based on hparams and projector type - new_clip->max_feature_layer = get_deepest_feature_layer(new_clip); - - if (verbosity >= 2) { - LOG_INF("\n%s: vision model hparams\n", __func__); - LOG_INF("image_size %d\n", hparams.image_size); - LOG_INF("patch_size %d\n", hparams.patch_size); - LOG_INF("v_hidden_size %d\n", hparams.hidden_size); - LOG_INF("v_n_intermediate %d\n", hparams.n_intermediate); - LOG_INF("v_projection_dim %d\n", hparams.projection_dim); - LOG_INF("v_n_head %d\n", hparams.n_head); - LOG_INF("v_n_layer %d\n", hparams.n_layer); - LOG_INF("v_eps %f\n", hparams.eps); - LOG_INF("v_image_mean %f %f %f\n", new_clip->image_mean[0], new_clip->image_mean[1], new_clip->image_mean[2]); - LOG_INF("v_image_std %f %f %f\n", new_clip->image_std[0], new_clip->image_std[1], new_clip->image_std[2]); - LOG_INF("v_image_grid_pinpoints: "); - for (const auto & pp : hparams.image_grid_pinpoints) { - LOG_INF("%d ", pp); - } - LOG_INF("\n"); - LOG_INF("v_vision_feature_layer: "); - for (const auto & feature_layer: hparams.vision_feature_layer) { - LOG_INF("%d ", feature_layer); - } - LOG_INF("\n"); - LOG_INF("v_mm_patch_merge_type: %s\n", hparams.mm_patch_merge_type); - - } - - try { - vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD); - new_clip->has_class_embedding = true; - } catch (const std::exception& /*e*/) { - new_clip->has_class_embedding = false; - } - - try { - vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight")); - vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias")); - new_clip->has_pre_norm = true; - } catch (std::exception & /*e*/) { - new_clip->has_pre_norm = false; - } - - try { - vision_model.post_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "weight")); - vision_model.post_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "bias")); - new_clip->has_post_norm = true; - } catch (std::exception & /*e*/) { - new_clip->has_post_norm = false; - } - - try { - vision_model.patch_bias = get_tensor(new_clip->ctx_data, TN_PATCH_BIAS); - new_clip->has_patch_bias = true; - } catch (std::exception & /*e*/) { - new_clip->has_patch_bias = false; - } - - try { - vision_model.patch_embeddings_0 = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD); - } catch(const std::exception& /*e*/) { - vision_model.patch_embeddings_0 = nullptr; - } - - try { - vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v")); - } catch(const std::exception& /*e*/) { - vision_model.position_embeddings = nullptr; - } - - try { - vision_model.patch_embeddings_1 = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD_1); - } catch(const std::exception& /*e*/) { - new_clip->has_qwen2vl_merger = false; - } - - // LLaVA projection - if (new_clip->proj_type == PROJECTOR_TYPE_MLP || new_clip->proj_type == PROJECTOR_TYPE_MLP_NORM) { - vision_model.mm_0_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "weight")); - vision_model.mm_0_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "bias")); - try { - // Yi-type llava - vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "weight")); - vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "bias")); - } catch (std::runtime_error & /*e*/) { } - try { - // missing in Yi-type llava - vision_model.mm_2_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight")); - vision_model.mm_2_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias")); - } catch (std::runtime_error & /*e*/) { } - try { - // Yi-type llava - vision_model.mm_3_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "weight")); - vision_model.mm_3_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "bias")); - } catch (std::runtime_error & /*e*/) { } - try { - // Yi-type llava - vision_model.mm_4_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "weight")); - vision_model.mm_4_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "bias")); - } catch (std::runtime_error & /*e*/) { } - try { - vision_model.image_newline = get_tensor(new_clip->ctx_data, TN_IMAGE_NEWLINE); - // LOG_INF("%s: image_newline tensor (llava-1.6) found\n", __func__); - } catch (std::runtime_error & /*e*/) { } - } else if (new_clip->proj_type == PROJECTOR_TYPE_LDP) { - // MobileVLM projection - vision_model.mm_model_mlp_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "weight")); - vision_model.mm_model_mlp_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "bias")); - vision_model.mm_model_mlp_3_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 3, "weight")); - vision_model.mm_model_mlp_3_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 3, "bias")); - vision_model.mm_model_block_1_block_0_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 0, "0.weight")); - vision_model.mm_model_block_1_block_0_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.weight")); - vision_model.mm_model_block_1_block_0_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.bias")); - vision_model.mm_model_block_1_block_1_fc1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.weight")); - vision_model.mm_model_block_1_block_1_fc1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.bias")); - vision_model.mm_model_block_1_block_1_fc2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.weight")); - vision_model.mm_model_block_1_block_1_fc2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.bias")); - vision_model.mm_model_block_1_block_2_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 2, "0.weight")); - vision_model.mm_model_block_1_block_2_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.weight")); - vision_model.mm_model_block_1_block_2_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.bias")); - vision_model.mm_model_block_2_block_0_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 0, "0.weight")); - vision_model.mm_model_block_2_block_0_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.weight")); - vision_model.mm_model_block_2_block_0_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.bias")); - vision_model.mm_model_block_2_block_1_fc1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.weight")); - vision_model.mm_model_block_2_block_1_fc1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.bias")); - vision_model.mm_model_block_2_block_1_fc2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.weight")); - vision_model.mm_model_block_2_block_1_fc2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.bias")); - vision_model.mm_model_block_2_block_2_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "0.weight")); - vision_model.mm_model_block_2_block_2_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.weight")); - vision_model.mm_model_block_2_block_2_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.bias")); - } - else if (new_clip->proj_type == PROJECTOR_TYPE_LDPV2) - { - // MobilVLM_V2 projection - vision_model.mm_model_mlp_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 0, "weight")); - vision_model.mm_model_mlp_0_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 0, "bias")); - vision_model.mm_model_mlp_2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 2, "weight")); - vision_model.mm_model_mlp_2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 2, "bias")); - vision_model.mm_model_peg_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_PEG, 0, "weight")); - vision_model.mm_model_peg_0_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_PEG, 0, "bias")); - } - else if (new_clip->proj_type == PROJECTOR_TYPE_RESAMPLER) { - // vision_model.mm_model_pos_embed = get_tensor(new_clip->ctx_data, TN_MINICPMV_POS_EMBD); - vision_model.mm_model_pos_embed_k = get_tensor(new_clip->ctx_data, TN_MINICPMV_POS_EMBD_K); - vision_model.mm_model_query = get_tensor(new_clip->ctx_data, TN_MINICPMV_QUERY); - vision_model.mm_model_proj = get_tensor(new_clip->ctx_data, TN_MINICPMV_PROJ); - vision_model.mm_model_kv_proj = get_tensor(new_clip->ctx_data, TN_MINICPMV_KV_PROJ); - vision_model.mm_model_attn_q_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "q", "weight")); - vision_model.mm_model_attn_k_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "k", "weight")); - vision_model.mm_model_attn_v_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "v", "weight")); - vision_model.mm_model_attn_q_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "q", "bias")); - vision_model.mm_model_attn_k_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "k", "bias")); - vision_model.mm_model_attn_v_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "v", "bias")); - vision_model.mm_model_attn_o_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "out", "weight")); - vision_model.mm_model_attn_o_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_ATTN, "out", "bias")); - vision_model.mm_model_ln_q_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "q", "weight")); - vision_model.mm_model_ln_q_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "q", "bias")); - vision_model.mm_model_ln_kv_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "kv", "weight")); - vision_model.mm_model_ln_kv_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "kv", "bias")); - vision_model.mm_model_ln_post_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "post", "weight")); - vision_model.mm_model_ln_post_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "post", "bias")); - } - else if (new_clip->proj_type == PROJECTOR_TYPE_GLM_EDGE) { - vision_model.mm_model_adapter_conv_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPER_CONV, "weight")); - vision_model.mm_model_adapter_conv_b = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPER_CONV, "bias")); - vision_model.mm_model_mlp_0_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_LINEAR,"weight")); - vision_model.mm_model_ln_q_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_NORM_1,"weight")); - vision_model.mm_model_ln_q_b = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_NORM_1,"bias")); - vision_model.mm_model_mlp_1_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_D_H_2_4H,"weight")); - vision_model.mm_model_mlp_2_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_GATE,"weight")); - vision_model.mm_model_mlp_3_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_D_4H_2_H,"weight")); - vision_model.boi_w = get_tensor(new_clip->ctx_data, TN_GLM_BOI_W); - vision_model.eoi_w = get_tensor(new_clip->ctx_data, TN_GLM_EOI_W); - } - else if (new_clip->proj_type == PROJECTOR_TYPE_MERGER) { - vision_model.mm_0_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "weight")); - vision_model.mm_0_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "bias")); - vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight")); - vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias")); - } - else if (new_clip->proj_type == PROJECTOR_TYPE_GEMMA3) { - vision_model.mm_input_proj_w = get_tensor(new_clip->ctx_data, TN_MM_INP_PROJ); - vision_model.mm_soft_emb_norm_w = get_tensor(new_clip->ctx_data, TN_MM_SOFT_EMB_N); - } - else { - std::string proj_type = PROJECTOR_TYPE_NAMES[new_clip->proj_type]; - throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str())); - } - - vision_model.layers.resize(hparams.n_layer); - - for (int il = 0; il < hparams.n_layer; ++il) { + // layers + vision_model.layers.resize(vision_model.hparams.n_layer); + for (int il = 0; il < vision_model.hparams.n_layer; ++il) { auto & layer = vision_model.layers[il]; - layer.k_w = get_tensor(new_clip->ctx_data, format(TN_ATTN_K, "v", il, "weight")); - layer.q_w = get_tensor(new_clip->ctx_data, format(TN_ATTN_Q, "v", il, "weight")); - layer.v_w = get_tensor(new_clip->ctx_data, format(TN_ATTN_V, "v", il, "weight")); - layer.o_w = get_tensor(new_clip->ctx_data, format(TN_ATTN_OUTPUT, "v", il, "weight")); - layer.ln_1_w = get_tensor(new_clip->ctx_data, format(TN_LN_1, "v", il, "weight")); - layer.ln_2_w = get_tensor(new_clip->ctx_data, format(TN_LN_2, "v", il, "weight")); - layer.ff_i_w = get_tensor(new_clip->ctx_data, format(TN_FFN_DOWN, "v", il, "weight")); - layer.ff_o_w = get_tensor(new_clip->ctx_data, format(TN_FFN_UP, "v", il, "weight")); - layer.k_b = get_tensor(new_clip->ctx_data, format(TN_ATTN_K, "v", il, "bias")); - layer.q_b = get_tensor(new_clip->ctx_data, format(TN_ATTN_Q, "v", il, "bias")); - layer.v_b = get_tensor(new_clip->ctx_data, format(TN_ATTN_V, "v", il, "bias")); - layer.o_b = get_tensor(new_clip->ctx_data, format(TN_ATTN_OUTPUT, "v", il, "bias")); - layer.ln_1_b = get_tensor(new_clip->ctx_data, format(TN_LN_1, "v", il, "bias")); - layer.ln_2_b = get_tensor(new_clip->ctx_data, format(TN_LN_2, "v", il, "bias")); - layer.ff_i_b = get_tensor(new_clip->ctx_data, format(TN_FFN_DOWN, "v", il, "bias")); - layer.ff_o_b = get_tensor(new_clip->ctx_data, format(TN_FFN_UP, "v", il, "bias")); + layer.k_w = get_tensor(string_format(TN_ATTN_K, "v", il, "weight")); + layer.q_w = get_tensor(string_format(TN_ATTN_Q, "v", il, "weight")); + layer.v_w = get_tensor(string_format(TN_ATTN_V, "v", il, "weight")); + layer.o_w = get_tensor(string_format(TN_ATTN_OUTPUT, "v", il, "weight")); + layer.ln_1_w = get_tensor(string_format(TN_LN_1, "v", il, "weight"), false); + layer.ln_2_w = get_tensor(string_format(TN_LN_2, "v", il, "weight"), false); + layer.ff_i_w = get_tensor(string_format(TN_FFN_DOWN, "v", il, "weight")); + layer.ff_o_w = get_tensor(string_format(TN_FFN_UP, "v", il, "weight")); + layer.k_b = get_tensor(string_format(TN_ATTN_K, "v", il, "bias"), false); + layer.q_b = get_tensor(string_format(TN_ATTN_Q, "v", il, "bias"), false); + layer.v_b = get_tensor(string_format(TN_ATTN_V, "v", il, "bias"), false); + layer.o_b = get_tensor(string_format(TN_ATTN_OUTPUT, "v", il, "bias"), false); + layer.ln_1_b = get_tensor(string_format(TN_LN_1, "v", il, "bias"), false); + layer.ln_2_b = get_tensor(string_format(TN_LN_2, "v", il, "bias"), false); + layer.ff_i_b = get_tensor(string_format(TN_FFN_DOWN, "v", il, "bias"), false); + layer.ff_o_b = get_tensor(string_format(TN_FFN_UP, "v", il, "bias"), false); + } + + switch (ctx_clip.proj_type) { + case PROJECTOR_TYPE_MLP: + case PROJECTOR_TYPE_MLP_NORM: + { + // LLaVA projection + vision_model.mm_0_w = get_tensor(string_format(TN_LLAVA_PROJ, 0, "weight"), false); + vision_model.mm_0_b = get_tensor(string_format(TN_LLAVA_PROJ, 0, "bias"), false); + // Yi-type llava + vision_model.mm_1_w = get_tensor(string_format(TN_LLAVA_PROJ, 1, "weight"), false); + vision_model.mm_1_b = get_tensor(string_format(TN_LLAVA_PROJ, 1, "bias"), false); + // missing in Yi-type llava + vision_model.mm_2_w = get_tensor(string_format(TN_LLAVA_PROJ, 2, "weight"), false); + vision_model.mm_2_b = get_tensor(string_format(TN_LLAVA_PROJ, 2, "bias"), false); + // Yi-type llava + vision_model.mm_3_w = get_tensor(string_format(TN_LLAVA_PROJ, 3, "weight"), false); + vision_model.mm_3_b = get_tensor(string_format(TN_LLAVA_PROJ, 3, "bias"), false); + vision_model.mm_4_w = get_tensor(string_format(TN_LLAVA_PROJ, 4, "weight"), false); + vision_model.mm_4_b = get_tensor(string_format(TN_LLAVA_PROJ, 4, "bias"), false); + if (vision_model.mm_3_w) { + // TODO: this is a hack to support Yi-type llava + ctx_clip.proj_type = PROJECTOR_TYPE_MLP_NORM; + } + vision_model.image_newline = get_tensor(TN_IMAGE_NEWLINE, false); + } break; + case PROJECTOR_TYPE_LDP: + { + // MobileVLM projection + vision_model.mm_model_mlp_1_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 1, "weight")); + vision_model.mm_model_mlp_1_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 1, "bias")); + vision_model.mm_model_mlp_3_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 3, "weight")); + vision_model.mm_model_mlp_3_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 3, "bias")); + vision_model.mm_model_block_1_block_0_0_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 0, "0.weight")); + vision_model.mm_model_block_1_block_0_1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.weight")); + vision_model.mm_model_block_1_block_0_1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 0, "1.bias")); + vision_model.mm_model_block_1_block_1_fc1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.weight")); + vision_model.mm_model_block_1_block_1_fc1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc1.bias")); + vision_model.mm_model_block_1_block_1_fc2_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.weight")); + vision_model.mm_model_block_1_block_1_fc2_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 1, "fc2.bias")); + vision_model.mm_model_block_1_block_2_0_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 2, "0.weight")); + vision_model.mm_model_block_1_block_2_1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.weight")); + vision_model.mm_model_block_1_block_2_1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 1, 2, "1.bias")); + vision_model.mm_model_block_2_block_0_0_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 0, "0.weight")); + vision_model.mm_model_block_2_block_0_1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.weight")); + vision_model.mm_model_block_2_block_0_1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 0, "1.bias")); + vision_model.mm_model_block_2_block_1_fc1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.weight")); + vision_model.mm_model_block_2_block_1_fc1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc1.bias")); + vision_model.mm_model_block_2_block_1_fc2_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.weight")); + vision_model.mm_model_block_2_block_1_fc2_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 1, "fc2.bias")); + vision_model.mm_model_block_2_block_2_0_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 2, "0.weight")); + vision_model.mm_model_block_2_block_2_1_w = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.weight")); + vision_model.mm_model_block_2_block_2_1_b = get_tensor(string_format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.bias")); + } break; + case PROJECTOR_TYPE_LDPV2: + { + // MobilVLM_V2 projection + vision_model.mm_model_mlp_0_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "weight")); + vision_model.mm_model_mlp_0_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "bias")); + vision_model.mm_model_mlp_2_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 2, "weight")); + vision_model.mm_model_mlp_2_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 2, "bias")); + vision_model.mm_model_peg_0_w = get_tensor(string_format(TN_MVLM_PROJ_PEG, 0, "weight")); + vision_model.mm_model_peg_0_b = get_tensor(string_format(TN_MVLM_PROJ_PEG, 0, "bias")); + } break; + case PROJECTOR_TYPE_RESAMPLER: + { + // vision_model.mm_model_pos_embed = get_tensor(new_clip->ctx_data, TN_MINICPMV_POS_EMBD); + vision_model.mm_model_pos_embed_k = get_tensor(TN_MINICPMV_POS_EMBD_K); + vision_model.mm_model_query = get_tensor(TN_MINICPMV_QUERY); + vision_model.mm_model_proj = get_tensor(TN_MINICPMV_PROJ); + vision_model.mm_model_kv_proj = get_tensor(TN_MINICPMV_KV_PROJ); + vision_model.mm_model_attn_q_w = get_tensor(string_format(TN_MINICPMV_ATTN, "q", "weight")); + vision_model.mm_model_attn_k_w = get_tensor(string_format(TN_MINICPMV_ATTN, "k", "weight")); + vision_model.mm_model_attn_v_w = get_tensor(string_format(TN_MINICPMV_ATTN, "v", "weight")); + vision_model.mm_model_attn_q_b = get_tensor(string_format(TN_MINICPMV_ATTN, "q", "bias")); + vision_model.mm_model_attn_k_b = get_tensor(string_format(TN_MINICPMV_ATTN, "k", "bias")); + vision_model.mm_model_attn_v_b = get_tensor(string_format(TN_MINICPMV_ATTN, "v", "bias")); + vision_model.mm_model_attn_o_w = get_tensor(string_format(TN_MINICPMV_ATTN, "out", "weight")); + vision_model.mm_model_attn_o_b = get_tensor(string_format(TN_MINICPMV_ATTN, "out", "bias")); + vision_model.mm_model_ln_q_w = get_tensor(string_format(TN_MINICPMV_LN, "q", "weight")); + vision_model.mm_model_ln_q_b = get_tensor(string_format(TN_MINICPMV_LN, "q", "bias")); + vision_model.mm_model_ln_kv_w = get_tensor(string_format(TN_MINICPMV_LN, "kv", "weight")); + vision_model.mm_model_ln_kv_b = get_tensor(string_format(TN_MINICPMV_LN, "kv", "bias")); + vision_model.mm_model_ln_post_w = get_tensor(string_format(TN_MINICPMV_LN, "post", "weight")); + vision_model.mm_model_ln_post_b = get_tensor(string_format(TN_MINICPMV_LN, "post", "bias")); + } break; + case PROJECTOR_TYPE_GLM_EDGE: + { + vision_model.mm_model_adapter_conv_w = get_tensor(string_format(TN_GLM_ADAPER_CONV, "weight")); + vision_model.mm_model_adapter_conv_b = get_tensor(string_format(TN_GLM_ADAPER_CONV, "bias")); + vision_model.mm_model_mlp_0_w = get_tensor(string_format(TN_GLM_ADAPTER_LINEAR,"weight")); + vision_model.mm_model_ln_q_w = get_tensor(string_format(TN_GLM_ADAPTER_NORM_1,"weight")); + vision_model.mm_model_ln_q_b = get_tensor(string_format(TN_GLM_ADAPTER_NORM_1,"bias")); + vision_model.mm_model_mlp_1_w = get_tensor(string_format(TN_GLM_ADAPTER_D_H_2_4H,"weight")); + vision_model.mm_model_mlp_2_w = get_tensor(string_format(TN_GLM_ADAPTER_GATE,"weight")); + vision_model.mm_model_mlp_3_w = get_tensor(string_format(TN_GLM_ADAPTER_D_4H_2_H,"weight")); + vision_model.boi_w = get_tensor(TN_GLM_BOI_W); + vision_model.eoi_w = get_tensor(TN_GLM_EOI_W); + } break; + case PROJECTOR_TYPE_MERGER: + { + vision_model.mm_0_w = get_tensor(string_format(TN_LLAVA_PROJ, 0, "weight")); + vision_model.mm_0_b = get_tensor(string_format(TN_LLAVA_PROJ, 0, "bias")); + vision_model.mm_1_w = get_tensor(string_format(TN_LLAVA_PROJ, 2, "weight")); + vision_model.mm_1_b = get_tensor(string_format(TN_LLAVA_PROJ, 2, "bias")); + } break; + case PROJECTOR_TYPE_GEMMA3: + { + vision_model.mm_input_proj_w = get_tensor(TN_MM_INP_PROJ); + vision_model.mm_soft_emb_norm_w = get_tensor(TN_MM_SOFT_EMB_N); + } break; + default: + GGML_ASSERT(false && "unknown projector type"); + } + + // load data + { + std::vector read_buf; + + auto fin = std::ifstream(fname, std::ios::binary); + if (!fin) { + throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str())); + } + + // alloc memory and offload data + ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend); + ctx_clip.buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(ctx_clip.ctx_data.get(), buft)); + ggml_backend_buffer_set_usage(ctx_clip.buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS); + for (auto & t : tensors_to_load) { + struct ggml_tensor * cur = ggml_get_tensor(ctx_clip.ctx_data.get(), t->name); + const size_t offset = tensor_offset[t->name]; + fin.seekg(offset, std::ios::beg); + if (!fin) { + throw std::runtime_error(string_format("%s: failed to seek for tensor %s\n", __func__, t->name)); + } + size_t num_bytes = ggml_nbytes(cur); + if (ggml_backend_buft_is_host(buft)) { + // for the CPU and Metal backend, we can read directly into the tensor + fin.read(reinterpret_cast(cur->data), num_bytes); + } else { + // read into a temporary buffer first, then copy to device memory + read_buf.resize(num_bytes); + fin.read(reinterpret_cast(read_buf.data()), num_bytes); + ggml_backend_tensor_set(cur, read_buf.data(), 0, num_bytes); + } + } + fin.close(); + + LOG_DBG("%s: loaded %zu tensors from %s\n", __func__, tensors_to_load.size(), fname.c_str()); } } - ggml_free(meta); + void alloc_compute_meta() { + ctx_clip.buf_compute_meta.resize(GGML_DEFAULT_GRAPH_SIZE * ggml_tensor_overhead() + ggml_graph_overhead()); - new_clip->ctx_gguf = ctx; - - // measure mem requirement and allocate - { - new_clip->buf_compute_meta.resize(GGML_DEFAULT_GRAPH_SIZE * ggml_tensor_overhead() + ggml_graph_overhead()); + // create a fake batch clip_image_f32_batch batch; - batch.size = 1; - batch.data = nullptr; - ggml_cgraph * gf = clip_image_build_graph(new_clip, &batch, nullptr, false); - ggml_backend_sched_reserve(new_clip->sched.get(), gf); - for (size_t i = 0; i < new_clip->backend_ptrs.size(); ++i) { - ggml_backend_t backend = new_clip->backend_ptrs[i]; - ggml_backend_buffer_type_t buft = new_clip->backend_buft[i]; - size_t size = ggml_backend_sched_get_buffer_size(new_clip->sched.get(), backend); + clip_image_f32_ptr img(clip_image_f32_init()); + clip_image_size image_size; + image_size.width = clip_get_image_size(&ctx_clip); + image_size.height = clip_get_image_size(&ctx_clip); + int n_patches = clip_get_image_size(&ctx_clip) / image_size.width; + img->nx = n_patches; + img->ny = n_patches; + img->buf.resize(n_patches * image_size.width * image_size.height * 3); + batch.entries.push_back(std::move(img)); + + ggml_cgraph * gf = clip_image_build_graph(&ctx_clip, batch, image_size, false); + ggml_backend_sched_reserve(ctx_clip.sched.get(), gf); + for (size_t i = 0; i < ctx_clip.backend_ptrs.size(); ++i) { + ggml_backend_t backend = ctx_clip.backend_ptrs[i]; + ggml_backend_buffer_type_t buft = ctx_clip.backend_buft[i]; + size_t size = ggml_backend_sched_get_buffer_size(ctx_clip.sched.get(), backend); if (size > 1) { LOG_INF("%s: %10s compute buffer size = %8.2f MiB\n", __func__, ggml_backend_buft_name(buft), @@ -1920,15 +1491,98 @@ struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_p } } - return new_clip; + void get_bool(const std::string & key, bool & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = gguf_get_val_bool(ctx_gguf.get(), i); + } + + void get_i32(const std::string & key, int & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = gguf_get_val_i32(ctx_gguf.get(), i); + } + + void get_u32(const std::string & key, int & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = gguf_get_val_u32(ctx_gguf.get(), i); + } + + void get_f32(const std::string & key, float & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = gguf_get_val_f32(ctx_gguf.get(), i); + } + + void get_string(const std::string & key, std::string & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + output = std::string(gguf_get_val_str(ctx_gguf.get(), i)); + } + + void get_arr_int(const std::string & key, std::vector & output, bool required = true) { + const int i = gguf_find_key(ctx_gguf.get(), key.c_str()); + if (i < 0) { + if (required) throw std::runtime_error("Key not found: " + key); + return; + } + int n = gguf_get_arr_n(ctx_gguf.get(), i); + output.resize(n); + const int32_t * values = (const int32_t *)gguf_get_arr_data(ctx_gguf.get(), i); + for (int i = 0; i < n; ++i) { + output[i] = values[i]; + } + } +}; + +// read and create ggml_context containing the tensors and their data +struct clip_ctx * clip_model_load(const char * fname, const int verbosity) { + return clip_init(fname, clip_context_params{ + /* use_gpu */ true, + /* verbosity */ static_cast(verbosity), + }); +} + +struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params) { + g_logger_state.verbosity_thold = ctx_params.verbosity; + clip_ctx * ctx_clip = new clip_ctx(ctx_params); + + try { + clip_model_loader loader(fname, *ctx_clip); + loader.load_hparams(); + loader.load_tensors(); + loader.alloc_compute_meta(); + } catch (const std::exception & e) { + LOG_ERR("%s: failed to load model '%s': %s\n", __func__, fname, e.what()); + delete ctx_clip; + return nullptr; + } + + return ctx_clip; } void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size) { - ctx_clip->load_image_size = load_image_size; + ctx_clip->load_image_size = *load_image_size; // copy } struct clip_image_size * clip_get_load_image_size(struct clip_ctx * ctx_clip) { - return ctx_clip->load_image_size; + return &ctx_clip->load_image_size; } struct clip_image_size * clip_image_size_init() { @@ -1946,19 +1600,53 @@ struct clip_image_f32 * clip_image_f32_init() { return new clip_image_f32(); } -void clip_image_u8_free(struct clip_image_u8 * img) { delete img; } -void clip_image_f32_free(struct clip_image_f32 * img) { delete img; } -void clip_image_u8_batch_free(struct clip_image_u8_batch * batch) { - if (batch->size > 0) { - delete[] batch->data; - batch->size = 0; - } +struct clip_image_f32_batch * clip_image_f32_batch_init() { + return new clip_image_f32_batch(); } -void clip_image_f32_batch_free(struct clip_image_f32_batch * batch) { - if (batch->size > 0) { - delete[] batch->data; - batch->size = 0; + +unsigned char * clip_image_u8_get_data(struct clip_image_u8 * img, uint32_t * nx, uint32_t * ny) { + if (nx) *nx = img->nx; + if (ny) *ny = img->ny; + return img->buf.data(); +} + +void clip_image_size_free(struct clip_image_size * load_image_size) { + if (load_image_size == nullptr) { + return; } + delete load_image_size; +} +void clip_image_u8_free(struct clip_image_u8 * img) { if (img) delete img; } +void clip_image_f32_free(struct clip_image_f32 * img) { if (img) delete img; } +void clip_image_u8_batch_free(struct clip_image_u8_batch * batch) { if (batch) delete batch; } +void clip_image_f32_batch_free(struct clip_image_f32_batch * batch) { if (batch) delete batch; } + +size_t clip_image_f32_batch_n_images(const struct clip_image_f32_batch * batch) { + return batch->entries.size(); +} + +size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int idx) { + if (idx < 0 || idx >= (int)batch->entries.size()) { + LOG_ERR("%s: invalid index %d\n", __func__, idx); + return 0; + } + return batch->entries[idx]->nx; +} + +size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx) { + if (idx < 0 || idx >= (int)batch->entries.size()) { + LOG_ERR("%s: invalid index %d\n", __func__, idx); + return 0; + } + return batch->entries[idx]->ny; +} + +clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx) { + if (idx < 0 || idx >= (int)batch->entries.size()) { + LOG_ERR("%s: invalid index %d\n", __func__, idx); + return nullptr; + } + return batch->entries[idx].get(); } void clip_build_img_from_pixels(const unsigned char * rgb_pixels, int nx, int ny, clip_image_u8 * img) { @@ -2032,14 +1720,15 @@ static void bilinear_resize(const clip_image_u8& src, clip_image_u8& dst, int ta } // Normalize image to float32 - careful with pytorch .to(model.device, dtype=torch.float16) - this sometimes reduces precision (32>16>32), sometimes not -static void normalize_image_u8_to_f32(const clip_image_u8* src, clip_image_f32* dst, const float mean[3], const float std[3]) { - dst->nx = src->nx; - dst->ny = src->ny; - dst->buf.resize(src->buf.size()); +static void normalize_image_u8_to_f32(const clip_image_u8 & src, clip_image_f32 & dst, const float mean[3], const float std[3]) { + dst.nx = src.nx; + dst.ny = src.ny; + dst.buf.resize(src.buf.size()); - for (size_t i = 0; i < src->buf.size(); ++i) { + // TODO @ngxson : seems like this could be done more efficiently on cgraph + for (size_t i = 0; i < src.buf.size(); ++i) { int c = i % 3; // rgb - dst->buf[i] = (static_cast(src->buf[i]) / 255.0f - mean[c]) / std[c]; + dst.buf[i] = (static_cast(src.buf[i]) / 255.0f - mean[c]) / std[c]; } } @@ -2047,7 +1736,7 @@ inline int clip(int x, int lower, int upper) { return std::max(lower, std::min(x, upper)); } -static bool bicubic_resize(const clip_image_u8 &img, clip_image_u8 &dst, int target_width, int target_height) { +static bool bicubic_resize(const clip_image_u8 & img, clip_image_u8 & dst, int target_width, int target_height) { const int nx = img.nx; const int ny = img.ny; @@ -2185,13 +1874,13 @@ static std::pair select_best_resolution(const std::pair & or return best_fit; } -static std::vector divide_to_patches_u8(const clip_image_u8 & image, int patch_size) { - std::vector patches; +static std::vector divide_to_patches_u8(const clip_image_u8 & image, int patch_size) { + std::vector patches; int width = image.nx; int height = image.ny; for (int i = 0; i < height; i += patch_size) { for (int j = 0; j < width; j += patch_size) { - clip_image_u8 *patch = clip_image_u8_init(); + clip_image_u8_ptr patch(clip_image_u8_init()); patch->nx = std::min(patch_size, width - j); patch->ny = std::min(patch_size, height - i); patch->buf.resize(3 * patch->nx * patch->ny); @@ -2202,7 +1891,7 @@ static std::vector divide_to_patches_u8(const clip_image_u8 & im } } } - patches.push_back(patch); + patches.push_back(std::move(patch)); } } return patches; @@ -2283,7 +1972,7 @@ static std::pair uhd_best_grid(const int max_slice_nums, const int mul // -> https://arxiv.org/pdf/2403.11703 // -> https://github.com/thunlp/LLaVA-UHD // -> https://github.com/thunlp/LLaVA-UHD/blob/302301bc2175f7e717fb8548516188e89f649753/llava_uhd/train/llava-uhd/slice_logic.py#L118 -static std::vector> uhd_slice_image(const clip_image_u8 * img, const int max_slice_nums=9, const int scale_resolution=448, const int patch_size=14) { +static std::vector> uhd_slice_image(const clip_image_u8 * img, const int max_slice_nums=9, const int scale_resolution=448, const int patch_size=14) { const std::pair original_size={img->nx,img->ny}; const int original_width = img->nx; const int original_height = img->ny; @@ -2291,33 +1980,33 @@ static std::vector> uhd_slice_image(const clip_imag const float ratio = 1.0 * original_width * original_height/ (scale_resolution * scale_resolution); const int multiple = fmin(ceil(ratio), max_slice_nums); - std::vector> images; - LOG_INF("%s: multiple %d\n", __func__, multiple); - images.push_back(std::vector()); + std::vector> images; + LOG_DBG("%s: multiple %d\n", __func__, multiple); + images.push_back(std::vector()); if (multiple <= 1) { auto best_size = uhd_find_best_resize(original_size, scale_resolution, patch_size, true); - clip_image_u8 * source_image = clip_image_u8_init(); + clip_image_u8_ptr source_image(clip_image_u8_init()); bicubic_resize(*img, *source_image, best_size.first, best_size.second); // source_image = image.resize(best_size, Image.Resampling.BICUBIC) - images[images.size()-1].push_back(source_image); + images.back().push_back(std::move(source_image)); } else if (multiple > 1) { auto best_size = uhd_find_best_resize(original_size, scale_resolution, patch_size); - clip_image_u8 * source_image = clip_image_u8_init(); + clip_image_u8_ptr source_image(clip_image_u8_init()); bicubic_resize(*img, *source_image, best_size.first, best_size.second); // source_image = image.copy().resize(best_resize, Image.Resampling.BICUBIC) - LOG_INF("%s: image_size: %d %d; source_image size: %d %d\n", __func__, img->nx, img->ny, best_size.first, best_size.second); - images[images.size()-1].push_back(source_image); + LOG_DBG("%s: image_size: %d %d; source_image size: %d %d\n", __func__, img->nx, img->ny, best_size.first, best_size.second); + images.back().push_back(std::move(source_image)); std::pair best_grid = uhd_best_grid(max_slice_nums, multiple, log_ratio); - LOG_INF("%s: image_size: %d %d; best_grid: %d %d\n", __func__, img->nx, img->ny, best_grid.first, best_grid.second); + LOG_DBG("%s: image_size: %d %d; best_grid: %d %d\n", __func__, img->nx, img->ny, best_grid.first, best_grid.second); auto refine_size = uhd_get_refine_size(original_size, best_grid, scale_resolution, patch_size, true); - clip_image_u8 * refine_image = clip_image_u8_init(); + clip_image_u8_ptr refine_image(clip_image_u8_init()); bicubic_resize(*img, *refine_image, refine_size.first, refine_size.second); - LOG_INF("%s: refine_image_size: %d %d; refine_size: %d %d\n", __func__, refine_image->nx, refine_image->ny, refine_size.first, refine_size.second); + LOG_DBG("%s: refine_image_size: %d %d; refine_size: %d %d\n", __func__, refine_image->nx, refine_image->ny, refine_size.first, refine_size.second); // split_to_patches int width = refine_image->nx; @@ -2325,9 +2014,9 @@ static std::vector> uhd_slice_image(const clip_imag int grid_x = int(width / best_grid.first); int grid_y = int(height / best_grid.second); for (int patches_i = 0, ic = 0; patches_i < height && ic < best_grid.second; patches_i += grid_y, ic += 1){ - images.push_back(std::vector()); + images.push_back(std::vector()); for(int patches_j = 0, jc = 0; patches_j < width && jc < best_grid.first; patches_j += grid_x, jc += 1){ - clip_image_u8 * patch = clip_image_u8_init(); + clip_image_u8_ptr patch(clip_image_u8_init()); patch->nx = grid_x; patch->ny = grid_y; patch->buf.resize(3 * patch->nx * patch->ny); @@ -2340,10 +2029,9 @@ static std::vector> uhd_slice_image(const clip_imag patch->buf[j+2] = refine_image->buf[i+2]; } } - images[images.size()-1].push_back(patch); + images.back().push_back(std::move(patch)); } } - clip_image_u8_free(refine_image); } return images; } @@ -2351,8 +2039,8 @@ static std::vector> uhd_slice_image(const clip_imag int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip) { const int max_slice_nums=9; const int scale_resolution=448; - const int original_width = ctx_clip->load_image_size->width; - const int original_height = ctx_clip->load_image_size->height; + const int original_width = ctx_clip->load_image_size.width; + const int original_height = ctx_clip->load_image_size.height; const float log_ratio = log(1.0*original_width/original_height); const float ratio = 1.0 * original_width * original_height/ (scale_resolution * scale_resolution); const int multiple = fmin(ceil(ratio), max_slice_nums); @@ -2362,88 +2050,64 @@ int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip) { // returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector // res_imgs memory is being allocated here, previous allocations will be freed if found -bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) { +bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, struct clip_image_f32_batch * res_imgs) { - if(clip_is_minicpmv(ctx)){ + if (clip_is_minicpmv(ctx)) { int max_slice_nums = 9; - std::vector> imgs = uhd_slice_image(img, max_slice_nums); - res_imgs->size = 0; - for (size_t i = 0; i < imgs.size(); ++i){ - res_imgs->size += imgs[i].size(); - } - res_imgs->data = new clip_image_f32[res_imgs->size]; - int idx = 0; + std::vector> imgs = uhd_slice_image(img, max_slice_nums); for (size_t i = 0; i < imgs.size(); ++i) { for (size_t j = 0; j < imgs[i].size(); ++j) { LOG_DBG("%s: %d %d\n", __func__,imgs[i][j]->nx,imgs[i][j]->ny); - clip_image_f32 * res = clip_image_f32_init(); - normalize_image_u8_to_f32(imgs[i][j], res, ctx->image_mean, ctx->image_std); - res_imgs->data[idx++] = *res; - clip_image_f32_free(res); - } - } - for (size_t i = 0; i < imgs.size(); ++i) { - for (size_t j = 0; j < imgs[i].size(); ++j) { - if (imgs[i][j] != nullptr) { - clip_image_u8_free(imgs[i][j]); - } + clip_image_f32_ptr res(clip_image_f32_init()); + normalize_image_u8_to_f32(*imgs[i][j], *res, ctx->image_mean, ctx->image_std); + res_imgs->entries.push_back(std::move(res)); } } return true; } else if (ctx->has_qwen2vl_merger) { - clip_image_u8 * resized = clip_image_u8_init(); - auto patch_size = clip_patch_size(ctx) * 2; + clip_image_u8 resized; + auto patch_size = clip_get_patch_size(ctx) * 2; int nx = ceil((float)img->nx / patch_size) * patch_size; int ny = ceil((float)img->ny / patch_size) * patch_size; - bicubic_resize(*img, *resized, nx, ny); + bicubic_resize(*img, resized, nx, ny); - res_imgs->data = new clip_image_f32[1]; - // clip_image_f32 * res = clip_image_f32_init(); - normalize_image_u8_to_f32(resized, res_imgs->data, ctx->image_mean, ctx->image_std); + clip_image_f32_ptr img_f32(clip_image_f32_init()); + // clip_image_f32_ptr res(clip_image_f32_init()); + normalize_image_u8_to_f32(resized, *img_f32, ctx->image_mean, ctx->image_std); // res_imgs->data[0] = *res; - res_imgs->size = 1; - - // clip_image_f32_free(res); - clip_image_u8_free(resized); + res_imgs->entries.push_back(std::move(img_f32)); return true; } if (ctx->has_glm_projector || ctx->proj_type == PROJECTOR_TYPE_GEMMA3) { - res_imgs->size = 1; - res_imgs->data = new clip_image_f32[res_imgs->size]; clip_image_u8 resized_image; int32_t sz=ctx->vision_model.hparams.image_size; bicubic_resize(*img, resized_image,sz,sz); - clip_image_f32 * res = clip_image_f32_init(); + clip_image_f32_ptr img_f32(clip_image_f32_init()); //clip_image_save_to_bmp(resized_image, "resized.bmp"); - normalize_image_u8_to_f32(&resized_image, res, ctx->image_mean, ctx->image_std); - res_imgs->data[0] = *res; - clip_image_f32_free(res); + normalize_image_u8_to_f32(resized_image, *img_f32, ctx->image_mean, ctx->image_std); + res_imgs->entries.push_back(std::move(img_f32)); return true; } bool pad_to_square = true; if (!ctx->has_vision_encoder) { - LOG_ERR("This gguf file seems to have no vision encoder\n"); + LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__); return false; } auto & params = ctx->vision_model.hparams; // The model config actually contains all we need to decide on how to preprocess, here we automatically switch to the new llava-1.6 preprocessing - if (strcmp(params.mm_patch_merge_type, "spatial_unpad") == 0) { + if (params.mm_patch_merge_type == PATCH_MERGE_SPATIAL_UNPAD) { pad_to_square = false; } // free the previous res_imgs if any set - if (res_imgs->size > 0) { - clip_image_f32_batch_free(res_imgs); - } - res_imgs->data = nullptr; - res_imgs->size = 0; + res_imgs->entries.clear(); // the logic below is to pad the shorter side to the longer side with a background color: rgb(122, 116, 104) // see https://github.com/haotian-liu/LLaVA/blob/e854a2bf85118c504f6f16bf5c3c7c92f8fa8c6b/llava/conversation.py#L113-L156 - clip_image_u8 * temp = clip_image_u8_init(); // we will keep the input image data here temporarily + clip_image_u8_ptr temp(clip_image_u8_init()); // we will keep the input image data here temporarily if (pad_to_square && img->nx != img->ny) { int longer_side = std::max(img->nx, img->ny); temp->nx = longer_side; @@ -2486,28 +2150,18 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli // clip_image_u8_free(temp2); // } - std::vector patches = divide_to_patches_u8(*temp, params.image_size); // prepare spatial sorted main patches of image_size each (336 in llava-1.6) + std::vector patches = divide_to_patches_u8(*temp, params.image_size); // prepare spatial sorted main patches of image_size each (336 in llava-1.6) - clip_image_u8 *image_original_resize = clip_image_u8_init(); + clip_image_u8_ptr image_original_resize(clip_image_u8_init()); // bilinear_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square bicubic_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square - patches.insert(patches.begin(), image_original_resize); - // clip_image_f32_batch_init(patches.size()); - res_imgs->size = patches.size(); - res_imgs->data = new clip_image_f32[res_imgs->size]; - int num=0; - for (auto& patch : patches) { - normalize_image_u8_to_f32(patch, &res_imgs->data[num], ctx->image_mean, ctx->image_std); - num++; + patches.insert(patches.begin(), std::move(image_original_resize)); + for (auto & patch : patches) { + clip_image_f32_ptr res(clip_image_f32_init()); + normalize_image_u8_to_f32(*patch, *res, ctx->image_mean, ctx->image_std); + res_imgs->entries.push_back(std::move(res)); } - for (size_t i = 0; i < patches.size(); i++) { - // LOG_DBG("patch %d: %d %d\n", i, patches[i]->nx, patches[i]->ny); - clip_image_u8_free(patches[i]); - } - - clip_image_u8_free(temp); - return true; } else { temp->nx = img->nx; @@ -2523,7 +2177,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli const int nx2 = ctx->vision_model.hparams.image_size; const int ny2 = ctx->vision_model.hparams.image_size; - clip_image_f32 * res = clip_image_f32_init(); + clip_image_f32_ptr res(clip_image_f32_init()); res->nx = nx2; res->ny = ny2; res->buf.resize(3 * nx2 * ny2); @@ -2575,7 +2229,6 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli } } } - clip_image_u8_free(temp); // { // clip_image_u8 * temp2 = clip_image_u8_init(); @@ -2585,10 +2238,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli // } // res_imgs.push_back(res); - res_imgs->size = 1; - res_imgs->data = new clip_image_f32[res_imgs->size]; - res_imgs->data[0] = *res; - clip_image_f32_free(res); + res_imgs->entries.push_back(std::move(res)); return true; } @@ -2598,6 +2248,9 @@ ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx) { } void clip_free(clip_ctx * ctx) { + if (ctx == nullptr) { + return; + } delete ctx; } @@ -2613,20 +2266,20 @@ size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w return clip_n_patches_by_img(ctx, &img) * clip_n_mmproj_embd(ctx) * sizeof(float); } -int32_t clip_image_size(const struct clip_ctx * ctx) { +int32_t clip_get_image_size(const struct clip_ctx * ctx) { return ctx->vision_model.hparams.image_size; } -int32_t clip_patch_size(const struct clip_ctx * ctx) { +int32_t clip_get_patch_size(const struct clip_ctx * ctx) { return ctx->vision_model.hparams.patch_size; } -int32_t clip_hidden_size(const struct clip_ctx * ctx) { +int32_t clip_get_hidden_size(const struct clip_ctx * ctx) { return ctx->vision_model.hparams.hidden_size; } const char * clip_patch_merge_type(const struct clip_ctx * ctx) { - return ctx->vision_model.hparams.mm_patch_merge_type; + return ctx->vision_model.hparams.mm_patch_merge_type == PATCH_MERGE_SPATIAL_UNPAD ? "spatial_unpad" : "flat"; } const int32_t * clip_image_grid(const struct clip_ctx * ctx) { @@ -2669,6 +2322,8 @@ int clip_n_patches_by_img(const struct clip_ctx * ctx, struct clip_image_f32 * i int x_patch = img->nx / patch_size + (int)(img->nx % patch_size > 0); int y_patch = img->ny / patch_size + (int)(img->ny % patch_size > 0); n_patches = x_patch * y_patch; + } else if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) { + n_patches = 256; } return n_patches; @@ -2762,23 +2417,27 @@ static std::vector> get_2d_sincos_pos_embed(int embed_dim, co bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f32 * img, float * vec) { if (!ctx->has_vision_encoder) { - LOG_ERR("This gguf file seems to have no vision encoder\n"); + LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__); return false; } - clip_image_f32_batch imgs{}; - imgs.size = 1; - imgs.data = img; + clip_image_f32_batch imgs; + clip_image_f32_ptr img_copy(clip_image_f32_init()); + *img_copy = *img; + imgs.entries.push_back(std::move(img_copy)); + return clip_image_batch_encode(ctx, n_threads, &imgs, vec); } -bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs, float * vec) { +bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, float * vec) { + const clip_image_f32_batch & imgs = *imgs_c_ptr; + if (!ctx->has_vision_encoder) { - LOG_ERR("This gguf file seems to have no vision encoder\n"); + LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__); return false; } - int batch_size = imgs->size; + int batch_size = imgs.entries.size(); if (ctx->has_llava_projector) { GGML_ASSERT(batch_size == 1); // TODO: support multiple images } @@ -2805,25 +2464,22 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima int image_size_width = image_size; int image_size_height = image_size; if (ctx->has_minicpmv_projector | ctx->has_qwen2vl_merger) { - image_size_width = imgs->data[0].nx; - image_size_height = imgs->data[0].ny; + image_size_width = imgs.entries[0]->nx; + image_size_height = imgs.entries[0]->ny; } const int patch_size = hparams.patch_size; const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size)); - const int num_positions = num_patches + (ctx->has_class_embedding ? 1 : 0); - if(ctx->load_image_size==nullptr){ - ctx->load_image_size= clip_image_size_init(); - } - const int pos_w = ctx->load_image_size->width/patch_size; - const int pos_h = ctx->load_image_size->height/patch_size; + const int num_positions = num_patches + (model.class_embedding ? 1 : 0); + const int pos_w = ctx->load_image_size.width / patch_size; + const int pos_h = ctx->load_image_size.height / patch_size; { struct ggml_tensor * inp_raw = ggml_graph_get_tensor(gf, "inp_raw"); float * data = (float *)malloc(ggml_nbytes(inp_raw)); - for (size_t i = 0; i < imgs->size; i++) { - const int nx = imgs->data[i].nx; - const int ny = imgs->data[i].ny; + for (size_t i = 0; i < imgs.entries.size(); i++) { + const int nx = imgs.entries[i]->nx; + const int ny = imgs.entries[i]->ny; if (!(ctx->has_minicpmv_projector | ctx->has_qwen2vl_merger)) { GGML_ASSERT(nx == image_size && ny == image_size); } @@ -2834,7 +2490,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima for (int k = 0; k < 3; k++) { for (int y = 0; y < ny; y++) { for (int x = 0; x < nx; x++) { - data[(b * 3 * n) + k * n + y * nx + x] = imgs->data[b].buf[3 * (y * nx + x) + k]; + data[(b * 3 * n) + k * n + y * nx + x] = imgs.entries[b]->buf[3 * (y * nx + x) + k]; } } } @@ -2895,16 +2551,14 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima free(pos_embed_data); } } - else{ - { - if (ctx->has_class_embedding) { - struct ggml_tensor * embeddings = ggml_graph_get_tensor(gf, "embeddings"); + else { + if (model.class_embedding) { + struct ggml_tensor * embeddings = ggml_graph_get_tensor(gf, "embeddings"); - void* zero_mem = malloc(ggml_nbytes(embeddings)); - memset(zero_mem, 0, ggml_nbytes(embeddings)); - ggml_backend_tensor_set(embeddings, zero_mem, 0, ggml_nbytes(embeddings)); - free(zero_mem); - } + void* zero_mem = malloc(ggml_nbytes(embeddings)); + memset(zero_mem, 0, ggml_nbytes(embeddings)); + ggml_backend_tensor_set(embeddings, zero_mem, 0, ggml_nbytes(embeddings)); + free(zero_mem); } if (ctx->has_qwen2vl_merger) { @@ -2952,7 +2606,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima // The patches vector is used to get rows to index into the embeds with; // we should skip dim 0 only if we have CLS to avoid going out of bounds // when retrieving the rows. - int patch_offset = ctx->has_class_embedding ? 1 : 0; + int patch_offset = model.class_embedding ? 1 : 0; int* patches_data = (int*)malloc(ggml_nbytes(patches)); for (int i = 0; i < num_patches; i++) { patches_data[i] = i + patch_offset; @@ -2993,11 +2647,11 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i auto * ctx_clip = clip_init(fname_inp, clip_context_params{ /* use_gpu */ false, - /* verbosity */ 2, + /* verbosity */ GGML_LOG_LEVEL_ERROR, }); - const auto & ctx_src = ctx_clip->ctx_gguf; - const auto & ctx_data = ctx_clip->ctx_data; + const auto & ctx_src = ctx_clip->ctx_gguf.get(); + const auto & ctx_data = ctx_clip->ctx_data.get(); auto * ctx_out = gguf_init_empty(); gguf_set_kv(ctx_out, ctx_src); @@ -3071,7 +2725,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i f32_data = (float *)conv_buf.data(); break; default: - LOG_ERR("Please use an input file in f32 or f16\n"); + LOG_ERR("%s: Please use an input file in f32 or f16\n", __func__); gguf_free(ctx_out); return false; } @@ -3157,7 +2811,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { } std::string proj_type = PROJECTOR_TYPE_NAMES[ctx->proj_type]; - throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str())); + throw std::runtime_error(string_format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str())); } int clip_is_minicpmv(const struct clip_ctx * ctx) { @@ -3170,10 +2824,19 @@ int clip_is_minicpmv(const struct clip_ctx * ctx) { bool clip_is_glm(const struct clip_ctx * ctx) { return ctx->has_glm_projector; } + bool clip_is_qwen2vl(const struct clip_ctx * ctx) { return ctx->has_qwen2vl_merger; } +bool clip_is_llava(const struct clip_ctx * ctx) { + return ctx->has_llava_projector; +} + +bool clip_is_gemma3(const struct clip_ctx * ctx) { + return ctx->proj_type == PROJECTOR_TYPE_GEMMA3; +} + // Determine the number of encoder layers to iterate over int get_deepest_feature_layer(const struct clip_ctx * ctx) { // Get the index of the second to last layer; this is the @@ -3209,3 +2872,11 @@ bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, clip_image_encode(ctx, n_threads, &clip_img, vec); return true; } + +// +// API used internally with mtmd +// + +projector_type clip_get_projector_type(const struct clip_ctx * ctx) { + return ctx->proj_type; +} diff --git a/examples/llava/clip.h b/examples/llava/clip.h index 47059ca1b9..cc133a58de 100644 --- a/examples/llava/clip.h +++ b/examples/llava/clip.h @@ -1,6 +1,7 @@ #ifndef CLIP_H #define CLIP_H +#include "ggml.h" #include #include @@ -29,19 +30,12 @@ struct clip_image_size { int height; }; -struct clip_image_u8_batch { - struct clip_image_u8 * data; - size_t size; -}; - -struct clip_image_f32_batch { - struct clip_image_f32 * data; - size_t size; -}; +struct clip_image_u8_batch; +struct clip_image_f32_batch; struct clip_context_params { bool use_gpu; - int verbosity; + ggml_log_level verbosity; }; // deprecated, use clip_init @@ -54,9 +48,9 @@ CLIP_API void clip_free(struct clip_ctx * ctx); CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx); CLIP_API size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w); -CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx); -CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx); -CLIP_API int32_t clip_hidden_size(const struct clip_ctx * ctx); +CLIP_API int32_t clip_get_image_size (const struct clip_ctx * ctx); +CLIP_API int32_t clip_get_patch_size (const struct clip_ctx * ctx); +CLIP_API int32_t clip_get_hidden_size(const struct clip_ctx * ctx); // TODO: should be enum, not string CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx); @@ -72,15 +66,26 @@ CLIP_API int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip); CLIP_API void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size); CLIP_API struct clip_image_size * clip_get_load_image_size(struct clip_ctx * ctx_clip); -CLIP_API struct clip_image_size * clip_image_size_init(); -CLIP_API struct clip_image_u8 * clip_image_u8_init (); -CLIP_API struct clip_image_f32 * clip_image_f32_init(); +CLIP_API struct clip_image_size * clip_image_size_init(); +CLIP_API struct clip_image_u8 * clip_image_u8_init (); +CLIP_API struct clip_image_f32 * clip_image_f32_init(); +CLIP_API struct clip_image_f32_batch * clip_image_f32_batch_init(); // only used by libllava +// nx, ny are the output image dimensions +CLIP_API unsigned char * clip_image_u8_get_data(struct clip_image_u8 * img, uint32_t * nx, uint32_t * ny); + +CLIP_API void clip_image_size_free (struct clip_image_size * img_size); CLIP_API void clip_image_u8_free (struct clip_image_u8 * img); CLIP_API void clip_image_f32_free(struct clip_image_f32 * img); CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch); CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch); +// use for accessing underlay data of clip_image_f32_batch +CLIP_API size_t clip_image_f32_batch_n_images(const struct clip_image_f32_batch * batch); // equivalent to batch->size() +CLIP_API size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->nx +CLIP_API size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->ny +CLIP_API clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->data + /** * Build image from pixels decoded by other libraries instead of stb_image.h for better performance. * The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes @@ -105,6 +110,8 @@ CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out CLIP_API int clip_is_minicpmv(const struct clip_ctx * ctx); CLIP_API bool clip_is_glm(const struct clip_ctx * ctx); CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx); +CLIP_API bool clip_is_llava(const struct clip_ctx * ctx); +CLIP_API bool clip_is_gemma3(const struct clip_ctx * ctx); CLIP_API int get_deepest_feature_layer(const struct clip_ctx * ctx); diff --git a/examples/llava/gemma3-cli.cpp b/examples/llava/gemma3-cli.cpp index 7813ac19f5..693b738a80 100644 --- a/examples/llava/gemma3-cli.cpp +++ b/examples/llava/gemma3-cli.cpp @@ -2,15 +2,15 @@ #include "log.h" #include "common.h" #include "sampling.h" -#include "clip.h" -#include "stb_image.h" #include "llama.h" #include "ggml.h" #include "console.h" +#include "chat.h" +#include "mtmd.h" #include #include -#include +#include #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #include @@ -57,13 +57,18 @@ static void sigint_handler(int signo) { #endif struct gemma3_context { - struct clip_ctx * ctx_clip = NULL; - common_init_result llama_init; + mtmd_context_ptr ctx_vision; + common_init_result llama_init; llama_model * model; llama_context * lctx; const llama_vocab * vocab; llama_batch batch; + int n_batch; + + // note: we know that gemma3 template is "linear", meaning each turn is completely separated to another + // so here we don't need to keep track of chat history + common_chat_templates_ptr tmpls; int n_threads = 1; llama_pos n_past = 0; @@ -74,16 +79,23 @@ struct gemma3_context { vocab = llama_model_get_vocab(model); n_threads = params.cpuparams.n_threads; batch = llama_batch_init(params.n_batch, 0, 1); - init_clip_model(params); + n_batch = params.n_batch; + tmpls = common_chat_templates_init(model, params.chat_template); + init_vision_context(params); } - void init_clip_model(common_params & params) { + void init_vision_context(common_params & params) { const char * clip_path = params.mmproj.path.c_str(); - ctx_clip = clip_model_load(clip_path, params.verbosity > 1); - } - - ~gemma3_context() { - clip_free(ctx_clip); + ctx_vision.reset(mtmd_init_from_file(clip_path, model, mtmd_context_params{ + /* use_gpu */ true, + /* timings */ true, + /* n_threads */ params.cpuparams.n_threads, + /* verbosity */ GGML_LOG_LEVEL_INFO, + })); + if (!ctx_vision.get()) { + LOG_ERR("Failed to load vision model from %s\n", clip_path); + exit(1); + } } }; @@ -120,77 +132,6 @@ struct decode_embd_batch { } }; -static int eval_text(gemma3_context & ctx, std::string input, bool logits_last = false) { - llama_tokens tokens = common_tokenize(ctx.lctx, input, false, true); - common_batch_clear(ctx.batch); - for (llama_token & t : tokens) { - common_batch_add(ctx.batch, t, ctx.n_past++, {0}, false); - } - if (logits_last) { - ctx.batch.logits[ctx.batch.n_tokens - 1] = true; - } - // LOG("eval_text (n_tokens = %d): %s\n", (int)tokens.size(), input.c_str()); - if (llama_decode(ctx.lctx, ctx.batch)) { - LOG_ERR("Failed to decode text\n"); - return 1; - } - return 0; -} - -static int eval_image(gemma3_context & ctx, std::string & fname) { - std::vector image_embd_v; - int n_embd = llama_model_n_embd(ctx.model); - int n_tokens = 256; - image_embd_v.resize(n_tokens * n_embd); - - bool ok; - struct clip_image_u8 * img_u8 = clip_image_u8_init(); - ok = clip_image_load_from_file(fname.c_str(), img_u8); - if (!ok) { - LOG_ERR("Unable to load image %s\n", fname.c_str()); - clip_image_u8_free(img_u8); - return 2; // non-fatal error - } - - clip_image_f32_batch batch_f32; - ok = clip_image_preprocess(ctx.ctx_clip, img_u8, &batch_f32); - if (!ok) { - LOG_ERR("Unable to preprocess image\n"); - clip_image_f32_batch_free(&batch_f32); - clip_image_u8_free(img_u8); - return 1; - } - - int64_t t0 = ggml_time_ms(); - LOG("Encoding image %s\n", fname.c_str()); - ok = clip_image_batch_encode(ctx.ctx_clip, ctx.n_threads, &batch_f32, image_embd_v.data()); - if (!ok) { - LOG_ERR("Unable to encode image\n"); - clip_image_f32_batch_free(&batch_f32); - clip_image_u8_free(img_u8); - return 1; - } - LOG("Image encoded in %" PRId64 " ms\n", ggml_time_ms() - t0); - - clip_image_f32_batch_free(&batch_f32); - clip_image_u8_free(img_u8); - - // decode image embeddings - int64_t t1 = ggml_time_ms(); - eval_text(ctx, ""); - llama_set_causal_attn(ctx.lctx, false); - decode_embd_batch batch_img(image_embd_v.data(), n_tokens, ctx.n_past, 0); - if (llama_decode(ctx.lctx, batch_img.batch)) { - LOG_ERR("failed to decode image\n"); - return 1; - } - ctx.n_past += n_tokens; - llama_set_causal_attn(ctx.lctx, true); - eval_text(ctx, ""); - LOG("Image decoded in %" PRId64 " ms\n", ggml_time_ms() - t1); - return 0; -} - static int generate_response(gemma3_context & ctx, common_sampler * smpl, int n_predict) { for (int i = 0; i < n_predict; i++) { if (i > n_predict || !g_is_generating) { @@ -220,6 +161,45 @@ static int generate_response(gemma3_context & ctx, common_sampler * smpl, int n_ return 0; } +static int eval_message(gemma3_context & ctx, common_chat_msg & msg, std::vector & images_fname, bool add_bos = false) { + std::vector bitmaps; + + common_chat_templates_inputs tmpl_inputs; + tmpl_inputs.messages = {msg}; + tmpl_inputs.add_generation_prompt = true; + tmpl_inputs.use_jinja = false; // jinja is buggy here + auto formatted_chat = common_chat_templates_apply(ctx.tmpls.get(), tmpl_inputs); + LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.prompt.c_str()); + + for (auto & fname : images_fname) { + mtmd_bitmap bitmap; + if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) { + LOG_ERR("Unable to load image %s\n", fname.c_str()); + return 2; // image not found + } + bitmaps.push_back(std::move(bitmap)); + } + + mtmd_input_text text; + text.text = formatted_chat.prompt; + text.add_special = add_bos; + text.parse_special = true; + mtmd_input_chunks_ptr chunks(mtmd_tokenize(ctx.ctx_vision.get(), text, bitmaps)); + if (chunks == nullptr) { + LOG_ERR("Unable to tokenize prompt\n"); + return 1; + } + + if (mtmd_helper_eval(ctx.ctx_vision.get(), ctx.lctx, chunks.get(), ctx.n_past, 0, ctx.n_batch)) { + LOG_ERR("Unable to eval prompt\n"); + return 1; + } + + ctx.n_past += mtmd_helper_get_n_tokens(chunks.get()); + + return 0; +} + int main(int argc, char ** argv) { ggml_time_init(); @@ -261,21 +241,15 @@ int main(int argc, char ** argv) { #endif } - if (eval_text(ctx, "")) { - return 1; - } - if (is_single_turn) { g_is_generating = true; - if (eval_text(ctx, "user\n")) { - return 1; + if (params.prompt.find("<__image__>") == std::string::npos) { + params.prompt += " <__image__>"; } - for (auto & fname : params.image) { - if (eval_image(ctx, fname)) { - return 1; - } - } - if (eval_text(ctx, params.prompt + "model\n", true)) { + common_chat_msg msg; + msg.role = "user"; + msg.content = params.prompt; + if (eval_message(ctx, msg, params.image, true)) { return 1; } if (generate_response(ctx, smpl, n_predict)) { @@ -289,9 +263,9 @@ int main(int argc, char ** argv) { LOG("\n /quit or /exit exit the program"); LOG("\n"); - if (eval_text(ctx, "user\n")) { - return 1; - } + bool is_first_msg = true; + std::vector images_fname; + std::string content; while (true) { g_is_generating = false; @@ -316,26 +290,33 @@ int main(int argc, char ** argv) { g_is_generating = true; if (line.find("/image") == 0) { std::string image = line.substr(7); - int res = eval_image(ctx, image); - if (res == 2) { - continue; // image not found - } - if (res) { - return 1; - } + images_fname.push_back(string_strip(image)); + content += "<__image__>"; + continue; + } else { + content += line; + } + common_chat_msg msg; + msg.role = "user"; + msg.content = content; + int ret = eval_message(ctx, msg, images_fname, is_first_msg); + if (ret == 2) { + // non-fatal error + images_fname.clear(); + content.clear(); continue; } - if (eval_text(ctx, line + "model\n", true)) { + if (ret) { return 1; } if (generate_response(ctx, smpl, n_predict)) { return 1; } - if (eval_text(ctx, "user\n")) { - return 1; - } + images_fname.clear(); + content.clear(); + is_first_msg = false; } } - + llama_perf_context_print(ctx.lctx); return 0; } diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index a151313436..0fe0e333a5 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -241,7 +241,7 @@ static struct llava_context * llava_init_context(common_params * params, llama_m prompt = "describe the image in detail."; } - auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1); + auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO); llama_context_params ctx_params = common_context_params_to_llama(*params); ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index 518aad3f1f..03a22cbb4c 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #if defined(LLAVA_LOG_OFF) # define LOG_INF(...) @@ -45,6 +46,17 @@ struct clip_image_grid_shape { int second; }; +// convenience cpp wrapper +struct clip_image_f32_batch_deleter { + void operator()(clip_image_f32_batch * val) { clip_image_f32_batch_free(val); } +}; +typedef std::unique_ptr clip_image_f32_batch_ptr; + +struct clip_image_size_deleter { + void operator()(clip_image_f32_batch * val) { clip_image_f32_batch_free(val); } +}; +typedef std::unique_ptr clip_image_size_ptr; + /** * Selects the best resolution from a list of possible resolutions based on the original size. * @@ -105,8 +117,8 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector struct ggml_context * ctx; } model; - const int32_t image_size = clip_image_size(ctx_clip); - const int32_t patch_size = clip_patch_size(ctx_clip); + const int32_t image_size = clip_get_image_size(ctx_clip); + const int32_t patch_size = clip_get_patch_size(ctx_clip); int32_t num_patches_per_side = image_size / patch_size; // 336 / 14 = 24 - used for embedding-patching boxes (24*24 = 576 patches) @@ -246,12 +258,9 @@ static clip_image_f32 * reshape_by_patch(clip_image_f32 * image, int patch_size) static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float * image_embd, int * n_img_pos) { // std::vector img_res_v; // format VectN x H x W x RGB (N x 336 x 336 x 3), so interleaved RGB - different to the python implementation which is N x 3 x 336 x 336 - clip_image_f32_batch img_res_v; - img_res_v.size = 0; - img_res_v.data = nullptr; - if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) { + clip_image_f32_batch_ptr img_res_v(clip_image_f32_batch_init()); + if (!clip_image_preprocess(ctx_clip, img, img_res_v.get())) { LOG_ERR("%s: unable to preprocess image\n", __func__); - delete[] img_res_v.data; return false; } @@ -259,66 +268,72 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli const char * mm_patch_merge_type = clip_patch_merge_type(ctx_clip); + const size_t n_imgs = clip_image_f32_batch_n_images(img_res_v.get()); + if (clip_is_minicpmv(ctx_clip) || clip_is_qwen2vl(ctx_clip)) { std::vector image_embd_v; - image_embd_v.resize(img_res_v.size); - struct clip_image_size * load_image_size = clip_image_size_init(); + image_embd_v.resize(n_imgs); + clip_image_size load_image_size; - for (size_t i = 0; i < img_res_v.size; i++) { + for (size_t i = 0; i < n_imgs; i++) { const int64_t t_img_enc_step_start_us = ggml_time_us(); - image_embd_v[i] = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny)); - int patch_size=14; - load_image_size->width = img_res_v.data[i].nx; - load_image_size->height = img_res_v.data[i].ny; - clip_add_load_image_size(ctx_clip, load_image_size); + int nx = clip_image_f32_batch_nx(img_res_v.get(), i); + int ny = clip_image_f32_batch_ny(img_res_v.get(), i); + image_embd_v[i] = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, nx, ny)); + int patch_size = 14; + load_image_size.width = nx; + load_image_size.height = ny; + clip_add_load_image_size(ctx_clip, &load_image_size); bool encoded = false; + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), i); if (clip_is_qwen2vl(ctx_clip)) { - encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); + encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd_v[i]); } else { - encoded = clip_image_encode(ctx_clip, n_threads, reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]); + encoded = clip_image_encode(ctx_clip, n_threads, reshape_by_patch(img_res, patch_size), image_embd_v[i]); } if (!encoded) { - LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); + LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) n_imgs); return false; } const int64_t t_img_enc_steop_batch_us = ggml_time_us(); - LOG_INF("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)img_res_v.size, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0); + LOG_INF("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)n_imgs, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0); } const int64_t t_img_enc_batch_us = ggml_time_us(); - LOG_INF("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); + LOG_INF("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)n_imgs, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); int n_img_pos_out = 0; for (size_t i = 0; i < image_embd_v.size(); i++) { + int nx = clip_image_f32_batch_nx(img_res_v.get(), i); + int ny = clip_image_f32_batch_ny(img_res_v.get(), i); + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), i); std::memcpy( image_embd + n_img_pos_out * clip_n_mmproj_embd(ctx_clip), image_embd_v[i], - clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny)); - n_img_pos_out += clip_n_patches_by_img(ctx_clip, &img_res_v.data[i]); + clip_embd_nbytes_by_img(ctx_clip, nx, ny)); + n_img_pos_out += clip_n_patches_by_img(ctx_clip, img_res); } *n_img_pos = n_img_pos_out; for (size_t i = 0; i < image_embd_v.size(); i++) { free(image_embd_v[i]); } image_embd_v.clear(); - load_image_size->width = img->nx; - load_image_size->height = img->ny; - clip_add_load_image_size(ctx_clip, load_image_size); - LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height); - delete[] img_res_v.data; - img_res_v.size = 0; - img_res_v.data = nullptr; + load_image_size.width = img->nx; + load_image_size.height = img->ny; + clip_add_load_image_size(ctx_clip, &load_image_size); + LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size.width, load_image_size.height); } else if (clip_is_glm(ctx_clip)){ struct clip_image_size * load_image_size = clip_image_size_init(); - load_image_size->width = img_res_v.data[0].nx; - load_image_size->height = img_res_v.data[0].ny; + load_image_size->width = clip_image_f32_batch_nx(img_res_v.get(), 0); + load_image_size->height = clip_image_f32_batch_ny(img_res_v.get(), 0); clip_add_load_image_size(ctx_clip, load_image_size); - bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd); - int pos = int(load_image_size->width/clip_patch_size(ctx_clip)/2); + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), 0); + bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd); + int pos = int(load_image_size->width/clip_get_patch_size(ctx_clip)/2); *n_img_pos = (pos * pos + 2); if (!encoded){ LOG_ERR("Unable to encode image \n"); @@ -328,8 +343,8 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) { // flat / default llava-1.5 type embedding *n_img_pos = clip_n_patches(ctx_clip); - bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd); // image_embd shape is 576 x 4096 - delete[] img_res_v.data; + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), 0); + bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd); // image_embd shape is 576 x 4096 if (!encoded) { LOG_ERR("Unable to encode image\n"); @@ -340,17 +355,18 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli // spatial_unpad llava-1.6 type embedding // TODO: CLIP needs batching support - in HF the llm projection is separate after encoding, which might be a solution to quickly get batching working std::vector image_embd_v; - image_embd_v.resize(img_res_v.size); - for (size_t i = 0; i < img_res_v.size; i++) { + image_embd_v.resize(n_imgs); + for (size_t i = 0; i < n_imgs; i++) { + clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), i); image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip)); // 576 patches * 4096 embeddings * 4 bytes = 9437184 - const bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside + const bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside if (!encoded) { - LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); + LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) n_imgs); return false; } } const int64_t t_img_enc_batch_us = ggml_time_us(); - LOG_INF("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); + LOG_INF("%s: %d segments encoded in %8.2f ms\n", __func__, (int)n_imgs, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); const int32_t * image_grid = clip_image_grid(ctx_clip); const size_t num_gridpoints = get_clip_image_grid_size(ctx_clip); @@ -360,12 +376,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli grid_pinpoints.push_back({image_grid[i], image_grid[i+1]}); } - // free all img_res_v - not needed anymore - delete[] img_res_v.data; - img_res_v.size = 0; - img_res_v.data = nullptr; - - const int32_t image_size = clip_image_size(ctx_clip); + const int32_t image_size = clip_get_image_size(ctx_clip); struct clip_image_grid_shape grid_shape = get_anyres_image_grid_shape({img->nx,img->ny}, grid_pinpoints, image_size); diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index 48fddeaa4d..5ad970c220 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -88,7 +88,7 @@ static struct clip_ctx * clip_init_context(common_params * params) { } struct clip_context_params clip_params = { /* use_gpu */ params->n_gpu_layers != 0, - /* verbosity */ params->verbosity, + /* verbosity */ GGML_LOG_LEVEL_INFO, // TODO: make this configurable }; auto * ctx_clip = clip_init(clip_path, clip_params); return ctx_clip; diff --git a/examples/llava/mtmd.cpp b/examples/llava/mtmd.cpp new file mode 100644 index 0000000000..114c274bc1 --- /dev/null +++ b/examples/llava/mtmd.cpp @@ -0,0 +1,341 @@ +#include "clip.h" +#include "clip-impl.h" +#include "mtmd.h" + +#include "llama.h" + +#include +#include +#include +#include +#include +#include +#include + +struct mtmd_context { + struct clip_ctx * ctx_clip; + const struct llama_model * text_model; + std::vector image_embd_v; // image embedding vector + bool print_timings; + int n_threads; + std::string image_marker; + + // TODO @ngxson : add timings + + mtmd_context(const char * mmproj_fname, + const llama_model * text_model, + const mtmd_context_params & ctx_params) : print_timings(ctx_params.print_timings), n_threads(ctx_params.n_threads), image_marker(ctx_params.image_marker) { + clip_context_params ctx_clip_params; + ctx_clip_params.use_gpu = ctx_params.use_gpu; + ctx_clip_params.verbosity = ctx_params.verbosity; + ctx_clip = clip_init(mmproj_fname, ctx_clip_params); + if (!ctx_clip) { + throw std::runtime_error(string_format("Failed to load CLIP model from %s\n", mmproj_fname)); + } + this->text_model = text_model; + } + + ~mtmd_context() { + clip_free(ctx_clip); + } +}; + +struct mtmd_image_tokens_data { + clip_image_f32_batch batch_f32; // preprocessed image patches +}; + +struct mtmd_image_tokens { + uint32_t nx; // number of tokens in x direction + uint32_t ny; // number of tokens in y direction + uint32_t n_tokens() const { return nx * ny; } + clip_image_f32_batch batch_f32; // preprocessed image patches +}; + +mtmd_context * mtmd_init_from_file(const char * mmproj_fname, + const struct llama_model * text_model, + const struct mtmd_context_params ctx_params) { + try { + return new mtmd_context(mmproj_fname, text_model, ctx_params); + } catch (const std::exception & e) { + LOG_ERR("%s: error: %s\n", __func__, e.what()); + return nullptr; + } +} + +void mtmd_free(mtmd_context * ctx) { + if (ctx) { + delete ctx; + } +} + +// copied from common_tokenize +static std::vector mtmd_tokenize_text_internal( + const struct llama_vocab * vocab, + const std::string & text, + bool add_special, + bool parse_special) { + // upper limit for the number of tokens + int n_tokens = text.length() + 2 * add_special; + std::vector result(n_tokens); + n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + return result; +} + +mtmd_input_chunks * mtmd_tokenize(mtmd_context * ctx, + const mtmd_input_text & text, + const std::vector & bitmaps) { + mtmd_input_chunks * output = new mtmd_input_chunks; + auto vocab = llama_model_get_vocab(ctx->text_model); + + std::string prompt_modified(text.text); + std::string marker_modified(ctx->image_marker); + projector_type proj_type = clip_get_projector_type(ctx->ctx_clip); + // a bit hacky here, but works for now + // for some models, we need to add prefix and suffix to the image embeddings + if (proj_type == PROJECTOR_TYPE_GEMMA3) { + // ... (image embeddings) ... + marker_modified = "" + ctx->image_marker + ""; + string_replace_all(prompt_modified, ctx->image_marker, marker_modified); + } + + std::vector parts = string_split_str(text.text, ctx->image_marker); + output->clear(); + output->reserve(parts.size()); + + size_t i_img = 0; + + for (const auto & part : parts) { + //printf("tokenizing part: %s\n", part.c_str()); + bool add_bos = &parts.front() == ∂ + auto tokens = mtmd_tokenize_text_internal(vocab, part, text.add_special && add_bos, text.parse_special); + if (tokens.empty()) { + continue; + } + mtmd_input_chunk chunk{ + MTMD_INPUT_CHUNK_TYPE_TEXT, + std::move(tokens), + {}, + }; + output->emplace_back(std::move(chunk)); + + if (&parts.back() != &part) { + // add image token to middle of 2 parts + + if (i_img >= bitmaps.size()) { + LOG_ERR("%s: error: not enough images for %d parts\n", __func__, (int)parts.size()); + return nullptr; + } + + // shim layer + clip_image_u8_ptr img_u8(clip_image_u8_init()); + img_u8->nx = bitmaps[i_img].nx; + img_u8->ny = bitmaps[i_img].ny; + img_u8->buf.resize(bitmaps[i_img].data.size()); + std::memcpy(img_u8->buf.data(), bitmaps[i_img].data.data(), img_u8->nx * img_u8->ny * 3); + + // preprocess image + clip_image_f32_batch batch_f32; + bool ok = clip_image_preprocess(ctx->ctx_clip, img_u8.get(), &batch_f32); + if (!ok) { + LOG_ERR("Unable to preprocess image\n"); + return nullptr; + } + + mtmd_image_tokens * image_tokens = new mtmd_image_tokens; + image_tokens->nx = clip_n_patches(ctx->ctx_clip); // TODO @ngxson : use clip_n_patches_by_image + image_tokens->ny = 1; // TODO + image_tokens->batch_f32 = std::move(batch_f32); + + mtmd_input_chunk chunk{ + MTMD_INPUT_CHUNK_TYPE_IMAGE, + {}, + image_tokens, + }; + output->emplace_back(std::move(chunk)); + i_img++; + } + } + + return output; +} + +void mtmd_input_chunks_free(mtmd_input_chunks * chunks) { + for (auto & chunk : *chunks) { + if (chunk.type == MTMD_INPUT_CHUNK_TYPE_IMAGE && chunk.tokens_image) { + delete chunk.tokens_image; + } + } + delete chunks; +} + +int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens) { + int n_mmproj_embd = clip_n_mmproj_embd(ctx->ctx_clip); + ctx->image_embd_v.resize(image_tokens->n_tokens() * n_mmproj_embd); + bool ok = clip_image_batch_encode( + ctx->ctx_clip, + ctx->n_threads, + &image_tokens->batch_f32, + ctx->image_embd_v.data()); + return ok ? 0 : 1; +} + +float * mtmd_get_output_embd(mtmd_context * ctx) { + return ctx->image_embd_v.data(); +} + +size_t mtmd_helper_get_n_tokens(mtmd_input_chunks * chunks) { + size_t n_tokens = 0; + for (auto & chunk : *chunks) { + if (chunk.type == MTMD_INPUT_CHUNK_TYPE_TEXT) { + n_tokens += chunk.tokens_text.size(); + } else if (chunk.type == MTMD_INPUT_CHUNK_TYPE_IMAGE) { + n_tokens += chunk.tokens_image->n_tokens(); + } else { + GGML_ASSERT(false && "chunk type not supported"); + } + } + return n_tokens; +} + +// helper struct to make working with embd batch easier +// note: this will be removed after llama_batch_ext refactoring +struct decode_embd_batch { + std::vector pos; + std::vector n_seq_id; + std::vector seq_id_0; + std::vector seq_ids; + std::vector logits; + llama_batch batch; + decode_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) { + pos .resize(n_tokens); + n_seq_id.resize(n_tokens); + seq_ids .resize(n_tokens + 1); + logits .resize(n_tokens); + seq_id_0.resize(1); + seq_id_0[0] = seq_id; + seq_ids [n_tokens] = nullptr; + batch = { + /*n_tokens =*/ n_tokens, + /*tokens =*/ nullptr, + /*embd =*/ embd, + /*pos =*/ pos.data(), + /*n_seq_id =*/ n_seq_id.data(), + /*seq_id =*/ seq_ids.data(), + /*logits =*/ logits.data(), + }; + for (int i = 0; i < n_tokens; i++) { + batch.pos [i] = pos_0 + i; + batch.n_seq_id[i] = 1; + batch.seq_id [i] = seq_id_0.data(); + batch.logits [i] = false; + } + } +}; + +int32_t mtmd_helper_eval(mtmd_context * ctx, + llama_context * lctx, + mtmd_input_chunks * chunks, + llama_pos pos0, + llama_seq_id seq_id, + int32_t n_batch) { + int32_t ret; + llama_pos n_past = pos0; + llama_batch text_batch = llama_batch_init(n_batch, 0, 1); + + for (auto & chunk : *chunks) { + bool is_last = &chunk == &chunks->back(); + if (chunk.type == MTMD_INPUT_CHUNK_TYPE_TEXT) { + // TODO @ngxson : may need to split into smaller batches + text_batch.n_tokens = chunk.tokens_text.size(); + for (size_t i = 0; i < chunk.tokens_text.size(); i++) { + text_batch.token [i] = chunk.tokens_text[i]; + text_batch.pos [i] = n_past++; + text_batch.n_seq_id[i] = 1; + text_batch.seq_id [i][0] = seq_id; + text_batch.logits [i] = false; + } + if (is_last) { + // always get logits for last input chunk + text_batch.logits[text_batch.n_tokens - 1] = true; + } + ret = llama_decode(lctx, text_batch); + if (ret != 0) { + LOG_ERR("failed to decode text\n"); + llama_batch_free(text_batch); + return ret; + } + + } else if (chunk.type == MTMD_INPUT_CHUNK_TYPE_IMAGE) { + GGML_ASSERT(!is_last && "logits for last image chunk is not yet support"); + GGML_ASSERT(chunk.tokens_image != nullptr); + int64_t t0 = ggml_time_ms(); + if (ctx->print_timings) { + LOG_INF("encoding image...\n"); + } + ret = mtmd_encode(ctx, chunk.tokens_image); + if (ret != 0) { + LOG_ERR("failed to encode image\n"); + llama_batch_free(text_batch); + return ret; + } + if (ctx->print_timings) { + LOG_INF("image encoded in %" PRId64 " ms\n", ggml_time_ms() - t0); + } + + int32_t n_tokens = chunk.tokens_image->n_tokens(); + float * embd = mtmd_get_output_embd(ctx); + decode_embd_batch batch_img(embd, n_tokens, n_past, 0); + int64_t t1 = ggml_time_ms(); + ret = llama_decode(lctx, batch_img.batch); + if (ret != 0) { + LOG_ERR("failed to decode image\n"); + llama_batch_free(text_batch); + return ret; + } + if (ctx->print_timings) { + LOG_INF("image decoded in %" PRId64 " ms\n", ggml_time_ms() - t1); + } + + n_past += n_tokens; + + } else { + GGML_ASSERT(false && "chunk type not supported"); + } + } + + llama_batch_free(text_batch); + return 0; +} + +int32_t mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len, mtmd_bitmap & output) { + clip_image_u8_ptr img_u8(clip_image_u8_init()); + bool ok = clip_image_load_from_bytes(buf, len, img_u8.get()); + if (!ok) { + LOG_ERR("Unable to load image from buffer\n"); + return 1; + } + unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny); + output.data.resize(output.nx * output.ny * 3); + std::memcpy(output.data.data(), data, output.nx * output.ny * 3); + return 0; +} + +int32_t mtmd_helper_bitmap_init_from_file(const char * fname, mtmd_bitmap & output) { + clip_image_u8_ptr img_u8(clip_image_u8_init()); + bool ok = clip_image_load_from_file(fname, img_u8.get()); + if (!ok) { + LOG_ERR("Unable to load image %s\n", fname); + return 1; + } + unsigned char * data = clip_image_u8_get_data(img_u8.get(), &output.nx, &output.ny); + output.data.resize(output.nx * output.ny * 3); + std::memcpy(output.data.data(), data, output.nx * output.ny * 3); + return 0; +} diff --git a/examples/llava/mtmd.h b/examples/llava/mtmd.h new file mode 100644 index 0000000000..598f6947bb --- /dev/null +++ b/examples/llava/mtmd.h @@ -0,0 +1,146 @@ +#ifndef MTMD_H +#define MTMD_H + +#include "ggml.h" +#include "llama.h" +#include "clip.h" + +#include +#include +#include + +#ifdef LLAMA_SHARED +# if defined(_WIN32) && !defined(__MINGW32__) +# ifdef LLAMA_BUILD +# define MTMD_API __declspec(dllexport) +# else +# define MTMD_API __declspec(dllimport) +# endif +# else +# define MTMD_API __attribute__ ((visibility ("default"))) +# endif +#else +# define MTMD_API +#endif + +#ifdef __cplusplus + +enum mtmd_input_chunk_type { + MTMD_INPUT_CHUNK_TYPE_TEXT, + MTMD_INPUT_CHUNK_TYPE_IMAGE, +}; + +struct mtmd_context; +struct mtmd_image_tokens; + +// represents raw image data, layout is RGBRGBRGB... +// length of data must be nx * ny * 3 +struct mtmd_bitmap { + uint32_t nx; + uint32_t ny; + std::vector data; +}; + +struct mtmd_input_chunk { + mtmd_input_chunk_type type; + std::vector tokens_text; + mtmd_image_tokens * tokens_image = nullptr; +}; + +using mtmd_input_chunks = std::vector; + +struct mtmd_context_params { + bool use_gpu = true; + bool print_timings = true; + int n_threads = 4; + enum ggml_log_level verbosity = GGML_LOG_LEVEL_INFO; + const char * image_marker = "<__image__>"; +}; + +struct mtmd_input_text { + std::string text; + bool add_special; + bool parse_special; +}; + +// initialize the mtmd context +// return nullptr on failure +MTMD_API mtmd_context * mtmd_init_from_file(const char * mmproj_fname, + const llama_model * text_model, + const mtmd_context_params ctx_params); + +MTMD_API void mtmd_free(mtmd_context * ctx); + +// tokenize an input text prompt and an image +// the prompt must have the input image marker (default: "<__image__>") in it +// the marker will be replaced with the image tokens +// for example: +// "here is an image: <__image__>\ndescribe it in detail." +// this will gives 3 chunks: +// 1. "here is an image: " +// 2. (image tokens) +// 3. "\ndescribe it in detail." +// number of bitmaps must be equal to the number of image markers in the prompt +// this function is thread-safe (shared ctx) +MTMD_API mtmd_input_chunks * mtmd_tokenize(mtmd_context * ctx, + const mtmd_input_text & text, + const std::vector & bitmaps); + +// free image chunk data +MTMD_API void mtmd_input_chunks_free(mtmd_input_chunks * chunks); + +// returns 0 on success +MTMD_API int32_t mtmd_encode(mtmd_context * ctx, + const mtmd_image_tokens * image_tokens); + +// get output embeddings from the last encode pass +MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx); + +// +// helper functions (can be implemented based on other functions) +// + +// helper to count the total number of tokens from a list of chunks, useful to keep track of n_past +MTMD_API size_t mtmd_helper_get_n_tokens(mtmd_input_chunks * chunks); + +// helper function that automatically: +// 1. run llama_decode() on text chunks +// 2. run mtmd_encode() on image chunks, then mtmd_get_output_embd() and then llama_decode() +// if any of the mtmd_encode() or llama_decode() calls return non-zero, stop and forward the error +// otherwise, returns 0 on success +MTMD_API int32_t mtmd_helper_eval(mtmd_context * ctx, + llama_context * lctx, + mtmd_input_chunks * chunks, + llama_pos pos0, + llama_seq_id seq_id, + int32_t n_batch); + +// helper function to construct a mtmd_bitmap from a file +// returns 0 on success +// this function is thread-safe +MTMD_API int32_t mtmd_helper_bitmap_init_from_file(const char * fname, mtmd_bitmap & output); + +// helper function to construct a mtmd_bitmap from a buffer +// the buffer must be an image in format supported by stb_image (jpg, png, bmp, gif, etc.) +// returns 0 on success +// this function is thread-safe +MTMD_API int32_t mtmd_helper_bitmap_init_from_buf(const unsigned char * buf, size_t len, mtmd_bitmap & output); + +// convenient unique_ptr wrappers +struct mtmd_context_deleter { + void operator()(mtmd_context * val) { mtmd_free(val); } +}; +using mtmd_context_ptr = std::unique_ptr; + +struct mtmd_input_chunks_deleter { + void operator()(mtmd_input_chunks * val) { mtmd_input_chunks_free(val); } +}; +using mtmd_input_chunks_ptr = std::unique_ptr; + +#else + +static_assert(false && "C header is not yet supported by this library"); + +#endif + +#endif diff --git a/examples/llava/qwen2vl-cli.cpp b/examples/llava/qwen2vl-cli.cpp index c6481e482a..eca7b7f10b 100644 --- a/examples/llava/qwen2vl-cli.cpp +++ b/examples/llava/qwen2vl-cli.cpp @@ -330,7 +330,7 @@ static struct llava_context * llava_init_context(common_params * params, llama_m prompt = "describe the image in detail."; } - auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1); + auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO); llama_context_params ctx_params = common_context_params_to_llama(*params); ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings diff --git a/examples/llava/test-1.jpeg b/examples/llava/test-1.jpeg new file mode 100644 index 0000000000..7fdcaaf04b Binary files /dev/null and b/examples/llava/test-1.jpeg differ diff --git a/examples/llava/tests.sh b/examples/llava/tests.sh new file mode 100755 index 0000000000..cc9bda8769 --- /dev/null +++ b/examples/llava/tests.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# make sure we are in the right directory +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +cd $SCRIPT_DIR + +#export LLAMA_CACHE="$SCRIPT_DIR/tmp" + +set -eux + +mkdir -p $SCRIPT_DIR/output + +PROJ_ROOT="$SCRIPT_DIR/../.." +cd $PROJ_ROOT + +############### + +arr_bin=() +arr_hf=() + +add_test() { + local bin=$1 + local hf=$2 + arr_bin+=("$bin") + arr_hf+=("$hf") +} + +add_test "llama-gemma3-cli" "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M" +add_test "llama-llava-cli" "cmp-nct/Yi-VL-6B-GGUF:Q5_K" +add_test "llama-llava-cli" "guinmoon/MobileVLM-3B-GGUF:Q4_K_M" +add_test "llama-llava-cli" "THUDM/glm-edge-v-5b-gguf:Q4_K_M" +add_test "llama-llava-cli" "second-state/Llava-v1.5-7B-GGUF:Q2_K" +add_test "llama-llava-cli" "cjpais/llava-1.6-mistral-7b-gguf:Q3_K" +add_test "llama-llava-cli" "ibm-research/granite-vision-3.2-2b-GGUF:Q4_K_M" +add_test "llama-minicpmv-cli" "second-state/MiniCPM-Llama3-V-2_5-GGUF:Q2_K" # model from openbmb is corrupted +add_test "llama-minicpmv-cli" "openbmb/MiniCPM-V-2_6-gguf:Q2_K" +add_test "llama-minicpmv-cli" "openbmb/MiniCPM-o-2_6-gguf:Q4_0" +add_test "llama-qwen2vl-cli" "bartowski/Qwen2-VL-2B-Instruct-GGUF:Q4_K_M" + +############### + +cmake --build build -j --target "${arr_bin[@]}" + +arr_res=() + +for i in "${!arr_bin[@]}"; do + bin="${arr_bin[$i]}" + hf="${arr_hf[$i]}" + + echo "Running test with binary: $bin and HF model: $hf" + echo "" + echo "" + + output=$("$PROJ_ROOT/build/bin/$bin" -hf "$hf" --image $SCRIPT_DIR/test-1.jpeg -p "what is the publisher name of the newspaper?" --temp 0 2>&1 | tee /dev/tty) + + echo "$output" > $SCRIPT_DIR/output/$bin-$(echo "$hf" | tr '/' '-').log + + if echo "$output" | grep -iq "new york"; then + result="\033[32mOK\033[0m: $bin $hf" + else + result="\033[31mFAIL\033[0m: $bin $hf" + fi + echo -e "$result" + arr_res+=("$result") + + echo "" + echo "" + echo "" + echo "#################################################" + echo "#################################################" + echo "" + echo "" +done + +set +x + +for i in "${!arr_res[@]}"; do + echo -e "${arr_res[$i]}" +done +echo "" +echo "Output logs are saved in $SCRIPT_DIR/output" diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 8c413f7d66..175f2804b5 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -851,7 +851,7 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) { LOG_INF("%s : calculating hellaswag score over selected tasks.\n", __func__); - LOG("\ntask\tacc_norm\n"); + LOG("\ntask\tacc_norm\t95%% confidence interval\n"); double acc = 0.0f; @@ -985,8 +985,22 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) { acc += 1.0; } - // Print the accumulated accuracy mean x 100 - LOG("%zu\t%.8lf\n", i + 1, acc/double(i + 1)*100.0); + double freq = acc / double(i + 1); + + const double za = 1.95996398454; + + // // Wald normal approx + // double conf =za*sqrt(freq*(1-freq)/double(i + 1)); + // LOG("%zu\t%.8lf +/- %.8lf\n", i + 1, freq*100.0, conf*100.0); + + // Wilson score interval, more accurate + double z = za * za / double(i + 1); + double cnf = z * sqrt(double(i + 1) * (4.0 * freq * (1 - freq) + z)) / (za + za); + double a = (freq + z * 0.5 - cnf) / (1.0 + z); + double b = (freq + z * 0.5 + cnf) / (1.0 + z); + + // Print the accumulated accuracy mean x 100 and confidence interval + LOG("%zu\t%3.8lf%%\t[%3.4lf%%, %3.4lf%%]\n", i + 1, freq * 100.0, a * 100.0, b * 100.0); } i0 = i1 - 1; diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index a4468b1698..0355311dc5 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -9,6 +9,7 @@ #include #include #include +#include struct quant_option { std::string name; @@ -16,7 +17,7 @@ struct quant_option { std::string desc; }; -static const std::vector QUANT_OPTIONS = { +static const std::vector QUANT_OPTIONS = { { "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G, +0.4685 ppl @ Llama-3-8B", }, { "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G, +0.4511 ppl @ Llama-3-8B", }, { "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G, +0.1316 ppl @ Llama-3-8B", }, @@ -105,7 +106,8 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp // [[noreturn]] static void usage(const char * executable) { - printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights] [--exclude-weights] [--output-tensor-type] [--token-embedding-type] [--override-kv] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable); + printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights] [--exclude-weights] [--output-tensor-type]\n", executable); + printf(" [--token-embedding-type] [--tensor-type] [--keep-split] [--override-kv] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n"); printf(" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n"); printf(" --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n"); printf(" --pure: Disable k-quant mixtures and quantize all tensors to the same type\n"); @@ -114,6 +116,8 @@ static void usage(const char * executable) { printf(" --exclude-weights tensor_name: use importance matrix for this/these tensor(s)\n"); printf(" --output-tensor-type ggml_type: use this ggml_type for the output.weight tensor\n"); printf(" --token-embedding-type ggml_type: use this ggml_type for the token embeddings tensor\n"); + printf(" --tensor-type TENSOR=TYPE: quantize this tensor to this ggml_type. example: --tensor-type attn_q=q8_0\n"); + printf(" Advanced option to selectively quantize tensors. May be specified multiple times.\n"); printf(" --keep-split: will generate quantized model in the same shards as input\n"); printf(" --override-kv KEY=TYPE:VALUE\n"); printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n"); @@ -244,6 +248,107 @@ static ggml_type parse_ggml_type(const char * arg) { return GGML_TYPE_COUNT; } +// Allowed tensors for arbitrary quantization with --tensor-type option +static const std::vector ALLOWED_TENSOR_TYPE = { + "attn_k", + "attn_kv_a_mqa", + "attn_kv_b", + "attn_o", + "attn_output", + "attn_q", + "attn_q_a", + "attn_q_b", + "attn_qkv", + "attn_v", + "channel_mix_key", + "channel_mix_receptance", + "channel_mix_value", + "cls", + "cls.output", + "cross_attn_k", + "cross_attn_o", + "cross_attn_q", + "cross_attn_v", + "ffn_act", + "ffn_down", + "ffn_down_exps", + "ffn_down_shexp", + "ffn_gate", + "ffn_gate_exps", + "ffn_gate_shexp", + "ffn_up", + "ffn_up_exps", + "ffn_up_shexp", + "ssm_in", + "ssm_out", + "time_mix_gate", + "time_mix_key", + "time_mix_output", + "time_mix_receptance", + "time_mix_value", +}; + +// changes to this struct must be replicated in llama-quant.cpp +struct tensor_quantization { + std::string name; + ggml_type quant = GGML_TYPE_COUNT; +}; + +static bool parse_tensor_type(const char * data, std::vector & tensor_type) { + const char * sep = strchr(data, '='); + if (sep == nullptr) { + printf("\n%s: malformed tensor type '%s'\n\n", __func__, data); + return false; + } + + const size_t tn_len = sep - data; + if (tn_len == 0) { + printf("\n%s: missing tensor name\n\n", __func__); + return false; + } + + if (const size_t qt_len = strlen(sep); qt_len == 1) { + printf("\n%s: missing quantization type\n\n", __func__); + return false; + } + + std::string tn(data, tn_len); + std::transform(tn.begin(), tn.end(), tn.begin(), tolower); + sep++; + const std::string qt(sep); + + bool found = false; + for (const auto & allowed : ALLOWED_TENSOR_TYPE) { + std::string tensor; + tensor = tn.rfind('.') != std::string::npos ? tn.substr(tn.rfind('.') + 1) : tn; + // handle special case of cls.output + std::string cls_output = "cls.output"; + if (tn.find(cls_output) != std::string::npos) { + tensor = "cls.output"; + } + // check if an allowed tensor exists and it's at the end of the kv string + if (tensor == allowed) { + found = true; + break; + } + } + if (!found) { + printf("\n%s: invalid tensor name '%s'\n\n", __func__, tn.c_str()); + return false; + } + + if (parse_ggml_type(qt.c_str()) == GGML_TYPE_COUNT) { + printf("\n%s: invalid quantization type '%s'\n\n", __func__, qt.c_str()); + return false; + } + + tensor_quantization tqz; + tqz.name = tn; + tqz.quant = parse_ggml_type(qt.c_str()); + tensor_type.emplace_back(std::move(tqz)); + return true; +} + int main(int argc, char ** argv) { if (argc < 3) { usage(argv[0]); @@ -255,6 +360,7 @@ int main(int argc, char ** argv) { std::string imatrix_file; std::vector included_weights, excluded_weights; std::vector kv_overrides; + std::vector tensor_types; for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) { if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) { @@ -277,6 +383,10 @@ int main(int argc, char ** argv) { } else { usage(argv[0]); } + } else if (strcmp(argv[arg_idx], "--tensor-type") == 0) { + if (arg_idx == argc-1 || !parse_tensor_type(argv[++arg_idx], tensor_types)) { + usage(argv[0]); + } } else if (strcmp(argv[arg_idx], "--override-kv") == 0) { if (arg_idx == argc-1 || !string_parse_kv_override(argv[++arg_idx], kv_overrides)) { usage(argv[0]); @@ -361,6 +471,9 @@ int main(int argc, char ** argv) { kv_overrides.back().key[0] = 0; params.kv_overrides = &kv_overrides; } + if (!tensor_types.empty()) { + params.tensor_types = &tensor_types; + } llama_backend_init(); diff --git a/examples/rpc/rpc-server.cpp b/examples/rpc/rpc-server.cpp index 3d590feb08..f8f2cb90ea 100644 --- a/examples/rpc/rpc-server.cpp +++ b/examples/rpc/rpc-server.cpp @@ -126,7 +126,7 @@ static std::string fs_get_cache_directory() { if (getenv("LLAMA_CACHE")) { cache_directory = std::getenv("LLAMA_CACHE"); } else { -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) if (std::getenv("XDG_CACHE_HOME")) { cache_directory = std::getenv("XDG_CACHE_HOME"); } else { @@ -136,7 +136,9 @@ static std::string fs_get_cache_directory() { cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); #elif defined(_WIN32) cache_directory = std::getenv("LOCALAPPDATA"); -#endif // __linux__ +#else +# error Unknown architecture +#endif cache_directory = ensure_trailing_slash(cache_directory); cache_directory += "llama.cpp"; } diff --git a/examples/run/CMakeLists.txt b/examples/run/CMakeLists.txt index cd6b0520e0..7cff188ca6 100644 --- a/examples/run/CMakeLists.txt +++ b/examples/run/CMakeLists.txt @@ -1,5 +1,16 @@ set(TARGET llama-run) add_executable(${TARGET} run.cpp linenoise.cpp/linenoise.cpp) + +# TODO: avoid copying this code block from common/CMakeLists.txt +set(LLAMA_RUN_EXTRA_LIBS "") +if (LLAMA_CURL) + find_package(CURL REQUIRED) + target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL) + include_directories(${CURL_INCLUDE_DIRS}) + find_library(CURL_LIBRARY curl REQUIRED) + set(LLAMA_RUN_EXTRA_LIBS ${LLAMA_RUN_EXTRA_LIBS} ${CURL_LIBRARY}) +endif () + install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_RUN_EXTRA_LIBS}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/run/run.cpp b/examples/run/run.cpp index 68e94b0b3c..e63c2aac33 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -697,8 +697,10 @@ class LlamaData { std::vector headers = { "User-Agent: llama-cpp", "Accept: application/json" }; std::string url; + std::string model_endpoint = get_model_endpoint(); + if (pos == std::string::npos) { - auto [model_name, manifest_url] = extract_model_and_tag(model, "https://huggingface.co/v2/"); + auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/"); hfr = model_name; nlohmann::json manifest; @@ -713,7 +715,7 @@ class LlamaData { hff = model.substr(pos + 1); } - url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff; + url = model_endpoint + hfr + "/resolve/main/" + hff; return download(url, bn, true, headers); } diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index d0e6da8e4a..674e227571 100644 Binary files a/examples/server/public/index.html.gz and b/examples/server/public/index.html.gz differ diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 760c364643..d87bda1a0d 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1705,6 +1705,8 @@ private: }; struct server_response { + bool running = true; + // for keeping track of all tasks waiting for the result std::unordered_set waiting_task_ids; @@ -1759,6 +1761,10 @@ struct server_response { while (true) { std::unique_lock lock(mutex_results); condition_results.wait(lock, [&]{ + if (!running) { + SRV_DBG("%s : queue result stop\n", __func__); + std::terminate(); // we cannot return here since the caller is HTTP code + } return !queue_results.empty(); }); @@ -1789,6 +1795,10 @@ struct server_response { } std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout)); + if (!running) { + SRV_DBG("%s : queue result stop\n", __func__); + std::terminate(); // we cannot return here since the caller is HTTP code + } if (cr_res == std::cv_status::timeout) { return nullptr; } @@ -1818,6 +1828,12 @@ struct server_response { } } } + + // terminate the waiting loop + void terminate() { + running = false; + condition_results.notify_all(); + } }; struct server_context { @@ -3891,6 +3907,21 @@ int main(int argc, char ** argv) { res_ok(res, {{ "success", true }}); }; + const auto handle_api_show = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) { + json data = { + { + "template", common_chat_templates_source(ctx_server.chat_templates.get()), + }, + { + "model_info", { + { "llama.context_length", ctx_server.slots.back().n_ctx, }, + } + }, + }; + + res_ok(res, data); + }; + // handle completion-like requests (completion, chat, infill) // we can optionally provide a custom format for partial results and final results const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok]( @@ -4455,6 +4486,7 @@ int main(int argc, char ** argv) { svr->Get ("/metrics", handle_metrics); svr->Get ("/props", handle_props); svr->Post("/props", handle_props_change); + svr->Post("/api/show", handle_api_show); svr->Get ("/models", handle_models); // public endpoint (no API key check) svr->Get ("/v1/models", handle_models); // public endpoint (no API key check) svr->Post("/completion", handle_completions); // legacy @@ -4491,9 +4523,10 @@ int main(int argc, char ** argv) { svr->new_task_queue = [¶ms] { return new httplib::ThreadPool(params.n_threads_http); }; // clean up function, to be called before exit - auto clean_up = [&svr]() { + auto clean_up = [&svr, &ctx_server]() { SRV_INF("%s: cleaning up before exit...\n", __func__); svr->stop(); + ctx_server.queue_results.terminate(); llama_backend_free(); }; @@ -4534,7 +4567,7 @@ int main(int argc, char ** argv) { if (!ctx_server.load_model(params)) { clean_up(); - // t.join(); // FIXME: see below + t.join(); LOG_ERR("%s: exiting due to model loading error\n", __func__); return 1; } @@ -4582,7 +4615,7 @@ int main(int argc, char ** argv) { ctx_server.queue_tasks.start_loop(); clean_up(); - // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this + t.join(); return 0; } diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md index 12816bfa87..652dea0382 100644 --- a/examples/server/tests/README.md +++ b/examples/server/tests/README.md @@ -17,7 +17,7 @@ To mitigate it, you can increase values in `n_predict`, `kv_size`. ```shell cd ../../.. -cmake -B build -DLLAMA_CURL=ON +cmake -B build cmake --build build --target llama-server ``` diff --git a/examples/server/tests/unit/test_embedding.py b/examples/server/tests/unit/test_embedding.py index 8b0eb42b09..0feb452ccf 100644 --- a/examples/server/tests/unit/test_embedding.py +++ b/examples/server/tests/unit/test_embedding.py @@ -49,6 +49,26 @@ def test_embedding_multiple(): assert len(d['embedding']) > 1 +def test_embedding_multiple_with_fa(): + server = ServerPreset.bert_bge_small_with_fa() + server.pooling = 'last' + server.start() + # one of these should trigger the FA branch (i.e. context size % 256 == 0) + res = server.make_request("POST", "/v1/embeddings", data={ + "input": [ + "a "*253, + "b "*254, + "c "*255, + "d "*256, + ], + }) + assert res.status_code == 200 + assert len(res.body['data']) == 4 + for d in res.body['data']: + assert 'embedding' in d + assert len(d['embedding']) > 1 + + @pytest.mark.parametrize( "input,is_multi_prompt", [ diff --git a/examples/server/tests/utils.py b/examples/server/tests/utils.py index 30aa866095..4dc2062a8e 100644 --- a/examples/server/tests/utils.py +++ b/examples/server/tests/utils.py @@ -323,6 +323,21 @@ class ServerPreset: server.server_embeddings = True return server + @staticmethod + def bert_bge_small_with_fa() -> ServerProcess: + server = ServerProcess() + server.model_hf_repo = "ggml-org/models" + server.model_hf_file = "bert-bge-small/ggml-model-f16.gguf" + server.model_alias = "bert-bge-small" + server.n_ctx = 1024 + server.n_batch = 300 + server.n_ubatch = 300 + server.n_slots = 2 + server.fa = True + server.seed = 42 + server.server_embeddings = True + return server + @staticmethod def tinyllama_infill() -> ServerProcess: server = ServerProcess() diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 55cf3230d9..aba2f27f9b 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -3,7 +3,7 @@ #include "common.h" #include "log.h" #include "llama.h" -#include "common/base64.hpp" +#include "base64.hpp" // increase max payload length to allow use of larger context size #define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576 diff --git a/examples/server/webui/package-lock.json b/examples/server/webui/package-lock.json index 056592dd47..b2e3cf94ac 100644 --- a/examples/server/webui/package-lock.json +++ b/examples/server/webui/package-lock.json @@ -10,9 +10,11 @@ "dependencies": { "@heroicons/react": "^2.2.0", "@sec-ant/readable-stream": "^0.6.0", + "@tailwindcss/postcss": "^4.1.1", + "@tailwindcss/vite": "^4.1.1", "@vscode/markdown-it-katex": "^1.1.1", "autoprefixer": "^10.4.20", - "daisyui": "^4.12.14", + "daisyui": "^5.0.12", "dexie": "^4.0.11", "highlight.js": "^11.10.0", "katex": "^0.16.15", @@ -26,7 +28,7 @@ "remark-breaks": "^4.0.0", "remark-gfm": "^4.0.0", "remark-math": "^6.0.0", - "tailwindcss": "^3.4.15", + "tailwindcss": "^4.1.1", "textlinestream": "^1.1.1", "vite-plugin-singlefile": "^2.0.3" }, @@ -979,27 +981,11 @@ "url": "https://github.com/sponsors/nzakas" } }, - "node_modules/@isaacs/cliui": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", - "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", - "license": "ISC", - "dependencies": { - "string-width": "^5.1.2", - "string-width-cjs": "npm:string-width@^4.2.0", - "strip-ansi": "^7.0.1", - "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", - "wrap-ansi": "^8.1.0", - "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, "node_modules/@jridgewell/gen-mapping": { "version": "0.3.8", "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.8.tgz", "integrity": "sha512-imAbBGkb+ebQyxKgzv5Hu2nmROxoDOXHh80evxdoXNOrvAnVx7zimzc1Oo5h9RlfV4vPXaE2iM5pOFbvOCClWA==", + "dev": true, "license": "MIT", "dependencies": { "@jridgewell/set-array": "^1.2.1", @@ -1014,6 +1000,7 @@ "version": "3.1.2", "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, "license": "MIT", "engines": { "node": ">=6.0.0" @@ -1023,6 +1010,7 @@ "version": "1.2.1", "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", + "dev": true, "license": "MIT", "engines": { "node": ">=6.0.0" @@ -1032,12 +1020,14 @@ "version": "1.5.0", "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", + "dev": true, "license": "MIT" }, "node_modules/@jridgewell/trace-mapping": { "version": "0.3.25", "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", + "dev": true, "license": "MIT", "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", @@ -1048,6 +1038,7 @@ "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", + "dev": true, "license": "MIT", "dependencies": { "@nodelib/fs.stat": "2.0.5", @@ -1061,6 +1052,7 @@ "version": "2.0.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", + "dev": true, "license": "MIT", "engines": { "node": ">= 8" @@ -1070,6 +1062,7 @@ "version": "1.2.8", "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", + "dev": true, "license": "MIT", "dependencies": { "@nodelib/fs.scandir": "2.1.5", @@ -1079,16 +1072,6 @@ "node": ">= 8" } }, - "node_modules/@pkgjs/parseargs": { - "version": "0.11.0", - "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", - "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "license": "MIT", - "optional": true, - "engines": { - "node": ">=14" - } - }, "node_modules/@rollup/rollup-android-arm-eabi": { "version": "4.34.2", "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.34.2.tgz", @@ -1342,6 +1325,243 @@ "integrity": "sha512-uiBh8DrB5FN35gP6/o8JEhEQ7/ci1jUsOZO/VMUjyvTpjtV54VstOXVj1TvTj/wsT23pfX6butxxh3qufsW3+g==", "license": "MIT" }, + "node_modules/@tailwindcss/node": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.1.tgz", + "integrity": "sha512-xvlh4pvfG/bkv0fEtJDABAm1tjtSmSyi2QmS4zyj1EKNI1UiOYiUq1IphSwDsNJ5vJ9cWEGs4rJXpUdCN2kujQ==", + "license": "MIT", + "dependencies": { + "enhanced-resolve": "^5.18.1", + "jiti": "^2.4.2", + "lightningcss": "1.29.2", + "tailwindcss": "4.1.1" + } + }, + "node_modules/@tailwindcss/oxide": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.1.tgz", + "integrity": "sha512-7+YBgnPQ4+jv6B6WVOerJ6WOzDzNJXrRKDts674v6TKAqFqYRr9+EBtSziO7nNcwQ8JtoZNMeqA+WJDjtCM/7w==", + "license": "MIT", + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@tailwindcss/oxide-android-arm64": "4.1.1", + "@tailwindcss/oxide-darwin-arm64": "4.1.1", + "@tailwindcss/oxide-darwin-x64": "4.1.1", + "@tailwindcss/oxide-freebsd-x64": "4.1.1", + "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.1", + "@tailwindcss/oxide-linux-arm64-gnu": "4.1.1", + "@tailwindcss/oxide-linux-arm64-musl": "4.1.1", + "@tailwindcss/oxide-linux-x64-gnu": "4.1.1", + "@tailwindcss/oxide-linux-x64-musl": "4.1.1", + "@tailwindcss/oxide-win32-arm64-msvc": "4.1.1", + "@tailwindcss/oxide-win32-x64-msvc": "4.1.1" + } + }, + "node_modules/@tailwindcss/oxide-android-arm64": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.1.tgz", + "integrity": "sha512-gTyRzfdParpoCU1yyUC/iN6XK6T0Ra4bDlF8Aeul5NP9cLzKEZDogdNVNGv5WZmCDkVol7qlex7TMmcfytMmmw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-darwin-arm64": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.1.tgz", + "integrity": "sha512-dI0QbdMWBvLB3MtaTKetzUKG9CUUQow8JSP4Nm+OxVokeZ+N+f1OmZW/hW1LzMxpx9RQCBgSRL+IIvKRat5Wdg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-darwin-x64": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.1.tgz", + "integrity": "sha512-2Y+NPQOTRBCItshPgY/CWg4bKi7E9evMg4bgdb6h9iZObCZLOe3doPcuSxGS3DB0dKyMFKE8pTdWtFUbxZBMSA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-freebsd-x64": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.1.tgz", + "integrity": "sha512-N97NGMsB/7CHShbc5ube4dcsW/bYENkBrg8yWi8ieN9boYVRdw3cZviVryV/Nfu9bKbBV9kUvduFF2qBI7rEqg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.1.tgz", + "integrity": "sha512-33Lk6KbHnUZbXqza6RWNFo9wqPQ4+H5BAn1CkUUfC1RZ1vYbyDN6+iJPj53wmnWJ3mhRI8jWt3Jt1fO02IVdUQ==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-arm64-gnu": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.1.tgz", + "integrity": "sha512-LyW35RzSUy+80WYScv03HKasAUmMFDaSbNpWfk1gG5gEE9kuRGnDzSrqMoLAmY/kzMCYP/1kqmUiAx8EFLkI2A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-arm64-musl": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.1.tgz", + "integrity": "sha512-1KPnDMlHdqjPTUSFjx55pafvs8RZXRgxfeRgUrukwDKkuj7gFk28vW3Mx65YdiugAc9NWs3VgueZWaM1Po6uGw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-x64-gnu": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.1.tgz", + "integrity": "sha512-4WdzA+MRlsinEEE6yxNMLJxpw0kE9XVipbAKdTL8BeUpyC2TdA3TL46lBulXzKp3BIxh3nqyR/UCqzl5o+3waQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-linux-x64-musl": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.1.tgz", + "integrity": "sha512-q7Ugbw3ARcjCW2VMUYrcMbJ6aMQuWPArBBE2EqC/swPZTdGADvMQSlvR0VKusUM4HoSsO7ZbvcZ53YwR57+AKw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.1.tgz", + "integrity": "sha512-0KpqsovgHcIzm7eAGzzEZsEs0/nPYXnRBv+aPq/GehpNQuE/NAQu+YgZXIIof+VflDFuyXOEnaFr7T5MZ1INhA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/oxide-win32-x64-msvc": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.1.tgz", + "integrity": "sha512-B1mjeXNS26kBOHv5sXARf6Wd0PWHV9x1TDlW0ummrBUOUAxAy5wcy4Nii1wzNvCdvC448hgiL06ylhwAbNthmg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@tailwindcss/postcss": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.1.tgz", + "integrity": "sha512-GX9AEM+msH0i2Yh1b6CuDRaZRo3kmbvIrLbSfvJ53C3uaAgsQ//fTQAh9HMQ6t1a9zvoUptlYqG//plWsBQTCw==", + "license": "MIT", + "dependencies": { + "@alloc/quick-lru": "^5.2.0", + "@tailwindcss/node": "4.1.1", + "@tailwindcss/oxide": "4.1.1", + "postcss": "^8.4.41", + "tailwindcss": "4.1.1" + } + }, + "node_modules/@tailwindcss/vite": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/vite/-/vite-4.1.1.tgz", + "integrity": "sha512-tFTkRZwXq4XKr3S2dUZBxy80wbWYHdDSsu4QOB1yE1HJFKjfxKVpXtup4dyTVdQcLInoHC9lZXFPHnjoBP774g==", + "license": "MIT", + "dependencies": { + "@tailwindcss/node": "4.1.1", + "@tailwindcss/oxide": "4.1.1", + "tailwindcss": "4.1.1" + }, + "peerDependencies": { + "vite": "^5.2.0 || ^6" + } + }, "node_modules/@types/babel__core": { "version": "7.20.5", "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", @@ -1815,22 +2035,11 @@ "url": "https://github.com/sponsors/epoberezkin" } }, - "node_modules/ansi-regex": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", - "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, "node_modules/ansi-styles": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, "license": "MIT", "dependencies": { "color-convert": "^2.0.1" @@ -1842,31 +2051,6 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, - "node_modules/any-promise": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", - "license": "MIT" - }, - "node_modules/anymatch": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", - "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", - "license": "ISC", - "dependencies": { - "normalize-path": "^3.0.0", - "picomatch": "^2.0.4" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/arg": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", - "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==", - "license": "MIT" - }, "node_modules/argparse": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", @@ -1925,20 +2109,9 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, "license": "MIT" }, - "node_modules/binary-extensions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", - "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", - "license": "MIT", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -2011,15 +2184,6 @@ "node": ">=6" } }, - "node_modules/camelcase-css": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz", - "integrity": "sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, "node_modules/caniuse-lite": { "version": "1.0.30001697", "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001697.tgz", @@ -2107,46 +2271,11 @@ "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/chokidar": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", - "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", - "license": "MIT", - "dependencies": { - "anymatch": "~3.1.2", - "braces": "~3.0.2", - "glob-parent": "~5.1.2", - "is-binary-path": "~2.1.0", - "is-glob": "~4.0.1", - "normalize-path": "~3.0.0", - "readdirp": "~3.6.0" - }, - "engines": { - "node": ">= 8.10.0" - }, - "funding": { - "url": "https://paulmillr.com/funding/" - }, - "optionalDependencies": { - "fsevents": "~2.3.2" - } - }, - "node_modules/chokidar/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, "license": "MIT", "dependencies": { "color-name": "~1.1.4" @@ -2159,6 +2288,7 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, "license": "MIT" }, "node_modules/colorjs.io": { @@ -2214,6 +2344,7 @@ "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, "license": "MIT", "dependencies": { "path-key": "^3.1.0", @@ -2224,60 +2355,19 @@ "node": ">= 8" } }, - "node_modules/css-selector-tokenizer": { - "version": "0.8.0", - "resolved": "https://registry.npmjs.org/css-selector-tokenizer/-/css-selector-tokenizer-0.8.0.tgz", - "integrity": "sha512-Jd6Ig3/pe62/qe5SBPTN8h8LeUg/pT4lLgtavPf7updwwHpvFzxvOQBHYj2LZDMjUnBzgvIUSjRcf6oT5HzHFg==", - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "fastparse": "^1.1.2" - } - }, - "node_modules/cssesc": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", - "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==", - "license": "MIT", - "bin": { - "cssesc": "bin/cssesc" - }, - "engines": { - "node": ">=4" - } - }, "node_modules/csstype": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", "license": "MIT" }, - "node_modules/culori": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/culori/-/culori-3.3.0.tgz", - "integrity": "sha512-pHJg+jbuFsCjz9iclQBqyL3B2HLCBF71BwVNujUYEvCeQMvV97R59MNK3R2+jgJ3a1fcZgI9B3vYgz8lzr/BFQ==", - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - } - }, "node_modules/daisyui": { - "version": "4.12.23", - "resolved": "https://registry.npmjs.org/daisyui/-/daisyui-4.12.23.tgz", - "integrity": "sha512-EM38duvxutJ5PD65lO/AFMpcw+9qEy6XAZrTpzp7WyaPeO/l+F/Qiq0ECHHmFNcFXh5aVoALY4MGrrxtCiaQCQ==", + "version": "5.0.12", + "resolved": "https://registry.npmjs.org/daisyui/-/daisyui-5.0.12.tgz", + "integrity": "sha512-01DU0eYBcHgPtuf5fxcrkGkIN6/Uyaqmkle5Yo3ZyW9YVAu036ALZbjv2KH5euvUbeQ4r9q3gAarGcf7Tywhng==", "license": "MIT", - "dependencies": { - "css-selector-tokenizer": "^0.8", - "culori": "^3", - "picocolors": "^1", - "postcss-js": "^4" - }, - "engines": { - "node": ">=16.9.0" - }, "funding": { - "type": "opencollective", - "url": "https://opencollective.com/daisyui" + "url": "https://github.com/saadeghi/daisyui?sponsor=1" } }, "node_modules/debug": { @@ -2326,6 +2416,15 @@ "node": ">=6" } }, + "node_modules/detect-libc": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz", + "integrity": "sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==", + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, "node_modules/devlop": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", @@ -2345,35 +2444,24 @@ "integrity": "sha512-SOKO002EqlvBYYKQSew3iymBoN2EQ4BDw/3yprjh7kAfFzjBYkaMNa/pZvcA7HSWlcKSQb9XhPe3wKyQ0x4A8A==", "license": "Apache-2.0" }, - "node_modules/didyoumean": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz", - "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==", - "license": "Apache-2.0" - }, - "node_modules/dlv": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz", - "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==", - "license": "MIT" - }, - "node_modules/eastasianwidth": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", - "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", - "license": "MIT" - }, "node_modules/electron-to-chromium": { "version": "1.5.91", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.91.tgz", "integrity": "sha512-sNSHHyq048PFmZY4S90ax61q+gLCs0X0YmcOII9wG9S2XwbVr+h4VW2wWhnbp/Eys3cCwTxVF292W3qPaxIapQ==", "license": "ISC" }, - "node_modules/emoji-regex": { - "version": "9.2.2", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", - "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "license": "MIT" + "node_modules/enhanced-resolve": { + "version": "5.18.1", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.1.tgz", + "integrity": "sha512-ZSW3ma5GkcQBIpwZTSRAI8N71Uuwgs93IezB7mf7R60tC8ZbJideoDNKjHn2O9KIlx6rkGTTEk1xUCK2E1Y2Yg==", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.4", + "tapable": "^2.2.0" + }, + "engines": { + "node": ">=10.13.0" + } }, "node_modules/entities": { "version": "4.5.0", @@ -2653,6 +2741,7 @@ "version": "3.3.3", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", + "dev": true, "license": "MIT", "dependencies": { "@nodelib/fs.stat": "^2.0.2", @@ -2669,6 +2758,7 @@ "version": "5.1.2", "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, "license": "ISC", "dependencies": { "is-glob": "^4.0.1" @@ -2691,16 +2781,11 @@ "dev": true, "license": "MIT" }, - "node_modules/fastparse": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/fastparse/-/fastparse-1.1.2.tgz", - "integrity": "sha512-483XLLxTVIwWK3QTrMGRqUfUpoOs/0hbQrl2oz4J0pAcm3A3bu84wxTFqGqkJzewCLdME38xJLJAxBABfQT8sQ==", - "license": "MIT" - }, "node_modules/fastq": { "version": "1.19.0", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.0.tgz", "integrity": "sha512-7SFSRCNjBQIZH/xZR3iy5iQYR8aGBE0h3VG6/cwlbrpdciNYBMotQav8c1XI3HjHH+NikUpP53nPdlZSdWmFzA==", + "dev": true, "license": "ISC", "dependencies": { "reusify": "^1.0.4" @@ -2769,22 +2854,6 @@ "dev": true, "license": "ISC" }, - "node_modules/foreground-child": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz", - "integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==", - "license": "ISC", - "dependencies": { - "cross-spawn": "^7.0.0", - "signal-exit": "^4.0.1" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/fraction.js": { "version": "4.3.7", "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz", @@ -2812,15 +2881,6 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, - "node_modules/function-bind": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", - "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", @@ -2831,30 +2891,11 @@ "node": ">=6.9.0" } }, - "node_modules/glob": { - "version": "10.4.5", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", - "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", - "license": "ISC", - "dependencies": { - "foreground-child": "^3.1.0", - "jackspeak": "^3.1.2", - "minimatch": "^9.0.4", - "minipass": "^7.1.2", - "package-json-from-dist": "^1.0.0", - "path-scurry": "^1.11.1" - }, - "bin": { - "glob": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/glob-parent": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", + "dev": true, "license": "ISC", "dependencies": { "is-glob": "^4.0.3" @@ -2863,30 +2904,6 @@ "node": ">=10.13.0" } }, - "node_modules/glob/node_modules/brace-expansion": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", - "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/glob/node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/globals": { "version": "15.14.0", "resolved": "https://registry.npmjs.org/globals/-/globals-15.14.0.tgz", @@ -2900,6 +2917,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "license": "ISC" + }, "node_modules/graphemer": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz", @@ -2917,18 +2940,6 @@ "node": ">=8" } }, - "node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "license": "MIT", - "dependencies": { - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, "node_modules/hast-util-from-dom": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/hast-util-from-dom/-/hast-util-from-dom-5.0.1.tgz", @@ -3190,33 +3201,6 @@ "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/is-binary-path": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", - "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", - "license": "MIT", - "dependencies": { - "binary-extensions": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/is-core-module": { - "version": "2.16.1", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz", - "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==", - "license": "MIT", - "dependencies": { - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/is-decimal": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-2.0.1.tgz", @@ -3231,24 +3215,17 @@ "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, "license": "MIT", "engines": { "node": ">=0.10.0" } }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, "node_modules/is-glob": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, "license": "MIT", "dependencies": { "is-extglob": "^2.1.1" @@ -3292,30 +3269,16 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, "license": "ISC" }, - "node_modules/jackspeak": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", - "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", - "license": "BlueOak-1.0.0", - "dependencies": { - "@isaacs/cliui": "^8.0.2" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - }, - "optionalDependencies": { - "@pkgjs/parseargs": "^0.11.0" - } - }, "node_modules/jiti": { - "version": "1.21.7", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz", - "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==", + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.4.2.tgz", + "integrity": "sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==", "license": "MIT", "bin": { - "jiti": "bin/jiti.js" + "jiti": "lib/jiti-cli.mjs" } }, "node_modules/js-tokens": { @@ -3424,23 +3387,233 @@ "node": ">= 0.8.0" } }, - "node_modules/lilconfig": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", - "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", - "license": "MIT", + "node_modules/lightningcss": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.29.2.tgz", + "integrity": "sha512-6b6gd/RUXKaw5keVdSEtqFVdzWnU5jMxTUjA2bVcMNPLwSQ08Sv/UodBVtETLCn7k4S1Ibxwh7k68IwLZPgKaA==", + "license": "MPL-2.0", + "dependencies": { + "detect-libc": "^2.0.3" + }, "engines": { - "node": ">=14" + "node": ">= 12.0.0" }, "funding": { - "url": "https://github.com/sponsors/antonk52" + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "lightningcss-darwin-arm64": "1.29.2", + "lightningcss-darwin-x64": "1.29.2", + "lightningcss-freebsd-x64": "1.29.2", + "lightningcss-linux-arm-gnueabihf": "1.29.2", + "lightningcss-linux-arm64-gnu": "1.29.2", + "lightningcss-linux-arm64-musl": "1.29.2", + "lightningcss-linux-x64-gnu": "1.29.2", + "lightningcss-linux-x64-musl": "1.29.2", + "lightningcss-win32-arm64-msvc": "1.29.2", + "lightningcss-win32-x64-msvc": "1.29.2" } }, - "node_modules/lines-and-columns": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", - "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", - "license": "MIT" + "node_modules/lightningcss-darwin-arm64": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.29.2.tgz", + "integrity": "sha512-cK/eMabSViKn/PG8U/a7aCorpeKLMlK0bQeNHmdb7qUnBkNPnL+oV5DjJUo0kqWsJUapZsM4jCfYItbqBDvlcA==", + "cpu": [ + "arm64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-x64": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.29.2.tgz", + "integrity": "sha512-j5qYxamyQw4kDXX5hnnCKMf3mLlHvG44f24Qyi2965/Ycz829MYqjrVg2H8BidybHBp9kom4D7DR5VqCKDXS0w==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-freebsd-x64": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.29.2.tgz", + "integrity": "sha512-wDk7M2tM78Ii8ek9YjnY8MjV5f5JN2qNVO+/0BAGZRvXKtQrBC4/cn4ssQIpKIPP44YXw6gFdpUF+Ps+RGsCwg==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm-gnueabihf": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.29.2.tgz", + "integrity": "sha512-IRUrOrAF2Z+KExdExe3Rz7NSTuuJ2HvCGlMKoquK5pjvo2JY4Rybr+NrKnq0U0hZnx5AnGsuFHjGnNT14w26sg==", + "cpu": [ + "arm" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-gnu": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.29.2.tgz", + "integrity": "sha512-KKCpOlmhdjvUTX/mBuaKemp0oeDIBBLFiU5Fnqxh1/DZ4JPZi4evEH7TKoSBFOSOV3J7iEmmBaw/8dpiUvRKlQ==", + "cpu": [ + "arm64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-musl": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.29.2.tgz", + "integrity": "sha512-Q64eM1bPlOOUgxFmoPUefqzY1yV3ctFPE6d/Vt7WzLW4rKTv7MyYNky+FWxRpLkNASTnKQUaiMJ87zNODIrrKQ==", + "cpu": [ + "arm64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-gnu": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.29.2.tgz", + "integrity": "sha512-0v6idDCPG6epLXtBH/RPkHvYx74CVziHo6TMYga8O2EiQApnUPZsbR9nFNrg2cgBzk1AYqEd95TlrsL7nYABQg==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-musl": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.29.2.tgz", + "integrity": "sha512-rMpz2yawkgGT8RULc5S4WiZopVMOFWjiItBT7aSfDX4NQav6M44rhn5hjtkKzB+wMTRlLLqxkeYEtQ3dd9696w==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-arm64-msvc": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.29.2.tgz", + "integrity": "sha512-nL7zRW6evGQqYVu/bKGK+zShyz8OVzsCotFgc7judbt6wnB2KbiKKJwBE4SGoDBQ1O94RjW4asrCjQL4i8Fhbw==", + "cpu": [ + "arm64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-x64-msvc": { + "version": "1.29.2", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.29.2.tgz", + "integrity": "sha512-EdIUW3B2vLuHmv7urfzMI/h2fmlnOQBk1xlsDxkN1tCWKjNFjfLhGxYk8C8mzpSfr+A6jFFIi8fU6LbQGsRWjA==", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } }, "node_modules/locate-path": { "version": "6.0.0", @@ -3841,6 +4014,7 @@ "version": "1.4.1", "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true, "license": "MIT", "engines": { "node": ">= 8" @@ -4454,32 +4628,12 @@ "node": "*" } }, - "node_modules/minipass": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", - "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "license": "ISC", - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, - "node_modules/mz": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", - "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0", - "object-assign": "^4.0.1", - "thenify-all": "^1.0.0" - } - }, "node_modules/nanoid": { "version": "3.3.8", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.8.tgz", @@ -4511,15 +4665,6 @@ "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==", "license": "MIT" }, - "node_modules/normalize-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", - "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/normalize-range": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz", @@ -4529,24 +4674,6 @@ "node": ">=0.10.0" } }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/object-hash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", - "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -4597,12 +4724,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/package-json-from-dist": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", - "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", - "license": "BlueOak-1.0.0" - }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -4667,39 +4788,12 @@ "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, "license": "MIT", "engines": { "node": ">=8" } }, - "node_modules/path-parse": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", - "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", - "license": "MIT" - }, - "node_modules/path-scurry": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", - "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", - "license": "BlueOak-1.0.0", - "dependencies": { - "lru-cache": "^10.2.0", - "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" - }, - "engines": { - "node": ">=16 || 14 >=14.18" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/path-scurry/node_modules/lru-cache": { - "version": "10.4.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", - "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "license": "ISC" - }, "node_modules/picocolors": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", @@ -4718,24 +4812,6 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, - "node_modules/pify": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", - "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/pirates": { - "version": "4.0.6", - "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.6.tgz", - "integrity": "sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, "node_modules/postcss": { "version": "8.5.1", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.1.tgz", @@ -4764,115 +4840,6 @@ "node": "^10 || ^12 || >=14" } }, - "node_modules/postcss-import": { - "version": "15.1.0", - "resolved": "https://registry.npmjs.org/postcss-import/-/postcss-import-15.1.0.tgz", - "integrity": "sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew==", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.0.0", - "read-cache": "^1.0.0", - "resolve": "^1.1.7" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "postcss": "^8.0.0" - } - }, - "node_modules/postcss-js": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/postcss-js/-/postcss-js-4.0.1.tgz", - "integrity": "sha512-dDLF8pEO191hJMtlHFPRa8xsizHaM82MLfNkUHdUtVEV3tgTp5oj+8qbEqYM57SLfc74KSbw//4SeJma2LRVIw==", - "license": "MIT", - "dependencies": { - "camelcase-css": "^2.0.1" - }, - "engines": { - "node": "^12 || ^14 || >= 16" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - "peerDependencies": { - "postcss": "^8.4.21" - } - }, - "node_modules/postcss-load-config": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz", - "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "lilconfig": "^3.0.0", - "yaml": "^2.3.4" - }, - "engines": { - "node": ">= 14" - }, - "peerDependencies": { - "postcss": ">=8.0.9", - "ts-node": ">=9.0.0" - }, - "peerDependenciesMeta": { - "postcss": { - "optional": true - }, - "ts-node": { - "optional": true - } - } - }, - "node_modules/postcss-nested": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz", - "integrity": "sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "postcss-selector-parser": "^6.1.1" - }, - "engines": { - "node": ">=12.0" - }, - "peerDependencies": { - "postcss": "^8.2.14" - } - }, - "node_modules/postcss-selector-parser": { - "version": "6.1.2", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", - "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, "node_modules/postcss-value-parser": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", @@ -4929,6 +4896,7 @@ "version": "1.2.3", "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true, "funding": [ { "type": "github", @@ -5030,27 +4998,6 @@ } } }, - "node_modules/read-cache": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", - "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==", - "license": "MIT", - "dependencies": { - "pify": "^2.3.0" - } - }, - "node_modules/readdirp": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", - "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", - "license": "MIT", - "dependencies": { - "picomatch": "^2.2.1" - }, - "engines": { - "node": ">=8.10.0" - } - }, "node_modules/rehype-highlight": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/rehype-highlight/-/rehype-highlight-7.0.2.tgz", @@ -5184,26 +5131,6 @@ "url": "https://opencollective.com/unified" } }, - "node_modules/resolve": { - "version": "1.22.10", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.10.tgz", - "integrity": "sha512-NPRy+/ncIMeDlTAsuqwKIiferiawhefFJtkNSW0qZJEqMEb+qBt/77B/jGeeek+F0uOeN05CDa6HXbbIgtVX4w==", - "license": "MIT", - "dependencies": { - "is-core-module": "^2.16.0", - "path-parse": "^1.0.7", - "supports-preserve-symlinks-flag": "^1.0.0" - }, - "bin": { - "resolve": "bin/resolve" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/resolve-from": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", @@ -5218,6 +5145,7 @@ "version": "1.0.4", "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", + "dev": true, "license": "MIT", "engines": { "iojs": ">=1.0.0", @@ -5266,6 +5194,7 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, "funding": [ { "type": "github", @@ -5705,6 +5634,7 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, "license": "MIT", "dependencies": { "shebang-regex": "^3.0.0" @@ -5717,23 +5647,12 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, "license": "MIT", "engines": { "node": ">=8" } }, - "node_modules/signal-exit": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", - "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", - "license": "ISC", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -5753,65 +5672,6 @@ "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/string-width": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", - "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", - "license": "MIT", - "dependencies": { - "eastasianwidth": "^0.2.0", - "emoji-regex": "^9.2.2", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/string-width-cjs": { - "name": "string-width", - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "license": "MIT" - }, - "node_modules/string-width-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/stringify-entities": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", @@ -5826,43 +5686,6 @@ "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/strip-ansi": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", - "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^6.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/strip-ansi-cjs": { - "name": "strip-ansi", - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, "node_modules/strip-json-comments": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", @@ -5885,37 +5708,6 @@ "inline-style-parser": "0.2.4" } }, - "node_modules/sucrase": { - "version": "3.35.0", - "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.0.tgz", - "integrity": "sha512-8EbVDiu9iN/nESwxeSxDKe0dunta1GOlHufmSSXxMD2z2/tMZpDMpvXQGsc+ajGo8y2uYUmixaSRUc/QPoQ0GA==", - "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.2", - "commander": "^4.0.0", - "glob": "^10.3.10", - "lines-and-columns": "^1.1.6", - "mz": "^2.7.0", - "pirates": "^4.0.1", - "ts-interface-checker": "^0.1.9" - }, - "bin": { - "sucrase": "bin/sucrase", - "sucrase-node": "bin/sucrase-node" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/sucrase/node_modules/commander": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", - "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, "node_modules/supports-color": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", @@ -5929,18 +5721,6 @@ "node": ">=8" } }, - "node_modules/supports-preserve-symlinks-flag": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", - "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/sync-child-process": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/sync-child-process/-/sync-child-process-1.0.2.tgz", @@ -5965,40 +5745,18 @@ } }, "node_modules/tailwindcss": { - "version": "3.4.17", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.17.tgz", - "integrity": "sha512-w33E2aCvSDP0tW9RZuNXadXlkHXqFzSkQew/aIa2i/Sj8fThxwovwlXHSPXTbAHwEIhBFXAedUhP2tueAKP8Og==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.1.tgz", + "integrity": "sha512-QNbdmeS979Efzim2g/bEvfuh+fTcIdp1y7gA+sb6OYSW74rt7Cr7M78AKdf6HqWT3d5AiTb7SwTT3sLQxr4/qw==", + "license": "MIT" + }, + "node_modules/tapable": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz", + "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==", "license": "MIT", - "dependencies": { - "@alloc/quick-lru": "^5.2.0", - "arg": "^5.0.2", - "chokidar": "^3.6.0", - "didyoumean": "^1.2.2", - "dlv": "^1.1.3", - "fast-glob": "^3.3.2", - "glob-parent": "^6.0.2", - "is-glob": "^4.0.3", - "jiti": "^1.21.6", - "lilconfig": "^3.1.3", - "micromatch": "^4.0.8", - "normalize-path": "^3.0.0", - "object-hash": "^3.0.0", - "picocolors": "^1.1.1", - "postcss": "^8.4.47", - "postcss-import": "^15.1.0", - "postcss-js": "^4.0.1", - "postcss-load-config": "^4.0.2", - "postcss-nested": "^6.2.0", - "postcss-selector-parser": "^6.1.2", - "resolve": "^1.22.8", - "sucrase": "^3.35.0" - }, - "bin": { - "tailwind": "lib/cli.js", - "tailwindcss": "lib/cli.js" - }, "engines": { - "node": ">=14.0.0" + "node": ">=6" } }, "node_modules/textlinestream": { @@ -6007,27 +5765,6 @@ "integrity": "sha512-iBHbi7BQxrFmwZUQJsT0SjNzlLLsXhvW/kg7EyOMVMBIrlnj/qYofwo1LVLZi+3GbUEo96Iu2eqToI2+lZoAEQ==", "license": "MIT" }, - "node_modules/thenify": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", - "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", - "license": "MIT", - "dependencies": { - "any-promise": "^1.0.0" - } - }, - "node_modules/thenify-all": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", - "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", - "license": "MIT", - "dependencies": { - "thenify": ">= 3.1.0 < 4" - }, - "engines": { - "node": ">=0.8" - } - }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", @@ -6073,12 +5810,6 @@ "typescript": ">=4.8.4" } }, - "node_modules/ts-interface-checker": { - "version": "0.1.13", - "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", - "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==", - "license": "Apache-2.0" - }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", @@ -6304,12 +6035,6 @@ "punycode": "^2.1.0" } }, - "node_modules/util-deprecate": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "license": "MIT" - }, "node_modules/varint": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/varint/-/varint-6.0.0.tgz", @@ -6460,6 +6185,7 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, "license": "ISC", "dependencies": { "isexe": "^2.0.0" @@ -6481,94 +6207,6 @@ "node": ">=0.10.0" } }, - "node_modules/wrap-ansi": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", - "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.1.0", - "string-width": "^5.0.1", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs": { - "name": "wrap-ansi", - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "license": "MIT" - }, - "node_modules/wrap-ansi-cjs/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/ansi-styles": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", - "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, "node_modules/yallist": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", @@ -6581,6 +6219,8 @@ "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.7.0.tgz", "integrity": "sha512-+hSoy/QHluxmC9kCIJyL/uyFmLmc+e5CFR5Wa+bpIhIj85LVb9ZH2nVnqrHoSvKogwODv0ClqZkmiSSaIH5LTA==", "license": "ISC", + "optional": true, + "peer": true, "bin": { "yaml": "bin.mjs" }, diff --git a/examples/server/webui/package.json b/examples/server/webui/package.json index 8c833d0241..6ac06b1a49 100644 --- a/examples/server/webui/package.json +++ b/examples/server/webui/package.json @@ -13,9 +13,11 @@ "dependencies": { "@heroicons/react": "^2.2.0", "@sec-ant/readable-stream": "^0.6.0", + "@tailwindcss/postcss": "^4.1.1", + "@tailwindcss/vite": "^4.1.1", "@vscode/markdown-it-katex": "^1.1.1", "autoprefixer": "^10.4.20", - "daisyui": "^4.12.14", + "daisyui": "^5.0.12", "dexie": "^4.0.11", "highlight.js": "^11.10.0", "katex": "^0.16.15", @@ -29,7 +31,7 @@ "remark-breaks": "^4.0.0", "remark-gfm": "^4.0.0", "remark-math": "^6.0.0", - "tailwindcss": "^3.4.15", + "tailwindcss": "^4.1.1", "textlinestream": "^1.1.1", "vite-plugin-singlefile": "^2.0.3" }, diff --git a/examples/server/webui/postcss.config.js b/examples/server/webui/postcss.config.js index 2e7af2b7f1..fb05b5692b 100644 --- a/examples/server/webui/postcss.config.js +++ b/examples/server/webui/postcss.config.js @@ -1,6 +1,5 @@ export default { plugins: { - tailwindcss: {}, - autoprefixer: {}, + "@tailwindcss/postcss": {}, }, } diff --git a/examples/server/webui/src/App.tsx b/examples/server/webui/src/App.tsx index 2ce734682c..cc4659e152 100644 --- a/examples/server/webui/src/App.tsx +++ b/examples/server/webui/src/App.tsx @@ -28,7 +28,7 @@ function AppLayout() { <>
diff --git a/examples/server/webui/src/Config.ts b/examples/server/webui/src/Config.ts index 779ed9bf78..dd1cc0e100 100644 --- a/examples/server/webui/src/Config.ts +++ b/examples/server/webui/src/Config.ts @@ -1,4 +1,4 @@ -import daisyuiThemes from 'daisyui/src/theming/themes'; +import daisyuiThemes from 'daisyui/theme/object'; import { isNumeric } from './utils/misc'; export const isDev = import.meta.env.MODE === 'development'; diff --git a/examples/server/webui/src/components/ChatScreen.tsx b/examples/server/webui/src/components/ChatScreen.tsx index d12b06e125..29ab5ea64f 100644 --- a/examples/server/webui/src/components/ChatScreen.tsx +++ b/examples/server/webui/src/components/ChatScreen.tsx @@ -1,4 +1,4 @@ -import { useEffect, useMemo, useRef, useState } from 'react'; +import { useEffect, useMemo, useState } from 'react'; import { CallbackGeneratedChunk, useAppContext } from '../utils/app.context'; import ChatMessage from './ChatMessage'; import { CanvasType, Message, PendingMessage } from '../utils/types'; @@ -6,6 +6,7 @@ import { classNames, cleanCurrentUrl, throttle } from '../utils/misc'; import CanvasPyInterpreter from './CanvasPyInterpreter'; import StorageUtils from '../utils/storage'; import { useVSCodeContext } from '../utils/llama-vscode'; +import { useChatTextarea, ChatTextareaApi } from './useChatTextarea.ts'; /** * A message display is a message node with additional information for rendering. @@ -99,7 +100,8 @@ export default function ChatScreen() { canvasData, replaceMessageAndGenerate, } = useAppContext(); - const textarea = useOptimizedTextarea(prefilledMsg.content()); + + const textarea: ChatTextareaApi = useChatTextarea(prefilledMsg.content()); const { extraContext, clearExtraContext } = useVSCodeContext(textarea); // TODO: improve this when we have "upload file" feature @@ -248,14 +250,16 @@ export default function ChatScreen() {
{/* chat input */} -
+
+ {isGenerating(currConvId ?? '') ? (