diff --git a/.devops/cuda-new.Dockerfile b/.devops/cuda-new.Dockerfile new file mode 100644 index 0000000000..62443e17f2 --- /dev/null +++ b/.devops/cuda-new.Dockerfile @@ -0,0 +1,95 @@ +ARG UBUNTU_VERSION=24.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=13.1.0 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + +ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} AS build + +# CUDA architecture to build for (defaults to all supported archs) +ARG CUDA_DOCKER_ARCH=default + +RUN apt-get update && \ + apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 + +WORKDIR /app + +COPY . . + +RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release -j$(nproc) + +RUN mkdir -p /app/lib && \ + find build -name "*.so*" -exec cp -P {} /app/lib \; + +RUN mkdir -p /app/full \ + && cp build/bin/* /app/full \ + && cp *.py /app/full \ + && cp -r gguf-py /app/full \ + && cp -r requirements /app/full \ + && cp requirements.txt /app/full \ + && cp .devops/tools.sh /app/full/tools.sh + +## Base image +FROM ${BASE_CUDA_RUN_CONTAINER} AS base + +RUN apt-get update \ + && apt-get install -y libgomp1 curl\ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app + +### Full +FROM base AS full + +COPY --from=build /app/full /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + git \ + python3 \ + python3-pip \ + python3-wheel \ + && pip install --break-system-packages --upgrade setuptools \ + && pip install --break-system-packages -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + + +ENTRYPOINT ["/app/tools.sh"] + +### Light, CLI only +FROM base AS light + +COPY --from=build /app/full/llama-cli /app/full/llama-completion /app + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +### Server, Server only +FROM base AS server + +ENV LLAMA_ARG_HOST=0.0.0.0 + +COPY --from=build /app/full/llama-server /app + +WORKDIR /app + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index bfd1270716..d9fe0686d3 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -40,7 +40,8 @@ jobs: # https://github.com/ggml-org/llama.cpp/issues/11888 #- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false } - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" } - - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } + - { tag: "cuda cuda12", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "12.4.0", ubuntu_version: "22.04" } + - { tag: "cuda13", dockerfile: ".devops/cuda-new.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "13.1.0", ubuntu_version: "24.04" } - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" } @@ -80,18 +81,21 @@ jobs: run: | REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case REPO_NAME="${{ github.event.repository.name }}" + PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:" # list all tags possible - if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then - TYPE="" - else - TYPE="-${{ matrix.config.tag }}" - fi - PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:" - CACHETAGS="${PREFIX}buildcache${TYPE}" - FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}" - LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}" - SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}" + tags="${{ matrix.config.tag }}" + for tag in $tags; do + if [[ "$tag" == "cpu" ]]; then + TYPE="" + else + TYPE="-$tag" + fi + CACHETAGS="${PREFIX}buildcache${TYPE}" + FULLTAGS="${FULLTAGS:+$FULLTAGS,}${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}" + LIGHTTAGS="${LIGHTTAGS:+$LIGHTTAGS,}${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}" + SERVERTAGS="${SERVERTAGS:+$SERVERTAGS,}${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}" + done echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT @@ -132,6 +136,9 @@ jobs: file: ${{ matrix.config.dockerfile }} target: full provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} # using github experimental cache #cache-from: type=gha #cache-to: type=gha,mode=max @@ -154,6 +161,9 @@ jobs: file: ${{ matrix.config.dockerfile }} target: light provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} # using github experimental cache #cache-from: type=gha #cache-to: type=gha,mode=max @@ -176,6 +186,9 @@ jobs: file: ${{ matrix.config.dockerfile }} target: server provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} # using github experimental cache #cache-from: type=gha #cache-to: type=gha,mode=max diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index f893b24c75..edc0ed539d 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3503,7 +3503,7 @@ class QwenModel(TextModel): self._set_vocab_qwen() -@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration", "KORMoForCausalLM") +@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration", "KORMoForCausalLM", "AudioFlamingo3ForConditionalGeneration") class Qwen2Model(TextModel): model_arch = gguf.MODEL_ARCH.QWEN2 @@ -9292,6 +9292,19 @@ class VoxtralWhisperEncoderModel(WhisperEncoderModel): self.gguf_writer.add_audio_stack_factor(4) # == intermediate_size // hidden_size +@ModelBase.register("AudioFlamingo3ForConditionalGeneration") +class AudioFlamingo3WhisperEncoderModel(WhisperEncoderModel): + def set_gguf_parameters(self): + super().set_gguf_parameters() + self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.MUSIC_FLAMINGO) + + def tensor_force_quant(self, name, new_name, bid, n_dims): + if ".conv" in name and ".weight" in name: + # Was trained in BF16, being safe, avoiding quantizing to FP16 + return gguf.GGMLQuantizationType.F32 + return super().tensor_force_quant(name, new_name, bid, n_dims) + + @ModelBase.register("FalconH1ForCausalLM") class FalconH1Model(Mamba2Model): model_arch = gguf.MODEL_ARCH.FALCON_H1 diff --git a/docs/ops.md b/docs/ops.md index b395d2315c..2b2770cb76 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -32,7 +32,7 @@ Legend: | CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | | COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | -| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | +| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ | | CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | diff --git a/docs/ops/Metal.csv b/docs/ops/Metal.csv index 5f7450e91f..02fd75fdbf 100644 --- a/docs/ops/Metal.csv +++ b/docs/ops/Metal.csv @@ -965,6 +965,7 @@ "Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal" "Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal" "Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal" +"Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[2,2,1536,729],ne_kernel=[2,2,1536,4096],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal" "Metal","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal" "Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal" "Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal" @@ -4964,8 +4965,9 @@ "Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","Metal" "Metal","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","Metal" "Metal","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","Metal" -"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","0","no","Metal" -"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","0","no","Metal" +"Metal","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","1","yes","Metal" +"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","Metal" +"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","Metal" "Metal","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","Metal" "Metal","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","Metal" "Metal","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","Metal" @@ -5715,15 +5717,15 @@ "Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal" "Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","Metal" "Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[6,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[6,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[6,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","Metal" @@ -5733,6 +5735,15 @@ "Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[18,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,1024,4,1],ne_b=[9,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[18,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,1536,4,1],ne_b=[9,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[18,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,2048,4,1],ne_b=[9,2048,1,1]","support","1","yes","Metal" "Metal","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal" "Metal","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal" "Metal","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal" @@ -8916,6 +8927,8 @@ "Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","Metal" "Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal" "Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal" +"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal" +"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal" "Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal" "Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal" "Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","0","no","Metal" @@ -9542,311 +9555,311 @@ "Metal","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","Metal" "Metal","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","Metal" "Metal","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","1","yes","Metal" "Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","Metal" "Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","Metal" "Metal","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","Metal" @@ -9891,8 +9904,9 @@ "Metal","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","Metal" "Metal","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","Metal" "Metal","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","Metal" -"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","Metal" -"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1,circular=0","support","1","yes","Metal" +"Metal","PAD","type=f32,ne_a=[33,17,2,1],pad_0=4,pad_1=3,circular=1","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0,circular=0","support","0","no","Metal" "Metal","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","Metal" "Metal","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","Metal" "Metal","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","Metal" @@ -9923,17 +9937,41 @@ "Metal","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","Metal" "Metal","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","Metal" "Metal","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","Metal" +"Metal","DIAG","type=f32,ne=[10,1,4,3]","support","0","no","Metal" +"Metal","DIAG","type=f32,ne=[79,1,19,13]","support","0","no","Metal" +"Metal","DIAG","type=f32,ne=[256,1,8,16]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[64,64,2,2]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[79,79,5,3],ne_rhs=[417,79,5,3]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,2],ne_rhs=[32,128,4,2]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[80,80,2,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[79,80,2,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[81,80,2,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[80,80,8,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[79,80,8,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[81,80,8,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[84,84,4,4],ne_rhs=[32,84,4,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[95,95,8,8],ne_rhs=[40,95,8,8]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","Metal" -"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","Metal" -"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","0","no","Metal" -"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","1","yes","Metal" -"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[32,128,4,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,3,4],ne_rhs=[32,128,3,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,1],ne_rhs=[32,128,4,1]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[200,64,4,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[384,64,4,4]","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=0","support","1","yes","Metal" +"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=0","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=1","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=1","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=0","support","1","yes","Metal" +"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=0","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=1","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=1","support","0","no","Metal" "Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","Metal" "Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","Metal" "Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","Metal" diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index cb46c32100..0176ca1ce9 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -4,7 +4,7 @@ project("ggml" C CXX ASM) ### GGML Version set(GGML_VERSION_MAJOR 0) set(GGML_VERSION_MINOR 9) -set(GGML_VERSION_PATCH 4) +set(GGML_VERSION_PATCH 5) set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}") find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH) diff --git a/ggml/src/ggml-cuda/fattn-mma-f16.cuh b/ggml/src/ggml-cuda/fattn-mma-f16.cuh index 7bd1044c19..856291dc3c 100644 --- a/ggml/src/ggml-cuda/fattn-mma-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-mma-f16.cuh @@ -531,7 +531,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter( for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::I) { #pragma unroll for (int l = 0; l < T_C_KQ::ne; ++l) { - if (!oob_check || k0 + T_C_KQ::get_i(l) < k_VKQ_sup) { + if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::I + T_C_KQ::get_i(l) < k_VKQ_sup) { KQ_max_new[l % 2] = fmaxf(KQ_max_new[l % 2], KQ_C[k0/(np*T_C_KQ::I)].x[l] + FATTN_KQ_MAX_OFFSET); } } @@ -583,7 +583,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter( for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::J) { #pragma unroll for (int l = 0; l < T_C_KQ::ne; ++l) { - if (!oob_check || k0 + T_C_KQ::get_j(l) < k_VKQ_sup) { + if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::J + T_C_KQ::get_j(l) < k_VKQ_sup) { // Turing + Volta: KQ_max_new[(l/2) % 2] = fmaxf(KQ_max_new[(l/2) % 2], KQ_C[(k0/(np*T_C_KQ::J))].x[l] + FATTN_KQ_MAX_OFFSET); } diff --git a/ggml/src/ggml-metal/ggml-metal-device.cpp b/ggml/src/ggml-metal/ggml-metal-device.cpp index 680904d132..b0734797f1 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.cpp +++ b/ggml/src/ggml-metal/ggml-metal-device.cpp @@ -1684,3 +1684,60 @@ ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd(ggm return res; } + +ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset(ggml_metal_library_t lib, const ggml_tensor * op) { + GGML_ASSERT(op->type == GGML_TYPE_I64); + + char base[256]; + char name[256]; + + snprintf(base, 256, "kernel_memset_%s", ggml_type_name(op->type)); + snprintf(name, 256, "%s", base); + + ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name); + if (!res.pipeline) { + res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr); + } + + return res; +} + +ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal(ggml_metal_library_t lib, const ggml_tensor * op) { + assert(op->op == GGML_OP_COUNT_EQUAL); + + GGML_TENSOR_LOCALS(int64_t, ne0, op->src[0], ne); + + GGML_ASSERT(op->src[0]->type == op->src[1]->type); + GGML_ASSERT(op->src[0]->type == GGML_TYPE_I32); + GGML_ASSERT(op->type == GGML_TYPE_I64); + + // note: the kernel only supports i32 output due to metal atomic add only supporting atomic_int + GGML_ASSERT(ggml_nelements(op->src[0]) < (1LL << 31)); + + char base[256]; + char name[256]; + + int nsg = 1; + while (32*nsg < ne00 && nsg < 32) { + nsg *= 2; + } + + snprintf(base, 256, "kernel_count_equal_%s", ggml_type_name(op->src[0]->type)); + snprintf(name, 256, "%s_nsg=%d", base, nsg); + + ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name); + if (!res.pipeline) { + ggml_metal_cv_t cv = ggml_metal_cv_init(); + + ggml_metal_cv_set_int16(cv, nsg, FC_COUNT_EQUAL + 0); + + res = ggml_metal_library_compile_pipeline(lib, base, name, cv); + + ggml_metal_cv_free(cv); + } + + res.smem = 32 * sizeof(int32_t); + res.nsg = nsg; + + return res; +} diff --git a/ggml/src/ggml-metal/ggml-metal-device.h b/ggml/src/ggml-metal/ggml-metal-device.h index 0a8b9211a7..d983b666ca 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.h +++ b/ggml/src/ggml-metal/ggml-metal-device.h @@ -147,6 +147,8 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_arange struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_timestep_embedding(ggml_metal_library_t lib, const struct ggml_tensor * op); struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_adamw (ggml_metal_library_t lib, const struct ggml_tensor * op); struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd (ggml_metal_library_t lib, const struct ggml_tensor * op); +struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset (ggml_metal_library_t lib, const struct ggml_tensor * op); +struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal (ggml_metal_library_t lib, const struct ggml_tensor * op); struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_flash_attn_ext_pad( ggml_metal_library_t lib, diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index f24270bb1c..59badd0043 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -1023,6 +1023,11 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te return has_simdgroup_reduction && ggml_is_contiguous_rows(op->src[0]); case GGML_OP_L2_NORM: return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0])); + case GGML_OP_COUNT_EQUAL: + return has_simdgroup_reduction && + op->src[0]->type == GGML_TYPE_I32 && + op->src[1]->type == GGML_TYPE_I32 && + op->type == GGML_TYPE_I64; case GGML_OP_ARGMAX: return has_simdgroup_reduction; case GGML_OP_NORM: diff --git a/ggml/src/ggml-metal/ggml-metal-impl.h b/ggml/src/ggml-metal/ggml-metal-impl.h index 8944b07e90..d3b0e732ec 100644 --- a/ggml/src/ggml-metal/ggml-metal-impl.h +++ b/ggml/src/ggml-metal/ggml-metal-impl.h @@ -78,6 +78,7 @@ #define FC_MUL_MM 700 #define FC_ROPE 800 #define FC_SSM_CONV 900 +#define FC_COUNT_EQUAL 1000 // op-specific constants #define OP_FLASH_ATTN_EXT_NQPTG 8 @@ -894,6 +895,25 @@ typedef struct { float step; } ggml_metal_kargs_arange; +typedef struct { + int64_t val; +} ggml_metal_kargs_memset; + +typedef struct { + int32_t ne00; + int32_t ne01; + int32_t ne02; + int32_t ne03; + uint64_t nb00; + uint64_t nb01; + uint64_t nb02; + uint64_t nb03; + uint64_t nb10; + uint64_t nb11; + uint64_t nb12; + uint64_t nb13; +} ggml_metal_kargs_count_equal; + typedef struct { int32_t k0; int32_t k1; diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp index e99c1763f6..acf2aa9184 100644 --- a/ggml/src/ggml-metal/ggml-metal-ops.cpp +++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp @@ -448,7 +448,11 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) { { n_fuse = ggml_metal_op_opt_step_sgd(ctx, idx); } break; - default: + case GGML_OP_COUNT_EQUAL: + { + n_fuse = ggml_metal_op_count_equal(ctx, idx); + } break; + default: { GGML_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(node->op)); GGML_ABORT("fatal error"); @@ -4090,3 +4094,64 @@ int ggml_metal_op_opt_step_sgd(ggml_metal_op_t ctx, int idx) { return 1; } + +int ggml_metal_op_count_equal(ggml_metal_op_t ctx, int idx) { + ggml_tensor * op = ctx->node(idx); + + ggml_metal_library_t lib = ctx->lib; + ggml_metal_encoder_t enc = ctx->enc; + + GGML_TENSOR_LOCALS(int32_t, ne0, op->src[0], ne); + GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb); + GGML_TENSOR_LOCALS(uint64_t, nb1, op->src[1], nb); + + { + ggml_metal_kargs_memset args = { /*.val =*/ 0 }; + + auto pipeline = ggml_metal_library_get_pipeline_memset(lib, op); + + ggml_metal_encoder_set_pipeline(enc, pipeline); + ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0); + ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 1); + + ggml_metal_encoder_dispatch_threadgroups(enc, 1, 1, 1, 1, 1, 1); + } + + ggml_metal_op_concurrency_reset(ctx); + + { + ggml_metal_kargs_count_equal args = { + /*.ne00 =*/ ne00, + /*.ne01 =*/ ne01, + /*.ne02 =*/ ne02, + /*.ne03 =*/ ne03, + /*.nb00 =*/ nb00, + /*.nb01 =*/ nb01, + /*.nb02 =*/ nb02, + /*.nb03 =*/ nb03, + /*.nb10 =*/ nb10, + /*.nb11 =*/ nb11, + /*.nb12 =*/ nb12, + /*.nb13 =*/ nb13, + }; + + auto pipeline = ggml_metal_library_get_pipeline_count_equal(lib, op); + + const size_t smem = pipeline.smem; + + const int nth = 32*pipeline.nsg; + + GGML_ASSERT(nth <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)); + + ggml_metal_encoder_set_pipeline(enc, pipeline); + ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0); + ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[0]), 1); + ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[1]), 2); + ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 3); + + ggml_metal_encoder_set_threadgroup_memory_size(enc, smem, 0); + ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne02, ne03, nth, 1, 1); + } + + return 1; +} diff --git a/ggml/src/ggml-metal/ggml-metal-ops.h b/ggml/src/ggml-metal/ggml-metal-ops.h index 902b544523..c1025d3567 100644 --- a/ggml/src/ggml-metal/ggml-metal-ops.h +++ b/ggml/src/ggml-metal/ggml-metal-ops.h @@ -87,6 +87,7 @@ int ggml_metal_op_leaky_relu (ggml_metal_op_t ctx, int idx); int ggml_metal_op_tri (ggml_metal_op_t ctx, int idx); int ggml_metal_op_opt_step_adamw (ggml_metal_op_t ctx, int idx); int ggml_metal_op_opt_step_sgd (ggml_metal_op_t ctx, int idx); +int ggml_metal_op_count_equal (ggml_metal_op_t ctx, int idx); #ifdef __cplusplus } diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 51bcbae309..67b30e0d93 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -1790,6 +1790,7 @@ kernel void kernel_op_sum_f32( return; } + // TODO: become function constant const uint nsg = (ntg.x + 31) / 32; float sumf = 0; @@ -9557,9 +9558,6 @@ template [[host_name("kernel_mul_mm_iq4_xs_f32")]] kernel mul_mm_t kernel_mul_m template [[host_name("kernel_mul_mm_f32_f16")]] kernel mul_mm_t kernel_mul_mm; template [[host_name("kernel_mul_mm_f16_f16")]] kernel mul_mm_t kernel_mul_mm; -#if defined(GGML_METAL_HAS_BF16) -template [[host_name("kernel_mul_mm_bf16_f16")]] kernel mul_mm_t kernel_mul_mm; -#endif template [[host_name("kernel_mul_mm_q4_0_f16")]] kernel mul_mm_t kernel_mul_mm; template [[host_name("kernel_mul_mm_q4_1_f16")]] kernel mul_mm_t kernel_mul_mm; template [[host_name("kernel_mul_mm_q5_0_f16")]] kernel mul_mm_t kernel_mul_mm; @@ -9615,9 +9613,6 @@ template [[host_name("kernel_mul_mm_id_iq4_xs_f32")]] kernel mul_mm_id kernel_m template [[host_name("kernel_mul_mm_id_f32_f16")]] kernel mul_mm_id kernel_mul_mm_id; template [[host_name("kernel_mul_mm_id_f16_f16")]] kernel mul_mm_id kernel_mul_mm_id; -#if defined(GGML_METAL_HAS_BF16) -template [[host_name("kernel_mul_mm_id_bf16_f16")]] kernel mul_mm_id kernel_mul_mm_id; -#endif template [[host_name("kernel_mul_mm_id_q4_0_f16")]] kernel mul_mm_id kernel_mul_mm_id; template [[host_name("kernel_mul_mm_id_q4_1_f16")]] kernel mul_mm_id kernel_mul_mm_id; template [[host_name("kernel_mul_mm_id_q5_0_f16")]] kernel mul_mm_id kernel_mul_mm_id; @@ -9920,3 +9915,75 @@ kernel void kernel_opt_step_sgd_f32( x[gid] = x[gid] * (1.0f - pars[0] * pars[1]) - pars[0] * g[gid]; } + +template +kernel void kernel_memset( + constant ggml_metal_kargs_fill & args, + device T * dst, + uint tpig[[thread_position_in_grid]]) { + dst[tpig] = args.val; +} + +typedef decltype(kernel_memset) kernel_memset_t; + +template [[host_name("kernel_memset_i64")]] kernel kernel_memset_t kernel_memset; + +constant short FC_count_equal_nsg [[function_constant(FC_COUNT_EQUAL + 0)]]; + +template +kernel void kernel_count_equal( + constant ggml_metal_kargs_count_equal & args, + device const char * src0, + device const char * src1, + device atomic_int * dst, + threadgroup int32_t * shmem_i32 [[threadgroup(0)]], + uint3 tgpig[[threadgroup_position_in_grid]], + ushort3 tpitg[[thread_position_in_threadgroup]], + ushort sgitg[[simdgroup_index_in_threadgroup]], + ushort tiisg[[thread_index_in_simdgroup]], + ushort3 ntg[[threads_per_threadgroup]]) { + const short NSG = FC_count_equal_nsg; + + const int i3 = tgpig.z; + const int i2 = tgpig.y; + const int i1 = tgpig.x; + + if (i3 >= args.ne03 || i2 >= args.ne02 || i1 >= args.ne01) { + return; + } + + int sum = 0; + + device const char * base0 = src0 + i1*args.nb01 + i2*args.nb02 + i3*args.nb03; + device const char * base1 = src1 + i1*args.nb11 + i2*args.nb12 + i3*args.nb13; + + for (int64_t i0 = tpitg.x; i0 < args.ne00; i0 += ntg.x) { + const T v0 = *(device const T *)(base0 + i0*args.nb00); + const T v1 = *(device const T *)(base1 + i0*args.nb10); + sum += (v0 == v1); + } + + sum = simd_sum(sum); + + if (tiisg == 0) { + shmem_i32[sgitg] = sum; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (sgitg == 0) { + float v = 0.0f; + if (tpitg.x < NSG) { + v = shmem_i32[tpitg.x]; + } + + float total = simd_sum(v); + if (tpitg.x == 0) { + atomic_fetch_add_explicit(dst, (int32_t) total, memory_order_relaxed); + } + } +} + +typedef decltype(kernel_count_equal) kernel_count_equal_t; + +template [[host_name("kernel_count_equal_i32")]] kernel kernel_count_equal_t kernel_count_equal; diff --git a/ggml/src/ggml-sycl/CMakeLists.txt b/ggml/src/ggml-sycl/CMakeLists.txt index 88f29221bb..5a89d8dd68 100644 --- a/ggml/src/ggml-sycl/CMakeLists.txt +++ b/ggml/src/ggml-sycl/CMakeLists.txt @@ -36,7 +36,47 @@ if (WIN32) endif() endif() -find_package(IntelSYCL) +macro(detect_and_find_package package_name) + set(test_source " + cmake_minimum_required(VERSION ${CMAKE_VERSION}) + project(check_package LANGUAGES CXX) + find_package(${package_name} QUIET) + ") + + set(test_dir "${CMAKE_CURRENT_BINARY_DIR}/check_package_${package_name}") + file(WRITE "${test_dir}/CMakeLists.txt" "${test_source}") + + set(cmake_args "") + if(CMAKE_GENERATOR) + list(APPEND cmake_args "-G" "${CMAKE_GENERATOR}") + endif() + if(CMAKE_GENERATOR_PLATFORM) + list(APPEND cmake_args "-A" "${CMAKE_GENERATOR_PLATFORM}") + endif() + if(CMAKE_GENERATOR_TOOLSET) + list(APPEND cmake_args "-T" "${CMAKE_GENERATOR_TOOLSET}") + endif() + if(CMAKE_CXX_COMPILER) + list(APPEND cmake_args "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}") + endif() + + execute_process( + COMMAND ${CMAKE_COMMAND} ${cmake_args} . + WORKING_DIRECTORY "${test_dir}" + RESULT_VARIABLE result + OUTPUT_QUIET + ERROR_QUIET + ) + + if(result EQUAL 0) + find_package(${package_name} ${ARGN}) + else() + message(WARNING "Detection of ${package_name} failed. The package might be broken or incompatible.") + set(${package_name}_FOUND FALSE) + endif() +endmacro() + +detect_and_find_package(IntelSYCL) if (IntelSYCL_FOUND) # Use oneAPI CMake when possible target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX) @@ -191,3 +231,4 @@ if (GGML_SYCL_DEVICE_ARCH) target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}) target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}) endif() + diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 616b8add36..c2a0f41c1b 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -3492,6 +3492,7 @@ class VisionProjectorType: COGVLM = "cogvlm" JANUS_PRO = "janus_pro" LFM2A = "lfm2a" # audio + MUSIC_FLAMINGO = "musicflamingo" # audio GLM4V = "glm4v" diff --git a/grammars/README.md b/grammars/README.md index daac7f4d8d..dcd28648b1 100644 --- a/grammars/README.md +++ b/grammars/README.md @@ -150,6 +150,9 @@ You can use GBNF grammars: - in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py) - in JavaScript with [json-schema-to-grammar.mjs](../tools/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../tools/server)'s Web UI) +> [!NOTE] +> The JSON schema is only used to constrain the model output and is not injected into the prompt. The model has no visibility into the schema, so if you want it to understand the expected structure, describe it explicitly in your prompt. This does not apply to tool calling, where schemas are injected into the prompt. + Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggml-org/llama.cpp/pull/5978, https://github.com/ggml-org/llama.cpp/pull/6659 & https://github.com/ggml-org/llama.cpp/pull/6555). ```bash diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index 5823efac2d..c838276158 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -130bc125a88bb57664b88932c48c38a1cb316fac +ebc3a0f4a56be1c9424a89fbec09962ac34fde85 diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h index a0939865e3..1ed0741883 100644 --- a/tools/mtmd/clip-impl.h +++ b/tools/mtmd/clip-impl.h @@ -180,6 +180,7 @@ enum projector_type { PROJECTOR_TYPE_GLMA, PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx PROJECTOR_TYPE_VOXTRAL, + PROJECTOR_TYPE_MUSIC_FLAMINGO, PROJECTOR_TYPE_LFM2, PROJECTOR_TYPE_KIMIVL, PROJECTOR_TYPE_LIGHTONOCR, @@ -209,6 +210,7 @@ static std::map PROJECTOR_TYPE_NAMES = { { PROJECTOR_TYPE_GLMA, "glma"}, { PROJECTOR_TYPE_QWEN25O, "qwen2.5o"}, { PROJECTOR_TYPE_VOXTRAL, "voxtral"}, + { PROJECTOR_TYPE_MUSIC_FLAMINGO, "musicflamingo"}, { PROJECTOR_TYPE_LFM2, "lfm2"}, { PROJECTOR_TYPE_KIMIVL, "kimivl"}, { PROJECTOR_TYPE_LIGHTONOCR,"lightonocr"}, diff --git a/tools/mtmd/clip-model.h b/tools/mtmd/clip-model.h index b4c31cdde6..1e5aa87b98 100644 --- a/tools/mtmd/clip-model.h +++ b/tools/mtmd/clip-model.h @@ -319,7 +319,8 @@ struct clip_model { bool audio_has_avgpool() const { return proj_type == PROJECTOR_TYPE_QWEN2A - || proj_type == PROJECTOR_TYPE_VOXTRAL; + || proj_type == PROJECTOR_TYPE_VOXTRAL + || proj_type == PROJECTOR_TYPE_MUSIC_FLAMINGO; } bool audio_has_stack_frames() const { diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 3ba0823def..fb08dd258c 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -818,6 +818,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 case PROJECTOR_TYPE_VOXTRAL: case PROJECTOR_TYPE_QWEN2A: case PROJECTOR_TYPE_GLMA: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: { builder = std::make_unique(ctx, img); } break; @@ -1176,6 +1177,7 @@ struct clip_model_loader { case PROJECTOR_TYPE_QWEN2A: case PROJECTOR_TYPE_GLMA: case PROJECTOR_TYPE_VOXTRAL: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: { bool require_stack = model.proj_type == PROJECTOR_TYPE_ULTRAVOX || model.proj_type == PROJECTOR_TYPE_VOXTRAL || @@ -1576,6 +1578,17 @@ struct clip_model_loader { model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight")); model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight")); } break; + case PROJECTOR_TYPE_MUSIC_FLAMINGO: + { + model.conv1d_1_w = get_tensor(string_format(TN_CONV1D, 1, "weight")); + model.conv1d_1_b = get_tensor(string_format(TN_CONV1D, 1, "bias")); + model.conv1d_2_w = get_tensor(string_format(TN_CONV1D, 2, "weight")); + model.conv1d_2_b = get_tensor(string_format(TN_CONV1D, 2, "bias")); + model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight")); + model.mm_1_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "bias")); + model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight")); + model.mm_2_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "bias")); + } break; case PROJECTOR_TYPE_INTERNVL: { model.mm_0_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "weight")); @@ -3031,6 +3044,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im case PROJECTOR_TYPE_VOXTRAL: case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_QWEN2A: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: { n_patches = img->nx; @@ -3403,6 +3417,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_LFM2: case PROJECTOR_TYPE_VOXTRAL: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: case PROJECTOR_TYPE_JANUS_PRO: case PROJECTOR_TYPE_COGVLM: { @@ -3526,6 +3541,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { return ctx->model.projection->ne[1]; case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_VOXTRAL: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: return ctx->model.mm_2_w->ne[1]; case PROJECTOR_TYPE_INTERNVL: return ctx->model.mm_3_w->ne[1]; @@ -3587,7 +3603,8 @@ bool clip_has_whisper_encoder(const struct clip_ctx * ctx) { return ctx->proj_type() == PROJECTOR_TYPE_ULTRAVOX || ctx->proj_type() == PROJECTOR_TYPE_QWEN2A || ctx->proj_type() == PROJECTOR_TYPE_GLMA - || ctx->proj_type() == PROJECTOR_TYPE_VOXTRAL; + || ctx->proj_type() == PROJECTOR_TYPE_VOXTRAL + || ctx->proj_type() == PROJECTOR_TYPE_MUSIC_FLAMINGO; } bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) { diff --git a/tools/mtmd/models/whisper-enc.cpp b/tools/mtmd/models/whisper-enc.cpp index 2870d854ab..2f2b127755 100644 --- a/tools/mtmd/models/whisper-enc.cpp +++ b/tools/mtmd/models/whisper-enc.cpp @@ -86,6 +86,15 @@ ggml_cgraph * clip_graph_whisper_enc::build() { FFN_GELU_ERF, -1); + } else if (proj_type == PROJECTOR_TYPE_MUSIC_FLAMINGO) { + // projector + cur = build_ffn(cur, + model.mm_1_w, model.mm_1_b, + nullptr, nullptr, + model.mm_2_w, model.mm_2_b, + FFN_GELU_ERF, + -1); + } else if (proj_type == PROJECTOR_TYPE_GLMA) { cur = ggml_norm(ctx0, cur, hparams.eps); cur = ggml_mul(ctx0, cur, model.mm_norm_pre_w); diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp index b9c4fa9098..b0b5ab42ab 100644 --- a/tools/mtmd/mtmd.cpp +++ b/tools/mtmd/mtmd.cpp @@ -330,6 +330,7 @@ struct mtmd_context { case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_VOXTRAL: case PROJECTOR_TYPE_GLMA: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: audio_preproc = std::make_unique(ctx_a); break; case PROJECTOR_TYPE_LFM2A: @@ -352,6 +353,9 @@ struct mtmd_context { // [BEGIN_AUDIO] ... (embeddings) ... aud_beg = "[BEGIN_AUDIO]"; + } else if (proj == PROJECTOR_TYPE_MUSIC_FLAMINGO) { + // ... (embeddings) ... + aud_beg = ""; } } diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index 470dc3d916..881f4b3dd9 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -12,6 +12,7 @@ #include #include #include +#include struct quant_option { std::string name; @@ -643,6 +644,11 @@ int main(int argc, char ** argv) { return 1; } + if (std::error_code ec; std::filesystem::equivalent(fname_inp, fname_out, ec)) { + fprintf(stderr, "%s: error: input and output files are the same: '%s'\n", __func__, fname_inp.c_str()); + return 1; + } + print_build_info(); fprintf(stderr, "%s: quantizing '%s' to '%s' as %s", __func__, fname_inp.c_str(), fname_out.c_str(), ftype_str.c_str());