Merge branch 'ggml-org:master' into pe/ggml-runtime-opt-x86-cpu-q4_k-q8_k
This commit is contained in:
commit
efa8dd0816
|
|
@ -0,0 +1,95 @@
|
|||
ARG UBUNTU_VERSION=24.04
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG CUDA_VERSION=13.1.0
|
||||
# Target the CUDA build image
|
||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
||||
|
||||
# CUDA architecture to build for (defaults to all supported archs)
|
||||
ARG CUDA_DOCKER_ARCH=default
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
||||
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
||||
fi && \
|
||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
||||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
## Base image
|
||||
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 curl\
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
|
||||
### Full
|
||||
FROM base AS full
|
||||
|
||||
COPY --from=build /app/full /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
git \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-wheel \
|
||||
&& pip install --break-system-packages --upgrade setuptools \
|
||||
&& pip install --break-system-packages -r requirements.txt \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
|
||||
ENTRYPOINT ["/app/tools.sh"]
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/full/llama-server /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
||||
|
|
@ -40,7 +40,8 @@ jobs:
|
|||
# https://github.com/ggml-org/llama.cpp/issues/11888
|
||||
#- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
|
||||
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "cuda cuda12", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "12.4.0", ubuntu_version: "22.04" }
|
||||
- { tag: "cuda13", dockerfile: ".devops/cuda-new.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "13.1.0", ubuntu_version: "24.04" }
|
||||
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
|
||||
|
|
@ -80,18 +81,21 @@ jobs:
|
|||
run: |
|
||||
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
||||
REPO_NAME="${{ github.event.repository.name }}"
|
||||
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
||||
|
||||
# list all tags possible
|
||||
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
|
||||
TYPE=""
|
||||
else
|
||||
TYPE="-${{ matrix.config.tag }}"
|
||||
fi
|
||||
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
||||
CACHETAGS="${PREFIX}buildcache${TYPE}"
|
||||
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
tags="${{ matrix.config.tag }}"
|
||||
for tag in $tags; do
|
||||
if [[ "$tag" == "cpu" ]]; then
|
||||
TYPE=""
|
||||
else
|
||||
TYPE="-$tag"
|
||||
fi
|
||||
CACHETAGS="${PREFIX}buildcache${TYPE}"
|
||||
FULLTAGS="${FULLTAGS:+$FULLTAGS,}${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
LIGHTTAGS="${LIGHTTAGS:+$LIGHTTAGS,}${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
SERVERTAGS="${SERVERTAGS:+$SERVERTAGS,}${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
done
|
||||
echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT
|
||||
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
|
||||
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
|
||||
|
|
@ -132,6 +136,9 @@ jobs:
|
|||
file: ${{ matrix.config.dockerfile }}
|
||||
target: full
|
||||
provenance: false
|
||||
build-args: |
|
||||
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
|
||||
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
|
||||
# using github experimental cache
|
||||
#cache-from: type=gha
|
||||
#cache-to: type=gha,mode=max
|
||||
|
|
@ -154,6 +161,9 @@ jobs:
|
|||
file: ${{ matrix.config.dockerfile }}
|
||||
target: light
|
||||
provenance: false
|
||||
build-args: |
|
||||
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
|
||||
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
|
||||
# using github experimental cache
|
||||
#cache-from: type=gha
|
||||
#cache-to: type=gha,mode=max
|
||||
|
|
@ -176,6 +186,9 @@ jobs:
|
|||
file: ${{ matrix.config.dockerfile }}
|
||||
target: server
|
||||
provenance: false
|
||||
build-args: |
|
||||
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
|
||||
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
|
||||
# using github experimental cache
|
||||
#cache-from: type=gha
|
||||
#cache-to: type=gha,mode=max
|
||||
|
|
|
|||
|
|
@ -319,7 +319,7 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
|
|||
}
|
||||
}
|
||||
} else {
|
||||
jmsg["content"] = json(); // null
|
||||
jmsg["content"] = "";
|
||||
}
|
||||
if (!msg.reasoning_content.empty()) {
|
||||
jmsg["reasoning_content"] = msg.reasoning_content;
|
||||
|
|
|
|||
|
|
@ -1109,6 +1109,25 @@ common_init_result::common_init_result(common_params & params) :
|
|||
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
// load and optionally apply lora adapters (must be loaded before context creation)
|
||||
for (auto & la : params.lora_adapters) {
|
||||
llama_adapter_lora_ptr lora;
|
||||
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
|
||||
if (lora == nullptr) {
|
||||
LOG_ERR("%s: failed to load lora adapter '%s'\n", __func__, la.path.c_str());
|
||||
pimpl->model.reset(model);
|
||||
return;
|
||||
}
|
||||
|
||||
char buf[1024];
|
||||
la.ptr = lora.get();
|
||||
llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf));
|
||||
la.task_name = buf;
|
||||
llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf));
|
||||
la.prompt_prefix = buf;
|
||||
pimpl->lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
|
||||
}
|
||||
|
||||
// updates params.sampling
|
||||
// TODO: fix naming
|
||||
common_init_sampler_from_model(model, params.sampling);
|
||||
|
|
@ -1245,24 +1264,6 @@ common_init_result_ptr common_init_from_params(common_params & params) {
|
|||
}
|
||||
}
|
||||
|
||||
// load and optionally apply lora adapters
|
||||
for (auto & la : params.lora_adapters) {
|
||||
llama_adapter_lora_ptr lora;
|
||||
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
|
||||
if (lora == nullptr) {
|
||||
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
|
||||
return res;
|
||||
}
|
||||
|
||||
char buf[1024];
|
||||
la.ptr = lora.get();
|
||||
llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf));
|
||||
la.task_name = buf;
|
||||
llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf));
|
||||
la.prompt_prefix = buf;
|
||||
res->lora().emplace_back(std::move(lora)); // copy to list of loaded adapters
|
||||
}
|
||||
|
||||
if (!params.lora_init_without_apply) {
|
||||
common_set_adapter_lora(lctx, params.lora_adapters);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ Legend:
|
|||
| CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||
| COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ |
|
||||
| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ |
|
||||
| CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
|
|
|||
|
|
@ -965,6 +965,7 @@
|
|||
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||
"Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[2,2,1536,729],ne_kernel=[2,2,1536,4096],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||
"Metal","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
||||
"Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
||||
"Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
||||
|
|
@ -4964,8 +4965,9 @@
|
|||
"Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","Metal"
|
||||
"Metal","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","Metal"
|
||||
"Metal","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","Metal"
|
||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","0","no","Metal"
|
||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","0","no","Metal"
|
||||
"Metal","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","1","yes","Metal"
|
||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","Metal"
|
||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","Metal"
|
||||
"Metal","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","Metal"
|
||||
"Metal","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","Metal"
|
||||
"Metal","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","Metal"
|
||||
|
|
@ -5715,15 +5717,15 @@
|
|||
"Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal"
|
||||
"Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","Metal"
|
||||
"Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[6,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[6,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[6,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
||||
|
|
@ -5733,6 +5735,15 @@
|
|||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[18,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,1024,4,1],ne_b=[9,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[18,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,1536,4,1],ne_b=[9,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[18,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,2048,4,1],ne_b=[9,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
||||
"Metal","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
||||
"Metal","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
||||
|
|
@ -8916,6 +8927,8 @@
|
|||
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
||||
"Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
||||
"Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
||||
|
|
@ -9542,311 +9555,311 @@
|
|||
"Metal","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","Metal"
|
||||
"Metal","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","Metal"
|
||||
"Metal","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","Metal"
|
||||
"Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","Metal"
|
||||
"Metal","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","Metal"
|
||||
|
|
@ -9891,8 +9904,9 @@
|
|||
"Metal","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","Metal"
|
||||
"Metal","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","Metal"
|
||||
"Metal","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1,circular=0","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[33,17,2,1],pad_0=4,pad_1=3,circular=1","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0,circular=0","support","0","no","Metal"
|
||||
"Metal","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","Metal"
|
||||
"Metal","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","Metal"
|
||||
"Metal","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","Metal"
|
||||
|
|
@ -9923,17 +9937,41 @@
|
|||
"Metal","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","Metal"
|
||||
"Metal","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","Metal"
|
||||
"Metal","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","Metal"
|
||||
"Metal","DIAG","type=f32,ne=[10,1,4,3]","support","0","no","Metal"
|
||||
"Metal","DIAG","type=f32,ne=[79,1,19,13]","support","0","no","Metal"
|
||||
"Metal","DIAG","type=f32,ne=[256,1,8,16]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[64,64,2,2]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[79,79,5,3],ne_rhs=[417,79,5,3]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,2],ne_rhs=[32,128,4,2]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[80,80,2,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[79,80,2,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[81,80,2,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[80,80,8,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[79,80,8,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[81,80,8,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[84,84,4,4],ne_rhs=[32,84,4,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[95,95,8,8],ne_rhs=[40,95,8,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[32,128,4,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,3,4],ne_rhs=[32,128,3,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,1],ne_rhs=[32,128,4,1]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[200,64,4,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[384,64,4,4]","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=0","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=0","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=1","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=1","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=0","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=0","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=1","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=1","support","0","no","Metal"
|
||||
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","Metal"
|
||||
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","Metal"
|
||||
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","Metal"
|
||||
|
|
|
|||
|
Can't render this file because it is too large.
|
|
|
@ -401,8 +401,8 @@ if (GGML_CPU_ALL_VARIANTS)
|
|||
ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
|
||||
ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
|
||||
ggml_add_cpu_backend_variant(android_armv9.0_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE2)
|
||||
ggml_add_cpu_backend_variant(android_armv9.2_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SME)
|
||||
ggml_add_cpu_backend_variant(android_armv9.2_2 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SME)
|
||||
ggml_add_cpu_backend_variant(android_armv9.2_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SME)
|
||||
ggml_add_cpu_backend_variant(android_armv9.2_2 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SVE2 SME)
|
||||
elseif (APPLE)
|
||||
ggml_add_cpu_backend_variant(apple_m1 DOTPROD)
|
||||
ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8)
|
||||
|
|
|
|||
|
|
@ -561,9 +561,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|||
|
||||
# Fetch KleidiAI sources:
|
||||
include(FetchContent)
|
||||
set(KLEIDIAI_COMMIT_TAG "v1.14.0")
|
||||
set(KLEIDIAI_COMMIT_TAG "v1.16.0")
|
||||
set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
|
||||
set(KLEIDIAI_ARCHIVE_MD5 "45e110675d93f99f82c23a1afcca76bc")
|
||||
set(KLEIDIAI_ARCHIVE_MD5 "0a9e9008adb6031f9e8cf70dff4a3321")
|
||||
|
||||
if (POLICY CMP0135)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
|
|
@ -615,6 +615,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|||
string(FIND "${ARCH_FLAGS_TEMP}" "+dotprod" DOTPROD_ENABLED)
|
||||
string(FIND "${ARCH_FLAGS_TEMP}" "+i8mm" I8MM_ENABLED)
|
||||
string(FIND "${ARCH_FLAGS_TEMP}" "+sme" SME_ENABLED)
|
||||
string(FIND "${ARCH_FLAGS_TEMP}" "+sve" SVE_ENABLED)
|
||||
|
||||
set(PRIVATE_ARCH_FLAGS ${ARCH_FLAGS_TEMP})
|
||||
|
||||
|
|
@ -659,6 +660,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|||
set(PRIVATE_ARCH_FLAGS "-fno-tree-vectorize;${PRIVATE_ARCH_FLAGS}+sve+sve2")
|
||||
endif()
|
||||
|
||||
if (NOT SVE_ENABLED MATCHES -1)
|
||||
list(APPEND GGML_KLEIDIAI_SOURCES
|
||||
${KLEIDIAI_SRC}/kai/kai_common_sve_asm.S
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod_asm.S
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod.c
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm_asm.S
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm.c)
|
||||
endif()
|
||||
|
||||
set_source_files_properties(${GGML_KLEIDIAI_SOURCES} PROPERTIES COMPILE_OPTIONS "${PRIVATE_ARCH_FLAGS}")
|
||||
list(APPEND GGML_CPU_SOURCES ${GGML_KLEIDIAI_SOURCES})
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@
|
|||
#include "kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.h"
|
||||
#include "kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.h"
|
||||
#include "kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.h"
|
||||
#include "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm.h"
|
||||
#include "kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod.h"
|
||||
|
||||
#include "kai_lhs_pack_bf16p2vlx2_f32_sme.h"
|
||||
#include "kai_lhs_quant_pack_qsi8d32p_f32.h"
|
||||
|
|
@ -69,9 +71,9 @@ static inline void kernel_run_fn10(size_t m, size_t n, size_t k, size_t /*bl*/,
|
|||
|
||||
template<void(*Fn)(size_t,size_t,size_t,const void*,const void*,float*,size_t,size_t,float,float)>
|
||||
static inline void kernel_run_float_fn10(size_t m, size_t n, size_t k, size_t /*bl*/,
|
||||
const void* lhs, const void* rhs, void* dst,
|
||||
size_t dst_stride_row, size_t dst_stride_col,
|
||||
float clamp_min, float clamp_max) {
|
||||
const void* lhs, const void* rhs, void* dst,
|
||||
size_t dst_stride_row, size_t dst_stride_col,
|
||||
float clamp_min, float clamp_max) {
|
||||
Fn(m, n, k, lhs, rhs, static_cast<float*>(dst), dst_stride_row, dst_stride_col, clamp_min, clamp_max);
|
||||
}
|
||||
|
||||
|
|
@ -152,8 +154,8 @@ static inline void rhs_pack_fn12(size_t num_groups, size_t n, size_t k, size_t n
|
|||
|
||||
template<void(*Fn)(size_t,size_t,size_t,size_t,size_t,size_t,const int8_t*,const float*,const float*,void*,size_t,const struct kai_rhs_pack_qsi8cx_params*)>
|
||||
static inline void rhs_pack_scale_fn12(size_t num_groups, size_t n, size_t k, size_t nr, size_t kr, size_t sr, size_t /*bl*/,
|
||||
size_t /*rhs_stride*/, const void* rhs, const void* bias, const void* scale,
|
||||
void* rhs_packed, size_t extra_bytes, const void* params) {
|
||||
size_t /*rhs_stride*/, const void* rhs, const void* bias, const void* scale,
|
||||
void* rhs_packed, size_t extra_bytes, const void* params) {
|
||||
Fn(num_groups, n, k, nr, kr, sr,
|
||||
static_cast<const int8_t*>(rhs),
|
||||
static_cast<const float*>(bias),
|
||||
|
|
@ -524,6 +526,61 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
|
|||
},
|
||||
#endif
|
||||
#else
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
{
|
||||
/* SVE i8mm GEMM */
|
||||
/* .kern_info = */ {
|
||||
/* .get_m_step = */ kai_get_m_step_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm,
|
||||
/* .get_n_step = */ kai_get_n_step_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm,
|
||||
/* .get_mr = */ kai_get_mr_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm,
|
||||
/* .get_nr = */ kai_get_nr_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm,
|
||||
/* .get_kr = */ kai_get_kr_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm,
|
||||
/* .get_sr = */ kai_get_sr_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm,
|
||||
/* .get_dst_offset = */ kai_get_dst_offset_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm,
|
||||
/* .get_dst_size = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm,
|
||||
/* .get_lhs_offset_ex = */ &kernel_offs_fn3<kai_get_lhs_packed_offset_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm>,
|
||||
/* .get_rhs_packed_offset_ex = */ &kernel_offs_fn3<kai_get_rhs_packed_offset_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm>,
|
||||
/* .run_kernel_ex = */ &kernel_run_fn11<kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm>,
|
||||
},
|
||||
/* .gemm_lhs_info = */ {
|
||||
/* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p4x8sb_f32_neon,
|
||||
/* .get_packed_offset_ex = */ &lhs_offs_fn6<kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p4x8sb_f32_neon>,
|
||||
/* .packed_size_ex = */ &lhs_ps_fn6<kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p4x8sb_f32_neon>,
|
||||
/* .pack_func_ex = */ &lhs_pack_float_fn10<kai_run_lhs_quant_pack_qsi8d32p4x8sb_f32_neon>,
|
||||
},
|
||||
/* SVE dotprod GEMV */
|
||||
/* .kern_info = */ {
|
||||
/* .get_m_step = */ kai_get_m_step_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod,
|
||||
/* .get_n_step = */ kai_get_n_step_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod,
|
||||
/* .get_mr = */ kai_get_mr_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod,
|
||||
/* .get_nr = */ kai_get_nr_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod,
|
||||
/* .get_kr = */ kai_get_kr_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod,
|
||||
/* .get_sr = */ kai_get_sr_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod,
|
||||
/* .get_dst_offset = */ kai_get_dst_offset_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod,
|
||||
/* .get_dst_size = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod,
|
||||
/* .get_lhs_offset_ex = */ &kernel_offs_fn3<kai_get_lhs_packed_offset_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod>,
|
||||
/* .get_rhs_packed_offset_ex = */ &kernel_offs_fn3<kai_get_rhs_packed_offset_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod>,
|
||||
/* .run_kernel_ex = */ &kernel_run_fn11<kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod>,
|
||||
},
|
||||
/* .gemv_lhs_info = */ {
|
||||
/* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
|
||||
/* .get_packed_offset_ex = */ &lhs_offs_fn6<kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32>,
|
||||
/* .packed_size_ex = */ &lhs_ps_fn6<kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32>,
|
||||
/* .pack_func_ex = */ &lhs_pack_float_fn10<kai_run_lhs_quant_pack_qsi8d32p_f32>,
|
||||
},
|
||||
/* .rhs_info = */ {
|
||||
/* .packed_stride = */ kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
|
||||
/* .to_float = */ dequantize_row_qsi4c32pscalef16,
|
||||
/* .packed_size_ex = */ &rhs_ps_fn5<kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0>,
|
||||
/* .packed_stride_ex = */ &rhs_stride_fn4<kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0>,
|
||||
/* .pack_func_ex = */ &rhs_pack_fn12<kai_run_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0>,
|
||||
},
|
||||
/* .required_cpu = */ CPU_FEATURE_SVE | CPU_FEATURE_I8MM | CPU_FEATURE_DOTPROD,
|
||||
/* .lhs_type = */ GGML_TYPE_F32,
|
||||
/* .rhs_type = */ GGML_TYPE_Q4_0,
|
||||
/* .op_type = */ GGML_TYPE_F32,
|
||||
},
|
||||
#endif
|
||||
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
{
|
||||
/* i8mm GEMM */
|
||||
|
|
@ -578,7 +635,7 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
|
|||
/* .rhs_type = */ GGML_TYPE_Q4_0,
|
||||
/* .op_type = */ GGML_TYPE_F32,
|
||||
},
|
||||
#endif
|
||||
#endif // __ARM_FEATURE_MATMUL_INT8
|
||||
#if defined(__ARM_FEATURE_DOTPROD)
|
||||
{
|
||||
/* DOTPROD GEMM */
|
||||
|
|
@ -811,26 +868,27 @@ ggml_kleidiai_kernels * ggml_kleidiai_select_kernels(cpu_feature cpu_features, c
|
|||
ggml_kleidiai_kernels * kernel = nullptr;
|
||||
|
||||
if (tensor->op == GGML_OP_MUL_MAT && tensor->src[0] != nullptr && tensor->src[1] != nullptr) {
|
||||
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
for (size_t i = 0; i < NELEMS(gemm_gemv_kernels) - 1; ++i) {
|
||||
if ((cpu_features & gemm_gemv_kernels[i].required_cpu) == gemm_gemv_kernels[i].required_cpu &&
|
||||
gemm_gemv_kernels[i].lhs_type == tensor->src[1]->type &&
|
||||
gemm_gemv_kernels[i].rhs_type == tensor->src[0]->type &&
|
||||
gemm_gemv_kernels[i].op_type == tensor->type) {
|
||||
kernel = &gemm_gemv_kernels[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!kernel) {
|
||||
for (size_t i = 0; i < NELEMS(gemm_gemv_kernels_q8) - 1; ++i) {
|
||||
if ((cpu_features & gemm_gemv_kernels_q8[i].required_cpu) == gemm_gemv_kernels_q8[i].required_cpu &&
|
||||
gemm_gemv_kernels_q8[i].lhs_type == tensor->src[1]->type &&
|
||||
gemm_gemv_kernels_q8[i].rhs_type == tensor->src[0]->type &&
|
||||
gemm_gemv_kernels_q8[i].op_type == tensor->type) {
|
||||
kernel = &gemm_gemv_kernels_q8[i];
|
||||
break;
|
||||
#if defined(__ARM_FEATURE_SME) || \
|
||||
defined(__ARM_FEATURE_DOTPROD) || \
|
||||
defined(__ARM_FEATURE_MATMUL_INT8) || \
|
||||
defined(__ARM_FEATURE_SVE)
|
||||
auto try_table = [&](auto & table) {
|
||||
for (size_t i = 0; i < NELEMS(table) - 1; ++i) {
|
||||
if ((cpu_features & table[i].required_cpu) == table[i].required_cpu &&
|
||||
table[i].lhs_type == tensor->src[1]->type &&
|
||||
table[i].rhs_type == tensor->src[0]->type &&
|
||||
table[i].op_type == tensor->type) {
|
||||
kernel = &table[i];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if (tensor->src[0]->type == GGML_TYPE_Q8_0) {
|
||||
try_table(gemm_gemv_kernels_q8);
|
||||
} else {
|
||||
try_table(gemm_gemv_kernels);
|
||||
}
|
||||
#else
|
||||
GGML_UNUSED(gemm_gemv_kernels);
|
||||
|
|
@ -845,7 +903,10 @@ ggml_kleidiai_kernels * ggml_kleidiai_select_kernels(cpu_feature cpu_features, c
|
|||
ggml_kleidiai_kernels * ggml_kleidiai_select_kernels_q4_0(cpu_feature features) {
|
||||
ggml_kleidiai_kernels * kernels = nullptr;
|
||||
|
||||
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
#if defined(__ARM_FEATURE_SME) || \
|
||||
defined(__ARM_FEATURE_DOTPROD) || \
|
||||
defined(__ARM_FEATURE_MATMUL_INT8) || \
|
||||
defined(__ARM_FEATURE_SVE)
|
||||
for (size_t i = 0; i < NELEMS(gemm_gemv_kernels) - 1; ++i) {
|
||||
if ((features & gemm_gemv_kernels[i].required_cpu) == gemm_gemv_kernels[i].required_cpu) {
|
||||
kernels = &gemm_gemv_kernels[i];
|
||||
|
|
|
|||
|
|
@ -46,13 +46,20 @@ struct ggml_kleidiai_context {
|
|||
} static ctx = { CPU_FEATURE_NONE, NULL, NULL };
|
||||
|
||||
static const char* cpu_feature_to_string(cpu_feature f) {
|
||||
switch (f) {
|
||||
case CPU_FEATURE_NONE: return "NONE";
|
||||
case CPU_FEATURE_DOTPROD: return "DOTPROD";
|
||||
case CPU_FEATURE_I8MM: return "I8MM";
|
||||
case CPU_FEATURE_SVE: return "SVE";
|
||||
case CPU_FEATURE_SME: return "SME";
|
||||
default: return "UNKNOWN";
|
||||
if (f == CPU_FEATURE_NONE) {
|
||||
return "NONE";
|
||||
} else if ((f & CPU_FEATURE_SME) == CPU_FEATURE_SME) {
|
||||
return "SME";
|
||||
} else if ((f & CPU_FEATURE_SVE) == CPU_FEATURE_SVE) {
|
||||
return "SVE";
|
||||
}
|
||||
else if ((f & CPU_FEATURE_I8MM) == CPU_FEATURE_I8MM) {
|
||||
return "I8MM";
|
||||
} else if ((f & CPU_FEATURE_DOTPROD) == CPU_FEATURE_DOTPROD) {
|
||||
return "DOTPROD";
|
||||
}
|
||||
else {
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +75,7 @@ static void init_kleidiai_context(void) {
|
|||
|
||||
ctx.features = (ggml_cpu_has_dotprod() ? CPU_FEATURE_DOTPROD : CPU_FEATURE_NONE) |
|
||||
(ggml_cpu_has_matmul_int8() ? CPU_FEATURE_I8MM : CPU_FEATURE_NONE) |
|
||||
(ggml_cpu_has_sve() ? CPU_FEATURE_SVE : CPU_FEATURE_NONE);
|
||||
((ggml_cpu_has_sve() && ggml_cpu_get_sve_cnt() == QK8_0) ? CPU_FEATURE_SVE : CPU_FEATURE_NONE);
|
||||
|
||||
if (env_var) {
|
||||
sme_enabled = atoi(env_var);
|
||||
|
|
|
|||
|
|
@ -531,7 +531,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
|
|||
for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::I) {
|
||||
#pragma unroll
|
||||
for (int l = 0; l < T_C_KQ::ne; ++l) {
|
||||
if (!oob_check || k0 + T_C_KQ::get_i(l) < k_VKQ_sup) {
|
||||
if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::I + T_C_KQ::get_i(l) < k_VKQ_sup) {
|
||||
KQ_max_new[l % 2] = fmaxf(KQ_max_new[l % 2], KQ_C[k0/(np*T_C_KQ::I)].x[l] + FATTN_KQ_MAX_OFFSET);
|
||||
}
|
||||
}
|
||||
|
|
@ -583,7 +583,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
|
|||
for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::J) {
|
||||
#pragma unroll
|
||||
for (int l = 0; l < T_C_KQ::ne; ++l) {
|
||||
if (!oob_check || k0 + T_C_KQ::get_j(l) < k_VKQ_sup) {
|
||||
if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::J + T_C_KQ::get_j(l) < k_VKQ_sup) {
|
||||
// Turing + Volta:
|
||||
KQ_max_new[(l/2) % 2] = fmaxf(KQ_max_new[(l/2) % 2], KQ_C[(k0/(np*T_C_KQ::J))].x[l] + FATTN_KQ_MAX_OFFSET);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1684,3 +1684,60 @@ ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd(ggm
|
|||
|
||||
return res;
|
||||
}
|
||||
|
||||
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset(ggml_metal_library_t lib, const ggml_tensor * op) {
|
||||
GGML_ASSERT(op->type == GGML_TYPE_I64);
|
||||
|
||||
char base[256];
|
||||
char name[256];
|
||||
|
||||
snprintf(base, 256, "kernel_memset_%s", ggml_type_name(op->type));
|
||||
snprintf(name, 256, "%s", base);
|
||||
|
||||
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
||||
if (!res.pipeline) {
|
||||
res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal(ggml_metal_library_t lib, const ggml_tensor * op) {
|
||||
assert(op->op == GGML_OP_COUNT_EQUAL);
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, op->src[0], ne);
|
||||
|
||||
GGML_ASSERT(op->src[0]->type == op->src[1]->type);
|
||||
GGML_ASSERT(op->src[0]->type == GGML_TYPE_I32);
|
||||
GGML_ASSERT(op->type == GGML_TYPE_I64);
|
||||
|
||||
// note: the kernel only supports i32 output due to metal atomic add only supporting atomic_int
|
||||
GGML_ASSERT(ggml_nelements(op->src[0]) < (1LL << 31));
|
||||
|
||||
char base[256];
|
||||
char name[256];
|
||||
|
||||
int nsg = 1;
|
||||
while (32*nsg < ne00 && nsg < 32) {
|
||||
nsg *= 2;
|
||||
}
|
||||
|
||||
snprintf(base, 256, "kernel_count_equal_%s", ggml_type_name(op->src[0]->type));
|
||||
snprintf(name, 256, "%s_nsg=%d", base, nsg);
|
||||
|
||||
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
||||
if (!res.pipeline) {
|
||||
ggml_metal_cv_t cv = ggml_metal_cv_init();
|
||||
|
||||
ggml_metal_cv_set_int16(cv, nsg, FC_COUNT_EQUAL + 0);
|
||||
|
||||
res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
|
||||
|
||||
ggml_metal_cv_free(cv);
|
||||
}
|
||||
|
||||
res.smem = 32 * sizeof(int32_t);
|
||||
res.nsg = nsg;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -147,6 +147,8 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_arange
|
|||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_timestep_embedding(ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_adamw (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_flash_attn_ext_pad(
|
||||
ggml_metal_library_t lib,
|
||||
|
|
|
|||
|
|
@ -1023,6 +1023,11 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|||
return has_simdgroup_reduction && ggml_is_contiguous_rows(op->src[0]);
|
||||
case GGML_OP_L2_NORM:
|
||||
return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0]));
|
||||
case GGML_OP_COUNT_EQUAL:
|
||||
return has_simdgroup_reduction &&
|
||||
op->src[0]->type == GGML_TYPE_I32 &&
|
||||
op->src[1]->type == GGML_TYPE_I32 &&
|
||||
op->type == GGML_TYPE_I64;
|
||||
case GGML_OP_ARGMAX:
|
||||
return has_simdgroup_reduction;
|
||||
case GGML_OP_NORM:
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@
|
|||
#define FC_MUL_MM 700
|
||||
#define FC_ROPE 800
|
||||
#define FC_SSM_CONV 900
|
||||
#define FC_COUNT_EQUAL 1000
|
||||
|
||||
// op-specific constants
|
||||
#define OP_FLASH_ATTN_EXT_NQPTG 8
|
||||
|
|
@ -894,6 +895,25 @@ typedef struct {
|
|||
float step;
|
||||
} ggml_metal_kargs_arange;
|
||||
|
||||
typedef struct {
|
||||
int64_t val;
|
||||
} ggml_metal_kargs_memset;
|
||||
|
||||
typedef struct {
|
||||
int32_t ne00;
|
||||
int32_t ne01;
|
||||
int32_t ne02;
|
||||
int32_t ne03;
|
||||
uint64_t nb00;
|
||||
uint64_t nb01;
|
||||
uint64_t nb02;
|
||||
uint64_t nb03;
|
||||
uint64_t nb10;
|
||||
uint64_t nb11;
|
||||
uint64_t nb12;
|
||||
uint64_t nb13;
|
||||
} ggml_metal_kargs_count_equal;
|
||||
|
||||
typedef struct {
|
||||
int32_t k0;
|
||||
int32_t k1;
|
||||
|
|
|
|||
|
|
@ -448,7 +448,11 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {
|
|||
{
|
||||
n_fuse = ggml_metal_op_opt_step_sgd(ctx, idx);
|
||||
} break;
|
||||
default:
|
||||
case GGML_OP_COUNT_EQUAL:
|
||||
{
|
||||
n_fuse = ggml_metal_op_count_equal(ctx, idx);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(node->op));
|
||||
GGML_ABORT("fatal error");
|
||||
|
|
@ -4090,3 +4094,64 @@ int ggml_metal_op_opt_step_sgd(ggml_metal_op_t ctx, int idx) {
|
|||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ggml_metal_op_count_equal(ggml_metal_op_t ctx, int idx) {
|
||||
ggml_tensor * op = ctx->node(idx);
|
||||
|
||||
ggml_metal_library_t lib = ctx->lib;
|
||||
ggml_metal_encoder_t enc = ctx->enc;
|
||||
|
||||
GGML_TENSOR_LOCALS(int32_t, ne0, op->src[0], ne);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb1, op->src[1], nb);
|
||||
|
||||
{
|
||||
ggml_metal_kargs_memset args = { /*.val =*/ 0 };
|
||||
|
||||
auto pipeline = ggml_metal_library_get_pipeline_memset(lib, op);
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 1);
|
||||
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, 1, 1, 1, 1, 1, 1);
|
||||
}
|
||||
|
||||
ggml_metal_op_concurrency_reset(ctx);
|
||||
|
||||
{
|
||||
ggml_metal_kargs_count_equal args = {
|
||||
/*.ne00 =*/ ne00,
|
||||
/*.ne01 =*/ ne01,
|
||||
/*.ne02 =*/ ne02,
|
||||
/*.ne03 =*/ ne03,
|
||||
/*.nb00 =*/ nb00,
|
||||
/*.nb01 =*/ nb01,
|
||||
/*.nb02 =*/ nb02,
|
||||
/*.nb03 =*/ nb03,
|
||||
/*.nb10 =*/ nb10,
|
||||
/*.nb11 =*/ nb11,
|
||||
/*.nb12 =*/ nb12,
|
||||
/*.nb13 =*/ nb13,
|
||||
};
|
||||
|
||||
auto pipeline = ggml_metal_library_get_pipeline_count_equal(lib, op);
|
||||
|
||||
const size_t smem = pipeline.smem;
|
||||
|
||||
const int nth = 32*pipeline.nsg;
|
||||
|
||||
GGML_ASSERT(nth <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[0]), 1);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[1]), 2);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 3);
|
||||
|
||||
ggml_metal_encoder_set_threadgroup_memory_size(enc, smem, 0);
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne02, ne03, nth, 1, 1);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ int ggml_metal_op_leaky_relu (ggml_metal_op_t ctx, int idx);
|
|||
int ggml_metal_op_tri (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_opt_step_adamw (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_opt_step_sgd (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_count_equal (ggml_metal_op_t ctx, int idx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1790,6 +1790,7 @@ kernel void kernel_op_sum_f32(
|
|||
return;
|
||||
}
|
||||
|
||||
// TODO: become function constant
|
||||
const uint nsg = (ntg.x + 31) / 32;
|
||||
|
||||
float sumf = 0;
|
||||
|
|
@ -9557,9 +9558,6 @@ template [[host_name("kernel_mul_mm_iq4_xs_f32")]] kernel mul_mm_t kernel_mul_m
|
|||
|
||||
template [[host_name("kernel_mul_mm_f32_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, float4x4, 1, dequantize_f32, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_f16_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, half4x4, 1, dequantize_f16, half, half4x4, half, half2x4>;
|
||||
#if defined(GGML_METAL_HAS_BF16)
|
||||
template [[host_name("kernel_mul_mm_bf16_f16")]] kernel mul_mm_t kernel_mul_mm<bfloat, bfloat4x4, simdgroup_bfloat8x8, half, half2x4, simdgroup_half8x8, bfloat4x4, 1, dequantize_bf16, bfloat, bfloat4x4, half, half2x4>;
|
||||
#endif
|
||||
template [[host_name("kernel_mul_mm_q4_0_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_0, 2, dequantize_q4_0, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_q4_1_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_1, 2, dequantize_q4_1, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_q5_0_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q5_0, 2, dequantize_q5_0, float, float4x4, half, half2x4>;
|
||||
|
|
@ -9615,9 +9613,6 @@ template [[host_name("kernel_mul_mm_id_iq4_xs_f32")]] kernel mul_mm_id kernel_m
|
|||
|
||||
template [[host_name("kernel_mul_mm_id_f32_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, float4x4, 1, dequantize_f32, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_id_f16_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, half4x4, 1, dequantize_f16, half, half4x4, half, half2x4>;
|
||||
#if defined(GGML_METAL_HAS_BF16)
|
||||
template [[host_name("kernel_mul_mm_id_bf16_f16")]] kernel mul_mm_id kernel_mul_mm_id<bfloat, bfloat4x4, simdgroup_bfloat8x8, half, half2x4, simdgroup_half8x8, bfloat4x4, 1, dequantize_bf16, bfloat, bfloat4x4, half, half2x4>;
|
||||
#endif
|
||||
template [[host_name("kernel_mul_mm_id_q4_0_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_0, 2, dequantize_q4_0, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_id_q4_1_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_1, 2, dequantize_q4_1, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_id_q5_0_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q5_0, 2, dequantize_q5_0, float, float4x4, half, half2x4>;
|
||||
|
|
@ -9920,3 +9915,75 @@ kernel void kernel_opt_step_sgd_f32(
|
|||
|
||||
x[gid] = x[gid] * (1.0f - pars[0] * pars[1]) - pars[0] * g[gid];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
kernel void kernel_memset(
|
||||
constant ggml_metal_kargs_fill & args,
|
||||
device T * dst,
|
||||
uint tpig[[thread_position_in_grid]]) {
|
||||
dst[tpig] = args.val;
|
||||
}
|
||||
|
||||
typedef decltype(kernel_memset<int64_t>) kernel_memset_t;
|
||||
|
||||
template [[host_name("kernel_memset_i64")]] kernel kernel_memset_t kernel_memset<int64_t>;
|
||||
|
||||
constant short FC_count_equal_nsg [[function_constant(FC_COUNT_EQUAL + 0)]];
|
||||
|
||||
template<typename T>
|
||||
kernel void kernel_count_equal(
|
||||
constant ggml_metal_kargs_count_equal & args,
|
||||
device const char * src0,
|
||||
device const char * src1,
|
||||
device atomic_int * dst,
|
||||
threadgroup int32_t * shmem_i32 [[threadgroup(0)]],
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
ushort3 tpitg[[thread_position_in_threadgroup]],
|
||||
ushort sgitg[[simdgroup_index_in_threadgroup]],
|
||||
ushort tiisg[[thread_index_in_simdgroup]],
|
||||
ushort3 ntg[[threads_per_threadgroup]]) {
|
||||
const short NSG = FC_count_equal_nsg;
|
||||
|
||||
const int i3 = tgpig.z;
|
||||
const int i2 = tgpig.y;
|
||||
const int i1 = tgpig.x;
|
||||
|
||||
if (i3 >= args.ne03 || i2 >= args.ne02 || i1 >= args.ne01) {
|
||||
return;
|
||||
}
|
||||
|
||||
int sum = 0;
|
||||
|
||||
device const char * base0 = src0 + i1*args.nb01 + i2*args.nb02 + i3*args.nb03;
|
||||
device const char * base1 = src1 + i1*args.nb11 + i2*args.nb12 + i3*args.nb13;
|
||||
|
||||
for (int64_t i0 = tpitg.x; i0 < args.ne00; i0 += ntg.x) {
|
||||
const T v0 = *(device const T *)(base0 + i0*args.nb00);
|
||||
const T v1 = *(device const T *)(base1 + i0*args.nb10);
|
||||
sum += (v0 == v1);
|
||||
}
|
||||
|
||||
sum = simd_sum(sum);
|
||||
|
||||
if (tiisg == 0) {
|
||||
shmem_i32[sgitg] = sum;
|
||||
}
|
||||
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
if (sgitg == 0) {
|
||||
float v = 0.0f;
|
||||
if (tpitg.x < NSG) {
|
||||
v = shmem_i32[tpitg.x];
|
||||
}
|
||||
|
||||
float total = simd_sum(v);
|
||||
if (tpitg.x == 0) {
|
||||
atomic_fetch_add_explicit(dst, (int32_t) total, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef decltype(kernel_count_equal<int32_t>) kernel_count_equal_t;
|
||||
|
||||
template [[host_name("kernel_count_equal_i32")]] kernel kernel_count_equal_t kernel_count_equal<int32_t>;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,47 @@ if (WIN32)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
find_package(IntelSYCL)
|
||||
macro(detect_and_find_package package_name)
|
||||
set(test_source "
|
||||
cmake_minimum_required(VERSION ${CMAKE_VERSION})
|
||||
project(check_package LANGUAGES CXX)
|
||||
find_package(${package_name} QUIET)
|
||||
")
|
||||
|
||||
set(test_dir "${CMAKE_CURRENT_BINARY_DIR}/check_package_${package_name}")
|
||||
file(WRITE "${test_dir}/CMakeLists.txt" "${test_source}")
|
||||
|
||||
set(cmake_args "")
|
||||
if(CMAKE_GENERATOR)
|
||||
list(APPEND cmake_args "-G" "${CMAKE_GENERATOR}")
|
||||
endif()
|
||||
if(CMAKE_GENERATOR_PLATFORM)
|
||||
list(APPEND cmake_args "-A" "${CMAKE_GENERATOR_PLATFORM}")
|
||||
endif()
|
||||
if(CMAKE_GENERATOR_TOOLSET)
|
||||
list(APPEND cmake_args "-T" "${CMAKE_GENERATOR_TOOLSET}")
|
||||
endif()
|
||||
if(CMAKE_CXX_COMPILER)
|
||||
list(APPEND cmake_args "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} ${cmake_args} .
|
||||
WORKING_DIRECTORY "${test_dir}"
|
||||
RESULT_VARIABLE result
|
||||
OUTPUT_QUIET
|
||||
ERROR_QUIET
|
||||
)
|
||||
|
||||
if(result EQUAL 0)
|
||||
find_package(${package_name} ${ARGN})
|
||||
else()
|
||||
message(WARNING "Detection of ${package_name} failed. The package might be broken or incompatible.")
|
||||
set(${package_name}_FOUND FALSE)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
detect_and_find_package(IntelSYCL)
|
||||
if (IntelSYCL_FOUND)
|
||||
# Use oneAPI CMake when possible
|
||||
target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX)
|
||||
|
|
@ -191,3 +231,4 @@ if (GGML_SYCL_DEVICE_ARCH)
|
|||
target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
||||
target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -150,6 +150,9 @@ You can use GBNF grammars:
|
|||
- in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
|
||||
- in JavaScript with [json-schema-to-grammar.mjs](../tools/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../tools/server)'s Web UI)
|
||||
|
||||
> [!NOTE]
|
||||
> The JSON schema is only used to constrain the model output and is not injected into the prompt. The model has no visibility into the schema, so if you want it to understand the expected structure, describe it explicitly in your prompt. This does not apply to tool calling, where schemas are injected into the prompt.
|
||||
|
||||
Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggml-org/llama.cpp/pull/5978, https://github.com/ggml-org/llama.cpp/pull/6659 & https://github.com/ggml-org/llama.cpp/pull/6555).
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -607,6 +607,8 @@ extern "C" {
|
|||
//
|
||||
|
||||
// Load a LoRA adapter from file
|
||||
// The adapter is valid as long as the associated model is not freed
|
||||
// All adapters must be loaded before context creation
|
||||
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
|
||||
struct llama_model * model,
|
||||
const char * path_lora);
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ Example function tool call syntax:
|
|||
{%- if message['role'] == 'user' -%}
|
||||
{{- '<|User|>' + message['content'] + '<|end▁of▁sentence|>' -}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is none -%}
|
||||
{%- if message['role'] == 'assistant' and not message['content'] -%}
|
||||
{{- '<|Assistant|><|tool▁calls▁begin|>' -}}
|
||||
{%- set ns.is_first = true -%}
|
||||
{%- for tc in message['tool_calls'] -%}
|
||||
|
|
@ -53,7 +53,7 @@ Example function tool call syntax:
|
|||
{%- endfor -%}
|
||||
{{- '<|tool▁calls▁end|><|end▁of▁sentence|>' -}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- set content = message['content'] -%}
|
||||
{%- if '</think>' in content -%}
|
||||
|
|
@ -73,4 +73,4 @@ Example function tool call syntax:
|
|||
{{- flush_tool_outputs() -}}
|
||||
{%- if add_generation_prompt and not ns.is_tool_outputs -%}
|
||||
{{- '<|Assistant|><think>\n' -}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
|
|
|
|||
|
|
@ -146,9 +146,11 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
|
||||
static void llama_adapter_lora_init_impl(const char * path_lora, llama_adapter_lora & adapter) {
|
||||
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
|
||||
|
||||
llama_model & model = adapter.model;
|
||||
|
||||
ggml_context * ctx_init;
|
||||
gguf_init_params meta_gguf_params = {
|
||||
/* .no_alloc = */ true,
|
||||
|
|
@ -411,14 +413,17 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
|||
}
|
||||
}
|
||||
|
||||
// update number of nodes used
|
||||
model.n_lora_nodes += adapter.get_n_nodes();
|
||||
|
||||
LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
|
||||
}
|
||||
|
||||
llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
|
||||
llama_adapter_lora * adapter = new llama_adapter_lora();
|
||||
llama_adapter_lora * adapter = new llama_adapter_lora(*model);
|
||||
|
||||
try {
|
||||
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
|
||||
llama_adapter_lora_init_impl(path_lora, *adapter);
|
||||
return adapter;
|
||||
} catch (const std::exception & err) {
|
||||
LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());
|
||||
|
|
@ -469,6 +474,10 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
|
|||
}
|
||||
|
||||
void llama_adapter_lora_free(llama_adapter_lora * adapter) {
|
||||
// update number of nodes used
|
||||
GGML_ASSERT(adapter->model.n_lora_nodes >= adapter->get_n_nodes());
|
||||
adapter->model.n_lora_nodes -= adapter->get_n_nodes();
|
||||
|
||||
delete adapter;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ struct llama_adapter_lora_weight {
|
|||
};
|
||||
|
||||
struct llama_adapter_lora {
|
||||
llama_model & model;
|
||||
|
||||
// map tensor name to lora_a_b
|
||||
std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
|
||||
|
||||
|
|
@ -73,10 +75,14 @@ struct llama_adapter_lora {
|
|||
// activated lora (aLoRA)
|
||||
std::vector<llama_token> alora_invocation_tokens;
|
||||
|
||||
llama_adapter_lora() = default;
|
||||
llama_adapter_lora(llama_model & model) : model(model) {}
|
||||
~llama_adapter_lora() = default;
|
||||
|
||||
llama_adapter_lora_weight * get_weight(ggml_tensor * w);
|
||||
|
||||
uint32_t get_n_nodes() const {
|
||||
return ab_map.size() * 6u; // a, b, scale, add, 2 x mul_mat
|
||||
}
|
||||
};
|
||||
|
||||
using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;
|
||||
|
|
|
|||
|
|
@ -1442,7 +1442,9 @@ uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const {
|
|||
if (model.arch == LLM_ARCH_QWEN3NEXT) {
|
||||
return std::max<uint32_t>(n_tokens * 40, 32u * model.n_tensors());
|
||||
}
|
||||
return std::max<uint32_t>(1024u, 8u*model.n_tensors());
|
||||
uint32_t res = std::max<uint32_t>(1024u, 8u*model.n_tensors());
|
||||
res += model.n_lora_nodes;
|
||||
return res;
|
||||
}
|
||||
|
||||
llm_graph_result * llama_context::get_gf_res_reserve() const {
|
||||
|
|
|
|||
|
|
@ -305,7 +305,7 @@ public:
|
|||
bool do_shift,
|
||||
stream_copy_info sc_info);
|
||||
|
||||
// used to create a batch procesing context from a batch
|
||||
// used to create a batch processing context from a batch
|
||||
llama_kv_cache_context(
|
||||
llama_kv_cache * kv,
|
||||
slot_info_vec_t sinfos,
|
||||
|
|
|
|||
|
|
@ -475,6 +475,9 @@ struct llama_model {
|
|||
// for quantize-stats only
|
||||
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
|
||||
|
||||
// for keeping track of extra nodes used by lora adapters
|
||||
uint32_t n_lora_nodes = 0;
|
||||
|
||||
int64_t t_load_us = 0;
|
||||
int64_t t_start_us = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -421,39 +421,6 @@ void llama_sampler_free(struct llama_sampler * smpl) {
|
|||
delete smpl;
|
||||
}
|
||||
|
||||
llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_context * ctx, int32_t idx) {
|
||||
const auto * logits = llama_get_logits_ith(ctx, idx);
|
||||
|
||||
const llama_model * model = llama_get_model(ctx);
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
||||
|
||||
// TODO: do not allocate each time
|
||||
std::vector<llama_token_data> cur;
|
||||
cur.reserve(n_vocab);
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||
}
|
||||
|
||||
llama_token_data_array cur_p = {
|
||||
/* .data = */ cur.data(),
|
||||
/* .size = */ cur.size(),
|
||||
/* .selected = */ -1,
|
||||
/* .sorted = */ false,
|
||||
};
|
||||
|
||||
llama_sampler_apply(smpl, &cur_p);
|
||||
|
||||
GGML_ASSERT(cur_p.selected >= 0 && cur_p.selected < (int32_t) cur_p.size);
|
||||
|
||||
auto token = cur_p.data[cur_p.selected].id;
|
||||
|
||||
llama_sampler_accept(smpl, token);
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
// sampler chain
|
||||
|
||||
static const char * llama_sampler_chain_name(const struct llama_sampler * /*smpl*/) {
|
||||
|
|
@ -527,12 +494,56 @@ struct llama_sampler * llama_sampler_chain_init(struct llama_sampler_chain_param
|
|||
/* .ctx = */ new llama_sampler_chain {
|
||||
/* .params = */ params,
|
||||
/* .samplers = */ {},
|
||||
/* .cur = */ {},
|
||||
/* .t_sample_us = */ 0,
|
||||
/* .n_sample = */ 0,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_context * ctx, int32_t idx) {
|
||||
const auto * logits = llama_get_logits_ith(ctx, idx);
|
||||
|
||||
const llama_model * model = llama_get_model(ctx);
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
||||
|
||||
// use pre-allocated buffer from chain if available, otherwise allocate locally
|
||||
std::vector<llama_token_data> * cur_ptr;
|
||||
std::vector<llama_token_data> cur_local;
|
||||
|
||||
if (smpl->iface == &llama_sampler_chain_i) {
|
||||
auto * chain = (llama_sampler_chain *) smpl->ctx;
|
||||
cur_ptr = &chain->cur;
|
||||
} else {
|
||||
cur_ptr = &cur_local;
|
||||
}
|
||||
|
||||
auto & cur = *cur_ptr;
|
||||
cur.resize(n_vocab);
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
||||
}
|
||||
|
||||
llama_token_data_array cur_p = {
|
||||
/* .data = */ cur.data(),
|
||||
/* .size = */ cur.size(),
|
||||
/* .selected = */ -1,
|
||||
/* .sorted = */ false,
|
||||
};
|
||||
|
||||
llama_sampler_apply(smpl, &cur_p);
|
||||
|
||||
GGML_ASSERT(cur_p.selected >= 0 && cur_p.selected < (int32_t) cur_p.size);
|
||||
|
||||
auto token = cur_p.data[cur_p.selected].id;
|
||||
|
||||
llama_sampler_accept(smpl, token);
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
void llama_sampler_chain_add(struct llama_sampler * chain, struct llama_sampler * smpl) {
|
||||
auto * p = (llama_sampler_chain *) chain->ctx;
|
||||
p->samplers.push_back(smpl);
|
||||
|
|
|
|||
|
|
@ -16,6 +16,9 @@ struct llama_sampler_chain {
|
|||
|
||||
std::vector<struct llama_sampler *> samplers;
|
||||
|
||||
// pre-allocated buffer for llama_sampler_sample to avoid repeated allocations
|
||||
std::vector<llama_token_data> cur;
|
||||
|
||||
// timing
|
||||
|
||||
mutable int64_t t_sample_us;
|
||||
|
|
|
|||
|
|
@ -650,7 +650,7 @@ static void test_msgs_oaicompat_json_conversion() {
|
|||
"[\n"
|
||||
" {\n"
|
||||
" \"role\": \"assistant\",\n"
|
||||
" \"content\": null,\n"
|
||||
" \"content\": \"\",\n"
|
||||
" \"tool_calls\": [\n"
|
||||
" {\n"
|
||||
" \"type\": \"function\",\n"
|
||||
|
|
@ -906,7 +906,8 @@ static void test_template_output_parsers() {
|
|||
" },\n"
|
||||
" \"id\": \"123456789\"\n"
|
||||
" }\n"
|
||||
" ]\n"
|
||||
" ],\n"
|
||||
" \"content\": \"\"\n"
|
||||
"}");
|
||||
}
|
||||
{
|
||||
|
|
@ -1713,7 +1714,8 @@ static void test_template_output_parsers() {
|
|||
" },\n"
|
||||
" \"id\": \"123456789\"\n"
|
||||
" }\n"
|
||||
" ]\n"
|
||||
" ],\n"
|
||||
" \"content\": \"\"\n"
|
||||
"}",
|
||||
/* expect_grammar_triggered= */ false
|
||||
);
|
||||
|
|
|
|||
|
|
@ -38,14 +38,6 @@ set(TARGET_SRCS
|
|||
server-http.h
|
||||
server-models.cpp
|
||||
server-models.h
|
||||
server-task.cpp
|
||||
server-task.h
|
||||
server-queue.cpp
|
||||
server-queue.h
|
||||
server-common.cpp
|
||||
server-common.h
|
||||
server-context.cpp
|
||||
server-context.h
|
||||
)
|
||||
set(PUBLIC_ASSETS
|
||||
index.html.gz
|
||||
|
|
|
|||
Loading…
Reference in New Issue