Merge branch 'master' into HEAD
This commit is contained in:
commit
8071a57c9b
|
|
@ -0,0 +1,95 @@
|
|||
ARG UBUNTU_VERSION=24.04
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG CUDA_VERSION=13.1.0
|
||||
# Target the CUDA build image
|
||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
||||
|
||||
# CUDA architecture to build for (defaults to all supported archs)
|
||||
ARG CUDA_DOCKER_ARCH=default
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
||||
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
||||
fi && \
|
||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
||||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
## Base image
|
||||
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 curl\
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
COPY --from=build /app/lib/ /app
|
||||
|
||||
### Full
|
||||
FROM base AS full
|
||||
|
||||
COPY --from=build /app/full /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
git \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-wheel \
|
||||
&& pip install --break-system-packages --upgrade setuptools \
|
||||
&& pip install --break-system-packages -r requirements.txt \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
|
||||
ENTRYPOINT ["/app/tools.sh"]
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/full/llama-server /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
||||
|
|
@ -40,7 +40,8 @@ jobs:
|
|||
# https://github.com/ggml-org/llama.cpp/issues/11888
|
||||
#- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
|
||||
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "cuda cuda12", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "12.4.0", ubuntu_version: "22.04" }
|
||||
- { tag: "cuda13", dockerfile: ".devops/cuda-new.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "13.1.0", ubuntu_version: "24.04" }
|
||||
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
|
||||
|
|
@ -80,18 +81,21 @@ jobs:
|
|||
run: |
|
||||
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
||||
REPO_NAME="${{ github.event.repository.name }}"
|
||||
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
||||
|
||||
# list all tags possible
|
||||
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
|
||||
TYPE=""
|
||||
else
|
||||
TYPE="-${{ matrix.config.tag }}"
|
||||
fi
|
||||
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
||||
CACHETAGS="${PREFIX}buildcache${TYPE}"
|
||||
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
tags="${{ matrix.config.tag }}"
|
||||
for tag in $tags; do
|
||||
if [[ "$tag" == "cpu" ]]; then
|
||||
TYPE=""
|
||||
else
|
||||
TYPE="-$tag"
|
||||
fi
|
||||
CACHETAGS="${PREFIX}buildcache${TYPE}"
|
||||
FULLTAGS="${FULLTAGS:+$FULLTAGS,}${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
LIGHTTAGS="${LIGHTTAGS:+$LIGHTTAGS,}${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
SERVERTAGS="${SERVERTAGS:+$SERVERTAGS,}${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
|
||||
done
|
||||
echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT
|
||||
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
|
||||
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
|
||||
|
|
@ -132,6 +136,9 @@ jobs:
|
|||
file: ${{ matrix.config.dockerfile }}
|
||||
target: full
|
||||
provenance: false
|
||||
build-args: |
|
||||
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
|
||||
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
|
||||
# using github experimental cache
|
||||
#cache-from: type=gha
|
||||
#cache-to: type=gha,mode=max
|
||||
|
|
@ -154,6 +161,9 @@ jobs:
|
|||
file: ${{ matrix.config.dockerfile }}
|
||||
target: light
|
||||
provenance: false
|
||||
build-args: |
|
||||
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
|
||||
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
|
||||
# using github experimental cache
|
||||
#cache-from: type=gha
|
||||
#cache-to: type=gha,mode=max
|
||||
|
|
@ -176,6 +186,9 @@ jobs:
|
|||
file: ${{ matrix.config.dockerfile }}
|
||||
target: server
|
||||
provenance: false
|
||||
build-args: |
|
||||
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
|
||||
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
|
||||
# using github experimental cache
|
||||
#cache-from: type=gha
|
||||
#cache-to: type=gha,mode=max
|
||||
|
|
|
|||
|
|
@ -1395,6 +1395,14 @@ static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
|
|||
builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
|
||||
}
|
||||
|
||||
static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
|
||||
builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
|
||||
|
||||
// TODO: Tool calling
|
||||
|
||||
builder.add_content(builder.consume_rest());
|
||||
}
|
||||
|
||||
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
|
||||
builder.try_parse_reasoning("<think>", "</think>");
|
||||
builder.add_content(builder.consume_rest());
|
||||
|
|
@ -1479,6 +1487,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|||
case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
|
||||
common_chat_parse_xiaomi_mimo(builder);
|
||||
break;
|
||||
case COMMON_CHAT_FORMAT_SOLAR_OPEN:
|
||||
common_chat_parse_solar_open(builder);
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -319,7 +319,7 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
|
|||
}
|
||||
}
|
||||
} else {
|
||||
jmsg["content"] = json(); // null
|
||||
jmsg["content"] = "";
|
||||
}
|
||||
if (!msg.reasoning_content.empty()) {
|
||||
jmsg["reasoning_content"] = msg.reasoning_content;
|
||||
|
|
@ -380,8 +380,8 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
|
|||
const auto & function = tool.at("function");
|
||||
result.push_back({
|
||||
/* .name = */ function.at("name"),
|
||||
/* .description = */ function.at("description"),
|
||||
/* .parameters = */ function.at("parameters").dump(),
|
||||
/* .description = */ function.value("description", ""),
|
||||
/* .parameters = */ function.value("parameters", json::object()).dump(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -669,6 +669,7 @@ const char * common_chat_format_name(common_chat_format format) {
|
|||
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
|
||||
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
|
||||
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
||||
case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
|
||||
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
|
||||
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
|
||||
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
|
||||
|
|
@ -2517,6 +2518,27 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
|
|||
return data;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
common_chat_params data;
|
||||
|
||||
// TODO: Reasoning effort
|
||||
json additional_context = {};
|
||||
|
||||
data.prompt = apply(tmpl, inputs, std::nullopt, std::nullopt, additional_context);
|
||||
data.format = COMMON_CHAT_FORMAT_SOLAR_OPEN;
|
||||
|
||||
data.preserved_tokens = {
|
||||
"<|think|>",
|
||||
"<|content|>",
|
||||
"<|begin|>",
|
||||
"<|end|>",
|
||||
};
|
||||
|
||||
// TODO: Tool calling
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
common_chat_params data;
|
||||
data.prompt = apply(tmpl, inputs);
|
||||
|
|
@ -2780,6 +2802,13 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|||
return common_chat_params_init_magistral(tmpl, params);
|
||||
}
|
||||
|
||||
// Solar Open
|
||||
if (src.find("<|tool_response:begin|>") != std::string::npos &&
|
||||
src.find("<|tool_response:name|>") != std::string::npos &&
|
||||
src.find("<|tool_response:result|>") != std::string::npos) {
|
||||
return common_chat_params_init_solar_open(tmpl, params);
|
||||
}
|
||||
|
||||
// Plain handler (no tools)
|
||||
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
||||
return common_chat_params_init_without_tools(tmpl, params);
|
||||
|
|
|
|||
|
|
@ -124,6 +124,7 @@ enum common_chat_format {
|
|||
COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
|
||||
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
||||
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
||||
COMMON_CHAT_FORMAT_SOLAR_OPEN,
|
||||
|
||||
// These are intended to be parsed by the PEG parser
|
||||
COMMON_CHAT_FORMAT_PEG_SIMPLE,
|
||||
|
|
|
|||
|
|
@ -1110,6 +1110,25 @@ common_init_result::common_init_result(common_params & params) :
|
|||
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
// load and optionally apply lora adapters (must be loaded before context creation)
|
||||
for (auto & la : params.lora_adapters) {
|
||||
llama_adapter_lora_ptr lora;
|
||||
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
|
||||
if (lora == nullptr) {
|
||||
LOG_ERR("%s: failed to load lora adapter '%s'\n", __func__, la.path.c_str());
|
||||
pimpl->model.reset(model);
|
||||
return;
|
||||
}
|
||||
|
||||
char buf[1024];
|
||||
la.ptr = lora.get();
|
||||
llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf));
|
||||
la.task_name = buf;
|
||||
llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf));
|
||||
la.prompt_prefix = buf;
|
||||
pimpl->lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
|
||||
}
|
||||
|
||||
// updates params.sampling
|
||||
// TODO: fix naming
|
||||
common_init_sampler_from_model(model, params.sampling);
|
||||
|
|
@ -1261,24 +1280,6 @@ common_init_result_ptr common_init_from_params(common_params & params) {
|
|||
}
|
||||
}
|
||||
|
||||
// load and optionally apply lora adapters
|
||||
for (auto & la : params.lora_adapters) {
|
||||
llama_adapter_lora_ptr lora;
|
||||
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
|
||||
if (lora == nullptr) {
|
||||
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
|
||||
return res;
|
||||
}
|
||||
|
||||
char buf[1024];
|
||||
la.ptr = lora.get();
|
||||
llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf));
|
||||
la.task_name = buf;
|
||||
llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf));
|
||||
la.prompt_prefix = buf;
|
||||
res->lora().emplace_back(std::move(lora)); // copy to list of loaded adapters
|
||||
}
|
||||
|
||||
if (!params.lora_init_without_apply) {
|
||||
common_set_adapter_lora(lctx, params.lora_adapters);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1062,6 +1062,9 @@ class TextModel(ModelBase):
|
|||
if chkhsh == "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273":
|
||||
# ref: https://huggingface.co/alvarobartt/grok-2-tokenizer
|
||||
res = "grok-2"
|
||||
if chkhsh == "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df":
|
||||
# ref: https://huggingface.co/aari1995/German_Semantic_V3
|
||||
res = "jina-v2-de"
|
||||
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
|
||||
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
|
||||
res = "llama-bpe"
|
||||
|
|
@ -1230,6 +1233,9 @@ class TextModel(ModelBase):
|
|||
if chkhsh == "4a2e2abae11ca2b86d570fc5b44be4d5eb5e72cc8f22dd136a94b37da83ab665":
|
||||
# ref: https://huggingface.co/KORMo-Team/KORMo-tokenizer
|
||||
res = "kormo"
|
||||
if chkhsh == "16389f0a1f51ee53e562ffd51c371dc508639ab0e4261502071836e50e223e91":
|
||||
# ref: https://huggingface.co/upstage/Solar-Open-100B
|
||||
res = "solar-open"
|
||||
|
||||
if res is None:
|
||||
logger.warning("\n")
|
||||
|
|
@ -2486,6 +2492,7 @@ class StableLMModel(TextModel):
|
|||
"VLlama3ForCausalLM",
|
||||
"LlavaForConditionalGeneration",
|
||||
"VoxtralForConditionalGeneration",
|
||||
"IQuestCoderForCausalLM",
|
||||
"LlamaModel")
|
||||
class LlamaModel(TextModel):
|
||||
model_arch = gguf.MODEL_ARCH.LLAMA
|
||||
|
|
@ -3503,7 +3510,7 @@ class QwenModel(TextModel):
|
|||
self._set_vocab_qwen()
|
||||
|
||||
|
||||
@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration", "KORMoForCausalLM")
|
||||
@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration", "KORMoForCausalLM", "AudioFlamingo3ForConditionalGeneration")
|
||||
class Qwen2Model(TextModel):
|
||||
model_arch = gguf.MODEL_ARCH.QWEN2
|
||||
|
||||
|
|
@ -5284,13 +5291,14 @@ class BertModel(TextModel):
|
|||
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
||||
|
||||
# convert to phantom space vocab
|
||||
def phantom(tok):
|
||||
if tok.startswith("[") and tok.endswith("]"):
|
||||
def phantom(tok, toktype):
|
||||
if toktype == gguf.TokenType.CONTROL:
|
||||
return tok
|
||||
if tok.startswith("##"):
|
||||
return tok[2:]
|
||||
return "\u2581" + tok
|
||||
tokens = list(map(phantom, tokens))
|
||||
assert len(tokens) == len(toktypes)
|
||||
tokens = list(map(phantom, tokens, toktypes))
|
||||
|
||||
# add vocab to gguf
|
||||
self.gguf_writer.add_tokenizer_model("bert")
|
||||
|
|
@ -9292,6 +9300,19 @@ class VoxtralWhisperEncoderModel(WhisperEncoderModel):
|
|||
self.gguf_writer.add_audio_stack_factor(4) # == intermediate_size // hidden_size
|
||||
|
||||
|
||||
@ModelBase.register("AudioFlamingo3ForConditionalGeneration")
|
||||
class AudioFlamingo3WhisperEncoderModel(WhisperEncoderModel):
|
||||
def set_gguf_parameters(self):
|
||||
super().set_gguf_parameters()
|
||||
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.MUSIC_FLAMINGO)
|
||||
|
||||
def tensor_force_quant(self, name, new_name, bid, n_dims):
|
||||
if ".conv" in name and ".weight" in name:
|
||||
# Was trained in BF16, being safe, avoiding quantizing to FP16
|
||||
return gguf.GGMLQuantizationType.F32
|
||||
return super().tensor_force_quant(name, new_name, bid, n_dims)
|
||||
|
||||
|
||||
@ModelBase.register("FalconH1ForCausalLM")
|
||||
class FalconH1Model(Mamba2Model):
|
||||
model_arch = gguf.MODEL_ARCH.FALCON_H1
|
||||
|
|
@ -10604,6 +10625,26 @@ class JanusProVisionModel(MmprojModel):
|
|||
return []
|
||||
|
||||
|
||||
@ModelBase.register("SolarOpenForCausalLM")
|
||||
class SolarOpenModel(Glm4MoeModel):
|
||||
model_arch = gguf.MODEL_ARCH.GLM4_MOE
|
||||
|
||||
def set_vocab(self):
|
||||
from transformers import AutoTokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
|
||||
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
|
||||
tokens, toktypes, tokpre = self.get_vocab_base()
|
||||
self.gguf_writer.add_tokenizer_model("gpt2")
|
||||
self.gguf_writer.add_tokenizer_pre(tokpre)
|
||||
self.gguf_writer.add_token_list(tokens)
|
||||
self.gguf_writer.add_token_types(toktypes)
|
||||
special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
|
||||
special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|endoftext|>"])
|
||||
special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<unk>"])
|
||||
special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|startoftext|>"])
|
||||
special_vocab.add_to_gguf(self.gguf_writer)
|
||||
|
||||
|
||||
###### CONVERSION LOGIC ######
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -145,6 +145,7 @@ models = [
|
|||
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
|
||||
{"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
|
||||
{"name": "kormo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/KORMo-Team/KORMo-tokenizer", },
|
||||
{"name": "solar-open", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
|
||||
]
|
||||
|
||||
# some models are known to be broken upstream, so we will skip them as exceptions
|
||||
|
|
@ -165,6 +166,8 @@ pre_computed_hashes = [
|
|||
{"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
|
||||
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "chkhsh": "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c"},
|
||||
{"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
|
||||
# jina-v2-de variants
|
||||
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"},
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ Legend:
|
|||
| CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||
| COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ |
|
||||
| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ |
|
||||
| CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
|
|
|||
|
|
@ -965,6 +965,7 @@
|
|||
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||
"Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[2,2,1536,729],ne_kernel=[2,2,1536,4096],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||
"Metal","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
||||
"Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
||||
"Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
||||
|
|
@ -4964,8 +4965,9 @@
|
|||
"Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","Metal"
|
||||
"Metal","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","Metal"
|
||||
"Metal","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","Metal"
|
||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","0","no","Metal"
|
||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","0","no","Metal"
|
||||
"Metal","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","1","yes","Metal"
|
||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","Metal"
|
||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","Metal"
|
||||
"Metal","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","Metal"
|
||||
"Metal","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","Metal"
|
||||
"Metal","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","Metal"
|
||||
|
|
@ -5715,15 +5717,15 @@
|
|||
"Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal"
|
||||
"Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","Metal"
|
||||
"Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[6,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[6,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[6,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[3,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
||||
|
|
@ -5733,6 +5735,15 @@
|
|||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[18,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,1024,4,1],ne_b=[9,1024,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[18,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,1536,4,1],ne_b=[9,1536,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[18,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_CONV","type=f32,ne_a=[9,2048,4,1],ne_b=[9,2048,1,1]","support","1","yes","Metal"
|
||||
"Metal","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
||||
"Metal","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
||||
"Metal","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
||||
|
|
@ -8916,6 +8927,8 @@
|
|||
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||
"Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
||||
"Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
||||
"Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
||||
|
|
@ -9542,311 +9555,311 @@
|
|||
"Metal","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","Metal"
|
||||
"Metal","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","Metal"
|
||||
"Metal","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||
"Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","Metal"
|
||||
"Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","Metal"
|
||||
"Metal","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","Metal"
|
||||
|
|
@ -9891,8 +9904,9 @@
|
|||
"Metal","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","Metal"
|
||||
"Metal","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","Metal"
|
||||
"Metal","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1,circular=0","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[33,17,2,1],pad_0=4,pad_1=3,circular=1","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0,circular=0","support","0","no","Metal"
|
||||
"Metal","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","Metal"
|
||||
"Metal","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","Metal"
|
||||
"Metal","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","Metal"
|
||||
|
|
@ -9923,17 +9937,41 @@
|
|||
"Metal","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","Metal"
|
||||
"Metal","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","Metal"
|
||||
"Metal","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","Metal"
|
||||
"Metal","DIAG","type=f32,ne=[10,1,4,3]","support","0","no","Metal"
|
||||
"Metal","DIAG","type=f32,ne=[79,1,19,13]","support","0","no","Metal"
|
||||
"Metal","DIAG","type=f32,ne=[256,1,8,16]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[64,64,2,2]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[79,79,5,3],ne_rhs=[417,79,5,3]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,2],ne_rhs=[32,128,4,2]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[80,80,2,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[79,80,2,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[81,80,2,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[80,80,8,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[79,80,8,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[81,80,8,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[84,84,4,4],ne_rhs=[32,84,4,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[95,95,8,8],ne_rhs=[40,95,8,8]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[32,128,4,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,3,4],ne_rhs=[32,128,3,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,1],ne_rhs=[32,128,4,1]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[200,64,4,4]","support","0","no","Metal"
|
||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[384,64,4,4]","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=0","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=0","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=1","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=1","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=0","support","1","yes","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=0","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=1","support","0","no","Metal"
|
||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=1","support","0","no","Metal"
|
||||
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","Metal"
|
||||
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","Metal"
|
||||
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","Metal"
|
||||
|
|
|
|||
|
Can't render this file because it is too large.
|
|
|
@ -4,7 +4,7 @@ project("ggml" C CXX ASM)
|
|||
### GGML Version
|
||||
set(GGML_VERSION_MAJOR 0)
|
||||
set(GGML_VERSION_MINOR 9)
|
||||
set(GGML_VERSION_PATCH 4)
|
||||
set(GGML_VERSION_PATCH 5)
|
||||
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
|
||||
|
||||
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
|
||||
|
|
|
|||
|
|
@ -358,7 +358,7 @@ extern "C" {
|
|||
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
||||
|
||||
// Compare the output of two backends
|
||||
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node);
|
||||
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor const * const * test_nodes, size_t num_test_nodes);
|
||||
|
||||
// Tensor initialization
|
||||
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
||||
|
|
|
|||
|
|
@ -2053,7 +2053,7 @@ void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) {
|
|||
ggml_free(copy.ctx_unallocated);
|
||||
}
|
||||
|
||||
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node) {
|
||||
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor const * const * test_nodes, size_t num_test_nodes) {
|
||||
struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
|
||||
if (copy.buffer == NULL) {
|
||||
return false;
|
||||
|
|
@ -2064,22 +2064,22 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
|||
|
||||
assert(g1->n_nodes == g2->n_nodes);
|
||||
|
||||
if (test_node != nullptr) {
|
||||
// Compute the whole graph and only test the output for a specific tensor
|
||||
if (num_test_nodes != 0) {
|
||||
GGML_ASSERT(test_nodes);
|
||||
// Compute the whole graph and only test the output for specific tensors
|
||||
ggml_backend_graph_compute(backend1, g1);
|
||||
ggml_backend_graph_compute(backend2, g2);
|
||||
|
||||
int test_node_idx = -1;
|
||||
bool verified = false;
|
||||
for (int i = 0; i < g1->n_nodes; i++) {
|
||||
struct ggml_tensor * t1 = g1->nodes[i];
|
||||
if (t1 == test_node) {
|
||||
test_node_idx = i;
|
||||
break;
|
||||
for (size_t j = 0; j < num_test_nodes; ++j) {
|
||||
if (g1->nodes[i] == test_nodes[j]) {
|
||||
callback(i, g1->nodes[i], g2->nodes[i], user_data);
|
||||
verified = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(test_node_idx != -1);
|
||||
|
||||
callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
|
||||
GGML_ASSERT(verified);
|
||||
} else {
|
||||
for (int i = 0; i < g1->n_nodes; i++) {
|
||||
struct ggml_tensor * t1 = g1->nodes[i];
|
||||
|
|
|
|||
|
|
@ -531,7 +531,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
|
|||
for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::I) {
|
||||
#pragma unroll
|
||||
for (int l = 0; l < T_C_KQ::ne; ++l) {
|
||||
if (!oob_check || k0 + T_C_KQ::get_i(l) < k_VKQ_sup) {
|
||||
if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::I + T_C_KQ::get_i(l) < k_VKQ_sup) {
|
||||
KQ_max_new[l % 2] = fmaxf(KQ_max_new[l % 2], KQ_C[k0/(np*T_C_KQ::I)].x[l] + FATTN_KQ_MAX_OFFSET);
|
||||
}
|
||||
}
|
||||
|
|
@ -583,7 +583,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
|
|||
for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::J) {
|
||||
#pragma unroll
|
||||
for (int l = 0; l < T_C_KQ::ne; ++l) {
|
||||
if (!oob_check || k0 + T_C_KQ::get_j(l) < k_VKQ_sup) {
|
||||
if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::J + T_C_KQ::get_j(l) < k_VKQ_sup) {
|
||||
// Turing + Volta:
|
||||
KQ_max_new[(l/2) % 2] = fmaxf(KQ_max_new[(l/2) % 2], KQ_C[(k0/(np*T_C_KQ::J))].x[l] + FATTN_KQ_MAX_OFFSET);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -203,16 +203,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||
GGML_ASSERT(info.device_count <= GGML_CUDA_MAX_DEVICES);
|
||||
|
||||
int64_t total_vram = 0;
|
||||
#ifdef GGML_CUDA_FORCE_MMQ
|
||||
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: yes\n", __func__);
|
||||
#else
|
||||
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: no\n", __func__);
|
||||
#endif // GGML_CUDA_FORCE_MMQ
|
||||
#ifdef GGML_CUDA_FORCE_CUBLAS
|
||||
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: yes\n", __func__);
|
||||
#else
|
||||
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
|
||||
#endif // GGML_CUDA_FORCE_CUBLAS
|
||||
GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
|
||||
|
||||
std::vector<std::pair<int, std::string>> turing_devices_without_mma;
|
||||
|
|
|
|||
|
|
@ -1684,3 +1684,60 @@ ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd(ggm
|
|||
|
||||
return res;
|
||||
}
|
||||
|
||||
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset(ggml_metal_library_t lib, const ggml_tensor * op) {
|
||||
GGML_ASSERT(op->type == GGML_TYPE_I64);
|
||||
|
||||
char base[256];
|
||||
char name[256];
|
||||
|
||||
snprintf(base, 256, "kernel_memset_%s", ggml_type_name(op->type));
|
||||
snprintf(name, 256, "%s", base);
|
||||
|
||||
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
||||
if (!res.pipeline) {
|
||||
res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal(ggml_metal_library_t lib, const ggml_tensor * op) {
|
||||
assert(op->op == GGML_OP_COUNT_EQUAL);
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, op->src[0], ne);
|
||||
|
||||
GGML_ASSERT(op->src[0]->type == op->src[1]->type);
|
||||
GGML_ASSERT(op->src[0]->type == GGML_TYPE_I32);
|
||||
GGML_ASSERT(op->type == GGML_TYPE_I64);
|
||||
|
||||
// note: the kernel only supports i32 output due to metal atomic add only supporting atomic_int
|
||||
GGML_ASSERT(ggml_nelements(op->src[0]) < (1LL << 31));
|
||||
|
||||
char base[256];
|
||||
char name[256];
|
||||
|
||||
int nsg = 1;
|
||||
while (32*nsg < ne00 && nsg < 32) {
|
||||
nsg *= 2;
|
||||
}
|
||||
|
||||
snprintf(base, 256, "kernel_count_equal_%s", ggml_type_name(op->src[0]->type));
|
||||
snprintf(name, 256, "%s_nsg=%d", base, nsg);
|
||||
|
||||
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
||||
if (!res.pipeline) {
|
||||
ggml_metal_cv_t cv = ggml_metal_cv_init();
|
||||
|
||||
ggml_metal_cv_set_int16(cv, nsg, FC_COUNT_EQUAL + 0);
|
||||
|
||||
res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
|
||||
|
||||
ggml_metal_cv_free(cv);
|
||||
}
|
||||
|
||||
res.smem = 32 * sizeof(int32_t);
|
||||
res.nsg = nsg;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -147,6 +147,8 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_arange
|
|||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_timestep_embedding(ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_adamw (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
|
||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_flash_attn_ext_pad(
|
||||
ggml_metal_library_t lib,
|
||||
|
|
|
|||
|
|
@ -1023,6 +1023,11 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|||
return has_simdgroup_reduction && ggml_is_contiguous_rows(op->src[0]);
|
||||
case GGML_OP_L2_NORM:
|
||||
return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0]));
|
||||
case GGML_OP_COUNT_EQUAL:
|
||||
return has_simdgroup_reduction &&
|
||||
op->src[0]->type == GGML_TYPE_I32 &&
|
||||
op->src[1]->type == GGML_TYPE_I32 &&
|
||||
op->type == GGML_TYPE_I64;
|
||||
case GGML_OP_ARGMAX:
|
||||
return has_simdgroup_reduction;
|
||||
case GGML_OP_NORM:
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@
|
|||
#define FC_MUL_MM 700
|
||||
#define FC_ROPE 800
|
||||
#define FC_SSM_CONV 900
|
||||
#define FC_COUNT_EQUAL 1000
|
||||
|
||||
// op-specific constants
|
||||
#define OP_FLASH_ATTN_EXT_NQPTG 8
|
||||
|
|
@ -894,6 +895,25 @@ typedef struct {
|
|||
float step;
|
||||
} ggml_metal_kargs_arange;
|
||||
|
||||
typedef struct {
|
||||
int64_t val;
|
||||
} ggml_metal_kargs_memset;
|
||||
|
||||
typedef struct {
|
||||
int32_t ne00;
|
||||
int32_t ne01;
|
||||
int32_t ne02;
|
||||
int32_t ne03;
|
||||
uint64_t nb00;
|
||||
uint64_t nb01;
|
||||
uint64_t nb02;
|
||||
uint64_t nb03;
|
||||
uint64_t nb10;
|
||||
uint64_t nb11;
|
||||
uint64_t nb12;
|
||||
uint64_t nb13;
|
||||
} ggml_metal_kargs_count_equal;
|
||||
|
||||
typedef struct {
|
||||
int32_t k0;
|
||||
int32_t k1;
|
||||
|
|
|
|||
|
|
@ -448,7 +448,11 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {
|
|||
{
|
||||
n_fuse = ggml_metal_op_opt_step_sgd(ctx, idx);
|
||||
} break;
|
||||
default:
|
||||
case GGML_OP_COUNT_EQUAL:
|
||||
{
|
||||
n_fuse = ggml_metal_op_count_equal(ctx, idx);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(node->op));
|
||||
GGML_ABORT("fatal error");
|
||||
|
|
@ -4090,3 +4094,64 @@ int ggml_metal_op_opt_step_sgd(ggml_metal_op_t ctx, int idx) {
|
|||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ggml_metal_op_count_equal(ggml_metal_op_t ctx, int idx) {
|
||||
ggml_tensor * op = ctx->node(idx);
|
||||
|
||||
ggml_metal_library_t lib = ctx->lib;
|
||||
ggml_metal_encoder_t enc = ctx->enc;
|
||||
|
||||
GGML_TENSOR_LOCALS(int32_t, ne0, op->src[0], ne);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb1, op->src[1], nb);
|
||||
|
||||
{
|
||||
ggml_metal_kargs_memset args = { /*.val =*/ 0 };
|
||||
|
||||
auto pipeline = ggml_metal_library_get_pipeline_memset(lib, op);
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 1);
|
||||
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, 1, 1, 1, 1, 1, 1);
|
||||
}
|
||||
|
||||
ggml_metal_op_concurrency_reset(ctx);
|
||||
|
||||
{
|
||||
ggml_metal_kargs_count_equal args = {
|
||||
/*.ne00 =*/ ne00,
|
||||
/*.ne01 =*/ ne01,
|
||||
/*.ne02 =*/ ne02,
|
||||
/*.ne03 =*/ ne03,
|
||||
/*.nb00 =*/ nb00,
|
||||
/*.nb01 =*/ nb01,
|
||||
/*.nb02 =*/ nb02,
|
||||
/*.nb03 =*/ nb03,
|
||||
/*.nb10 =*/ nb10,
|
||||
/*.nb11 =*/ nb11,
|
||||
/*.nb12 =*/ nb12,
|
||||
/*.nb13 =*/ nb13,
|
||||
};
|
||||
|
||||
auto pipeline = ggml_metal_library_get_pipeline_count_equal(lib, op);
|
||||
|
||||
const size_t smem = pipeline.smem;
|
||||
|
||||
const int nth = 32*pipeline.nsg;
|
||||
|
||||
GGML_ASSERT(nth <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[0]), 1);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[1]), 2);
|
||||
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 3);
|
||||
|
||||
ggml_metal_encoder_set_threadgroup_memory_size(enc, smem, 0);
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne02, ne03, nth, 1, 1);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ int ggml_metal_op_leaky_relu (ggml_metal_op_t ctx, int idx);
|
|||
int ggml_metal_op_tri (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_opt_step_adamw (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_opt_step_sgd (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_count_equal (ggml_metal_op_t ctx, int idx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1790,6 +1790,7 @@ kernel void kernel_op_sum_f32(
|
|||
return;
|
||||
}
|
||||
|
||||
// TODO: become function constant
|
||||
const uint nsg = (ntg.x + 31) / 32;
|
||||
|
||||
float sumf = 0;
|
||||
|
|
@ -9557,9 +9558,6 @@ template [[host_name("kernel_mul_mm_iq4_xs_f32")]] kernel mul_mm_t kernel_mul_m
|
|||
|
||||
template [[host_name("kernel_mul_mm_f32_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, float4x4, 1, dequantize_f32, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_f16_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, half4x4, 1, dequantize_f16, half, half4x4, half, half2x4>;
|
||||
#if defined(GGML_METAL_HAS_BF16)
|
||||
template [[host_name("kernel_mul_mm_bf16_f16")]] kernel mul_mm_t kernel_mul_mm<bfloat, bfloat4x4, simdgroup_bfloat8x8, half, half2x4, simdgroup_half8x8, bfloat4x4, 1, dequantize_bf16, bfloat, bfloat4x4, half, half2x4>;
|
||||
#endif
|
||||
template [[host_name("kernel_mul_mm_q4_0_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_0, 2, dequantize_q4_0, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_q4_1_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_1, 2, dequantize_q4_1, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_q5_0_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q5_0, 2, dequantize_q5_0, float, float4x4, half, half2x4>;
|
||||
|
|
@ -9615,9 +9613,6 @@ template [[host_name("kernel_mul_mm_id_iq4_xs_f32")]] kernel mul_mm_id kernel_m
|
|||
|
||||
template [[host_name("kernel_mul_mm_id_f32_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, float4x4, 1, dequantize_f32, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_id_f16_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, half4x4, 1, dequantize_f16, half, half4x4, half, half2x4>;
|
||||
#if defined(GGML_METAL_HAS_BF16)
|
||||
template [[host_name("kernel_mul_mm_id_bf16_f16")]] kernel mul_mm_id kernel_mul_mm_id<bfloat, bfloat4x4, simdgroup_bfloat8x8, half, half2x4, simdgroup_half8x8, bfloat4x4, 1, dequantize_bf16, bfloat, bfloat4x4, half, half2x4>;
|
||||
#endif
|
||||
template [[host_name("kernel_mul_mm_id_q4_0_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_0, 2, dequantize_q4_0, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_id_q4_1_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_1, 2, dequantize_q4_1, float, float4x4, half, half2x4>;
|
||||
template [[host_name("kernel_mul_mm_id_q5_0_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q5_0, 2, dequantize_q5_0, float, float4x4, half, half2x4>;
|
||||
|
|
@ -9920,3 +9915,75 @@ kernel void kernel_opt_step_sgd_f32(
|
|||
|
||||
x[gid] = x[gid] * (1.0f - pars[0] * pars[1]) - pars[0] * g[gid];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
kernel void kernel_memset(
|
||||
constant ggml_metal_kargs_fill & args,
|
||||
device T * dst,
|
||||
uint tpig[[thread_position_in_grid]]) {
|
||||
dst[tpig] = args.val;
|
||||
}
|
||||
|
||||
typedef decltype(kernel_memset<int64_t>) kernel_memset_t;
|
||||
|
||||
template [[host_name("kernel_memset_i64")]] kernel kernel_memset_t kernel_memset<int64_t>;
|
||||
|
||||
constant short FC_count_equal_nsg [[function_constant(FC_COUNT_EQUAL + 0)]];
|
||||
|
||||
template<typename T>
|
||||
kernel void kernel_count_equal(
|
||||
constant ggml_metal_kargs_count_equal & args,
|
||||
device const char * src0,
|
||||
device const char * src1,
|
||||
device atomic_int * dst,
|
||||
threadgroup int32_t * shmem_i32 [[threadgroup(0)]],
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
ushort3 tpitg[[thread_position_in_threadgroup]],
|
||||
ushort sgitg[[simdgroup_index_in_threadgroup]],
|
||||
ushort tiisg[[thread_index_in_simdgroup]],
|
||||
ushort3 ntg[[threads_per_threadgroup]]) {
|
||||
const short NSG = FC_count_equal_nsg;
|
||||
|
||||
const int i3 = tgpig.z;
|
||||
const int i2 = tgpig.y;
|
||||
const int i1 = tgpig.x;
|
||||
|
||||
if (i3 >= args.ne03 || i2 >= args.ne02 || i1 >= args.ne01) {
|
||||
return;
|
||||
}
|
||||
|
||||
int sum = 0;
|
||||
|
||||
device const char * base0 = src0 + i1*args.nb01 + i2*args.nb02 + i3*args.nb03;
|
||||
device const char * base1 = src1 + i1*args.nb11 + i2*args.nb12 + i3*args.nb13;
|
||||
|
||||
for (int64_t i0 = tpitg.x; i0 < args.ne00; i0 += ntg.x) {
|
||||
const T v0 = *(device const T *)(base0 + i0*args.nb00);
|
||||
const T v1 = *(device const T *)(base1 + i0*args.nb10);
|
||||
sum += (v0 == v1);
|
||||
}
|
||||
|
||||
sum = simd_sum(sum);
|
||||
|
||||
if (tiisg == 0) {
|
||||
shmem_i32[sgitg] = sum;
|
||||
}
|
||||
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
if (sgitg == 0) {
|
||||
float v = 0.0f;
|
||||
if (tpitg.x < NSG) {
|
||||
v = shmem_i32[tpitg.x];
|
||||
}
|
||||
|
||||
float total = simd_sum(v);
|
||||
if (tpitg.x == 0) {
|
||||
atomic_fetch_add_explicit(dst, (int32_t) total, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef decltype(kernel_count_equal<int32_t>) kernel_count_equal_t;
|
||||
|
||||
template [[host_name("kernel_count_equal_i32")]] kernel kernel_count_equal_t kernel_count_equal<int32_t>;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,47 @@ if (WIN32)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
find_package(IntelSYCL)
|
||||
macro(detect_and_find_package package_name)
|
||||
set(test_source "
|
||||
cmake_minimum_required(VERSION ${CMAKE_VERSION})
|
||||
project(check_package LANGUAGES CXX)
|
||||
find_package(${package_name} QUIET)
|
||||
")
|
||||
|
||||
set(test_dir "${CMAKE_CURRENT_BINARY_DIR}/check_package_${package_name}")
|
||||
file(WRITE "${test_dir}/CMakeLists.txt" "${test_source}")
|
||||
|
||||
set(cmake_args "")
|
||||
if(CMAKE_GENERATOR)
|
||||
list(APPEND cmake_args "-G" "${CMAKE_GENERATOR}")
|
||||
endif()
|
||||
if(CMAKE_GENERATOR_PLATFORM)
|
||||
list(APPEND cmake_args "-A" "${CMAKE_GENERATOR_PLATFORM}")
|
||||
endif()
|
||||
if(CMAKE_GENERATOR_TOOLSET)
|
||||
list(APPEND cmake_args "-T" "${CMAKE_GENERATOR_TOOLSET}")
|
||||
endif()
|
||||
if(CMAKE_CXX_COMPILER)
|
||||
list(APPEND cmake_args "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} ${cmake_args} .
|
||||
WORKING_DIRECTORY "${test_dir}"
|
||||
RESULT_VARIABLE result
|
||||
OUTPUT_QUIET
|
||||
ERROR_QUIET
|
||||
)
|
||||
|
||||
if(result EQUAL 0)
|
||||
find_package(${package_name} ${ARGN})
|
||||
else()
|
||||
message(WARNING "Detection of ${package_name} failed. The package might be broken or incompatible.")
|
||||
set(${package_name}_FOUND FALSE)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
detect_and_find_package(IntelSYCL)
|
||||
if (IntelSYCL_FOUND)
|
||||
# Use oneAPI CMake when possible
|
||||
target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX)
|
||||
|
|
@ -191,3 +231,4 @@ if (GGML_SYCL_DEVICE_ARCH)
|
|||
target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
||||
target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -434,8 +434,15 @@ static constexpr std::initializer_list<ggml_op> topk_moe_early_softmax_norm{ GGM
|
|||
GGML_OP_VIEW, GGML_OP_GET_ROWS, GGML_OP_RESHAPE,
|
||||
GGML_OP_SUM_ROWS, GGML_OP_CLAMP, GGML_OP_DIV,
|
||||
GGML_OP_RESHAPE };
|
||||
|
||||
static constexpr std::initializer_list<ggml_op> topk_moe_sigmoid_norm_bias{ GGML_OP_UNARY, GGML_OP_RESHAPE, GGML_OP_ADD,
|
||||
GGML_OP_ARGSORT, GGML_OP_VIEW, GGML_OP_GET_ROWS,
|
||||
GGML_OP_RESHAPE, GGML_OP_SUM_ROWS, GGML_OP_CLAMP,
|
||||
GGML_OP_DIV, GGML_OP_RESHAPE };
|
||||
|
||||
static constexpr std::initializer_list<ggml_op> topk_moe_early_softmax { GGML_OP_SOFT_MAX, GGML_OP_RESHAPE, GGML_OP_ARGSORT,
|
||||
GGML_OP_VIEW, GGML_OP_GET_ROWS };
|
||||
|
||||
static constexpr std::initializer_list<ggml_op> topk_moe_late_softmax { GGML_OP_ARGSORT, GGML_OP_VIEW,
|
||||
GGML_OP_GET_ROWS, GGML_OP_RESHAPE,
|
||||
GGML_OP_SOFT_MAX, GGML_OP_RESHAPE };
|
||||
|
|
@ -464,6 +471,32 @@ static constexpr std::initializer_list<std::array<int, 3>> topk_moe_early_softma
|
|||
{ 9, 0, 8 }, // reshape->src[0] == div
|
||||
};
|
||||
|
||||
//node #436 ( UNARY): ffn_moe_probs-10 ( 256K) [Vulka ] use=2: ffn_moe_logits-10 ( 256K) [Vulka ]
|
||||
//node #437 ( RESHAPE): ffn_moe_probs-10 (re ( 256K) [Vulka ] use=1: ffn_moe_probs-10 ( 256K) [Vulka ]
|
||||
//node #438 ( ADD): ffn_moe_probs_biased ( 256K) [Vulka ] use=1: ffn_moe_probs-10 ( 256K) [Vulka ] blk.10.exp_probs_b.b ( 0K) [Vulka ]
|
||||
//node #439 ( ARGSORT): ffn_moe_argsort-10 ( 256K) [Vulka ] use=1: ffn_moe_probs_biased ( 256K) [Vulka ]
|
||||
//node #440 ( VIEW): ffn_moe_topk-10 ( 255K) [Vulka ] use=3: ffn_moe_argsort-10 ( 256K) [Vulka ]
|
||||
//node #441 ( GET_ROWS): ffn_moe_weights-10 ( 12K) [Vulka ] use=1: ffn_moe_probs-10 (re ( 256K) [Vulka ] ffn_moe_topk-10 ( 255K) [Vulka ]
|
||||
//node #442 ( RESHAPE): ffn_moe_weights-10 ( ( 12K) [Vulka ] use=2: ffn_moe_weights-10 ( 12K) [Vulka ]
|
||||
//node #443 ( SUM_ROWS): ffn_moe_weights_sum- ( 2K) [Vulka ] use=1: ffn_moe_weights-10 ( ( 12K) [Vulka ]
|
||||
//node #444 ( CLAMP): ffn_moe_weights_sum_ ( 2K) [Vulka ] use=1: ffn_moe_weights_sum- ( 2K) [Vulka ]
|
||||
//node #445 ( DIV): ffn_moe_weights_norm ( 12K) [Vulka ] use=1: ffn_moe_weights-10 ( ( 12K) [Vulka ] ffn_moe_weights_sum_ ( 2K) [Vulka ]
|
||||
//node #446 ( RESHAPE): ffn_moe_weights_norm ( 12K) [Vulka ] use=1: ffn_moe_weights_norm ( 12K) [Vulka ]
|
||||
static constexpr std::initializer_list<std::array<int, 3>> topk_moe_sigmoid_norm_bias_edges {
|
||||
{ 1, 0, 0 }, // reshape->src[0] == sigmoid
|
||||
{ 2, 0, 0 }, // add->src[0] == sigmoid
|
||||
{ 3, 0, 2 }, // argsort->src[0] == add
|
||||
{ 4, 0, 3 }, // view->src[0] == argsort
|
||||
{ 5, 0, 1 }, // get_rows->src[0] == reshape
|
||||
{ 5, 1, 4 }, // get_rows->src[1] == view
|
||||
{ 6, 0, 5 }, // reshape->src[0] == get_rows
|
||||
{ 7, 0, 6 }, // sum_rows->src[0] == reshape
|
||||
{ 8, 0, 7 }, // clamp->src[0] == sum_rows
|
||||
{ 9, 0, 6 }, // div->src[0] == reshape
|
||||
{ 9, 1, 8 }, // div->src[1] == clamp
|
||||
{10, 0, 9 }, // reshape->src[0] == div
|
||||
};
|
||||
|
||||
// same as early_softmax_norm but ending after the get_rows
|
||||
static constexpr std::initializer_list<std::array<int, 3>> topk_moe_early_softmax_edges {
|
||||
{ 1, 0, 0 }, // reshape->src[0] == softmax
|
||||
|
|
@ -491,16 +524,10 @@ enum topk_moe_mode {
|
|||
TOPK_MOE_EARLY_SOFTMAX,
|
||||
TOPK_MOE_EARLY_SOFTMAX_NORM,
|
||||
TOPK_MOE_LATE_SOFTMAX,
|
||||
TOPK_MOE_SIGMOID_NORM_BIAS,
|
||||
TOPK_MOE_COUNT,
|
||||
};
|
||||
|
||||
static topk_moe_mode ggml_vk_num_additional_ops_to_topk_moe_mode(uint32_t num) {
|
||||
topk_moe_mode mode = num == topk_moe_early_softmax_norm.size() - 1 ? TOPK_MOE_EARLY_SOFTMAX_NORM :
|
||||
num == topk_moe_early_softmax.size() - 1 ? TOPK_MOE_EARLY_SOFTMAX :
|
||||
TOPK_MOE_LATE_SOFTMAX;
|
||||
return mode;
|
||||
}
|
||||
|
||||
static constexpr std::initializer_list<std::array<int, 3>> rope_view_set_rows_edges {
|
||||
{ 1, 0, 0 }, // view->src[0] == rope
|
||||
{ 2, 0, 1 }, // set_rows->src[0] == view
|
||||
|
|
@ -766,7 +793,7 @@ struct vk_device_struct {
|
|||
vk_pipeline pipeline_count_experts;
|
||||
|
||||
// [2] is for whether to take n_experts from spec constant (0) or push constant (1)
|
||||
vk_pipeline pipeline_topk_moe[num_topk_moe_pipelines][TOPK_MOE_COUNT][2];
|
||||
vk_pipeline pipeline_topk_moe[num_topk_moe_pipelines][2];
|
||||
|
||||
std::vector<vk_pipeline_ref> all_pipelines;
|
||||
|
||||
|
|
@ -1181,6 +1208,11 @@ struct vk_op_topk_moe_push_constants {
|
|||
uint32_t n_expert_used;
|
||||
float clamp_min;
|
||||
float clamp_max;
|
||||
uint32_t gating_func;
|
||||
uint32_t has_bias;
|
||||
uint32_t with_norm;
|
||||
float output_scale;
|
||||
float output_bias;
|
||||
};
|
||||
|
||||
struct vk_op_add_id_push_constants {
|
||||
|
|
@ -1771,6 +1803,8 @@ struct ggml_backend_vk_context {
|
|||
// Bit 'i' means nodes[start_of_fusion + i] writes to memory.
|
||||
// If there's no fusion, bit 0 is still set.
|
||||
int fused_ops_write_mask {};
|
||||
topk_moe_mode fused_topk_moe_mode {};
|
||||
bool fused_topk_moe_scale {};
|
||||
|
||||
// for GGML_VK_PERF_LOGGER
|
||||
std::unique_ptr<vk_perf_logger> perf_logger;
|
||||
|
|
@ -4291,9 +4325,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|||
|
||||
for (uint32_t use_push = 0; use_push < 2; ++use_push) {
|
||||
for (uint32_t i = 0; i < num_topk_moe_pipelines; ++i) {
|
||||
ggml_vk_create_pipeline2(device, device->pipeline_topk_moe[i][TOPK_MOE_EARLY_SOFTMAX][use_push], "topk_moe_f32_early_softmax_"+std::to_string(i), topk_moe_f32_len, topk_moe_f32_data, "main", 3, sizeof(vk_op_topk_moe_push_constants), {1, 1, 1}, {device->subgroup_size, 1u<<i, 0, 0, use_push}, 1, true, true, device->subgroup_size);
|
||||
ggml_vk_create_pipeline2(device, device->pipeline_topk_moe[i][TOPK_MOE_EARLY_SOFTMAX_NORM][use_push], "topk_moe_f32_early_softmax_norm"+std::to_string(i), topk_moe_f32_len, topk_moe_f32_data, "main", 3, sizeof(vk_op_topk_moe_push_constants), {1, 1, 1}, {device->subgroup_size, 1u<<i, 1, 0, use_push}, 1, true, true, device->subgroup_size);
|
||||
ggml_vk_create_pipeline2(device, device->pipeline_topk_moe[i][TOPK_MOE_LATE_SOFTMAX][use_push], "topk_moe_f32_late_softmax"+std::to_string(i), topk_moe_f32_len, topk_moe_f32_data, "main", 3, sizeof(vk_op_topk_moe_push_constants), {1, 1, 1}, {device->subgroup_size, 1u<<i, 0, 1, use_push}, 1, true, true, device->subgroup_size);
|
||||
ggml_vk_create_pipeline2(device, device->pipeline_topk_moe[i][use_push], "topk_moe_f32_"+std::to_string(i), topk_moe_f32_len, topk_moe_f32_data, "main", 4, sizeof(vk_op_topk_moe_push_constants), {1, 1, 1}, {device->subgroup_size, 1u<<i, use_push}, 1, true, true, device->subgroup_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -8684,10 +8716,9 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
|||
if (ctx->num_additional_fused_ops) {
|
||||
uint32_t idx = (uint32_t)ceilf(log2f(float(dst->ne[0])));
|
||||
GGML_ASSERT(idx < num_topk_moe_pipelines);
|
||||
topk_moe_mode mode = ggml_vk_num_additional_ops_to_topk_moe_mode(ctx->num_additional_fused_ops);
|
||||
// use n_experts from push constant if it's not equal to the power of two spec constant
|
||||
bool use_push = dst->ne[0] != (1u << idx);
|
||||
return ctx->device->pipeline_topk_moe[idx][mode][use_push];
|
||||
return ctx->device->pipeline_topk_moe[idx][use_push];
|
||||
}
|
||||
|
||||
if (src0->type == GGML_TYPE_F32 && (src1 == nullptr || src1->type == GGML_TYPE_F32) && dst->type == GGML_TYPE_F32) {
|
||||
|
|
@ -10346,14 +10377,16 @@ static void ggml_vk_soft_max_back(ggml_backend_vk_context * ctx, vk_context& sub
|
|||
}
|
||||
|
||||
static void ggml_vk_topk_moe(ggml_backend_vk_context * ctx, vk_context& subctx, ggml_cgraph * cgraph, int node_idx) {
|
||||
topk_moe_mode mode = ggml_vk_num_additional_ops_to_topk_moe_mode(ctx->num_additional_fused_ops);
|
||||
topk_moe_mode mode = ctx->fused_topk_moe_mode;
|
||||
ggml_tensor * logits = cgraph->nodes[node_idx + 0]->src[0];
|
||||
ggml_tensor * weights = (mode == TOPK_MOE_EARLY_SOFTMAX_NORM) ? cgraph->nodes[node_idx + 9] :
|
||||
(mode == TOPK_MOE_EARLY_SOFTMAX) ? cgraph->nodes[node_idx + 4] :
|
||||
cgraph->nodes[node_idx + 5];
|
||||
ggml_tensor * ids = (mode == TOPK_MOE_LATE_SOFTMAX) ? cgraph->nodes[node_idx + 1] : cgraph->nodes[node_idx + 3];
|
||||
ggml_tensor * bias = (mode == TOPK_MOE_SIGMOID_NORM_BIAS) ? cgraph->nodes[node_idx + 2]->src[1] : logits;
|
||||
ggml_tensor * weights = cgraph->nodes[node_idx + ctx->num_additional_fused_ops];
|
||||
ggml_tensor * ids = (mode == TOPK_MOE_SIGMOID_NORM_BIAS) ? cgraph->nodes[node_idx + 4] :
|
||||
(mode == TOPK_MOE_LATE_SOFTMAX) ? cgraph->nodes[node_idx + 1] :
|
||||
cgraph->nodes[node_idx + 3];
|
||||
|
||||
GGML_ASSERT(logits->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(bias->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(weights->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
||||
|
||||
|
|
@ -10368,6 +10401,7 @@ static void ggml_vk_topk_moe(ggml_backend_vk_context * ctx, vk_context& subctx,
|
|||
ggml_pipeline_request_descriptor_sets(ctx, pipeline, 1);
|
||||
|
||||
vk_subbuffer logits_buf = ggml_vk_tensor_subbuffer(ctx, logits);
|
||||
vk_subbuffer bias_buf = ggml_vk_tensor_subbuffer(ctx, bias);
|
||||
vk_subbuffer weights_buf = ggml_vk_tensor_subbuffer(ctx, weights);
|
||||
vk_subbuffer ids_buf = ggml_vk_tensor_subbuffer(ctx, ids);
|
||||
|
||||
|
|
@ -10375,18 +10409,45 @@ static void ggml_vk_topk_moe(ggml_backend_vk_context * ctx, vk_context& subctx,
|
|||
pc.n_rows = n_rows;
|
||||
pc.n_experts_push = n_experts;
|
||||
pc.n_expert_used = n_expert_used;
|
||||
pc.clamp_min = -std::numeric_limits<float>::infinity();
|
||||
pc.clamp_max = std::numeric_limits<float>::infinity();
|
||||
if (mode == TOPK_MOE_EARLY_SOFTMAX_NORM) {
|
||||
ggml_tensor * clamp = cgraph->nodes[node_idx + 7];
|
||||
GGML_ASSERT(clamp->op == GGML_OP_CLAMP);
|
||||
pc.clamp_min = ggml_get_op_params_f32(clamp, 0);
|
||||
pc.clamp_max = ggml_get_op_params_f32(clamp, 1);
|
||||
}
|
||||
if (mode == TOPK_MOE_SIGMOID_NORM_BIAS) {
|
||||
ggml_tensor * clamp = cgraph->nodes[node_idx + 8];
|
||||
GGML_ASSERT(clamp->op == GGML_OP_CLAMP);
|
||||
pc.clamp_min = ggml_get_op_params_f32(clamp, 0);
|
||||
pc.clamp_max = ggml_get_op_params_f32(clamp, 1);
|
||||
}
|
||||
|
||||
#define GATING_FUNC_SOFTMAX 0
|
||||
#define GATING_FUNC_SIGMOID 1
|
||||
#define GATING_FUNC_SOFTMAX_WEIGHT 2
|
||||
|
||||
pc.gating_func = mode == TOPK_MOE_SIGMOID_NORM_BIAS ? GATING_FUNC_SIGMOID :
|
||||
mode == TOPK_MOE_LATE_SOFTMAX ? GATING_FUNC_SOFTMAX_WEIGHT :
|
||||
GATING_FUNC_SOFTMAX;
|
||||
pc.has_bias = mode == TOPK_MOE_SIGMOID_NORM_BIAS;
|
||||
pc.with_norm = mode == TOPK_MOE_EARLY_SOFTMAX_NORM || mode == TOPK_MOE_SIGMOID_NORM_BIAS;
|
||||
if (ctx->fused_topk_moe_scale) {
|
||||
GGML_ASSERT(weights->op == GGML_OP_SCALE);
|
||||
pc.output_scale = ggml_get_op_params_f32(weights, 0);
|
||||
pc.output_bias = ggml_get_op_params_f32(weights, 1);
|
||||
} else {
|
||||
pc.output_scale = 1.0f;
|
||||
pc.output_bias = 0.0f;
|
||||
}
|
||||
|
||||
GGML_ASSERT(n_expert_used <= n_experts);
|
||||
|
||||
const uint32_t rows_per_block = 4;
|
||||
std::array<uint32_t, 3> elements = { CEIL_DIV(n_rows, rows_per_block), 1, 1 };
|
||||
|
||||
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, {logits_buf, weights_buf, ids_buf}, pc, elements);
|
||||
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, {logits_buf, bias_buf, weights_buf, ids_buf}, pc, elements);
|
||||
}
|
||||
|
||||
static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_cgraph * cgraph, int node_idx, bool backprop) {
|
||||
|
|
@ -12128,6 +12189,11 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
|
|||
|
||||
break;
|
||||
case GGML_OP_UNARY:
|
||||
if (ctx->fused_topk_moe_mode != TOPK_MOE_COUNT) {
|
||||
ggml_vk_topk_moe(ctx, compute_ctx, cgraph, node_idx);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (ggml_get_unary_op(node)) {
|
||||
case GGML_UNARY_OP_EXP:
|
||||
case GGML_UNARY_OP_SILU:
|
||||
|
|
@ -12175,7 +12241,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
|
|||
|
||||
break;
|
||||
case GGML_OP_SOFT_MAX:
|
||||
if (ctx->num_additional_fused_ops) {
|
||||
if (ctx->fused_topk_moe_mode != TOPK_MOE_COUNT) {
|
||||
ggml_vk_topk_moe(ctx, compute_ctx, cgraph, node_idx);
|
||||
} else {
|
||||
ggml_vk_soft_max(ctx, compute_ctx, src0, src1, src2, node);
|
||||
|
|
@ -12195,7 +12261,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
|
|||
|
||||
break;
|
||||
case GGML_OP_ARGSORT:
|
||||
if (ctx->num_additional_fused_ops) {
|
||||
if (ctx->fused_topk_moe_mode != TOPK_MOE_COUNT) {
|
||||
ggml_vk_topk_moe(ctx, compute_ctx, cgraph, node_idx);
|
||||
} else {
|
||||
ggml_vk_argsort(ctx, compute_ctx, src0, node);
|
||||
|
|
@ -13048,6 +13114,24 @@ static bool ggml_vk_can_fuse_topk_moe(ggml_backend_vk_context * ctx, const struc
|
|||
get_rows = cgraph->nodes[node_idx + 4];
|
||||
argsort = cgraph->nodes[node_idx + 2];
|
||||
break;
|
||||
case TOPK_MOE_SIGMOID_NORM_BIAS:
|
||||
softmax = cgraph->nodes[node_idx + 0]; // really sigmoid
|
||||
weights = cgraph->nodes[node_idx + 10];
|
||||
get_rows = cgraph->nodes[node_idx + 5];
|
||||
argsort = cgraph->nodes[node_idx + 3];
|
||||
if (ggml_get_unary_op(softmax) != GGML_UNARY_OP_SIGMOID) {
|
||||
return false;
|
||||
}
|
||||
// bias is expected to be 1D
|
||||
if (ggml_nrows(cgraph->nodes[node_idx + 2]->src[1]) != 1 ||
|
||||
!ggml_is_contiguous(cgraph->nodes[node_idx + 2]->src[1])) {
|
||||
return false;
|
||||
}
|
||||
// sigmoid fusion seems to generate infinities on moltenvk
|
||||
if (ctx->device->driver_id == vk::DriverId::eMoltenvk) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case TOPK_MOE_EARLY_SOFTMAX:
|
||||
softmax = cgraph->nodes[node_idx + 0];
|
||||
weights = cgraph->nodes[node_idx + 4];
|
||||
|
|
@ -13071,26 +13155,28 @@ static bool ggml_vk_can_fuse_topk_moe(ggml_backend_vk_context * ctx, const struc
|
|||
probs = probs->src[0];
|
||||
ggml_tensor * selection_probs = argsort->src[0];
|
||||
|
||||
if (probs != selection_probs) {
|
||||
if (probs != selection_probs && mode != TOPK_MOE_SIGMOID_NORM_BIAS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const float * op_params = (const float *)softmax->op_params;
|
||||
|
||||
float scale = op_params[0];
|
||||
float max_bias = op_params[1];
|
||||
|
||||
if (!ggml_is_contiguous(softmax->src[0]) || !ggml_is_contiguous(weights)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (scale != 1.0f || max_bias != 0.0f) {
|
||||
return false;
|
||||
}
|
||||
if (softmax->op == GGML_OP_SOFT_MAX) {
|
||||
const float * op_params = (const float *)softmax->op_params;
|
||||
|
||||
// don't fuse when masks or sinks are present
|
||||
if (softmax->src[1] || softmax->src[2]) {
|
||||
return false;
|
||||
float scale = op_params[0];
|
||||
float max_bias = op_params[1];
|
||||
|
||||
if (scale != 1.0f || max_bias != 0.0f) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// don't fuse when masks or sinks are present
|
||||
if (softmax->src[1] || softmax->src[2]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const int n_expert = softmax->ne[0];
|
||||
|
|
@ -13363,6 +13449,8 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
|||
total_mul_mat_bytes += bytes;
|
||||
}
|
||||
|
||||
ctx->fused_topk_moe_mode = TOPK_MOE_COUNT;
|
||||
ctx->fused_topk_moe_scale = false;
|
||||
const char *fusion_string {};
|
||||
if (!ctx->device->disable_fusion) {
|
||||
uint32_t num_adds = ggml_vk_fuse_multi_add(ctx, cgraph, i);
|
||||
|
|
@ -13408,13 +13496,23 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
|||
ctx->num_additional_fused_ops = topk_moe_early_softmax_norm.size() - 1;
|
||||
// view of argsort writes to memory
|
||||
ctx->fused_ops_write_mask |= 1 << 3;
|
||||
ctx->fused_topk_moe_mode = TOPK_MOE_EARLY_SOFTMAX_NORM;
|
||||
fusion_string = "TOPK_MOE_EARLY_SOFTMAX_NORM";
|
||||
} else if (ggml_can_fuse_subgraph(cgraph, i, topk_moe_sigmoid_norm_bias, { i + 4, i + 10 }) &&
|
||||
ggml_check_edges(cgraph, i, topk_moe_sigmoid_norm_bias_edges) &&
|
||||
ggml_vk_can_fuse_topk_moe(ctx, cgraph, i, TOPK_MOE_SIGMOID_NORM_BIAS)) {
|
||||
ctx->num_additional_fused_ops = topk_moe_sigmoid_norm_bias.size() - 1;
|
||||
// view of argsort writes to memory
|
||||
ctx->fused_ops_write_mask |= 1 << 4;
|
||||
ctx->fused_topk_moe_mode = TOPK_MOE_SIGMOID_NORM_BIAS;
|
||||
fusion_string = "TOPK_MOE_SIGMOID_NORM_BIAS";
|
||||
} else if (ggml_can_fuse_subgraph(cgraph, i, topk_moe_early_softmax, { i + 3, i + 4 }) &&
|
||||
ggml_check_edges(cgraph, i, topk_moe_early_softmax_edges) &&
|
||||
ggml_vk_can_fuse_topk_moe(ctx, cgraph, i, TOPK_MOE_EARLY_SOFTMAX)) {
|
||||
ctx->num_additional_fused_ops = topk_moe_early_softmax.size() - 1;
|
||||
// view of argsort writes to memory
|
||||
ctx->fused_ops_write_mask |= 1 << 3;
|
||||
ctx->fused_topk_moe_mode = TOPK_MOE_EARLY_SOFTMAX;
|
||||
fusion_string = "TOPK_MOE_EARLY_SOFTMAX";
|
||||
} else if (ggml_can_fuse_subgraph(cgraph, i, topk_moe_late_softmax, { i + 1, i + 5 }) &&
|
||||
ggml_check_edges(cgraph, i, topk_moe_late_softmax_edges) &&
|
||||
|
|
@ -13422,8 +13520,17 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
|||
ctx->num_additional_fused_ops = topk_moe_late_softmax.size() - 1;
|
||||
// view of argsort writes to memory
|
||||
ctx->fused_ops_write_mask |= 1 << 1;
|
||||
ctx->fused_topk_moe_mode = TOPK_MOE_LATE_SOFTMAX;
|
||||
fusion_string = "TOPK_MOE_LATE_SOFTMAX";
|
||||
}
|
||||
if (ctx->fused_topk_moe_mode != TOPK_MOE_COUNT) {
|
||||
// Look for an additional scale op to fuse - occurs in deepseek2 and nemotron3 nano.
|
||||
if (ggml_can_fuse_subgraph(cgraph, i + ctx->num_additional_fused_ops - 1, { GGML_OP_DIV, GGML_OP_RESHAPE, GGML_OP_SCALE }, { i + ctx->num_additional_fused_ops + 1 }) ||
|
||||
ggml_can_fuse_subgraph(cgraph, i + ctx->num_additional_fused_ops, { GGML_OP_GET_ROWS, GGML_OP_SCALE }, { i + ctx->num_additional_fused_ops + 1 })) {
|
||||
ctx->fused_topk_moe_scale = true;
|
||||
ctx->num_additional_fused_ops++;
|
||||
}
|
||||
}
|
||||
}
|
||||
ctx->fused_ops_write_mask |= 1 << ctx->num_additional_fused_ops;
|
||||
|
||||
|
|
@ -13602,6 +13709,9 @@ static void ggml_vk_graph_optimize(ggml_backend_t backend, struct ggml_cgraph *
|
|||
if (keep_pattern(topk_moe_early_softmax_norm)) {
|
||||
continue;
|
||||
}
|
||||
if (keep_pattern(topk_moe_sigmoid_norm_bias)) {
|
||||
continue;
|
||||
}
|
||||
if (keep_pattern(topk_moe_early_softmax)) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -13628,6 +13738,7 @@ static void ggml_vk_graph_optimize(ggml_backend_t backend, struct ggml_cgraph *
|
|||
}
|
||||
// Don't pull forward nodes from fusion patterns
|
||||
if (match_pattern(topk_moe_early_softmax_norm, j) ||
|
||||
match_pattern(topk_moe_sigmoid_norm_bias, j) ||
|
||||
match_pattern(topk_moe_early_softmax, j) ||
|
||||
match_pattern(topk_moe_late_softmax, j)) {
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@
|
|||
|
||||
#include "types.glsl"
|
||||
|
||||
#define GATING_FUNC_SOFTMAX 0
|
||||
#define GATING_FUNC_SIGMOID 1
|
||||
#define GATING_FUNC_SOFTMAX_WEIGHT 2
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
uint n_rows;
|
||||
|
|
@ -14,15 +18,18 @@ layout (push_constant) uniform parameter
|
|||
uint n_expert_used;
|
||||
float clamp_min;
|
||||
float clamp_max;
|
||||
uint gating_func;
|
||||
uint has_bias;
|
||||
uint with_norm;
|
||||
float output_scale;
|
||||
float output_bias;
|
||||
};
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 4, local_size_z = 1) in;
|
||||
|
||||
layout(constant_id = 0) const uint WARP_SIZE = 32;
|
||||
layout(constant_id = 1) const uint n_experts_spec = 512;
|
||||
layout(constant_id = 2) const bool with_norm = true;
|
||||
layout(constant_id = 3) const bool late_softmax = false;
|
||||
layout(constant_id = 4) const bool nexperts_use_push = false;
|
||||
layout(constant_id = 2) const bool nexperts_use_push = false;
|
||||
|
||||
uint n_experts = nexperts_use_push ? n_experts_push : n_experts_spec;
|
||||
|
||||
|
|
@ -31,8 +38,9 @@ uint n_experts = nexperts_use_push ? n_experts_push : n_experts_spec;
|
|||
const uint experts_per_thread = CEIL_DIV(n_experts_spec, WARP_SIZE);
|
||||
|
||||
layout (binding = 0, std430) readonly buffer Logits {float logits[];};
|
||||
layout (binding = 1, std430) writeonly buffer Weights {float weights[];};
|
||||
layout (binding = 2, std430) writeonly buffer Ids {uint ids[];};
|
||||
layout (binding = 1, std430) readonly buffer BiasProbs {float bias[];};
|
||||
layout (binding = 2, std430) writeonly buffer Weights {float weights[];};
|
||||
layout (binding = 3, std430) writeonly buffer Ids {uint ids[];};
|
||||
|
||||
const float INFINITY = 1.0 / 0.0;
|
||||
|
||||
|
|
@ -87,20 +95,40 @@ void main() {
|
|||
}
|
||||
|
||||
const uint logits_offset = n_experts * row;
|
||||
const uint bias_offset = 0; // 1D
|
||||
const uint weights_offset = n_expert_used * row;
|
||||
const uint ids_offset = n_experts * row;
|
||||
const uint lane = gl_SubgroupInvocationID;
|
||||
|
||||
float wt[experts_per_thread];
|
||||
float probs[experts_per_thread];
|
||||
|
||||
[[unroll]]
|
||||
for (uint i = 0; i < n_experts; i += WARP_SIZE) {
|
||||
const uint expert = i + lane;
|
||||
wt[i / WARP_SIZE] = (n_experts % WARP_SIZE == 0 || expert < n_experts) ? logits[logits_offset + expert] : -INFINITY;
|
||||
probs[i / WARP_SIZE] = (n_experts % WARP_SIZE == 0 || expert < n_experts) ? logits[logits_offset + expert] : -INFINITY;
|
||||
}
|
||||
|
||||
if (!late_softmax) {
|
||||
softmax_warp_inplace(wt, n_experts, lane, nexperts_use_push);
|
||||
if (gating_func == GATING_FUNC_SOFTMAX) {
|
||||
softmax_warp_inplace(probs, n_experts, lane, nexperts_use_push);
|
||||
} else if (gating_func == GATING_FUNC_SIGMOID) {
|
||||
[[unroll]]
|
||||
for (int i = 0; i < experts_per_thread; i++) {
|
||||
probs[i] = 1.f / (1.f + exp(-probs[i]));
|
||||
}
|
||||
}
|
||||
|
||||
float selection_probs[experts_per_thread];
|
||||
if (has_bias != 0) {
|
||||
[[unroll]]
|
||||
for (uint i = 0; i < n_experts; i += WARP_SIZE) {
|
||||
const uint expert = i + lane;
|
||||
selection_probs[i / WARP_SIZE] = (n_experts % WARP_SIZE == 0 || expert < n_experts) ? probs[i / WARP_SIZE] + bias[bias_offset + expert] : -INFINITY;
|
||||
}
|
||||
} else {
|
||||
[[unroll]]
|
||||
for (int i = 0; i < experts_per_thread; i++) {
|
||||
selection_probs[i] = probs[i];
|
||||
}
|
||||
}
|
||||
|
||||
// at this point, each thread holds a portion of softmax,
|
||||
|
|
@ -117,14 +145,16 @@ void main() {
|
|||
}
|
||||
|
||||
for (int k = 0; k < n_expert_used; k++) {
|
||||
float max_val = wt[0];
|
||||
float max_val = probs[0];
|
||||
float max_val_s = selection_probs[0];
|
||||
uint max_expert = lane;
|
||||
|
||||
[[unroll]]
|
||||
for (int i = 1; i < experts_per_thread; i++) {
|
||||
const uint expert = lane + i * WARP_SIZE;
|
||||
if ((n_experts % WARP_SIZE == 0 || expert < n_experts) && wt[i] > max_val) {
|
||||
max_val = wt[i];
|
||||
if ((n_experts % WARP_SIZE == 0 || expert < n_experts) && selection_probs[i] > max_val_s) {
|
||||
max_val = probs[i];
|
||||
max_val_s = selection_probs[i];
|
||||
max_expert = expert;
|
||||
}
|
||||
}
|
||||
|
|
@ -132,9 +162,11 @@ void main() {
|
|||
[[unroll]]
|
||||
for (uint mask = WARP_SIZE / 2; mask > 0; mask /= 2) {
|
||||
const float val = subgroupShuffleXor(max_val, mask);
|
||||
const float val_s = subgroupShuffleXor(max_val_s, mask);
|
||||
const uint expert = subgroupShuffleXor(max_expert, mask);
|
||||
if (val > max_val || (val == max_val && expert < max_expert)) {
|
||||
if (val_s > max_val_s || (val_s == max_val_s && expert < max_expert)) {
|
||||
max_val = val;
|
||||
max_val_s = val_s;
|
||||
max_expert = expert;
|
||||
}
|
||||
}
|
||||
|
|
@ -144,16 +176,14 @@ void main() {
|
|||
}
|
||||
|
||||
if ((max_expert & (WARP_SIZE - 1)) == lane) {
|
||||
wt[max_expert / WARP_SIZE] = -INFINITY;
|
||||
selection_probs[max_expert / WARP_SIZE] = -INFINITY;
|
||||
|
||||
ids[ids_offset + k] = max_expert;
|
||||
if (with_norm) {
|
||||
wt_sum += max_val;
|
||||
}
|
||||
wt_sum += max_val;
|
||||
}
|
||||
}
|
||||
|
||||
if (with_norm) {
|
||||
if (with_norm != 0) {
|
||||
wt_sum = subgroupAdd(wt_sum);
|
||||
wt_sum = clamp(wt_sum, clamp_min, clamp_max);
|
||||
const float inv_sum = 1.0f / wt_sum;
|
||||
|
|
@ -164,7 +194,7 @@ void main() {
|
|||
}
|
||||
}
|
||||
|
||||
if (late_softmax) {
|
||||
if (gating_func == GATING_FUNC_SOFTMAX_WEIGHT) {
|
||||
softmax_warp_inplace(output_weights, n_expert_used, lane, true);
|
||||
}
|
||||
|
||||
|
|
@ -172,7 +202,7 @@ void main() {
|
|||
for (uint i = 0; i < experts_per_thread; ++i) {
|
||||
uint idx = i * WARP_SIZE + lane;
|
||||
if (idx < n_expert_used) {
|
||||
weights[weights_offset + idx] = output_weights[i];
|
||||
weights[weights_offset + idx] = output_scale * output_weights[i] + output_bias;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3492,6 +3492,7 @@ class VisionProjectorType:
|
|||
COGVLM = "cogvlm"
|
||||
JANUS_PRO = "janus_pro"
|
||||
LFM2A = "lfm2a" # audio
|
||||
MUSIC_FLAMINGO = "musicflamingo" # audio
|
||||
GLM4V = "glm4v"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -150,6 +150,9 @@ You can use GBNF grammars:
|
|||
- in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
|
||||
- in JavaScript with [json-schema-to-grammar.mjs](../tools/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../tools/server)'s Web UI)
|
||||
|
||||
> [!NOTE]
|
||||
> The JSON schema is only used to constrain the model output and is not injected into the prompt. The model has no visibility into the schema, so if you want it to understand the expected structure, describe it explicitly in your prompt. This does not apply to tool calling, where schemas are injected into the prompt.
|
||||
|
||||
Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggml-org/llama.cpp/pull/5978, https://github.com/ggml-org/llama.cpp/pull/6659 & https://github.com/ggml-org/llama.cpp/pull/6555).
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -618,6 +618,8 @@ extern "C" {
|
|||
//
|
||||
|
||||
// Load a LoRA adapter from file
|
||||
// The adapter is valid as long as the associated model is not freed
|
||||
// All adapters must be loaded before context creation
|
||||
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
|
||||
struct llama_model * model,
|
||||
const char * path_lora);
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ Example function tool call syntax:
|
|||
{%- if message['role'] == 'user' -%}
|
||||
{{- '<|User|>' + message['content'] + '<|end▁of▁sentence|>' -}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is none -%}
|
||||
{%- if message['role'] == 'assistant' and not message['content'] -%}
|
||||
{{- '<|Assistant|><|tool▁calls▁begin|>' -}}
|
||||
{%- set ns.is_first = true -%}
|
||||
{%- for tc in message['tool_calls'] -%}
|
||||
|
|
@ -53,7 +53,7 @@ Example function tool call syntax:
|
|||
{%- endfor -%}
|
||||
{{- '<|tool▁calls▁end|><|end▁of▁sentence|>' -}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] -%}
|
||||
{{- flush_tool_outputs() -}}
|
||||
{%- set content = message['content'] -%}
|
||||
{%- if '</think>' in content -%}
|
||||
|
|
@ -73,4 +73,4 @@ Example function tool call syntax:
|
|||
{{- flush_tool_outputs() -}}
|
||||
{%- if add_generation_prompt and not ns.is_tool_outputs -%}
|
||||
{{- '<|Assistant|><think>\n' -}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
130bc125a88bb57664b88932c48c38a1cb316fac
|
||||
ebc3a0f4a56be1c9424a89fbec09962ac34fde85
|
||||
|
|
|
|||
|
|
@ -146,9 +146,11 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
|
||||
static void llama_adapter_lora_init_impl(const char * path_lora, llama_adapter_lora & adapter) {
|
||||
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
|
||||
|
||||
llama_model & model = adapter.model;
|
||||
|
||||
ggml_context * ctx_init;
|
||||
gguf_init_params meta_gguf_params = {
|
||||
/* .no_alloc = */ true,
|
||||
|
|
@ -411,14 +413,17 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
|||
}
|
||||
}
|
||||
|
||||
// update number of nodes used
|
||||
model.n_lora_nodes += adapter.get_n_nodes();
|
||||
|
||||
LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
|
||||
}
|
||||
|
||||
llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
|
||||
llama_adapter_lora * adapter = new llama_adapter_lora();
|
||||
llama_adapter_lora * adapter = new llama_adapter_lora(*model);
|
||||
|
||||
try {
|
||||
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
|
||||
llama_adapter_lora_init_impl(path_lora, *adapter);
|
||||
return adapter;
|
||||
} catch (const std::exception & err) {
|
||||
LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());
|
||||
|
|
@ -469,6 +474,10 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
|
|||
}
|
||||
|
||||
void llama_adapter_lora_free(llama_adapter_lora * adapter) {
|
||||
// update number of nodes used
|
||||
GGML_ASSERT(adapter->model.n_lora_nodes >= adapter->get_n_nodes());
|
||||
adapter->model.n_lora_nodes -= adapter->get_n_nodes();
|
||||
|
||||
delete adapter;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ struct llama_adapter_lora_weight {
|
|||
};
|
||||
|
||||
struct llama_adapter_lora {
|
||||
llama_model & model;
|
||||
|
||||
// map tensor name to lora_a_b
|
||||
std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
|
||||
|
||||
|
|
@ -73,10 +75,14 @@ struct llama_adapter_lora {
|
|||
// activated lora (aLoRA)
|
||||
std::vector<llama_token> alora_invocation_tokens;
|
||||
|
||||
llama_adapter_lora() = default;
|
||||
llama_adapter_lora(llama_model & model) : model(model) {}
|
||||
~llama_adapter_lora() = default;
|
||||
|
||||
llama_adapter_lora_weight * get_weight(ggml_tensor * w);
|
||||
|
||||
uint32_t get_n_nodes() const {
|
||||
return ab_map.size() * 6u; // a, b, scale, add, 2 x mul_mat
|
||||
}
|
||||
};
|
||||
|
||||
using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
|
|||
{ "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
|
||||
{ "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
|
||||
{ "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
|
||||
{ "solar-open", LLM_CHAT_TEMPLATE_SOLAR_OPEN },
|
||||
};
|
||||
|
||||
llm_chat_template llm_chat_template_from_str(const std::string & name) {
|
||||
|
|
@ -216,6 +217,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
|
|||
return LLM_CHAT_TEMPLATE_GROK_2;
|
||||
} else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
|
||||
return LLM_CHAT_TEMPLATE_PANGU_EMBED;
|
||||
} else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
|
||||
return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
|
||||
}
|
||||
return LLM_CHAT_TEMPLATE_UNKNOWN;
|
||||
}
|
||||
|
|
@ -845,6 +848,14 @@ int32_t llm_chat_apply_template(
|
|||
if (add_ass) {
|
||||
ss << "[unused9]助手:";
|
||||
}
|
||||
} else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
|
||||
for (auto message : chat) {
|
||||
std::string role(message->role);
|
||||
ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
|
||||
}
|
||||
if (add_ass) {
|
||||
ss << "<|begin|>assistant";
|
||||
}
|
||||
} else {
|
||||
// template not supported
|
||||
return -1;
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ enum llm_chat_template {
|
|||
LLM_CHAT_TEMPLATE_SEED_OSS,
|
||||
LLM_CHAT_TEMPLATE_GROK_2,
|
||||
LLM_CHAT_TEMPLATE_PANGU_EMBED,
|
||||
LLM_CHAT_TEMPLATE_SOLAR_OPEN,
|
||||
LLM_CHAT_TEMPLATE_UNKNOWN,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1959,7 +1959,9 @@ uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const {
|
|||
if (model.arch == LLM_ARCH_QWEN3NEXT) {
|
||||
return std::max<uint32_t>(n_tokens * 40, 32u * model.n_tensors());
|
||||
}
|
||||
return std::max<uint32_t>(1024u, 8u*model.n_tensors());
|
||||
uint32_t res = std::max<uint32_t>(1024u, 8u*model.n_tensors());
|
||||
res += model.n_lora_nodes;
|
||||
return res;
|
||||
}
|
||||
|
||||
llm_graph_result * llama_context::get_gf_res_reserve() const {
|
||||
|
|
|
|||
|
|
@ -305,7 +305,7 @@ public:
|
|||
bool do_shift,
|
||||
stream_copy_info sc_info);
|
||||
|
||||
// used to create a batch procesing context from a batch
|
||||
// used to create a batch processing context from a batch
|
||||
llama_kv_cache_context(
|
||||
llama_kv_cache * kv,
|
||||
slot_info_vec_t sinfos,
|
||||
|
|
|
|||
|
|
@ -240,9 +240,10 @@ struct llama_file::impl {
|
|||
throw std::runtime_error("unexpectedly reached end of file");
|
||||
}
|
||||
} else {
|
||||
bool successful = false;
|
||||
while (!successful) {
|
||||
off_t ret = read(fd, ptr, len);
|
||||
size_t bytes_read = 0;
|
||||
while (bytes_read < len) {
|
||||
const size_t to_read = len - bytes_read;
|
||||
ssize_t ret = ::read(fd, reinterpret_cast<char *>(ptr) + bytes_read, to_read);
|
||||
|
||||
if (ret == -1) {
|
||||
if (errno == EINTR) {
|
||||
|
|
@ -251,10 +252,16 @@ struct llama_file::impl {
|
|||
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
||||
}
|
||||
if (ret == 0) {
|
||||
// EOF: allow if this read was only pulling alignment padding past file end
|
||||
off_t pos = lseek(fd, 0, SEEK_CUR);
|
||||
if (pos != -1 && (size_t) pos == size) {
|
||||
std::memset(reinterpret_cast<char *>(ptr) + bytes_read, 0, len - bytes_read);
|
||||
return;
|
||||
}
|
||||
throw std::runtime_error("unexpectedly reached end of file");
|
||||
}
|
||||
|
||||
successful = true;
|
||||
bytes_read += (size_t) ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -126,6 +126,7 @@ const char * llm_type_name(llm_type type) {
|
|||
case LLM_TYPE_31B_A3_5B: return "31B.A3.5B";
|
||||
case LLM_TYPE_80B_A3B: return "80B.A3B";
|
||||
case LLM_TYPE_100B_A6B: return "100B.A6B";
|
||||
case LLM_TYPE_102B_A12B: return "102B.A12B";
|
||||
case LLM_TYPE_106B_A12B: return "106B.A12B";
|
||||
case LLM_TYPE_230B_A10B: return "230B.A10B";
|
||||
case LLM_TYPE_235B_A22B: return "235B.A22B";
|
||||
|
|
@ -1778,6 +1779,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
|
||||
switch (hparams.n_layer) {
|
||||
case 47: type = LLM_TYPE_106B_A12B; break; // GLM-4.5-Air (46 layers + 1 NextN layer)
|
||||
case 48: type = LLM_TYPE_102B_A12B; break; // Solar Open
|
||||
case 93: type = LLM_TYPE_355B_A32B; break; // GLM-4.5 (92 layers + 1 NextN layer)
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
|
|
@ -3320,7 +3322,14 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
layer.attn_norm_2_b = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
|
||||
|
||||
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, layer.ffn_gate ? n_ff : n_ff * 2}, 0);
|
||||
|
||||
const auto tn_ffn_up_weight = tn(LLM_TENSOR_FFN_UP, "weight", i);
|
||||
ggml_tensor * t_ffn_up = ml.get_tensor_meta(tn_ffn_up_weight.str().c_str());
|
||||
const int64_t n_ffn_up = t_ffn_up ? t_ffn_up->ne[1] : n_ff;
|
||||
|
||||
GGML_ASSERT(n_ffn_up == n_ff || n_ffn_up == n_ff * 2);
|
||||
layer.ffn_up = create_tensor(tn_ffn_up_weight, {n_embd, n_ffn_up}, 0);
|
||||
layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ffn_up}, TENSOR_NOT_REQUIRED);
|
||||
|
||||
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
|
||||
layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, 0);
|
||||
|
|
@ -5206,9 +5215,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, flags);
|
||||
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, flags);
|
||||
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, flags);
|
||||
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, flags);
|
||||
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, flags);
|
||||
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, flags);
|
||||
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, TENSOR_NOT_REQUIRED | flags);
|
||||
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, TENSOR_NOT_REQUIRED | flags);
|
||||
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED | flags);
|
||||
|
||||
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, flags);
|
||||
|
||||
|
|
|
|||
|
|
@ -119,6 +119,7 @@ enum llm_type {
|
|||
LLM_TYPE_31B_A3_5B,
|
||||
LLM_TYPE_80B_A3B, // Qwen3 Next
|
||||
LLM_TYPE_100B_A6B,
|
||||
LLM_TYPE_102B_A12B, // Solar-Open
|
||||
LLM_TYPE_106B_A12B, // GLM-4.5-Air
|
||||
LLM_TYPE_230B_A10B, // Minimax M2
|
||||
LLM_TYPE_235B_A22B,
|
||||
|
|
@ -475,6 +476,9 @@ struct llama_model {
|
|||
// for quantize-stats only
|
||||
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
|
||||
|
||||
// for keeping track of extra nodes used by lora adapters
|
||||
uint32_t n_lora_nodes = 0;
|
||||
|
||||
int64_t t_load_us = 0;
|
||||
int64_t t_start_us = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -355,6 +355,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|||
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
|
||||
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
|
||||
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
|
||||
case LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN:
|
||||
regex_exprs = {
|
||||
// original regex from tokenizer.json
|
||||
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
||||
|
|
@ -2015,6 +2016,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
tokenizer_pre == "minimax-m2") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
|
||||
clean_spaces = false;
|
||||
} else if (
|
||||
tokenizer_pre == "solar-open") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN;
|
||||
clean_spaces = false;
|
||||
} else {
|
||||
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
||||
}
|
||||
|
|
@ -2358,6 +2363,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
|| t.first == "<|end|>"
|
||||
|| t.first == "<|return|>" // o200k_harmony
|
||||
|| t.first == "<|call|>" // o200k_harmony
|
||||
|| t.first == "<|flush|>" // solar-open
|
||||
|| t.first == "<|calls|>" // solar-open
|
||||
|| t.first == "<end_of_turn>"
|
||||
|| t.first == "<|endoftext|>"
|
||||
|| t.first == "<|eom_id|>"
|
||||
|
|
@ -2404,13 +2411,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
|
||||
}
|
||||
|
||||
// TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
|
||||
// we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
|
||||
// TODO: workaround for o200k_harmony and solar-open tokenizer: the "<|end|>" token should not be EOG
|
||||
// we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens ("<|calls|>" and "<|flush|>" for solar-open),
|
||||
// we remove the "<|end|>" token from the EOG list
|
||||
{
|
||||
bool has_return = false;
|
||||
bool has_call = false;
|
||||
bool has_end = false;
|
||||
bool has_flush = false;
|
||||
|
||||
llama_token end_id = LLAMA_TOKEN_NULL;
|
||||
|
||||
|
|
@ -2420,18 +2428,20 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
|
||||
if (id_to_token[tid].text == "<|return|>") {
|
||||
has_return = true;
|
||||
} else if (id_to_token[tid].text == "<|call|>") {
|
||||
} else if (id_to_token[tid].text == "<|call|>" || id_to_token[tid].text == "<|calls|>") {
|
||||
has_call = true;
|
||||
} else if (id_to_token[tid].text == "<|flush|>") {
|
||||
has_flush = true;
|
||||
} else if (id_to_token[tid].text == "<|end|>") {
|
||||
has_end = true;
|
||||
end_id = tid;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_return && has_call && has_end) {
|
||||
if ((has_return && has_call && has_end) || (has_call && has_flush && has_end)) {
|
||||
special_eog_ids.erase(end_id);
|
||||
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
||||
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
|
||||
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ enum llama_vocab_pre_type {
|
|||
LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,
|
||||
LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2 = 41,
|
||||
LLAMA_VOCAB_PRE_TYPE_AFMOE = 42,
|
||||
LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43,
|
||||
};
|
||||
|
||||
struct LLM_KV;
|
||||
|
|
|
|||
|
|
@ -142,11 +142,13 @@ llm_build_bert::llm_build_bert(const llama_model & model, const llm_graph_params
|
|||
LLM_FFN_GELU, LLM_FFN_SEQ, il);
|
||||
cb(cur, "ffn_out", il);
|
||||
} else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
|
||||
const bool up_contains_gate = !model.layers[il].ffn_gate && model.layers[il].ffn_up->ne[1] != hparams.n_ff();
|
||||
auto type_op = up_contains_gate ? LLM_FFN_GEGLU : LLM_FFN_GELU;
|
||||
cur = build_ffn(cur,
|
||||
model.layers[il].ffn_up, NULL, NULL,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
model.layers[il].ffn_gate, NULL, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, NULL,
|
||||
model.layers[il].ffn_gate ? LLM_FFN_GELU : LLM_FFN_GEGLU, LLM_FFN_PAR, il);
|
||||
type_op, LLM_FFN_PAR, il);
|
||||
cb(cur, "ffn_out", il);
|
||||
} else {
|
||||
cur = build_ffn(cur,
|
||||
|
|
|
|||
|
|
@ -1158,6 +1158,7 @@ struct test_case {
|
|||
}
|
||||
|
||||
virtual bool run_whole_graph() { return false; }
|
||||
virtual std::vector<ggml_tensor *> fusion_test_nodes() { return {}; }
|
||||
|
||||
ggml_cgraph * gf = nullptr;
|
||||
ggml_cgraph * gb = nullptr;
|
||||
|
|
@ -1391,7 +1392,13 @@ struct test_case {
|
|||
GGML_UNUSED(index);
|
||||
};
|
||||
|
||||
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud, run_whole_graph() ? out : nullptr);
|
||||
std::vector<ggml_tensor *> fused_nodes_to_verify = fusion_test_nodes();
|
||||
if (fused_nodes_to_verify.size() == 0 && run_whole_graph()) {
|
||||
fused_nodes_to_verify.push_back(out);
|
||||
}
|
||||
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud,
|
||||
run_whole_graph() ? fused_nodes_to_verify.data() : nullptr,
|
||||
fused_nodes_to_verify.size());
|
||||
|
||||
ggml_backend_buffer_free(buf);
|
||||
|
||||
|
|
@ -5180,6 +5187,8 @@ struct test_topk_moe : public test_case {
|
|||
const bool bias_probs;
|
||||
const MoeGatingFunc gating_func;
|
||||
const float scale_w;
|
||||
ggml_tensor * weights {};
|
||||
ggml_tensor * selected_experts {};
|
||||
|
||||
test_topk_moe(std::array<int64_t, 4> ne = { 10, 5, 1, 1 },
|
||||
int n_expert_used = 1,
|
||||
|
|
@ -5217,16 +5226,16 @@ struct test_topk_moe : public test_case {
|
|||
|
||||
ggml_tensor * selection_probs = probs;
|
||||
if (bias_probs) {
|
||||
ggml_tensor * exp_probs_b = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne.data());
|
||||
ggml_tensor * exp_probs_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ne[0]);
|
||||
ggml_set_name(exp_probs_b, "exp_probs_b");
|
||||
selection_probs = ggml_add(ctx, probs, exp_probs_b);
|
||||
ggml_set_name(selection_probs, "selection_probs");
|
||||
}
|
||||
|
||||
ggml_tensor * selected_experts = ggml_argsort_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
|
||||
selected_experts = ggml_argsort_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
|
||||
ggml_set_name(selected_experts, "selected_experts");
|
||||
|
||||
ggml_tensor * weights = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
|
||||
weights = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
|
||||
ggml_set_name(weights, "weights");
|
||||
|
||||
if (gating_func == GATING_FUNC_SOFTMAX_WEIGHT) {
|
||||
|
|
@ -5252,6 +5261,21 @@ struct test_topk_moe : public test_case {
|
|||
ggml_set_name(weights, "weights");
|
||||
return weights;
|
||||
}
|
||||
// Verify two outputs
|
||||
std::vector<ggml_tensor *> fusion_test_nodes() override { return { selected_experts, weights }; }
|
||||
|
||||
// allow output in arbitrary order
|
||||
double err(const float * a, const float * b, size_t n) override {
|
||||
std::vector<float> a2(n);
|
||||
std::vector<float> b2(n);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
a2[i] = a[i];
|
||||
b2[i] = b[i];
|
||||
}
|
||||
std::sort(a2.begin(), a2.end());
|
||||
std::sort(b2.begin(), b2.end());
|
||||
return nmse(a2.data(), b2.data(), n);
|
||||
}
|
||||
};
|
||||
|
||||
struct test_mul_mat_vec_fusion : public test_case {
|
||||
|
|
|
|||
|
|
@ -650,7 +650,7 @@ static void test_msgs_oaicompat_json_conversion() {
|
|||
"[\n"
|
||||
" {\n"
|
||||
" \"role\": \"assistant\",\n"
|
||||
" \"content\": null,\n"
|
||||
" \"content\": \"\",\n"
|
||||
" \"tool_calls\": [\n"
|
||||
" {\n"
|
||||
" \"type\": \"function\",\n"
|
||||
|
|
@ -724,6 +724,30 @@ static void test_tools_oaicompat_json_conversion() {
|
|||
"]"
|
||||
),
|
||||
common_chat_tools_to_json_oaicompat<json>({special_function_tool}).dump(2));
|
||||
|
||||
{
|
||||
auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse(
|
||||
R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])"));
|
||||
assert_equals((size_t) 1, tools_no_params.size());
|
||||
assert_equals(std::string("test_func"), tools_no_params[0].name);
|
||||
assert_equals(std::string("A test"), tools_no_params[0].description);
|
||||
assert_equals(std::string("{}"), tools_no_params[0].parameters);
|
||||
}
|
||||
{
|
||||
auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse(
|
||||
R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])"));
|
||||
assert_equals((size_t) 1, tools_no_desc.size());
|
||||
assert_equals(std::string("test_func"), tools_no_desc[0].name);
|
||||
assert_equals(std::string(""), tools_no_desc[0].description);
|
||||
}
|
||||
{
|
||||
auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse(
|
||||
R"([{"type": "function", "function": {"name": "test_func"}}])"));
|
||||
assert_equals((size_t) 1, tools_minimal.size());
|
||||
assert_equals(std::string("test_func"), tools_minimal[0].name);
|
||||
assert_equals(std::string(""), tools_minimal[0].description);
|
||||
assert_equals(std::string("{}"), tools_minimal[0].parameters);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_template_output_parsers() {
|
||||
|
|
@ -906,7 +930,8 @@ static void test_template_output_parsers() {
|
|||
" },\n"
|
||||
" \"id\": \"123456789\"\n"
|
||||
" }\n"
|
||||
" ]\n"
|
||||
" ],\n"
|
||||
" \"content\": \"\"\n"
|
||||
"}");
|
||||
}
|
||||
{
|
||||
|
|
@ -1713,7 +1738,8 @@ static void test_template_output_parsers() {
|
|||
" },\n"
|
||||
" \"id\": \"123456789\"\n"
|
||||
" }\n"
|
||||
" ]\n"
|
||||
" ],\n"
|
||||
" \"content\": \"\"\n"
|
||||
"}",
|
||||
/* expect_grammar_triggered= */ false
|
||||
);
|
||||
|
|
|
|||
|
|
@ -180,6 +180,7 @@ enum projector_type {
|
|||
PROJECTOR_TYPE_GLMA,
|
||||
PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx
|
||||
PROJECTOR_TYPE_VOXTRAL,
|
||||
PROJECTOR_TYPE_MUSIC_FLAMINGO,
|
||||
PROJECTOR_TYPE_LFM2,
|
||||
PROJECTOR_TYPE_KIMIVL,
|
||||
PROJECTOR_TYPE_LIGHTONOCR,
|
||||
|
|
@ -209,6 +210,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
|||
{ PROJECTOR_TYPE_GLMA, "glma"},
|
||||
{ PROJECTOR_TYPE_QWEN25O, "qwen2.5o"},
|
||||
{ PROJECTOR_TYPE_VOXTRAL, "voxtral"},
|
||||
{ PROJECTOR_TYPE_MUSIC_FLAMINGO, "musicflamingo"},
|
||||
{ PROJECTOR_TYPE_LFM2, "lfm2"},
|
||||
{ PROJECTOR_TYPE_KIMIVL, "kimivl"},
|
||||
{ PROJECTOR_TYPE_LIGHTONOCR,"lightonocr"},
|
||||
|
|
|
|||
|
|
@ -319,7 +319,8 @@ struct clip_model {
|
|||
|
||||
bool audio_has_avgpool() const {
|
||||
return proj_type == PROJECTOR_TYPE_QWEN2A
|
||||
|| proj_type == PROJECTOR_TYPE_VOXTRAL;
|
||||
|| proj_type == PROJECTOR_TYPE_VOXTRAL
|
||||
|| proj_type == PROJECTOR_TYPE_MUSIC_FLAMINGO;
|
||||
}
|
||||
|
||||
bool audio_has_stack_frames() const {
|
||||
|
|
|
|||
|
|
@ -818,6 +818,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|||
case PROJECTOR_TYPE_VOXTRAL:
|
||||
case PROJECTOR_TYPE_QWEN2A:
|
||||
case PROJECTOR_TYPE_GLMA:
|
||||
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||
{
|
||||
builder = std::make_unique<clip_graph_whisper_enc>(ctx, img);
|
||||
} break;
|
||||
|
|
@ -1176,6 +1177,7 @@ struct clip_model_loader {
|
|||
case PROJECTOR_TYPE_QWEN2A:
|
||||
case PROJECTOR_TYPE_GLMA:
|
||||
case PROJECTOR_TYPE_VOXTRAL:
|
||||
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||
{
|
||||
bool require_stack = model.proj_type == PROJECTOR_TYPE_ULTRAVOX ||
|
||||
model.proj_type == PROJECTOR_TYPE_VOXTRAL ||
|
||||
|
|
@ -1576,6 +1578,17 @@ struct clip_model_loader {
|
|||
model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight"));
|
||||
model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight"));
|
||||
} break;
|
||||
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||
{
|
||||
model.conv1d_1_w = get_tensor(string_format(TN_CONV1D, 1, "weight"));
|
||||
model.conv1d_1_b = get_tensor(string_format(TN_CONV1D, 1, "bias"));
|
||||
model.conv1d_2_w = get_tensor(string_format(TN_CONV1D, 2, "weight"));
|
||||
model.conv1d_2_b = get_tensor(string_format(TN_CONV1D, 2, "bias"));
|
||||
model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight"));
|
||||
model.mm_1_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "bias"));
|
||||
model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight"));
|
||||
model.mm_2_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "bias"));
|
||||
} break;
|
||||
case PROJECTOR_TYPE_INTERNVL:
|
||||
{
|
||||
model.mm_0_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "weight"));
|
||||
|
|
@ -3031,6 +3044,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
|
|||
case PROJECTOR_TYPE_VOXTRAL:
|
||||
case PROJECTOR_TYPE_ULTRAVOX:
|
||||
case PROJECTOR_TYPE_QWEN2A:
|
||||
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||
{
|
||||
n_patches = img->nx;
|
||||
|
||||
|
|
@ -3403,6 +3417,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
case PROJECTOR_TYPE_ULTRAVOX:
|
||||
case PROJECTOR_TYPE_LFM2:
|
||||
case PROJECTOR_TYPE_VOXTRAL:
|
||||
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||
case PROJECTOR_TYPE_JANUS_PRO:
|
||||
case PROJECTOR_TYPE_COGVLM:
|
||||
{
|
||||
|
|
@ -3526,6 +3541,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
|||
return ctx->model.projection->ne[1];
|
||||
case PROJECTOR_TYPE_ULTRAVOX:
|
||||
case PROJECTOR_TYPE_VOXTRAL:
|
||||
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||
return ctx->model.mm_2_w->ne[1];
|
||||
case PROJECTOR_TYPE_INTERNVL:
|
||||
return ctx->model.mm_3_w->ne[1];
|
||||
|
|
@ -3587,7 +3603,8 @@ bool clip_has_whisper_encoder(const struct clip_ctx * ctx) {
|
|||
return ctx->proj_type() == PROJECTOR_TYPE_ULTRAVOX
|
||||
|| ctx->proj_type() == PROJECTOR_TYPE_QWEN2A
|
||||
|| ctx->proj_type() == PROJECTOR_TYPE_GLMA
|
||||
|| ctx->proj_type() == PROJECTOR_TYPE_VOXTRAL;
|
||||
|| ctx->proj_type() == PROJECTOR_TYPE_VOXTRAL
|
||||
|| ctx->proj_type() == PROJECTOR_TYPE_MUSIC_FLAMINGO;
|
||||
}
|
||||
|
||||
bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) {
|
||||
|
|
|
|||
|
|
@ -86,6 +86,15 @@ ggml_cgraph * clip_graph_whisper_enc::build() {
|
|||
FFN_GELU_ERF,
|
||||
-1);
|
||||
|
||||
} else if (proj_type == PROJECTOR_TYPE_MUSIC_FLAMINGO) {
|
||||
// projector
|
||||
cur = build_ffn(cur,
|
||||
model.mm_1_w, model.mm_1_b,
|
||||
nullptr, nullptr,
|
||||
model.mm_2_w, model.mm_2_b,
|
||||
FFN_GELU_ERF,
|
||||
-1);
|
||||
|
||||
} else if (proj_type == PROJECTOR_TYPE_GLMA) {
|
||||
cur = ggml_norm(ctx0, cur, hparams.eps);
|
||||
cur = ggml_mul(ctx0, cur, model.mm_norm_pre_w);
|
||||
|
|
|
|||
|
|
@ -330,6 +330,7 @@ struct mtmd_context {
|
|||
case PROJECTOR_TYPE_ULTRAVOX:
|
||||
case PROJECTOR_TYPE_VOXTRAL:
|
||||
case PROJECTOR_TYPE_GLMA:
|
||||
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||
audio_preproc = std::make_unique<mtmd_audio_preprocessor_whisper>(ctx_a);
|
||||
break;
|
||||
case PROJECTOR_TYPE_LFM2A:
|
||||
|
|
@ -352,6 +353,9 @@ struct mtmd_context {
|
|||
// [BEGIN_AUDIO] ... (embeddings) ...
|
||||
aud_beg = "[BEGIN_AUDIO]";
|
||||
|
||||
} else if (proj == PROJECTOR_TYPE_MUSIC_FLAMINGO) {
|
||||
// <sound> ... (embeddings) ...
|
||||
aud_beg = "<sound>";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
#include <cmath>
|
||||
#include <cctype>
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
|
||||
struct quant_option {
|
||||
std::string name;
|
||||
|
|
@ -643,6 +644,11 @@ int main(int argc, char ** argv) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (std::error_code ec; std::filesystem::equivalent(fname_inp, fname_out, ec)) {
|
||||
fprintf(stderr, "%s: error: input and output files are the same: '%s'\n", __func__, fname_inp.c_str());
|
||||
return 1;
|
||||
}
|
||||
|
||||
print_build_info();
|
||||
|
||||
fprintf(stderr, "%s: quantizing '%s' to '%s' as %s", __func__, fname_inp.c_str(), fname_out.c_str(), ftype_str.c_str());
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -65,10 +65,7 @@ export async function copyCodeToClipboard(
|
|||
successMessage = 'Code copied to clipboard',
|
||||
errorMessage = 'Failed to copy code'
|
||||
): Promise<boolean> {
|
||||
const doc = new DOMParser().parseFromString(rawCode, 'text/html');
|
||||
const decodedCode = doc.body.textContent ?? rawCode;
|
||||
|
||||
return copyToClipboard(decodedCode, successMessage, errorMessage);
|
||||
return copyToClipboard(rawCode, successMessage, errorMessage);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue