Compare commits
21 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
e86f3c2221 | |
|
|
169ee68ffb | |
|
|
ced765be44 | |
|
|
3ccccc83f7 | |
|
|
d0a6a31470 | |
|
|
2b2afade9f | |
|
|
f4f5019254 | |
|
|
d5574c919c | |
|
|
26831bded9 | |
|
|
be47fb9285 | |
|
|
9e10bd2eaf | |
|
|
4cd162a123 | |
|
|
13814eb370 | |
|
|
54f67b9b66 | |
|
|
33ded988ba | |
|
|
0db8109849 | |
|
|
9b8329de7a | |
|
|
9a6369bb60 | |
|
|
ecc343de63 | |
|
|
01ade96e71 | |
|
|
7bcaf815c2 |
|
|
@ -1395,6 +1395,14 @@ static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
|
||||||
builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
|
builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
|
||||||
|
builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
|
||||||
|
|
||||||
|
// TODO: Tool calling
|
||||||
|
|
||||||
|
builder.add_content(builder.consume_rest());
|
||||||
|
}
|
||||||
|
|
||||||
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
|
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
|
||||||
builder.try_parse_reasoning("<think>", "</think>");
|
builder.try_parse_reasoning("<think>", "</think>");
|
||||||
builder.add_content(builder.consume_rest());
|
builder.add_content(builder.consume_rest());
|
||||||
|
|
@ -1479,6 +1487,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
||||||
case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
|
case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
|
||||||
common_chat_parse_xiaomi_mimo(builder);
|
common_chat_parse_xiaomi_mimo(builder);
|
||||||
break;
|
break;
|
||||||
|
case COMMON_CHAT_FORMAT_SOLAR_OPEN:
|
||||||
|
common_chat_parse_solar_open(builder);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -380,8 +380,8 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
|
||||||
const auto & function = tool.at("function");
|
const auto & function = tool.at("function");
|
||||||
result.push_back({
|
result.push_back({
|
||||||
/* .name = */ function.at("name"),
|
/* .name = */ function.at("name"),
|
||||||
/* .description = */ function.at("description"),
|
/* .description = */ function.value("description", ""),
|
||||||
/* .parameters = */ function.at("parameters").dump(),
|
/* .parameters = */ function.value("parameters", json::object()).dump(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -669,6 +669,7 @@ const char * common_chat_format_name(common_chat_format format) {
|
||||||
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
|
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
|
||||||
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
|
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
|
||||||
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
||||||
|
case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
|
||||||
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
|
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
|
||||||
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
|
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
|
||||||
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
|
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
|
||||||
|
|
@ -2517,6 +2518,27 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||||
|
common_chat_params data;
|
||||||
|
|
||||||
|
// TODO: Reasoning effort
|
||||||
|
json additional_context = {};
|
||||||
|
|
||||||
|
data.prompt = apply(tmpl, inputs, std::nullopt, std::nullopt, additional_context);
|
||||||
|
data.format = COMMON_CHAT_FORMAT_SOLAR_OPEN;
|
||||||
|
|
||||||
|
data.preserved_tokens = {
|
||||||
|
"<|think|>",
|
||||||
|
"<|content|>",
|
||||||
|
"<|begin|>",
|
||||||
|
"<|end|>",
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: Tool calling
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||||
common_chat_params data;
|
common_chat_params data;
|
||||||
data.prompt = apply(tmpl, inputs);
|
data.prompt = apply(tmpl, inputs);
|
||||||
|
|
@ -2780,6 +2802,13 @@ static common_chat_params common_chat_templates_apply_jinja(
|
||||||
return common_chat_params_init_magistral(tmpl, params);
|
return common_chat_params_init_magistral(tmpl, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Solar Open
|
||||||
|
if (src.find("<|tool_response:begin|>") != std::string::npos &&
|
||||||
|
src.find("<|tool_response:name|>") != std::string::npos &&
|
||||||
|
src.find("<|tool_response:result|>") != std::string::npos) {
|
||||||
|
return common_chat_params_init_solar_open(tmpl, params);
|
||||||
|
}
|
||||||
|
|
||||||
// Plain handler (no tools)
|
// Plain handler (no tools)
|
||||||
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
||||||
return common_chat_params_init_without_tools(tmpl, params);
|
return common_chat_params_init_without_tools(tmpl, params);
|
||||||
|
|
|
||||||
|
|
@ -124,6 +124,7 @@ enum common_chat_format {
|
||||||
COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
|
COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
|
||||||
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
||||||
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
||||||
|
COMMON_CHAT_FORMAT_SOLAR_OPEN,
|
||||||
|
|
||||||
// These are intended to be parsed by the PEG parser
|
// These are intended to be parsed by the PEG parser
|
||||||
COMMON_CHAT_FORMAT_PEG_SIMPLE,
|
COMMON_CHAT_FORMAT_PEG_SIMPLE,
|
||||||
|
|
|
||||||
|
|
@ -1062,6 +1062,9 @@ class TextModel(ModelBase):
|
||||||
if chkhsh == "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273":
|
if chkhsh == "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273":
|
||||||
# ref: https://huggingface.co/alvarobartt/grok-2-tokenizer
|
# ref: https://huggingface.co/alvarobartt/grok-2-tokenizer
|
||||||
res = "grok-2"
|
res = "grok-2"
|
||||||
|
if chkhsh == "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df":
|
||||||
|
# ref: https://huggingface.co/aari1995/German_Semantic_V3
|
||||||
|
res = "jina-v2-de"
|
||||||
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
|
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
|
||||||
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
|
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
|
||||||
res = "llama-bpe"
|
res = "llama-bpe"
|
||||||
|
|
@ -1230,6 +1233,12 @@ class TextModel(ModelBase):
|
||||||
if chkhsh == "4a2e2abae11ca2b86d570fc5b44be4d5eb5e72cc8f22dd136a94b37da83ab665":
|
if chkhsh == "4a2e2abae11ca2b86d570fc5b44be4d5eb5e72cc8f22dd136a94b37da83ab665":
|
||||||
# ref: https://huggingface.co/KORMo-Team/KORMo-tokenizer
|
# ref: https://huggingface.co/KORMo-Team/KORMo-tokenizer
|
||||||
res = "kormo"
|
res = "kormo"
|
||||||
|
if chkhsh == "9d70134b369a70e5735009b6de918f7581b5211f7c074d1f89f753aea8248af1":
|
||||||
|
# ref: https://huggingface.co/tencent/Youtu-LLM-2B
|
||||||
|
res = "youtu"
|
||||||
|
if chkhsh == "16389f0a1f51ee53e562ffd51c371dc508639ab0e4261502071836e50e223e91":
|
||||||
|
# ref: https://huggingface.co/upstage/Solar-Open-100B
|
||||||
|
res = "solar-open"
|
||||||
|
|
||||||
if res is None:
|
if res is None:
|
||||||
logger.warning("\n")
|
logger.warning("\n")
|
||||||
|
|
@ -2486,6 +2495,7 @@ class StableLMModel(TextModel):
|
||||||
"VLlama3ForCausalLM",
|
"VLlama3ForCausalLM",
|
||||||
"LlavaForConditionalGeneration",
|
"LlavaForConditionalGeneration",
|
||||||
"VoxtralForConditionalGeneration",
|
"VoxtralForConditionalGeneration",
|
||||||
|
"IQuestCoderForCausalLM",
|
||||||
"LlamaModel")
|
"LlamaModel")
|
||||||
class LlamaModel(TextModel):
|
class LlamaModel(TextModel):
|
||||||
model_arch = gguf.MODEL_ARCH.LLAMA
|
model_arch = gguf.MODEL_ARCH.LLAMA
|
||||||
|
|
@ -3503,7 +3513,7 @@ class QwenModel(TextModel):
|
||||||
self._set_vocab_qwen()
|
self._set_vocab_qwen()
|
||||||
|
|
||||||
|
|
||||||
@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration", "KORMoForCausalLM")
|
@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration", "KORMoForCausalLM", "AudioFlamingo3ForConditionalGeneration")
|
||||||
class Qwen2Model(TextModel):
|
class Qwen2Model(TextModel):
|
||||||
model_arch = gguf.MODEL_ARCH.QWEN2
|
model_arch = gguf.MODEL_ARCH.QWEN2
|
||||||
|
|
||||||
|
|
@ -5284,13 +5294,14 @@ class BertModel(TextModel):
|
||||||
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
||||||
|
|
||||||
# convert to phantom space vocab
|
# convert to phantom space vocab
|
||||||
def phantom(tok):
|
def phantom(tok, toktype):
|
||||||
if tok.startswith("[") and tok.endswith("]"):
|
if toktype == gguf.TokenType.CONTROL:
|
||||||
return tok
|
return tok
|
||||||
if tok.startswith("##"):
|
if tok.startswith("##"):
|
||||||
return tok[2:]
|
return tok[2:]
|
||||||
return "\u2581" + tok
|
return "\u2581" + tok
|
||||||
tokens = list(map(phantom, tokens))
|
assert len(tokens) == len(toktypes)
|
||||||
|
tokens = list(map(phantom, tokens, toktypes))
|
||||||
|
|
||||||
# add vocab to gguf
|
# add vocab to gguf
|
||||||
self.gguf_writer.add_tokenizer_model("bert")
|
self.gguf_writer.add_tokenizer_model("bert")
|
||||||
|
|
@ -7181,6 +7192,7 @@ class DeepseekModel(TextModel):
|
||||||
"DeepseekV2ForCausalLM",
|
"DeepseekV2ForCausalLM",
|
||||||
"DeepseekV3ForCausalLM",
|
"DeepseekV3ForCausalLM",
|
||||||
"KimiVLForConditionalGeneration",
|
"KimiVLForConditionalGeneration",
|
||||||
|
"YoutuForCausalLM",
|
||||||
)
|
)
|
||||||
class DeepseekV2Model(TextModel):
|
class DeepseekV2Model(TextModel):
|
||||||
model_arch = gguf.MODEL_ARCH.DEEPSEEK2
|
model_arch = gguf.MODEL_ARCH.DEEPSEEK2
|
||||||
|
|
@ -7247,7 +7259,15 @@ class DeepseekV2Model(TextModel):
|
||||||
super().set_gguf_parameters()
|
super().set_gguf_parameters()
|
||||||
hparams = self.hparams
|
hparams = self.hparams
|
||||||
|
|
||||||
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
|
# first_k_dense_replace: number of leading layers using dense FFN instead of MoE
|
||||||
|
# For non-MoE models (like Youtu), set to n_layer to use dense FFN for all layers
|
||||||
|
# For MoE models (like DeepSeek-V2), this is the number of leading non-MoE layers
|
||||||
|
has_moe = hparams.get("n_routed_experts") is not None
|
||||||
|
first_k_dense_replace = hparams.get("first_k_dense_replace")
|
||||||
|
if first_k_dense_replace is None:
|
||||||
|
# Default: if no MoE, all layers are dense; if MoE, none are dense
|
||||||
|
first_k_dense_replace = hparams["num_hidden_layers"] if not has_moe else 0
|
||||||
|
self.gguf_writer.add_leading_dense_block_count(first_k_dense_replace)
|
||||||
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
|
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
|
||||||
if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
|
if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
|
||||||
self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
|
self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
|
||||||
|
|
@ -7259,11 +7279,24 @@ class DeepseekV2Model(TextModel):
|
||||||
self.gguf_writer.add_key_length_mla(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
|
self.gguf_writer.add_key_length_mla(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
|
||||||
self.gguf_writer.add_value_length_mla(hparams["v_head_dim"])
|
self.gguf_writer.add_value_length_mla(hparams["v_head_dim"])
|
||||||
|
|
||||||
self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
|
# MoE parameters (required by C++ code for DEEPSEEK2 arch)
|
||||||
self.gguf_writer.add_expert_count(hparams["n_routed_experts"])
|
# For non-MoE models like Youtu, use intermediate_size as expert_feed_forward_length
|
||||||
self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"])
|
moe_intermediate_size = self.find_hparam(["moe_intermediate_size", "intermediate_size"], optional=False)
|
||||||
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
|
self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
|
||||||
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
|
|
||||||
|
if (n_routed_experts := hparams.get("n_routed_experts")) is not None:
|
||||||
|
self.gguf_writer.add_expert_count(n_routed_experts)
|
||||||
|
|
||||||
|
# expert_shared_count is required by C++ code, default to 0 for non-MoE models
|
||||||
|
n_shared_experts = hparams.get("n_shared_experts", 0)
|
||||||
|
self.gguf_writer.add_expert_shared_count(n_shared_experts)
|
||||||
|
|
||||||
|
# When not set, C++ code will use scale_w = false to skip the no-op scaling
|
||||||
|
if (routed_scaling_factor := hparams.get("routed_scaling_factor")) is not None:
|
||||||
|
self.gguf_writer.add_expert_weights_scale(routed_scaling_factor)
|
||||||
|
|
||||||
|
if (norm_topk_prob := hparams.get("norm_topk_prob")) is not None and norm_topk_prob:
|
||||||
|
self.gguf_writer.add_expert_weights_norm(norm_topk_prob)
|
||||||
|
|
||||||
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
|
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
|
||||||
|
|
||||||
|
|
@ -7279,10 +7312,17 @@ class DeepseekV2Model(TextModel):
|
||||||
# skip vision tensors and remove "language_model." for Kimi-VL
|
# skip vision tensors and remove "language_model." for Kimi-VL
|
||||||
if "vision_tower" in name or "multi_modal_projector" in name:
|
if "vision_tower" in name or "multi_modal_projector" in name:
|
||||||
return []
|
return []
|
||||||
|
if name.startswith("siglip2.") or name.startswith("merger."):
|
||||||
|
return []
|
||||||
if name.startswith("language_model."):
|
if name.startswith("language_model."):
|
||||||
name = name.replace("language_model.", "")
|
name = name.replace("language_model.", "")
|
||||||
|
|
||||||
|
# skip lm_head.weight if tie_word_embeddings is True
|
||||||
|
if self.hparams.get("tie_word_embeddings", False):
|
||||||
|
if name == "lm_head.weight" or name == "model.lm_head.weight":
|
||||||
|
logger.info("Skipping tied output layer 'lm_head.weight' (will use token_embd.weight)")
|
||||||
|
return []
|
||||||
|
|
||||||
# rename e_score_correction_bias tensors
|
# rename e_score_correction_bias tensors
|
||||||
if name.endswith("e_score_correction_bias"):
|
if name.endswith("e_score_correction_bias"):
|
||||||
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
|
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
|
||||||
|
|
@ -9292,6 +9332,19 @@ class VoxtralWhisperEncoderModel(WhisperEncoderModel):
|
||||||
self.gguf_writer.add_audio_stack_factor(4) # == intermediate_size // hidden_size
|
self.gguf_writer.add_audio_stack_factor(4) # == intermediate_size // hidden_size
|
||||||
|
|
||||||
|
|
||||||
|
@ModelBase.register("AudioFlamingo3ForConditionalGeneration")
|
||||||
|
class AudioFlamingo3WhisperEncoderModel(WhisperEncoderModel):
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
super().set_gguf_parameters()
|
||||||
|
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.MUSIC_FLAMINGO)
|
||||||
|
|
||||||
|
def tensor_force_quant(self, name, new_name, bid, n_dims):
|
||||||
|
if ".conv" in name and ".weight" in name:
|
||||||
|
# Was trained in BF16, being safe, avoiding quantizing to FP16
|
||||||
|
return gguf.GGMLQuantizationType.F32
|
||||||
|
return super().tensor_force_quant(name, new_name, bid, n_dims)
|
||||||
|
|
||||||
|
|
||||||
@ModelBase.register("FalconH1ForCausalLM")
|
@ModelBase.register("FalconH1ForCausalLM")
|
||||||
class FalconH1Model(Mamba2Model):
|
class FalconH1Model(Mamba2Model):
|
||||||
model_arch = gguf.MODEL_ARCH.FALCON_H1
|
model_arch = gguf.MODEL_ARCH.FALCON_H1
|
||||||
|
|
@ -10604,6 +10657,79 @@ class JanusProVisionModel(MmprojModel):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@ModelBase.register("YOUTUVLForConditionalGeneration", "YOUTUVLForCausalLM")
|
||||||
|
class YOUTUVLVisionModel(MmprojModel):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
assert self.hparams_vision is not None
|
||||||
|
self.hparams_vision["image_size"] = self.hparams_vision.get("image_size", 560)
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
super().set_gguf_parameters()
|
||||||
|
|
||||||
|
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.YOUTUVL)
|
||||||
|
self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-6))
|
||||||
|
|
||||||
|
# Handle activation function
|
||||||
|
hidden_act = str(self.hparams.get("hidden_act", "gelu_pytorch_tanh")).lower()
|
||||||
|
if hidden_act in ("gelu", "gelu_pytorch_tanh", "gelu_fast", "gelu_new", "gelu_accurate"):
|
||||||
|
self.gguf_writer.add_vision_use_gelu(True)
|
||||||
|
elif hidden_act == "silu":
|
||||||
|
self.gguf_writer.add_vision_use_silu(True)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported activation function for YOUTUVL: {hidden_act}")
|
||||||
|
|
||||||
|
self.gguf_writer.add_vision_spatial_merge_size(self.hparams.get("spatial_merge_size", 2))
|
||||||
|
|
||||||
|
window_size = self.hparams.get("window_size")
|
||||||
|
if window_size is not None:
|
||||||
|
self.gguf_writer.add_vision_window_size(window_size)
|
||||||
|
# fullatt_block_indexes contains explicit layer indices that use full attention
|
||||||
|
# e.g., [2, 5, 8, 11] means layers 2, 5, 8, 11 use full attention
|
||||||
|
# All other layers use window attention
|
||||||
|
fullatt_block_indexes = self.hparams.get("fullatt_block_indexes")
|
||||||
|
assert fullatt_block_indexes is not None, "fullatt_block_indexes is required for youtuvl"
|
||||||
|
# Store the explicit layer indices for YoutuVL (irregular pattern approach)
|
||||||
|
self.gguf_writer.add_vision_wa_layer_indexes(layers=fullatt_block_indexes)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
del bid # unused
|
||||||
|
|
||||||
|
# Skip language model tensors
|
||||||
|
skip_prefixes = ('lm_head.', 'model.layers.', 'model.embed_tokens.', 'model.norm.')
|
||||||
|
if name.startswith(skip_prefixes):
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Try to map the tensor using TensorNameMap (handles vision encoder and projector)
|
||||||
|
try:
|
||||||
|
new_name = self.map_tensor_name(name)
|
||||||
|
return [(new_name, data_torch)]
|
||||||
|
except ValueError:
|
||||||
|
# If mapping fails, log warning and skip
|
||||||
|
logger.warning(f"Cannot map tensor: {name}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@ModelBase.register("SolarOpenForCausalLM")
|
||||||
|
class SolarOpenModel(Glm4MoeModel):
|
||||||
|
model_arch = gguf.MODEL_ARCH.GLM4_MOE
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
|
||||||
|
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
|
||||||
|
tokens, toktypes, tokpre = self.get_vocab_base()
|
||||||
|
self.gguf_writer.add_tokenizer_model("gpt2")
|
||||||
|
self.gguf_writer.add_tokenizer_pre(tokpre)
|
||||||
|
self.gguf_writer.add_token_list(tokens)
|
||||||
|
self.gguf_writer.add_token_types(toktypes)
|
||||||
|
special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
|
||||||
|
special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|endoftext|>"])
|
||||||
|
special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<unk>"])
|
||||||
|
special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|startoftext|>"])
|
||||||
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
|
|
||||||
###### CONVERSION LOGIC ######
|
###### CONVERSION LOGIC ######
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -145,6 +145,8 @@ models = [
|
||||||
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
|
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
|
||||||
{"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
|
{"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
|
||||||
{"name": "kormo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/KORMo-Team/KORMo-tokenizer", },
|
{"name": "kormo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/KORMo-Team/KORMo-tokenizer", },
|
||||||
|
{"name": "youtu", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Youtu-LLM-2B", },
|
||||||
|
{"name": "solar-open", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
|
||||||
]
|
]
|
||||||
|
|
||||||
# some models are known to be broken upstream, so we will skip them as exceptions
|
# some models are known to be broken upstream, so we will skip them as exceptions
|
||||||
|
|
@ -165,6 +167,8 @@ pre_computed_hashes = [
|
||||||
{"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
|
{"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
|
||||||
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "chkhsh": "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c"},
|
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "chkhsh": "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c"},
|
||||||
{"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
|
{"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
|
||||||
|
# jina-v2-de variants
|
||||||
|
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ Legend:
|
||||||
| CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
| CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||||
| CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
| CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
||||||
| COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ |
|
| COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ |
|
||||||
| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||||
| CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ |
|
| CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ |
|
||||||
| CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
| CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||||
| CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
| CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||||
|
|
|
||||||
|
|
@ -965,6 +965,7 @@
|
||||||
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||||
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||||
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
"Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||||
|
"Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[2,2,1536,729],ne_kernel=[2,2,1536,4096],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal"
|
||||||
"Metal","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
"Metal","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
||||||
"Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
"Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
||||||
"Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
"Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal"
|
||||||
|
|
@ -4964,8 +4965,9 @@
|
||||||
"Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","Metal"
|
"Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","Metal"
|
||||||
"Metal","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","Metal"
|
"Metal","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","Metal"
|
||||||
"Metal","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","Metal"
|
"Metal","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","Metal"
|
||||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","0","no","Metal"
|
"Metal","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","1","yes","Metal"
|
||||||
"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","0","no","Metal"
|
"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","Metal"
|
||||||
"Metal","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","Metal"
|
"Metal","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","Metal"
|
||||||
"Metal","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","Metal"
|
"Metal","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","Metal"
|
||||||
"Metal","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","Metal"
|
"Metal","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","Metal"
|
||||||
|
|
@ -5715,15 +5717,15 @@
|
||||||
"Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal"
|
"Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal"
|
||||||
"Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","Metal"
|
"Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","Metal"
|
||||||
"Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal"
|
"Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[3,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[6,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[3,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[3,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[6,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[3,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[3,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[6,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[3,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","Metal"
|
||||||
|
|
@ -5733,6 +5735,15 @@
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
"Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","SSM_CONV","type=f32,ne_a=[9,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","SSM_CONV","type=f32,ne_a=[18,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","SSM_CONV","type=f32,ne_a=[9,1024,4,1],ne_b=[9,1024,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","SSM_CONV","type=f32,ne_a=[9,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","SSM_CONV","type=f32,ne_a=[18,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","SSM_CONV","type=f32,ne_a=[9,1536,4,1],ne_b=[9,1536,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","SSM_CONV","type=f32,ne_a=[9,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","SSM_CONV","type=f32,ne_a=[18,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal"
|
||||||
|
"Metal","SSM_CONV","type=f32,ne_a=[9,2048,4,1],ne_b=[9,2048,1,1]","support","1","yes","Metal"
|
||||||
"Metal","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
"Metal","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
||||||
"Metal","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
"Metal","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
||||||
"Metal","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
"Metal","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal"
|
||||||
|
|
@ -8916,6 +8927,8 @@
|
||||||
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","Metal"
|
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","Metal"
|
||||||
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||||
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
"Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||||
|
"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||||
|
"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal"
|
||||||
"Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
"Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
||||||
"Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
"Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
||||||
"Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
"Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","0","no","Metal"
|
||||||
|
|
@ -9542,311 +9555,311 @@
|
||||||
"Metal","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","Metal"
|
"Metal","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","Metal"
|
||||||
"Metal","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","Metal"
|
"Metal","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","Metal"
|
||||||
"Metal","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","Metal"
|
"Metal","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15","support","1","yes","Metal"
|
"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","1","yes","Metal"
|
||||||
"Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","Metal"
|
"Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","Metal"
|
||||||
"Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","Metal"
|
"Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","Metal"
|
||||||
"Metal","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","Metal"
|
"Metal","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","Metal"
|
||||||
|
|
@ -9891,8 +9904,9 @@
|
||||||
"Metal","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","Metal"
|
"Metal","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","Metal"
|
||||||
"Metal","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","Metal"
|
"Metal","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","Metal"
|
||||||
"Metal","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","Metal"
|
"Metal","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","Metal"
|
||||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","Metal"
|
"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1,circular=0","support","1","yes","Metal"
|
||||||
"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","0","no","Metal"
|
"Metal","PAD","type=f32,ne_a=[33,17,2,1],pad_0=4,pad_1=3,circular=1","support","0","no","Metal"
|
||||||
|
"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0,circular=0","support","0","no","Metal"
|
||||||
"Metal","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","Metal"
|
"Metal","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","Metal"
|
||||||
"Metal","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","Metal"
|
"Metal","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","Metal"
|
||||||
"Metal","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","Metal"
|
"Metal","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","Metal"
|
||||||
|
|
@ -9923,17 +9937,41 @@
|
||||||
"Metal","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","Metal"
|
"Metal","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","Metal"
|
||||||
"Metal","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","Metal"
|
"Metal","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","Metal"
|
||||||
"Metal","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","Metal"
|
"Metal","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","Metal"
|
||||||
|
"Metal","DIAG","type=f32,ne=[10,1,4,3]","support","0","no","Metal"
|
||||||
|
"Metal","DIAG","type=f32,ne=[79,1,19,13]","support","0","no","Metal"
|
||||||
|
"Metal","DIAG","type=f32,ne=[256,1,8,16]","support","0","no","Metal"
|
||||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","Metal"
|
||||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","Metal"
|
||||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","Metal"
|
||||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","Metal"
|
||||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","Metal"
|
||||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[64,64,2,2]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[79,79,5,3],ne_rhs=[417,79,5,3]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,2],ne_rhs=[32,128,4,2]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[80,80,2,8]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[79,80,2,8]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[81,80,2,8]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[80,80,8,8]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[79,80,8,8]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[81,80,8,8]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[84,84,4,4],ne_rhs=[32,84,4,4]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[95,95,8,8],ne_rhs=[40,95,8,8]","support","0","no","Metal"
|
||||||
"Metal","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","Metal"
|
||||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","0","no","Metal"
|
||||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","0","no","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[32,128,4,4]","support","0","no","Metal"
|
||||||
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","1","yes","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,3,4],ne_rhs=[32,128,3,4]","support","0","no","Metal"
|
||||||
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","Metal"
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,1],ne_rhs=[32,128,4,1]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[200,64,4,4]","support","0","no","Metal"
|
||||||
|
"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[384,64,4,4]","support","0","no","Metal"
|
||||||
|
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=0","support","1","yes","Metal"
|
||||||
|
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=0","support","0","no","Metal"
|
||||||
|
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=1","support","0","no","Metal"
|
||||||
|
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=1","support","0","no","Metal"
|
||||||
|
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=0","support","1","yes","Metal"
|
||||||
|
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=0","support","0","no","Metal"
|
||||||
|
"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=1","support","0","no","Metal"
|
||||||
|
"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=1","support","0","no","Metal"
|
||||||
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","Metal"
|
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","Metal"
|
||||||
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","Metal"
|
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","Metal"
|
||||||
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","Metal"
|
"Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","Metal"
|
||||||
|
|
|
||||||
|
Can't render this file because it is too large.
|
|
|
@ -4,7 +4,7 @@ project("ggml" C CXX ASM)
|
||||||
### GGML Version
|
### GGML Version
|
||||||
set(GGML_VERSION_MAJOR 0)
|
set(GGML_VERSION_MAJOR 0)
|
||||||
set(GGML_VERSION_MINOR 9)
|
set(GGML_VERSION_MINOR 9)
|
||||||
set(GGML_VERSION_PATCH 4)
|
set(GGML_VERSION_PATCH 5)
|
||||||
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
|
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
|
||||||
|
|
||||||
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
|
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
|
||||||
|
|
|
||||||
|
|
@ -358,7 +358,7 @@ extern "C" {
|
||||||
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
||||||
|
|
||||||
// Compare the output of two backends
|
// Compare the output of two backends
|
||||||
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node);
|
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor const * const * test_nodes, size_t num_test_nodes);
|
||||||
|
|
||||||
// Tensor initialization
|
// Tensor initialization
|
||||||
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
||||||
|
|
|
||||||
|
|
@ -2053,7 +2053,7 @@ void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) {
|
||||||
ggml_free(copy.ctx_unallocated);
|
ggml_free(copy.ctx_unallocated);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node) {
|
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor const * const * test_nodes, size_t num_test_nodes) {
|
||||||
struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
|
struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
|
||||||
if (copy.buffer == NULL) {
|
if (copy.buffer == NULL) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -2064,22 +2064,22 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
||||||
|
|
||||||
assert(g1->n_nodes == g2->n_nodes);
|
assert(g1->n_nodes == g2->n_nodes);
|
||||||
|
|
||||||
if (test_node != nullptr) {
|
if (num_test_nodes != 0) {
|
||||||
// Compute the whole graph and only test the output for a specific tensor
|
GGML_ASSERT(test_nodes);
|
||||||
|
// Compute the whole graph and only test the output for specific tensors
|
||||||
ggml_backend_graph_compute(backend1, g1);
|
ggml_backend_graph_compute(backend1, g1);
|
||||||
ggml_backend_graph_compute(backend2, g2);
|
ggml_backend_graph_compute(backend2, g2);
|
||||||
|
|
||||||
int test_node_idx = -1;
|
bool verified = false;
|
||||||
for (int i = 0; i < g1->n_nodes; i++) {
|
for (int i = 0; i < g1->n_nodes; i++) {
|
||||||
struct ggml_tensor * t1 = g1->nodes[i];
|
for (size_t j = 0; j < num_test_nodes; ++j) {
|
||||||
if (t1 == test_node) {
|
if (g1->nodes[i] == test_nodes[j]) {
|
||||||
test_node_idx = i;
|
callback(i, g1->nodes[i], g2->nodes[i], user_data);
|
||||||
break;
|
verified = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GGML_ASSERT(test_node_idx != -1);
|
GGML_ASSERT(verified);
|
||||||
|
|
||||||
callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
|
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < g1->n_nodes; i++) {
|
for (int i = 0; i < g1->n_nodes; i++) {
|
||||||
struct ggml_tensor * t1 = g1->nodes[i];
|
struct ggml_tensor * t1 = g1->nodes[i];
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,11 @@ const int CUDA_CPY_BLOCK_NM = 8; // block size of 3rd dimension if available
|
||||||
const int CUDA_CPY_BLOCK_ROWS = 8; // block dimension for marching through rows
|
const int CUDA_CPY_BLOCK_ROWS = 8; // block dimension for marching through rows
|
||||||
|
|
||||||
template <cpy_kernel_t cpy_1>
|
template <cpy_kernel_t cpy_1>
|
||||||
static __global__ void cpy_scalar(const char * cx, char * cdst, const int ne,
|
static __global__ void cpy_scalar(const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11,
|
||||||
const int nb12, const int nb13) {
|
const int64_t nb12, const int64_t nb13) {
|
||||||
const int64_t i = blockDim.x*blockIdx.x + threadIdx.x;
|
const int64_t i = (int64_t)blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
if (i >= ne) {
|
if (i >= ne) {
|
||||||
return;
|
return;
|
||||||
|
|
@ -40,10 +40,10 @@ static __global__ void cpy_scalar(const char * cx, char * cdst, const int ne,
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __global__ void cpy_scalar_transpose(const char * cx, char * cdst, const int ne,
|
static __global__ void cpy_scalar_transpose(const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11,
|
||||||
const int nb12, const int nb13) {
|
const int64_t nb12, const int64_t nb13) {
|
||||||
|
|
||||||
const T* src = reinterpret_cast<const T*>(cx);
|
const T* src = reinterpret_cast<const T*>(cx);
|
||||||
T* dst = reinterpret_cast<T*>(cdst);
|
T* dst = reinterpret_cast<T*>(cdst);
|
||||||
|
|
@ -117,60 +117,60 @@ static __device__ void cpy_blck_q_f32(const char * cxi, char * cdsti) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <cpy_kernel_t cpy_blck, int qk>
|
template <cpy_kernel_t cpy_blck, int qk>
|
||||||
static __global__ void cpy_f32_q(const char * cx, char * cdst, const int ne,
|
static __global__ void cpy_f32_q(const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11,
|
||||||
const int nb12, const int nb13) {
|
const int64_t nb12, const int64_t nb13) {
|
||||||
const int i = (blockDim.x*blockIdx.x + threadIdx.x)*qk;
|
const int64_t i = ((int64_t)blockDim.x*blockIdx.x + threadIdx.x)*qk;
|
||||||
|
|
||||||
if (i >= ne) {
|
if (i >= ne) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int i03 = i/(ne00 * ne01 * ne02);
|
const int64_t i03 = i/(ne00 * ne01 * ne02);
|
||||||
const int i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
|
const int64_t i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
|
||||||
const int i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
|
const int64_t i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
|
||||||
const int i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
|
const int64_t i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
|
||||||
const int x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
|
const int64_t x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
|
||||||
|
|
||||||
const int i13 = i/(ne10 * ne11 * ne12);
|
const int64_t i13 = i/(ne10 * ne11 * ne12);
|
||||||
const int i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
|
const int64_t i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
|
||||||
const int i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
|
const int64_t i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
|
||||||
const int i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
|
const int64_t i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
|
||||||
const int dst_offset = (i10/qk)*nb10 + i11*nb11 + i12*nb12 + i13*nb13;
|
const int64_t dst_offset = (i10/qk)*nb10 + i11*nb11 + i12*nb12 + i13*nb13;
|
||||||
|
|
||||||
cpy_blck(cx + x_offset, cdst + dst_offset);
|
cpy_blck(cx + x_offset, cdst + dst_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <cpy_kernel_t cpy_blck, int qk>
|
template <cpy_kernel_t cpy_blck, int qk>
|
||||||
static __global__ void cpy_q_f32(const char * cx, char * cdst, const int ne,
|
static __global__ void cpy_q_f32(const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11,
|
||||||
const int nb12, const int nb13) {
|
const int64_t nb12, const int64_t nb13) {
|
||||||
const int i = (blockDim.x*blockIdx.x + threadIdx.x)*qk;
|
const int64_t i = ((int64_t)blockDim.x*blockIdx.x + threadIdx.x)*qk;
|
||||||
|
|
||||||
if (i >= ne) {
|
if (i >= ne) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int i03 = i/(ne00 * ne01 * ne02);
|
const int64_t i03 = i/(ne00 * ne01 * ne02);
|
||||||
const int i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
|
const int64_t i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
|
||||||
const int i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
|
const int64_t i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
|
||||||
const int i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
|
const int64_t i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
|
||||||
const int x_offset = (i00/qk)*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
|
const int64_t x_offset = (i00/qk)*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
|
||||||
|
|
||||||
const int i13 = i/(ne10 * ne11 * ne12);
|
const int64_t i13 = i/(ne10 * ne11 * ne12);
|
||||||
const int i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
|
const int64_t i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
|
||||||
const int i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
|
const int64_t i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
|
||||||
const int i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
|
const int64_t i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
|
||||||
const int dst_offset = i10*nb10 + i11*nb11 + i12*nb12 + i13*nb13;
|
const int64_t dst_offset = i10*nb10 + i11*nb11 + i12*nb12 + i13*nb13;
|
||||||
|
|
||||||
cpy_blck(cx + x_offset, cdst + dst_offset);
|
cpy_blck(cx + x_offset, cdst + dst_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename src_t, typename dst_t>
|
template<typename src_t, typename dst_t>
|
||||||
static __global__ void cpy_scalar_contiguous(const char * cx, char * cdst, const int64_t ne) {
|
static __global__ void cpy_scalar_contiguous(const char * cx, char * cdst, const int64_t ne) {
|
||||||
const int64_t i = blockDim.x*blockIdx.x + threadIdx.x;
|
const int64_t i = (int64_t)blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
if (i >= ne) {
|
if (i >= ne) {
|
||||||
return;
|
return;
|
||||||
|
|
@ -188,19 +188,20 @@ static void ggml_cpy_scalar_contiguous_cuda(
|
||||||
cudaStream_t stream) {
|
cudaStream_t stream) {
|
||||||
|
|
||||||
const int64_t num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
|
const int64_t num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_scalar_contiguous<src_t, dst_t><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
|
cpy_scalar_contiguous<src_t, dst_t><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
|
||||||
(cx, cdst, ne);
|
(cx, cdst, ne);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename src_t, typename dst_t, bool transposed = false>
|
template<typename src_t, typename dst_t, bool transposed = false>
|
||||||
static void ggml_cpy_scalar_cuda(
|
static void ggml_cpy_scalar_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream) {
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13, cudaStream_t stream) {
|
||||||
|
|
||||||
if (transposed) {
|
if (transposed) {
|
||||||
GGML_ASSERT(ne == ne00*ne01*ne02); // ne[3] is 1 assumed
|
GGML_ASSERT(ne == ne00*ne01*ne02); // ne[3] is 1 assumed
|
||||||
int ne00n, ne01n, ne02n;
|
int64_t ne00n, ne01n, ne02n;
|
||||||
if (nb00 <= nb02) { // most likely safe to handle nb00 = nb02 case here
|
if (nb00 <= nb02) { // most likely safe to handle nb00 = nb02 case here
|
||||||
ne00n = ne00;
|
ne00n = ne00;
|
||||||
ne01n = ne01;
|
ne01n = ne01;
|
||||||
|
|
@ -211,143 +212,159 @@ static void ggml_cpy_scalar_cuda(
|
||||||
ne02n = 1;
|
ne02n = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
dim3 dimGrid( (ne01n + CUDA_CPY_TILE_DIM_2D - 1) / CUDA_CPY_TILE_DIM_2D,
|
int64_t grid_x = (ne01n + CUDA_CPY_TILE_DIM_2D - 1) / CUDA_CPY_TILE_DIM_2D;
|
||||||
(ne00n + CUDA_CPY_TILE_DIM_2D - 1) / CUDA_CPY_TILE_DIM_2D,
|
int64_t grid_y = (ne00n + CUDA_CPY_TILE_DIM_2D - 1) / CUDA_CPY_TILE_DIM_2D;
|
||||||
(ne/(ne01n*ne00n) + CUDA_CPY_BLOCK_NM - 1) / CUDA_CPY_BLOCK_NM);
|
int64_t grid_z = (ne/(ne01n*ne00n) + CUDA_CPY_BLOCK_NM - 1) / CUDA_CPY_BLOCK_NM;
|
||||||
|
GGML_ASSERT(grid_x < UINT_MAX);
|
||||||
|
GGML_ASSERT(grid_y < USHRT_MAX);
|
||||||
|
GGML_ASSERT(grid_z < USHRT_MAX);
|
||||||
|
dim3 dimGrid(grid_x, grid_y, grid_z);
|
||||||
dim3 dimBlock(CUDA_CPY_TILE_DIM_2D, CUDA_CPY_BLOCK_ROWS, 1);
|
dim3 dimBlock(CUDA_CPY_TILE_DIM_2D, CUDA_CPY_BLOCK_ROWS, 1);
|
||||||
cpy_scalar_transpose<dst_t><<<dimGrid, dimBlock, 0, stream>>>
|
cpy_scalar_transpose<dst_t><<<dimGrid, dimBlock, 0, stream>>>
|
||||||
(cx, cdst, ne, ne00n, ne01n, ne02n, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
(cx, cdst, ne, ne00n, ne01n, ne02n, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
} else {
|
} else {
|
||||||
const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
|
const int64_t num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_scalar<cpy_1_scalar<src_t, dst_t>><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
|
cpy_scalar<cpy_1_scalar<src_t, dst_t>><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
|
||||||
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_f32_q8_0_cuda(
|
static void ggml_cpy_f32_q8_0_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream) {
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13, cudaStream_t stream) {
|
||||||
|
|
||||||
GGML_ASSERT(ne % QK8_0 == 0);
|
GGML_ASSERT(ne % QK8_0 == 0);
|
||||||
const int num_blocks = ne / QK8_0;
|
const int64_t num_blocks = ne / QK8_0;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_f32_q<cpy_blck_f32_q8_0, QK8_0><<<num_blocks, 1, 0, stream>>>
|
cpy_f32_q<cpy_blck_f32_q8_0, QK8_0><<<num_blocks, 1, 0, stream>>>
|
||||||
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_q8_0_f32_cuda(
|
static void ggml_cpy_q8_0_f32_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream) {
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13, cudaStream_t stream) {
|
||||||
|
|
||||||
const int num_blocks = ne;
|
const int64_t num_blocks = ne;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_q_f32<cpy_blck_q8_0_f32, QK8_0><<<num_blocks, 1, 0, stream>>>
|
cpy_q_f32<cpy_blck_q8_0_f32, QK8_0><<<num_blocks, 1, 0, stream>>>
|
||||||
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_f32_q4_0_cuda(
|
static void ggml_cpy_f32_q4_0_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream) {
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13, cudaStream_t stream) {
|
||||||
|
|
||||||
GGML_ASSERT(ne % QK4_0 == 0);
|
GGML_ASSERT(ne % QK4_0 == 0);
|
||||||
const int num_blocks = ne / QK4_0;
|
const int64_t num_blocks = ne / QK4_0;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_f32_q<cpy_blck_f32_q4_0, QK4_0><<<num_blocks, 1, 0, stream>>>
|
cpy_f32_q<cpy_blck_f32_q4_0, QK4_0><<<num_blocks, 1, 0, stream>>>
|
||||||
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_q4_0_f32_cuda(
|
static void ggml_cpy_q4_0_f32_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02,
|
||||||
const int nb00, const int nb01, const int nb02,
|
const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12,
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12,
|
||||||
const int nb10, const int nb11, const int nb12, const int nb13,
|
const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13,
|
||||||
cudaStream_t stream) {
|
cudaStream_t stream) {
|
||||||
const int num_blocks = ne;
|
const int64_t num_blocks = ne;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_q_f32<cpy_blck_q_f32<dequantize_q4_0, QK4_0>, QK4_0><<<num_blocks, 1, 0, stream>>>(
|
cpy_q_f32<cpy_blck_q_f32<dequantize_q4_0, QK4_0>, QK4_0><<<num_blocks, 1, 0, stream>>>(
|
||||||
cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03,
|
cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03,
|
||||||
ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_f32_q4_1_cuda(
|
static void ggml_cpy_f32_q4_1_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream) {
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13, cudaStream_t stream) {
|
||||||
|
|
||||||
GGML_ASSERT(ne % QK4_1 == 0);
|
GGML_ASSERT(ne % QK4_1 == 0);
|
||||||
const int num_blocks = ne / QK4_1;
|
const int64_t num_blocks = ne / QK4_1;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_f32_q<cpy_blck_f32_q4_1, QK4_1><<<num_blocks, 1, 0, stream>>>
|
cpy_f32_q<cpy_blck_f32_q4_1, QK4_1><<<num_blocks, 1, 0, stream>>>
|
||||||
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_q4_1_f32_cuda(
|
static void ggml_cpy_q4_1_f32_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02,
|
||||||
const int nb00, const int nb01, const int nb02,
|
const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12,
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12,
|
||||||
const int nb10, const int nb11, const int nb12, const int nb13,
|
const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13,
|
||||||
cudaStream_t stream) {
|
cudaStream_t stream) {
|
||||||
const int num_blocks = ne;
|
const int64_t num_blocks = ne;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_q_f32<cpy_blck_q_f32<dequantize_q4_1, QK4_1>, QK4_1><<<num_blocks, 1, 0, stream>>>(
|
cpy_q_f32<cpy_blck_q_f32<dequantize_q4_1, QK4_1>, QK4_1><<<num_blocks, 1, 0, stream>>>(
|
||||||
cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03,
|
cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03,
|
||||||
ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_f32_q5_0_cuda(
|
static void ggml_cpy_f32_q5_0_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream) {
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13, cudaStream_t stream) {
|
||||||
|
|
||||||
GGML_ASSERT(ne % QK5_0 == 0);
|
GGML_ASSERT(ne % QK5_0 == 0);
|
||||||
const int num_blocks = ne / QK5_0;
|
const int64_t num_blocks = ne / QK5_0;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_f32_q<cpy_blck_f32_q5_0, QK5_0><<<num_blocks, 1, 0, stream>>>
|
cpy_f32_q<cpy_blck_f32_q5_0, QK5_0><<<num_blocks, 1, 0, stream>>>
|
||||||
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_q5_0_f32_cuda(
|
static void ggml_cpy_q5_0_f32_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02,
|
||||||
const int nb00, const int nb01, const int nb02,
|
const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12,
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12,
|
||||||
const int nb10, const int nb11, const int nb12, const int nb13,
|
const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13,
|
||||||
cudaStream_t stream) {
|
cudaStream_t stream) {
|
||||||
const int num_blocks = ne;
|
const int64_t num_blocks = ne;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_q_f32<cpy_blck_q_f32<dequantize_q5_0, QK5_0>, QK5_0><<<num_blocks, 1, 0, stream>>>(
|
cpy_q_f32<cpy_blck_q_f32<dequantize_q5_0, QK5_0>, QK5_0><<<num_blocks, 1, 0, stream>>>(
|
||||||
cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03,
|
cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03,
|
||||||
ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_f32_q5_1_cuda(
|
static void ggml_cpy_f32_q5_1_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream) {
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13, cudaStream_t stream) {
|
||||||
|
|
||||||
GGML_ASSERT(ne % QK5_1 == 0);
|
GGML_ASSERT(ne % QK5_1 == 0);
|
||||||
const int num_blocks = ne / QK5_1;
|
const int64_t num_blocks = ne / QK5_1;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_f32_q<cpy_blck_f32_q5_1, QK5_1><<<num_blocks, 1, 0, stream>>>
|
cpy_f32_q<cpy_blck_f32_q5_1, QK5_1><<<num_blocks, 1, 0, stream>>>
|
||||||
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_q5_1_f32_cuda(
|
static void ggml_cpy_q5_1_f32_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02,
|
||||||
const int nb00, const int nb01, const int nb02,
|
const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12,
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12,
|
||||||
const int nb10, const int nb11, const int nb12, const int nb13,
|
const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13,
|
||||||
cudaStream_t stream) {
|
cudaStream_t stream) {
|
||||||
const int num_blocks = ne;
|
const int64_t num_blocks = ne;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_q_f32<cpy_blck_q_f32<dequantize_q5_1, QK5_1>, QK5_1><<<num_blocks, 1, 0, stream>>>(
|
cpy_q_f32<cpy_blck_q_f32<dequantize_q5_1, QK5_1>, QK5_1><<<num_blocks, 1, 0, stream>>>(
|
||||||
cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03,
|
cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03,
|
||||||
ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_f32_iq4_nl_cuda(
|
static void ggml_cpy_f32_iq4_nl_cuda(
|
||||||
const char * cx, char * cdst, const int ne,
|
const char * cx, char * cdst, const int64_t ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t nb00, const int64_t nb01, const int64_t nb02,
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream) {
|
const int64_t nb03, const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t nb10, const int64_t nb11, const int64_t nb12, const int64_t nb13, cudaStream_t stream) {
|
||||||
|
|
||||||
GGML_ASSERT(ne % QK4_NL == 0);
|
GGML_ASSERT(ne % QK4_NL == 0);
|
||||||
const int num_blocks = ne / QK4_NL;
|
const int64_t num_blocks = ne / QK4_NL;
|
||||||
|
GGML_ASSERT(num_blocks < UINT_MAX);
|
||||||
cpy_f32_q<cpy_blck_f32_iq4_nl, QK4_NL><<<num_blocks, 1, 0, stream>>>
|
cpy_f32_q<cpy_blck_f32_iq4_nl, QK4_NL><<<num_blocks, 1, 0, stream>>>
|
||||||
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
||||||
}
|
}
|
||||||
|
|
@ -356,9 +373,6 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
|
||||||
const int64_t ne = ggml_nelements(src0);
|
const int64_t ne = ggml_nelements(src0);
|
||||||
GGML_ASSERT(ne == ggml_nelements(src1));
|
GGML_ASSERT(ne == ggml_nelements(src1));
|
||||||
|
|
||||||
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
|
||||||
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
|
||||||
|
|
||||||
const int64_t ne00 = src0->ne[0];
|
const int64_t ne00 = src0->ne[0];
|
||||||
const int64_t ne01 = src0->ne[1];
|
const int64_t ne01 = src0->ne[1];
|
||||||
const int64_t ne02 = src0->ne[2];
|
const int64_t ne02 = src0->ne[2];
|
||||||
|
|
|
||||||
|
|
@ -531,7 +531,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
|
||||||
for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::I) {
|
for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::I) {
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int l = 0; l < T_C_KQ::ne; ++l) {
|
for (int l = 0; l < T_C_KQ::ne; ++l) {
|
||||||
if (!oob_check || k0 + T_C_KQ::get_i(l) < k_VKQ_sup) {
|
if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::I + T_C_KQ::get_i(l) < k_VKQ_sup) {
|
||||||
KQ_max_new[l % 2] = fmaxf(KQ_max_new[l % 2], KQ_C[k0/(np*T_C_KQ::I)].x[l] + FATTN_KQ_MAX_OFFSET);
|
KQ_max_new[l % 2] = fmaxf(KQ_max_new[l % 2], KQ_C[k0/(np*T_C_KQ::I)].x[l] + FATTN_KQ_MAX_OFFSET);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -583,7 +583,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
|
||||||
for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::J) {
|
for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::J) {
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int l = 0; l < T_C_KQ::ne; ++l) {
|
for (int l = 0; l < T_C_KQ::ne; ++l) {
|
||||||
if (!oob_check || k0 + T_C_KQ::get_j(l) < k_VKQ_sup) {
|
if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::J + T_C_KQ::get_j(l) < k_VKQ_sup) {
|
||||||
// Turing + Volta:
|
// Turing + Volta:
|
||||||
KQ_max_new[(l/2) % 2] = fmaxf(KQ_max_new[(l/2) % 2], KQ_C[(k0/(np*T_C_KQ::J))].x[l] + FATTN_KQ_MAX_OFFSET);
|
KQ_max_new[(l/2) % 2] = fmaxf(KQ_max_new[(l/2) % 2], KQ_C[(k0/(np*T_C_KQ::J))].x[l] + FATTN_KQ_MAX_OFFSET);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -201,16 +201,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||||
GGML_ASSERT(info.device_count <= GGML_CUDA_MAX_DEVICES);
|
GGML_ASSERT(info.device_count <= GGML_CUDA_MAX_DEVICES);
|
||||||
|
|
||||||
int64_t total_vram = 0;
|
int64_t total_vram = 0;
|
||||||
#ifdef GGML_CUDA_FORCE_MMQ
|
|
||||||
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: yes\n", __func__);
|
|
||||||
#else
|
|
||||||
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: no\n", __func__);
|
|
||||||
#endif // GGML_CUDA_FORCE_MMQ
|
|
||||||
#ifdef GGML_CUDA_FORCE_CUBLAS
|
|
||||||
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: yes\n", __func__);
|
|
||||||
#else
|
|
||||||
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
|
|
||||||
#endif // GGML_CUDA_FORCE_CUBLAS
|
|
||||||
GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
|
GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
|
||||||
|
|
||||||
std::vector<std::pair<int, std::string>> turing_devices_without_mma;
|
std::vector<std::pair<int, std::string>> turing_devices_without_mma;
|
||||||
|
|
|
||||||
|
|
@ -1684,3 +1684,60 @@ ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd(ggm
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset(ggml_metal_library_t lib, const ggml_tensor * op) {
|
||||||
|
GGML_ASSERT(op->type == GGML_TYPE_I64);
|
||||||
|
|
||||||
|
char base[256];
|
||||||
|
char name[256];
|
||||||
|
|
||||||
|
snprintf(base, 256, "kernel_memset_%s", ggml_type_name(op->type));
|
||||||
|
snprintf(name, 256, "%s", base);
|
||||||
|
|
||||||
|
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
||||||
|
if (!res.pipeline) {
|
||||||
|
res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal(ggml_metal_library_t lib, const ggml_tensor * op) {
|
||||||
|
assert(op->op == GGML_OP_COUNT_EQUAL);
|
||||||
|
|
||||||
|
GGML_TENSOR_LOCALS(int64_t, ne0, op->src[0], ne);
|
||||||
|
|
||||||
|
GGML_ASSERT(op->src[0]->type == op->src[1]->type);
|
||||||
|
GGML_ASSERT(op->src[0]->type == GGML_TYPE_I32);
|
||||||
|
GGML_ASSERT(op->type == GGML_TYPE_I64);
|
||||||
|
|
||||||
|
// note: the kernel only supports i32 output due to metal atomic add only supporting atomic_int
|
||||||
|
GGML_ASSERT(ggml_nelements(op->src[0]) < (1LL << 31));
|
||||||
|
|
||||||
|
char base[256];
|
||||||
|
char name[256];
|
||||||
|
|
||||||
|
int nsg = 1;
|
||||||
|
while (32*nsg < ne00 && nsg < 32) {
|
||||||
|
nsg *= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(base, 256, "kernel_count_equal_%s", ggml_type_name(op->src[0]->type));
|
||||||
|
snprintf(name, 256, "%s_nsg=%d", base, nsg);
|
||||||
|
|
||||||
|
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
||||||
|
if (!res.pipeline) {
|
||||||
|
ggml_metal_cv_t cv = ggml_metal_cv_init();
|
||||||
|
|
||||||
|
ggml_metal_cv_set_int16(cv, nsg, FC_COUNT_EQUAL + 0);
|
||||||
|
|
||||||
|
res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
|
||||||
|
|
||||||
|
ggml_metal_cv_free(cv);
|
||||||
|
}
|
||||||
|
|
||||||
|
res.smem = 32 * sizeof(int32_t);
|
||||||
|
res.nsg = nsg;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -147,6 +147,8 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_arange
|
||||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_timestep_embedding(ggml_metal_library_t lib, const struct ggml_tensor * op);
|
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_timestep_embedding(ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_adamw (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_adamw (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||||
|
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||||
|
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||||
|
|
||||||
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_flash_attn_ext_pad(
|
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_flash_attn_ext_pad(
|
||||||
ggml_metal_library_t lib,
|
ggml_metal_library_t lib,
|
||||||
|
|
|
||||||
|
|
@ -1023,6 +1023,11 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
||||||
return has_simdgroup_reduction && ggml_is_contiguous_rows(op->src[0]);
|
return has_simdgroup_reduction && ggml_is_contiguous_rows(op->src[0]);
|
||||||
case GGML_OP_L2_NORM:
|
case GGML_OP_L2_NORM:
|
||||||
return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0]));
|
return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0]));
|
||||||
|
case GGML_OP_COUNT_EQUAL:
|
||||||
|
return has_simdgroup_reduction &&
|
||||||
|
op->src[0]->type == GGML_TYPE_I32 &&
|
||||||
|
op->src[1]->type == GGML_TYPE_I32 &&
|
||||||
|
op->type == GGML_TYPE_I64;
|
||||||
case GGML_OP_ARGMAX:
|
case GGML_OP_ARGMAX:
|
||||||
return has_simdgroup_reduction;
|
return has_simdgroup_reduction;
|
||||||
case GGML_OP_NORM:
|
case GGML_OP_NORM:
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,7 @@
|
||||||
#define FC_MUL_MM 700
|
#define FC_MUL_MM 700
|
||||||
#define FC_ROPE 800
|
#define FC_ROPE 800
|
||||||
#define FC_SSM_CONV 900
|
#define FC_SSM_CONV 900
|
||||||
|
#define FC_COUNT_EQUAL 1000
|
||||||
|
|
||||||
// op-specific constants
|
// op-specific constants
|
||||||
#define OP_FLASH_ATTN_EXT_NQPTG 8
|
#define OP_FLASH_ATTN_EXT_NQPTG 8
|
||||||
|
|
@ -894,6 +895,25 @@ typedef struct {
|
||||||
float step;
|
float step;
|
||||||
} ggml_metal_kargs_arange;
|
} ggml_metal_kargs_arange;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int64_t val;
|
||||||
|
} ggml_metal_kargs_memset;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int32_t ne00;
|
||||||
|
int32_t ne01;
|
||||||
|
int32_t ne02;
|
||||||
|
int32_t ne03;
|
||||||
|
uint64_t nb00;
|
||||||
|
uint64_t nb01;
|
||||||
|
uint64_t nb02;
|
||||||
|
uint64_t nb03;
|
||||||
|
uint64_t nb10;
|
||||||
|
uint64_t nb11;
|
||||||
|
uint64_t nb12;
|
||||||
|
uint64_t nb13;
|
||||||
|
} ggml_metal_kargs_count_equal;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int32_t k0;
|
int32_t k0;
|
||||||
int32_t k1;
|
int32_t k1;
|
||||||
|
|
|
||||||
|
|
@ -448,7 +448,11 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {
|
||||||
{
|
{
|
||||||
n_fuse = ggml_metal_op_opt_step_sgd(ctx, idx);
|
n_fuse = ggml_metal_op_opt_step_sgd(ctx, idx);
|
||||||
} break;
|
} break;
|
||||||
default:
|
case GGML_OP_COUNT_EQUAL:
|
||||||
|
{
|
||||||
|
n_fuse = ggml_metal_op_count_equal(ctx, idx);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
{
|
{
|
||||||
GGML_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(node->op));
|
GGML_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(node->op));
|
||||||
GGML_ABORT("fatal error");
|
GGML_ABORT("fatal error");
|
||||||
|
|
@ -4090,3 +4094,64 @@ int ggml_metal_op_opt_step_sgd(ggml_metal_op_t ctx, int idx) {
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ggml_metal_op_count_equal(ggml_metal_op_t ctx, int idx) {
|
||||||
|
ggml_tensor * op = ctx->node(idx);
|
||||||
|
|
||||||
|
ggml_metal_library_t lib = ctx->lib;
|
||||||
|
ggml_metal_encoder_t enc = ctx->enc;
|
||||||
|
|
||||||
|
GGML_TENSOR_LOCALS(int32_t, ne0, op->src[0], ne);
|
||||||
|
GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb);
|
||||||
|
GGML_TENSOR_LOCALS(uint64_t, nb1, op->src[1], nb);
|
||||||
|
|
||||||
|
{
|
||||||
|
ggml_metal_kargs_memset args = { /*.val =*/ 0 };
|
||||||
|
|
||||||
|
auto pipeline = ggml_metal_library_get_pipeline_memset(lib, op);
|
||||||
|
|
||||||
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||||
|
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
||||||
|
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 1);
|
||||||
|
|
||||||
|
ggml_metal_encoder_dispatch_threadgroups(enc, 1, 1, 1, 1, 1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_metal_op_concurrency_reset(ctx);
|
||||||
|
|
||||||
|
{
|
||||||
|
ggml_metal_kargs_count_equal args = {
|
||||||
|
/*.ne00 =*/ ne00,
|
||||||
|
/*.ne01 =*/ ne01,
|
||||||
|
/*.ne02 =*/ ne02,
|
||||||
|
/*.ne03 =*/ ne03,
|
||||||
|
/*.nb00 =*/ nb00,
|
||||||
|
/*.nb01 =*/ nb01,
|
||||||
|
/*.nb02 =*/ nb02,
|
||||||
|
/*.nb03 =*/ nb03,
|
||||||
|
/*.nb10 =*/ nb10,
|
||||||
|
/*.nb11 =*/ nb11,
|
||||||
|
/*.nb12 =*/ nb12,
|
||||||
|
/*.nb13 =*/ nb13,
|
||||||
|
};
|
||||||
|
|
||||||
|
auto pipeline = ggml_metal_library_get_pipeline_count_equal(lib, op);
|
||||||
|
|
||||||
|
const size_t smem = pipeline.smem;
|
||||||
|
|
||||||
|
const int nth = 32*pipeline.nsg;
|
||||||
|
|
||||||
|
GGML_ASSERT(nth <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
||||||
|
|
||||||
|
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||||
|
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
|
||||||
|
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[0]), 1);
|
||||||
|
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[1]), 2);
|
||||||
|
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 3);
|
||||||
|
|
||||||
|
ggml_metal_encoder_set_threadgroup_memory_size(enc, smem, 0);
|
||||||
|
ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne02, ne03, nth, 1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -87,6 +87,7 @@ int ggml_metal_op_leaky_relu (ggml_metal_op_t ctx, int idx);
|
||||||
int ggml_metal_op_tri (ggml_metal_op_t ctx, int idx);
|
int ggml_metal_op_tri (ggml_metal_op_t ctx, int idx);
|
||||||
int ggml_metal_op_opt_step_adamw (ggml_metal_op_t ctx, int idx);
|
int ggml_metal_op_opt_step_adamw (ggml_metal_op_t ctx, int idx);
|
||||||
int ggml_metal_op_opt_step_sgd (ggml_metal_op_t ctx, int idx);
|
int ggml_metal_op_opt_step_sgd (ggml_metal_op_t ctx, int idx);
|
||||||
|
int ggml_metal_op_count_equal (ggml_metal_op_t ctx, int idx);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1790,6 +1790,7 @@ kernel void kernel_op_sum_f32(
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: become function constant
|
||||||
const uint nsg = (ntg.x + 31) / 32;
|
const uint nsg = (ntg.x + 31) / 32;
|
||||||
|
|
||||||
float sumf = 0;
|
float sumf = 0;
|
||||||
|
|
@ -9557,9 +9558,6 @@ template [[host_name("kernel_mul_mm_iq4_xs_f32")]] kernel mul_mm_t kernel_mul_m
|
||||||
|
|
||||||
template [[host_name("kernel_mul_mm_f32_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, float4x4, 1, dequantize_f32, float, float4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_f32_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, float4x4, 1, dequantize_f32, float, float4x4, half, half2x4>;
|
||||||
template [[host_name("kernel_mul_mm_f16_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, half4x4, 1, dequantize_f16, half, half4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_f16_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, half4x4, 1, dequantize_f16, half, half4x4, half, half2x4>;
|
||||||
#if defined(GGML_METAL_HAS_BF16)
|
|
||||||
template [[host_name("kernel_mul_mm_bf16_f16")]] kernel mul_mm_t kernel_mul_mm<bfloat, bfloat4x4, simdgroup_bfloat8x8, half, half2x4, simdgroup_half8x8, bfloat4x4, 1, dequantize_bf16, bfloat, bfloat4x4, half, half2x4>;
|
|
||||||
#endif
|
|
||||||
template [[host_name("kernel_mul_mm_q4_0_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_0, 2, dequantize_q4_0, float, float4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_q4_0_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_0, 2, dequantize_q4_0, float, float4x4, half, half2x4>;
|
||||||
template [[host_name("kernel_mul_mm_q4_1_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_1, 2, dequantize_q4_1, float, float4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_q4_1_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_1, 2, dequantize_q4_1, float, float4x4, half, half2x4>;
|
||||||
template [[host_name("kernel_mul_mm_q5_0_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q5_0, 2, dequantize_q5_0, float, float4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_q5_0_f16")]] kernel mul_mm_t kernel_mul_mm<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q5_0, 2, dequantize_q5_0, float, float4x4, half, half2x4>;
|
||||||
|
|
@ -9615,9 +9613,6 @@ template [[host_name("kernel_mul_mm_id_iq4_xs_f32")]] kernel mul_mm_id kernel_m
|
||||||
|
|
||||||
template [[host_name("kernel_mul_mm_id_f32_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, float4x4, 1, dequantize_f32, float, float4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_id_f32_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, float4x4, 1, dequantize_f32, float, float4x4, half, half2x4>;
|
||||||
template [[host_name("kernel_mul_mm_id_f16_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, half4x4, 1, dequantize_f16, half, half4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_id_f16_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, half4x4, 1, dequantize_f16, half, half4x4, half, half2x4>;
|
||||||
#if defined(GGML_METAL_HAS_BF16)
|
|
||||||
template [[host_name("kernel_mul_mm_id_bf16_f16")]] kernel mul_mm_id kernel_mul_mm_id<bfloat, bfloat4x4, simdgroup_bfloat8x8, half, half2x4, simdgroup_half8x8, bfloat4x4, 1, dequantize_bf16, bfloat, bfloat4x4, half, half2x4>;
|
|
||||||
#endif
|
|
||||||
template [[host_name("kernel_mul_mm_id_q4_0_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_0, 2, dequantize_q4_0, float, float4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_id_q4_0_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_0, 2, dequantize_q4_0, float, float4x4, half, half2x4>;
|
||||||
template [[host_name("kernel_mul_mm_id_q4_1_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_1, 2, dequantize_q4_1, float, float4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_id_q4_1_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q4_1, 2, dequantize_q4_1, float, float4x4, half, half2x4>;
|
||||||
template [[host_name("kernel_mul_mm_id_q5_0_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q5_0, 2, dequantize_q5_0, float, float4x4, half, half2x4>;
|
template [[host_name("kernel_mul_mm_id_q5_0_f16")]] kernel mul_mm_id kernel_mul_mm_id<half, half4x4, simdgroup_half8x8, half, half2x4, simdgroup_half8x8, block_q5_0, 2, dequantize_q5_0, float, float4x4, half, half2x4>;
|
||||||
|
|
@ -9920,3 +9915,75 @@ kernel void kernel_opt_step_sgd_f32(
|
||||||
|
|
||||||
x[gid] = x[gid] * (1.0f - pars[0] * pars[1]) - pars[0] * g[gid];
|
x[gid] = x[gid] * (1.0f - pars[0] * pars[1]) - pars[0] * g[gid];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
kernel void kernel_memset(
|
||||||
|
constant ggml_metal_kargs_fill & args,
|
||||||
|
device T * dst,
|
||||||
|
uint tpig[[thread_position_in_grid]]) {
|
||||||
|
dst[tpig] = args.val;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef decltype(kernel_memset<int64_t>) kernel_memset_t;
|
||||||
|
|
||||||
|
template [[host_name("kernel_memset_i64")]] kernel kernel_memset_t kernel_memset<int64_t>;
|
||||||
|
|
||||||
|
constant short FC_count_equal_nsg [[function_constant(FC_COUNT_EQUAL + 0)]];
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
kernel void kernel_count_equal(
|
||||||
|
constant ggml_metal_kargs_count_equal & args,
|
||||||
|
device const char * src0,
|
||||||
|
device const char * src1,
|
||||||
|
device atomic_int * dst,
|
||||||
|
threadgroup int32_t * shmem_i32 [[threadgroup(0)]],
|
||||||
|
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||||
|
ushort3 tpitg[[thread_position_in_threadgroup]],
|
||||||
|
ushort sgitg[[simdgroup_index_in_threadgroup]],
|
||||||
|
ushort tiisg[[thread_index_in_simdgroup]],
|
||||||
|
ushort3 ntg[[threads_per_threadgroup]]) {
|
||||||
|
const short NSG = FC_count_equal_nsg;
|
||||||
|
|
||||||
|
const int i3 = tgpig.z;
|
||||||
|
const int i2 = tgpig.y;
|
||||||
|
const int i1 = tgpig.x;
|
||||||
|
|
||||||
|
if (i3 >= args.ne03 || i2 >= args.ne02 || i1 >= args.ne01) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int sum = 0;
|
||||||
|
|
||||||
|
device const char * base0 = src0 + i1*args.nb01 + i2*args.nb02 + i3*args.nb03;
|
||||||
|
device const char * base1 = src1 + i1*args.nb11 + i2*args.nb12 + i3*args.nb13;
|
||||||
|
|
||||||
|
for (int64_t i0 = tpitg.x; i0 < args.ne00; i0 += ntg.x) {
|
||||||
|
const T v0 = *(device const T *)(base0 + i0*args.nb00);
|
||||||
|
const T v1 = *(device const T *)(base1 + i0*args.nb10);
|
||||||
|
sum += (v0 == v1);
|
||||||
|
}
|
||||||
|
|
||||||
|
sum = simd_sum(sum);
|
||||||
|
|
||||||
|
if (tiisg == 0) {
|
||||||
|
shmem_i32[sgitg] = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
|
||||||
|
if (sgitg == 0) {
|
||||||
|
float v = 0.0f;
|
||||||
|
if (tpitg.x < NSG) {
|
||||||
|
v = shmem_i32[tpitg.x];
|
||||||
|
}
|
||||||
|
|
||||||
|
float total = simd_sum(v);
|
||||||
|
if (tpitg.x == 0) {
|
||||||
|
atomic_fetch_add_explicit(dst, (int32_t) total, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef decltype(kernel_count_equal<int32_t>) kernel_count_equal_t;
|
||||||
|
|
||||||
|
template [[host_name("kernel_count_equal_i32")]] kernel kernel_count_equal_t kernel_count_equal<int32_t>;
|
||||||
|
|
|
||||||
|
|
@ -231,3 +231,4 @@ if (GGML_SYCL_DEVICE_ARCH)
|
||||||
target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
||||||
target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -434,8 +434,15 @@ static constexpr std::initializer_list<ggml_op> topk_moe_early_softmax_norm{ GGM
|
||||||
GGML_OP_VIEW, GGML_OP_GET_ROWS, GGML_OP_RESHAPE,
|
GGML_OP_VIEW, GGML_OP_GET_ROWS, GGML_OP_RESHAPE,
|
||||||
GGML_OP_SUM_ROWS, GGML_OP_CLAMP, GGML_OP_DIV,
|
GGML_OP_SUM_ROWS, GGML_OP_CLAMP, GGML_OP_DIV,
|
||||||
GGML_OP_RESHAPE };
|
GGML_OP_RESHAPE };
|
||||||
|
|
||||||
|
static constexpr std::initializer_list<ggml_op> topk_moe_sigmoid_norm_bias{ GGML_OP_UNARY, GGML_OP_RESHAPE, GGML_OP_ADD,
|
||||||
|
GGML_OP_ARGSORT, GGML_OP_VIEW, GGML_OP_GET_ROWS,
|
||||||
|
GGML_OP_RESHAPE, GGML_OP_SUM_ROWS, GGML_OP_CLAMP,
|
||||||
|
GGML_OP_DIV, GGML_OP_RESHAPE };
|
||||||
|
|
||||||
static constexpr std::initializer_list<ggml_op> topk_moe_early_softmax { GGML_OP_SOFT_MAX, GGML_OP_RESHAPE, GGML_OP_ARGSORT,
|
static constexpr std::initializer_list<ggml_op> topk_moe_early_softmax { GGML_OP_SOFT_MAX, GGML_OP_RESHAPE, GGML_OP_ARGSORT,
|
||||||
GGML_OP_VIEW, GGML_OP_GET_ROWS };
|
GGML_OP_VIEW, GGML_OP_GET_ROWS };
|
||||||
|
|
||||||
static constexpr std::initializer_list<ggml_op> topk_moe_late_softmax { GGML_OP_ARGSORT, GGML_OP_VIEW,
|
static constexpr std::initializer_list<ggml_op> topk_moe_late_softmax { GGML_OP_ARGSORT, GGML_OP_VIEW,
|
||||||
GGML_OP_GET_ROWS, GGML_OP_RESHAPE,
|
GGML_OP_GET_ROWS, GGML_OP_RESHAPE,
|
||||||
GGML_OP_SOFT_MAX, GGML_OP_RESHAPE };
|
GGML_OP_SOFT_MAX, GGML_OP_RESHAPE };
|
||||||
|
|
@ -464,6 +471,32 @@ static constexpr std::initializer_list<std::array<int, 3>> topk_moe_early_softma
|
||||||
{ 9, 0, 8 }, // reshape->src[0] == div
|
{ 9, 0, 8 }, // reshape->src[0] == div
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//node #436 ( UNARY): ffn_moe_probs-10 ( 256K) [Vulka ] use=2: ffn_moe_logits-10 ( 256K) [Vulka ]
|
||||||
|
//node #437 ( RESHAPE): ffn_moe_probs-10 (re ( 256K) [Vulka ] use=1: ffn_moe_probs-10 ( 256K) [Vulka ]
|
||||||
|
//node #438 ( ADD): ffn_moe_probs_biased ( 256K) [Vulka ] use=1: ffn_moe_probs-10 ( 256K) [Vulka ] blk.10.exp_probs_b.b ( 0K) [Vulka ]
|
||||||
|
//node #439 ( ARGSORT): ffn_moe_argsort-10 ( 256K) [Vulka ] use=1: ffn_moe_probs_biased ( 256K) [Vulka ]
|
||||||
|
//node #440 ( VIEW): ffn_moe_topk-10 ( 255K) [Vulka ] use=3: ffn_moe_argsort-10 ( 256K) [Vulka ]
|
||||||
|
//node #441 ( GET_ROWS): ffn_moe_weights-10 ( 12K) [Vulka ] use=1: ffn_moe_probs-10 (re ( 256K) [Vulka ] ffn_moe_topk-10 ( 255K) [Vulka ]
|
||||||
|
//node #442 ( RESHAPE): ffn_moe_weights-10 ( ( 12K) [Vulka ] use=2: ffn_moe_weights-10 ( 12K) [Vulka ]
|
||||||
|
//node #443 ( SUM_ROWS): ffn_moe_weights_sum- ( 2K) [Vulka ] use=1: ffn_moe_weights-10 ( ( 12K) [Vulka ]
|
||||||
|
//node #444 ( CLAMP): ffn_moe_weights_sum_ ( 2K) [Vulka ] use=1: ffn_moe_weights_sum- ( 2K) [Vulka ]
|
||||||
|
//node #445 ( DIV): ffn_moe_weights_norm ( 12K) [Vulka ] use=1: ffn_moe_weights-10 ( ( 12K) [Vulka ] ffn_moe_weights_sum_ ( 2K) [Vulka ]
|
||||||
|
//node #446 ( RESHAPE): ffn_moe_weights_norm ( 12K) [Vulka ] use=1: ffn_moe_weights_norm ( 12K) [Vulka ]
|
||||||
|
static constexpr std::initializer_list<std::array<int, 3>> topk_moe_sigmoid_norm_bias_edges {
|
||||||
|
{ 1, 0, 0 }, // reshape->src[0] == sigmoid
|
||||||
|
{ 2, 0, 0 }, // add->src[0] == sigmoid
|
||||||
|
{ 3, 0, 2 }, // argsort->src[0] == add
|
||||||
|
{ 4, 0, 3 }, // view->src[0] == argsort
|
||||||
|
{ 5, 0, 1 }, // get_rows->src[0] == reshape
|
||||||
|
{ 5, 1, 4 }, // get_rows->src[1] == view
|
||||||
|
{ 6, 0, 5 }, // reshape->src[0] == get_rows
|
||||||
|
{ 7, 0, 6 }, // sum_rows->src[0] == reshape
|
||||||
|
{ 8, 0, 7 }, // clamp->src[0] == sum_rows
|
||||||
|
{ 9, 0, 6 }, // div->src[0] == reshape
|
||||||
|
{ 9, 1, 8 }, // div->src[1] == clamp
|
||||||
|
{10, 0, 9 }, // reshape->src[0] == div
|
||||||
|
};
|
||||||
|
|
||||||
// same as early_softmax_norm but ending after the get_rows
|
// same as early_softmax_norm but ending after the get_rows
|
||||||
static constexpr std::initializer_list<std::array<int, 3>> topk_moe_early_softmax_edges {
|
static constexpr std::initializer_list<std::array<int, 3>> topk_moe_early_softmax_edges {
|
||||||
{ 1, 0, 0 }, // reshape->src[0] == softmax
|
{ 1, 0, 0 }, // reshape->src[0] == softmax
|
||||||
|
|
@ -491,16 +524,10 @@ enum topk_moe_mode {
|
||||||
TOPK_MOE_EARLY_SOFTMAX,
|
TOPK_MOE_EARLY_SOFTMAX,
|
||||||
TOPK_MOE_EARLY_SOFTMAX_NORM,
|
TOPK_MOE_EARLY_SOFTMAX_NORM,
|
||||||
TOPK_MOE_LATE_SOFTMAX,
|
TOPK_MOE_LATE_SOFTMAX,
|
||||||
|
TOPK_MOE_SIGMOID_NORM_BIAS,
|
||||||
TOPK_MOE_COUNT,
|
TOPK_MOE_COUNT,
|
||||||
};
|
};
|
||||||
|
|
||||||
static topk_moe_mode ggml_vk_num_additional_ops_to_topk_moe_mode(uint32_t num) {
|
|
||||||
topk_moe_mode mode = num == topk_moe_early_softmax_norm.size() - 1 ? TOPK_MOE_EARLY_SOFTMAX_NORM :
|
|
||||||
num == topk_moe_early_softmax.size() - 1 ? TOPK_MOE_EARLY_SOFTMAX :
|
|
||||||
TOPK_MOE_LATE_SOFTMAX;
|
|
||||||
return mode;
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr std::initializer_list<std::array<int, 3>> rope_view_set_rows_edges {
|
static constexpr std::initializer_list<std::array<int, 3>> rope_view_set_rows_edges {
|
||||||
{ 1, 0, 0 }, // view->src[0] == rope
|
{ 1, 0, 0 }, // view->src[0] == rope
|
||||||
{ 2, 0, 1 }, // set_rows->src[0] == view
|
{ 2, 0, 1 }, // set_rows->src[0] == view
|
||||||
|
|
@ -766,7 +793,7 @@ struct vk_device_struct {
|
||||||
vk_pipeline pipeline_count_experts;
|
vk_pipeline pipeline_count_experts;
|
||||||
|
|
||||||
// [2] is for whether to take n_experts from spec constant (0) or push constant (1)
|
// [2] is for whether to take n_experts from spec constant (0) or push constant (1)
|
||||||
vk_pipeline pipeline_topk_moe[num_topk_moe_pipelines][TOPK_MOE_COUNT][2];
|
vk_pipeline pipeline_topk_moe[num_topk_moe_pipelines][2];
|
||||||
|
|
||||||
std::vector<vk_pipeline_ref> all_pipelines;
|
std::vector<vk_pipeline_ref> all_pipelines;
|
||||||
|
|
||||||
|
|
@ -1181,6 +1208,11 @@ struct vk_op_topk_moe_push_constants {
|
||||||
uint32_t n_expert_used;
|
uint32_t n_expert_used;
|
||||||
float clamp_min;
|
float clamp_min;
|
||||||
float clamp_max;
|
float clamp_max;
|
||||||
|
uint32_t gating_func;
|
||||||
|
uint32_t has_bias;
|
||||||
|
uint32_t with_norm;
|
||||||
|
float output_scale;
|
||||||
|
float output_bias;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vk_op_add_id_push_constants {
|
struct vk_op_add_id_push_constants {
|
||||||
|
|
@ -1771,6 +1803,8 @@ struct ggml_backend_vk_context {
|
||||||
// Bit 'i' means nodes[start_of_fusion + i] writes to memory.
|
// Bit 'i' means nodes[start_of_fusion + i] writes to memory.
|
||||||
// If there's no fusion, bit 0 is still set.
|
// If there's no fusion, bit 0 is still set.
|
||||||
int fused_ops_write_mask {};
|
int fused_ops_write_mask {};
|
||||||
|
topk_moe_mode fused_topk_moe_mode {};
|
||||||
|
bool fused_topk_moe_scale {};
|
||||||
|
|
||||||
// for GGML_VK_PERF_LOGGER
|
// for GGML_VK_PERF_LOGGER
|
||||||
std::unique_ptr<vk_perf_logger> perf_logger;
|
std::unique_ptr<vk_perf_logger> perf_logger;
|
||||||
|
|
@ -4291,9 +4325,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
||||||
|
|
||||||
for (uint32_t use_push = 0; use_push < 2; ++use_push) {
|
for (uint32_t use_push = 0; use_push < 2; ++use_push) {
|
||||||
for (uint32_t i = 0; i < num_topk_moe_pipelines; ++i) {
|
for (uint32_t i = 0; i < num_topk_moe_pipelines; ++i) {
|
||||||
ggml_vk_create_pipeline2(device, device->pipeline_topk_moe[i][TOPK_MOE_EARLY_SOFTMAX][use_push], "topk_moe_f32_early_softmax_"+std::to_string(i), topk_moe_f32_len, topk_moe_f32_data, "main", 3, sizeof(vk_op_topk_moe_push_constants), {1, 1, 1}, {device->subgroup_size, 1u<<i, 0, 0, use_push}, 1, true, true, device->subgroup_size);
|
ggml_vk_create_pipeline2(device, device->pipeline_topk_moe[i][use_push], "topk_moe_f32_"+std::to_string(i), topk_moe_f32_len, topk_moe_f32_data, "main", 4, sizeof(vk_op_topk_moe_push_constants), {1, 1, 1}, {device->subgroup_size, 1u<<i, use_push}, 1, true, true, device->subgroup_size);
|
||||||
ggml_vk_create_pipeline2(device, device->pipeline_topk_moe[i][TOPK_MOE_EARLY_SOFTMAX_NORM][use_push], "topk_moe_f32_early_softmax_norm"+std::to_string(i), topk_moe_f32_len, topk_moe_f32_data, "main", 3, sizeof(vk_op_topk_moe_push_constants), {1, 1, 1}, {device->subgroup_size, 1u<<i, 1, 0, use_push}, 1, true, true, device->subgroup_size);
|
|
||||||
ggml_vk_create_pipeline2(device, device->pipeline_topk_moe[i][TOPK_MOE_LATE_SOFTMAX][use_push], "topk_moe_f32_late_softmax"+std::to_string(i), topk_moe_f32_len, topk_moe_f32_data, "main", 3, sizeof(vk_op_topk_moe_push_constants), {1, 1, 1}, {device->subgroup_size, 1u<<i, 0, 1, use_push}, 1, true, true, device->subgroup_size);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -8684,10 +8716,9 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
||||||
if (ctx->num_additional_fused_ops) {
|
if (ctx->num_additional_fused_ops) {
|
||||||
uint32_t idx = (uint32_t)ceilf(log2f(float(dst->ne[0])));
|
uint32_t idx = (uint32_t)ceilf(log2f(float(dst->ne[0])));
|
||||||
GGML_ASSERT(idx < num_topk_moe_pipelines);
|
GGML_ASSERT(idx < num_topk_moe_pipelines);
|
||||||
topk_moe_mode mode = ggml_vk_num_additional_ops_to_topk_moe_mode(ctx->num_additional_fused_ops);
|
|
||||||
// use n_experts from push constant if it's not equal to the power of two spec constant
|
// use n_experts from push constant if it's not equal to the power of two spec constant
|
||||||
bool use_push = dst->ne[0] != (1u << idx);
|
bool use_push = dst->ne[0] != (1u << idx);
|
||||||
return ctx->device->pipeline_topk_moe[idx][mode][use_push];
|
return ctx->device->pipeline_topk_moe[idx][use_push];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (src0->type == GGML_TYPE_F32 && (src1 == nullptr || src1->type == GGML_TYPE_F32) && dst->type == GGML_TYPE_F32) {
|
if (src0->type == GGML_TYPE_F32 && (src1 == nullptr || src1->type == GGML_TYPE_F32) && dst->type == GGML_TYPE_F32) {
|
||||||
|
|
@ -10346,14 +10377,16 @@ static void ggml_vk_soft_max_back(ggml_backend_vk_context * ctx, vk_context& sub
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_topk_moe(ggml_backend_vk_context * ctx, vk_context& subctx, ggml_cgraph * cgraph, int node_idx) {
|
static void ggml_vk_topk_moe(ggml_backend_vk_context * ctx, vk_context& subctx, ggml_cgraph * cgraph, int node_idx) {
|
||||||
topk_moe_mode mode = ggml_vk_num_additional_ops_to_topk_moe_mode(ctx->num_additional_fused_ops);
|
topk_moe_mode mode = ctx->fused_topk_moe_mode;
|
||||||
ggml_tensor * logits = cgraph->nodes[node_idx + 0]->src[0];
|
ggml_tensor * logits = cgraph->nodes[node_idx + 0]->src[0];
|
||||||
ggml_tensor * weights = (mode == TOPK_MOE_EARLY_SOFTMAX_NORM) ? cgraph->nodes[node_idx + 9] :
|
ggml_tensor * bias = (mode == TOPK_MOE_SIGMOID_NORM_BIAS) ? cgraph->nodes[node_idx + 2]->src[1] : logits;
|
||||||
(mode == TOPK_MOE_EARLY_SOFTMAX) ? cgraph->nodes[node_idx + 4] :
|
ggml_tensor * weights = cgraph->nodes[node_idx + ctx->num_additional_fused_ops];
|
||||||
cgraph->nodes[node_idx + 5];
|
ggml_tensor * ids = (mode == TOPK_MOE_SIGMOID_NORM_BIAS) ? cgraph->nodes[node_idx + 4] :
|
||||||
ggml_tensor * ids = (mode == TOPK_MOE_LATE_SOFTMAX) ? cgraph->nodes[node_idx + 1] : cgraph->nodes[node_idx + 3];
|
(mode == TOPK_MOE_LATE_SOFTMAX) ? cgraph->nodes[node_idx + 1] :
|
||||||
|
cgraph->nodes[node_idx + 3];
|
||||||
|
|
||||||
GGML_ASSERT(logits->type == GGML_TYPE_F32);
|
GGML_ASSERT(logits->type == GGML_TYPE_F32);
|
||||||
|
GGML_ASSERT(bias->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT(weights->type == GGML_TYPE_F32);
|
GGML_ASSERT(weights->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
||||||
|
|
||||||
|
|
@ -10368,6 +10401,7 @@ static void ggml_vk_topk_moe(ggml_backend_vk_context * ctx, vk_context& subctx,
|
||||||
ggml_pipeline_request_descriptor_sets(ctx, pipeline, 1);
|
ggml_pipeline_request_descriptor_sets(ctx, pipeline, 1);
|
||||||
|
|
||||||
vk_subbuffer logits_buf = ggml_vk_tensor_subbuffer(ctx, logits);
|
vk_subbuffer logits_buf = ggml_vk_tensor_subbuffer(ctx, logits);
|
||||||
|
vk_subbuffer bias_buf = ggml_vk_tensor_subbuffer(ctx, bias);
|
||||||
vk_subbuffer weights_buf = ggml_vk_tensor_subbuffer(ctx, weights);
|
vk_subbuffer weights_buf = ggml_vk_tensor_subbuffer(ctx, weights);
|
||||||
vk_subbuffer ids_buf = ggml_vk_tensor_subbuffer(ctx, ids);
|
vk_subbuffer ids_buf = ggml_vk_tensor_subbuffer(ctx, ids);
|
||||||
|
|
||||||
|
|
@ -10375,18 +10409,45 @@ static void ggml_vk_topk_moe(ggml_backend_vk_context * ctx, vk_context& subctx,
|
||||||
pc.n_rows = n_rows;
|
pc.n_rows = n_rows;
|
||||||
pc.n_experts_push = n_experts;
|
pc.n_experts_push = n_experts;
|
||||||
pc.n_expert_used = n_expert_used;
|
pc.n_expert_used = n_expert_used;
|
||||||
|
pc.clamp_min = -std::numeric_limits<float>::infinity();
|
||||||
|
pc.clamp_max = std::numeric_limits<float>::infinity();
|
||||||
if (mode == TOPK_MOE_EARLY_SOFTMAX_NORM) {
|
if (mode == TOPK_MOE_EARLY_SOFTMAX_NORM) {
|
||||||
ggml_tensor * clamp = cgraph->nodes[node_idx + 7];
|
ggml_tensor * clamp = cgraph->nodes[node_idx + 7];
|
||||||
|
GGML_ASSERT(clamp->op == GGML_OP_CLAMP);
|
||||||
pc.clamp_min = ggml_get_op_params_f32(clamp, 0);
|
pc.clamp_min = ggml_get_op_params_f32(clamp, 0);
|
||||||
pc.clamp_max = ggml_get_op_params_f32(clamp, 1);
|
pc.clamp_max = ggml_get_op_params_f32(clamp, 1);
|
||||||
}
|
}
|
||||||
|
if (mode == TOPK_MOE_SIGMOID_NORM_BIAS) {
|
||||||
|
ggml_tensor * clamp = cgraph->nodes[node_idx + 8];
|
||||||
|
GGML_ASSERT(clamp->op == GGML_OP_CLAMP);
|
||||||
|
pc.clamp_min = ggml_get_op_params_f32(clamp, 0);
|
||||||
|
pc.clamp_max = ggml_get_op_params_f32(clamp, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GATING_FUNC_SOFTMAX 0
|
||||||
|
#define GATING_FUNC_SIGMOID 1
|
||||||
|
#define GATING_FUNC_SOFTMAX_WEIGHT 2
|
||||||
|
|
||||||
|
pc.gating_func = mode == TOPK_MOE_SIGMOID_NORM_BIAS ? GATING_FUNC_SIGMOID :
|
||||||
|
mode == TOPK_MOE_LATE_SOFTMAX ? GATING_FUNC_SOFTMAX_WEIGHT :
|
||||||
|
GATING_FUNC_SOFTMAX;
|
||||||
|
pc.has_bias = mode == TOPK_MOE_SIGMOID_NORM_BIAS;
|
||||||
|
pc.with_norm = mode == TOPK_MOE_EARLY_SOFTMAX_NORM || mode == TOPK_MOE_SIGMOID_NORM_BIAS;
|
||||||
|
if (ctx->fused_topk_moe_scale) {
|
||||||
|
GGML_ASSERT(weights->op == GGML_OP_SCALE);
|
||||||
|
pc.output_scale = ggml_get_op_params_f32(weights, 0);
|
||||||
|
pc.output_bias = ggml_get_op_params_f32(weights, 1);
|
||||||
|
} else {
|
||||||
|
pc.output_scale = 1.0f;
|
||||||
|
pc.output_bias = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
GGML_ASSERT(n_expert_used <= n_experts);
|
GGML_ASSERT(n_expert_used <= n_experts);
|
||||||
|
|
||||||
const uint32_t rows_per_block = 4;
|
const uint32_t rows_per_block = 4;
|
||||||
std::array<uint32_t, 3> elements = { CEIL_DIV(n_rows, rows_per_block), 1, 1 };
|
std::array<uint32_t, 3> elements = { CEIL_DIV(n_rows, rows_per_block), 1, 1 };
|
||||||
|
|
||||||
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, {logits_buf, weights_buf, ids_buf}, pc, elements);
|
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, {logits_buf, bias_buf, weights_buf, ids_buf}, pc, elements);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_cgraph * cgraph, int node_idx, bool backprop) {
|
static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_cgraph * cgraph, int node_idx, bool backprop) {
|
||||||
|
|
@ -12128,6 +12189,11 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case GGML_OP_UNARY:
|
case GGML_OP_UNARY:
|
||||||
|
if (ctx->fused_topk_moe_mode != TOPK_MOE_COUNT) {
|
||||||
|
ggml_vk_topk_moe(ctx, compute_ctx, cgraph, node_idx);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
switch (ggml_get_unary_op(node)) {
|
switch (ggml_get_unary_op(node)) {
|
||||||
case GGML_UNARY_OP_EXP:
|
case GGML_UNARY_OP_EXP:
|
||||||
case GGML_UNARY_OP_SILU:
|
case GGML_UNARY_OP_SILU:
|
||||||
|
|
@ -12175,7 +12241,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case GGML_OP_SOFT_MAX:
|
case GGML_OP_SOFT_MAX:
|
||||||
if (ctx->num_additional_fused_ops) {
|
if (ctx->fused_topk_moe_mode != TOPK_MOE_COUNT) {
|
||||||
ggml_vk_topk_moe(ctx, compute_ctx, cgraph, node_idx);
|
ggml_vk_topk_moe(ctx, compute_ctx, cgraph, node_idx);
|
||||||
} else {
|
} else {
|
||||||
ggml_vk_soft_max(ctx, compute_ctx, src0, src1, src2, node);
|
ggml_vk_soft_max(ctx, compute_ctx, src0, src1, src2, node);
|
||||||
|
|
@ -12195,7 +12261,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case GGML_OP_ARGSORT:
|
case GGML_OP_ARGSORT:
|
||||||
if (ctx->num_additional_fused_ops) {
|
if (ctx->fused_topk_moe_mode != TOPK_MOE_COUNT) {
|
||||||
ggml_vk_topk_moe(ctx, compute_ctx, cgraph, node_idx);
|
ggml_vk_topk_moe(ctx, compute_ctx, cgraph, node_idx);
|
||||||
} else {
|
} else {
|
||||||
ggml_vk_argsort(ctx, compute_ctx, src0, node);
|
ggml_vk_argsort(ctx, compute_ctx, src0, node);
|
||||||
|
|
@ -13048,6 +13114,24 @@ static bool ggml_vk_can_fuse_topk_moe(ggml_backend_vk_context * ctx, const struc
|
||||||
get_rows = cgraph->nodes[node_idx + 4];
|
get_rows = cgraph->nodes[node_idx + 4];
|
||||||
argsort = cgraph->nodes[node_idx + 2];
|
argsort = cgraph->nodes[node_idx + 2];
|
||||||
break;
|
break;
|
||||||
|
case TOPK_MOE_SIGMOID_NORM_BIAS:
|
||||||
|
softmax = cgraph->nodes[node_idx + 0]; // really sigmoid
|
||||||
|
weights = cgraph->nodes[node_idx + 10];
|
||||||
|
get_rows = cgraph->nodes[node_idx + 5];
|
||||||
|
argsort = cgraph->nodes[node_idx + 3];
|
||||||
|
if (ggml_get_unary_op(softmax) != GGML_UNARY_OP_SIGMOID) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// bias is expected to be 1D
|
||||||
|
if (ggml_nrows(cgraph->nodes[node_idx + 2]->src[1]) != 1 ||
|
||||||
|
!ggml_is_contiguous(cgraph->nodes[node_idx + 2]->src[1])) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// sigmoid fusion seems to generate infinities on moltenvk
|
||||||
|
if (ctx->device->driver_id == vk::DriverId::eMoltenvk) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case TOPK_MOE_EARLY_SOFTMAX:
|
case TOPK_MOE_EARLY_SOFTMAX:
|
||||||
softmax = cgraph->nodes[node_idx + 0];
|
softmax = cgraph->nodes[node_idx + 0];
|
||||||
weights = cgraph->nodes[node_idx + 4];
|
weights = cgraph->nodes[node_idx + 4];
|
||||||
|
|
@ -13071,26 +13155,28 @@ static bool ggml_vk_can_fuse_topk_moe(ggml_backend_vk_context * ctx, const struc
|
||||||
probs = probs->src[0];
|
probs = probs->src[0];
|
||||||
ggml_tensor * selection_probs = argsort->src[0];
|
ggml_tensor * selection_probs = argsort->src[0];
|
||||||
|
|
||||||
if (probs != selection_probs) {
|
if (probs != selection_probs && mode != TOPK_MOE_SIGMOID_NORM_BIAS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const float * op_params = (const float *)softmax->op_params;
|
|
||||||
|
|
||||||
float scale = op_params[0];
|
|
||||||
float max_bias = op_params[1];
|
|
||||||
|
|
||||||
if (!ggml_is_contiguous(softmax->src[0]) || !ggml_is_contiguous(weights)) {
|
if (!ggml_is_contiguous(softmax->src[0]) || !ggml_is_contiguous(weights)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scale != 1.0f || max_bias != 0.0f) {
|
if (softmax->op == GGML_OP_SOFT_MAX) {
|
||||||
return false;
|
const float * op_params = (const float *)softmax->op_params;
|
||||||
}
|
|
||||||
|
|
||||||
// don't fuse when masks or sinks are present
|
float scale = op_params[0];
|
||||||
if (softmax->src[1] || softmax->src[2]) {
|
float max_bias = op_params[1];
|
||||||
return false;
|
|
||||||
|
if (scale != 1.0f || max_bias != 0.0f) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// don't fuse when masks or sinks are present
|
||||||
|
if (softmax->src[1] || softmax->src[2]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int n_expert = softmax->ne[0];
|
const int n_expert = softmax->ne[0];
|
||||||
|
|
@ -13363,6 +13449,8 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
||||||
total_mul_mat_bytes += bytes;
|
total_mul_mat_bytes += bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx->fused_topk_moe_mode = TOPK_MOE_COUNT;
|
||||||
|
ctx->fused_topk_moe_scale = false;
|
||||||
const char *fusion_string {};
|
const char *fusion_string {};
|
||||||
if (!ctx->device->disable_fusion) {
|
if (!ctx->device->disable_fusion) {
|
||||||
uint32_t num_adds = ggml_vk_fuse_multi_add(ctx, cgraph, i);
|
uint32_t num_adds = ggml_vk_fuse_multi_add(ctx, cgraph, i);
|
||||||
|
|
@ -13408,13 +13496,23 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
||||||
ctx->num_additional_fused_ops = topk_moe_early_softmax_norm.size() - 1;
|
ctx->num_additional_fused_ops = topk_moe_early_softmax_norm.size() - 1;
|
||||||
// view of argsort writes to memory
|
// view of argsort writes to memory
|
||||||
ctx->fused_ops_write_mask |= 1 << 3;
|
ctx->fused_ops_write_mask |= 1 << 3;
|
||||||
|
ctx->fused_topk_moe_mode = TOPK_MOE_EARLY_SOFTMAX_NORM;
|
||||||
fusion_string = "TOPK_MOE_EARLY_SOFTMAX_NORM";
|
fusion_string = "TOPK_MOE_EARLY_SOFTMAX_NORM";
|
||||||
|
} else if (ggml_can_fuse_subgraph(cgraph, i, topk_moe_sigmoid_norm_bias, { i + 4, i + 10 }) &&
|
||||||
|
ggml_check_edges(cgraph, i, topk_moe_sigmoid_norm_bias_edges) &&
|
||||||
|
ggml_vk_can_fuse_topk_moe(ctx, cgraph, i, TOPK_MOE_SIGMOID_NORM_BIAS)) {
|
||||||
|
ctx->num_additional_fused_ops = topk_moe_sigmoid_norm_bias.size() - 1;
|
||||||
|
// view of argsort writes to memory
|
||||||
|
ctx->fused_ops_write_mask |= 1 << 4;
|
||||||
|
ctx->fused_topk_moe_mode = TOPK_MOE_SIGMOID_NORM_BIAS;
|
||||||
|
fusion_string = "TOPK_MOE_SIGMOID_NORM_BIAS";
|
||||||
} else if (ggml_can_fuse_subgraph(cgraph, i, topk_moe_early_softmax, { i + 3, i + 4 }) &&
|
} else if (ggml_can_fuse_subgraph(cgraph, i, topk_moe_early_softmax, { i + 3, i + 4 }) &&
|
||||||
ggml_check_edges(cgraph, i, topk_moe_early_softmax_edges) &&
|
ggml_check_edges(cgraph, i, topk_moe_early_softmax_edges) &&
|
||||||
ggml_vk_can_fuse_topk_moe(ctx, cgraph, i, TOPK_MOE_EARLY_SOFTMAX)) {
|
ggml_vk_can_fuse_topk_moe(ctx, cgraph, i, TOPK_MOE_EARLY_SOFTMAX)) {
|
||||||
ctx->num_additional_fused_ops = topk_moe_early_softmax.size() - 1;
|
ctx->num_additional_fused_ops = topk_moe_early_softmax.size() - 1;
|
||||||
// view of argsort writes to memory
|
// view of argsort writes to memory
|
||||||
ctx->fused_ops_write_mask |= 1 << 3;
|
ctx->fused_ops_write_mask |= 1 << 3;
|
||||||
|
ctx->fused_topk_moe_mode = TOPK_MOE_EARLY_SOFTMAX;
|
||||||
fusion_string = "TOPK_MOE_EARLY_SOFTMAX";
|
fusion_string = "TOPK_MOE_EARLY_SOFTMAX";
|
||||||
} else if (ggml_can_fuse_subgraph(cgraph, i, topk_moe_late_softmax, { i + 1, i + 5 }) &&
|
} else if (ggml_can_fuse_subgraph(cgraph, i, topk_moe_late_softmax, { i + 1, i + 5 }) &&
|
||||||
ggml_check_edges(cgraph, i, topk_moe_late_softmax_edges) &&
|
ggml_check_edges(cgraph, i, topk_moe_late_softmax_edges) &&
|
||||||
|
|
@ -13422,8 +13520,17 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
||||||
ctx->num_additional_fused_ops = topk_moe_late_softmax.size() - 1;
|
ctx->num_additional_fused_ops = topk_moe_late_softmax.size() - 1;
|
||||||
// view of argsort writes to memory
|
// view of argsort writes to memory
|
||||||
ctx->fused_ops_write_mask |= 1 << 1;
|
ctx->fused_ops_write_mask |= 1 << 1;
|
||||||
|
ctx->fused_topk_moe_mode = TOPK_MOE_LATE_SOFTMAX;
|
||||||
fusion_string = "TOPK_MOE_LATE_SOFTMAX";
|
fusion_string = "TOPK_MOE_LATE_SOFTMAX";
|
||||||
}
|
}
|
||||||
|
if (ctx->fused_topk_moe_mode != TOPK_MOE_COUNT) {
|
||||||
|
// Look for an additional scale op to fuse - occurs in deepseek2 and nemotron3 nano.
|
||||||
|
if (ggml_can_fuse_subgraph(cgraph, i + ctx->num_additional_fused_ops - 1, { GGML_OP_DIV, GGML_OP_RESHAPE, GGML_OP_SCALE }, { i + ctx->num_additional_fused_ops + 1 }) ||
|
||||||
|
ggml_can_fuse_subgraph(cgraph, i + ctx->num_additional_fused_ops, { GGML_OP_GET_ROWS, GGML_OP_SCALE }, { i + ctx->num_additional_fused_ops + 1 })) {
|
||||||
|
ctx->fused_topk_moe_scale = true;
|
||||||
|
ctx->num_additional_fused_ops++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ctx->fused_ops_write_mask |= 1 << ctx->num_additional_fused_ops;
|
ctx->fused_ops_write_mask |= 1 << ctx->num_additional_fused_ops;
|
||||||
|
|
||||||
|
|
@ -13602,6 +13709,9 @@ static void ggml_vk_graph_optimize(ggml_backend_t backend, struct ggml_cgraph *
|
||||||
if (keep_pattern(topk_moe_early_softmax_norm)) {
|
if (keep_pattern(topk_moe_early_softmax_norm)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (keep_pattern(topk_moe_sigmoid_norm_bias)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (keep_pattern(topk_moe_early_softmax)) {
|
if (keep_pattern(topk_moe_early_softmax)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -13628,6 +13738,7 @@ static void ggml_vk_graph_optimize(ggml_backend_t backend, struct ggml_cgraph *
|
||||||
}
|
}
|
||||||
// Don't pull forward nodes from fusion patterns
|
// Don't pull forward nodes from fusion patterns
|
||||||
if (match_pattern(topk_moe_early_softmax_norm, j) ||
|
if (match_pattern(topk_moe_early_softmax_norm, j) ||
|
||||||
|
match_pattern(topk_moe_sigmoid_norm_bias, j) ||
|
||||||
match_pattern(topk_moe_early_softmax, j) ||
|
match_pattern(topk_moe_early_softmax, j) ||
|
||||||
match_pattern(topk_moe_late_softmax, j)) {
|
match_pattern(topk_moe_late_softmax, j)) {
|
||||||
continue;
|
continue;
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,10 @@
|
||||||
|
|
||||||
#include "types.glsl"
|
#include "types.glsl"
|
||||||
|
|
||||||
|
#define GATING_FUNC_SOFTMAX 0
|
||||||
|
#define GATING_FUNC_SIGMOID 1
|
||||||
|
#define GATING_FUNC_SOFTMAX_WEIGHT 2
|
||||||
|
|
||||||
layout (push_constant) uniform parameter
|
layout (push_constant) uniform parameter
|
||||||
{
|
{
|
||||||
uint n_rows;
|
uint n_rows;
|
||||||
|
|
@ -14,15 +18,18 @@ layout (push_constant) uniform parameter
|
||||||
uint n_expert_used;
|
uint n_expert_used;
|
||||||
float clamp_min;
|
float clamp_min;
|
||||||
float clamp_max;
|
float clamp_max;
|
||||||
|
uint gating_func;
|
||||||
|
uint has_bias;
|
||||||
|
uint with_norm;
|
||||||
|
float output_scale;
|
||||||
|
float output_bias;
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(local_size_x_id = 0, local_size_y = 4, local_size_z = 1) in;
|
layout(local_size_x_id = 0, local_size_y = 4, local_size_z = 1) in;
|
||||||
|
|
||||||
layout(constant_id = 0) const uint WARP_SIZE = 32;
|
layout(constant_id = 0) const uint WARP_SIZE = 32;
|
||||||
layout(constant_id = 1) const uint n_experts_spec = 512;
|
layout(constant_id = 1) const uint n_experts_spec = 512;
|
||||||
layout(constant_id = 2) const bool with_norm = true;
|
layout(constant_id = 2) const bool nexperts_use_push = false;
|
||||||
layout(constant_id = 3) const bool late_softmax = false;
|
|
||||||
layout(constant_id = 4) const bool nexperts_use_push = false;
|
|
||||||
|
|
||||||
uint n_experts = nexperts_use_push ? n_experts_push : n_experts_spec;
|
uint n_experts = nexperts_use_push ? n_experts_push : n_experts_spec;
|
||||||
|
|
||||||
|
|
@ -31,8 +38,9 @@ uint n_experts = nexperts_use_push ? n_experts_push : n_experts_spec;
|
||||||
const uint experts_per_thread = CEIL_DIV(n_experts_spec, WARP_SIZE);
|
const uint experts_per_thread = CEIL_DIV(n_experts_spec, WARP_SIZE);
|
||||||
|
|
||||||
layout (binding = 0, std430) readonly buffer Logits {float logits[];};
|
layout (binding = 0, std430) readonly buffer Logits {float logits[];};
|
||||||
layout (binding = 1, std430) writeonly buffer Weights {float weights[];};
|
layout (binding = 1, std430) readonly buffer BiasProbs {float bias[];};
|
||||||
layout (binding = 2, std430) writeonly buffer Ids {uint ids[];};
|
layout (binding = 2, std430) writeonly buffer Weights {float weights[];};
|
||||||
|
layout (binding = 3, std430) writeonly buffer Ids {uint ids[];};
|
||||||
|
|
||||||
const float INFINITY = 1.0 / 0.0;
|
const float INFINITY = 1.0 / 0.0;
|
||||||
|
|
||||||
|
|
@ -87,20 +95,40 @@ void main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint logits_offset = n_experts * row;
|
const uint logits_offset = n_experts * row;
|
||||||
|
const uint bias_offset = 0; // 1D
|
||||||
const uint weights_offset = n_expert_used * row;
|
const uint weights_offset = n_expert_used * row;
|
||||||
const uint ids_offset = n_experts * row;
|
const uint ids_offset = n_experts * row;
|
||||||
const uint lane = gl_SubgroupInvocationID;
|
const uint lane = gl_SubgroupInvocationID;
|
||||||
|
|
||||||
float wt[experts_per_thread];
|
float probs[experts_per_thread];
|
||||||
|
|
||||||
[[unroll]]
|
[[unroll]]
|
||||||
for (uint i = 0; i < n_experts; i += WARP_SIZE) {
|
for (uint i = 0; i < n_experts; i += WARP_SIZE) {
|
||||||
const uint expert = i + lane;
|
const uint expert = i + lane;
|
||||||
wt[i / WARP_SIZE] = (n_experts % WARP_SIZE == 0 || expert < n_experts) ? logits[logits_offset + expert] : -INFINITY;
|
probs[i / WARP_SIZE] = (n_experts % WARP_SIZE == 0 || expert < n_experts) ? logits[logits_offset + expert] : -INFINITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!late_softmax) {
|
if (gating_func == GATING_FUNC_SOFTMAX) {
|
||||||
softmax_warp_inplace(wt, n_experts, lane, nexperts_use_push);
|
softmax_warp_inplace(probs, n_experts, lane, nexperts_use_push);
|
||||||
|
} else if (gating_func == GATING_FUNC_SIGMOID) {
|
||||||
|
[[unroll]]
|
||||||
|
for (int i = 0; i < experts_per_thread; i++) {
|
||||||
|
probs[i] = 1.f / (1.f + exp(-probs[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float selection_probs[experts_per_thread];
|
||||||
|
if (has_bias != 0) {
|
||||||
|
[[unroll]]
|
||||||
|
for (uint i = 0; i < n_experts; i += WARP_SIZE) {
|
||||||
|
const uint expert = i + lane;
|
||||||
|
selection_probs[i / WARP_SIZE] = (n_experts % WARP_SIZE == 0 || expert < n_experts) ? probs[i / WARP_SIZE] + bias[bias_offset + expert] : -INFINITY;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
[[unroll]]
|
||||||
|
for (int i = 0; i < experts_per_thread; i++) {
|
||||||
|
selection_probs[i] = probs[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// at this point, each thread holds a portion of softmax,
|
// at this point, each thread holds a portion of softmax,
|
||||||
|
|
@ -117,14 +145,16 @@ void main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int k = 0; k < n_expert_used; k++) {
|
for (int k = 0; k < n_expert_used; k++) {
|
||||||
float max_val = wt[0];
|
float max_val = probs[0];
|
||||||
|
float max_val_s = selection_probs[0];
|
||||||
uint max_expert = lane;
|
uint max_expert = lane;
|
||||||
|
|
||||||
[[unroll]]
|
[[unroll]]
|
||||||
for (int i = 1; i < experts_per_thread; i++) {
|
for (int i = 1; i < experts_per_thread; i++) {
|
||||||
const uint expert = lane + i * WARP_SIZE;
|
const uint expert = lane + i * WARP_SIZE;
|
||||||
if ((n_experts % WARP_SIZE == 0 || expert < n_experts) && wt[i] > max_val) {
|
if ((n_experts % WARP_SIZE == 0 || expert < n_experts) && selection_probs[i] > max_val_s) {
|
||||||
max_val = wt[i];
|
max_val = probs[i];
|
||||||
|
max_val_s = selection_probs[i];
|
||||||
max_expert = expert;
|
max_expert = expert;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -132,9 +162,11 @@ void main() {
|
||||||
[[unroll]]
|
[[unroll]]
|
||||||
for (uint mask = WARP_SIZE / 2; mask > 0; mask /= 2) {
|
for (uint mask = WARP_SIZE / 2; mask > 0; mask /= 2) {
|
||||||
const float val = subgroupShuffleXor(max_val, mask);
|
const float val = subgroupShuffleXor(max_val, mask);
|
||||||
|
const float val_s = subgroupShuffleXor(max_val_s, mask);
|
||||||
const uint expert = subgroupShuffleXor(max_expert, mask);
|
const uint expert = subgroupShuffleXor(max_expert, mask);
|
||||||
if (val > max_val || (val == max_val && expert < max_expert)) {
|
if (val_s > max_val_s || (val_s == max_val_s && expert < max_expert)) {
|
||||||
max_val = val;
|
max_val = val;
|
||||||
|
max_val_s = val_s;
|
||||||
max_expert = expert;
|
max_expert = expert;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -144,16 +176,14 @@ void main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((max_expert & (WARP_SIZE - 1)) == lane) {
|
if ((max_expert & (WARP_SIZE - 1)) == lane) {
|
||||||
wt[max_expert / WARP_SIZE] = -INFINITY;
|
selection_probs[max_expert / WARP_SIZE] = -INFINITY;
|
||||||
|
|
||||||
ids[ids_offset + k] = max_expert;
|
ids[ids_offset + k] = max_expert;
|
||||||
if (with_norm) {
|
wt_sum += max_val;
|
||||||
wt_sum += max_val;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (with_norm) {
|
if (with_norm != 0) {
|
||||||
wt_sum = subgroupAdd(wt_sum);
|
wt_sum = subgroupAdd(wt_sum);
|
||||||
wt_sum = clamp(wt_sum, clamp_min, clamp_max);
|
wt_sum = clamp(wt_sum, clamp_min, clamp_max);
|
||||||
const float inv_sum = 1.0f / wt_sum;
|
const float inv_sum = 1.0f / wt_sum;
|
||||||
|
|
@ -164,7 +194,7 @@ void main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (late_softmax) {
|
if (gating_func == GATING_FUNC_SOFTMAX_WEIGHT) {
|
||||||
softmax_warp_inplace(output_weights, n_expert_used, lane, true);
|
softmax_warp_inplace(output_weights, n_expert_used, lane, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -172,7 +202,7 @@ void main() {
|
||||||
for (uint i = 0; i < experts_per_thread; ++i) {
|
for (uint i = 0; i < experts_per_thread; ++i) {
|
||||||
uint idx = i * WARP_SIZE + lane;
|
uint idx = i * WARP_SIZE + lane;
|
||||||
if (idx < n_expert_used) {
|
if (idx < n_expert_used) {
|
||||||
weights[weights_offset + idx] = output_weights[i];
|
weights[weights_offset + idx] = output_scale * output_weights[i] + output_bias;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -294,7 +294,9 @@ class Keys:
|
||||||
USE_GELU = "clip.use_gelu"
|
USE_GELU = "clip.use_gelu"
|
||||||
USE_SILU = "clip.use_silu"
|
USE_SILU = "clip.use_silu"
|
||||||
N_WA_PATTERN = "clip.vision.n_wa_pattern" # used by qwen2.5vl
|
N_WA_PATTERN = "clip.vision.n_wa_pattern" # used by qwen2.5vl
|
||||||
|
WA_LAYER_INDEXES = "clip.vision.wa_layer_indexes" # used by youtuvl
|
||||||
IS_DEEPSTACK_LAYERS = "clip.vision.is_deepstack_layers"
|
IS_DEEPSTACK_LAYERS = "clip.vision.is_deepstack_layers"
|
||||||
|
WINDOW_SIZE = "clip.vision.window_size"
|
||||||
|
|
||||||
class Attention:
|
class Attention:
|
||||||
HEAD_COUNT = "clip.vision.attention.head_count"
|
HEAD_COUNT = "clip.vision.attention.head_count"
|
||||||
|
|
@ -3492,7 +3494,9 @@ class VisionProjectorType:
|
||||||
COGVLM = "cogvlm"
|
COGVLM = "cogvlm"
|
||||||
JANUS_PRO = "janus_pro"
|
JANUS_PRO = "janus_pro"
|
||||||
LFM2A = "lfm2a" # audio
|
LFM2A = "lfm2a" # audio
|
||||||
|
MUSIC_FLAMINGO = "musicflamingo" # audio
|
||||||
GLM4V = "glm4v"
|
GLM4V = "glm4v"
|
||||||
|
YOUTUVL = "youtuvl"
|
||||||
|
|
||||||
|
|
||||||
# Items here are (block size, type size)
|
# Items here are (block size, type size)
|
||||||
|
|
|
||||||
|
|
@ -1129,11 +1129,40 @@ class GGUFWriter:
|
||||||
self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value)
|
self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value)
|
||||||
|
|
||||||
def add_vision_n_wa_pattern(self, value: int) -> None:
|
def add_vision_n_wa_pattern(self, value: int) -> None:
|
||||||
|
"""Add window attention pattern interval for vision models.
|
||||||
|
|
||||||
|
This defines the pattern interval for window attention vs full attention layers.
|
||||||
|
For example, if n_wa_pattern=4, then layers 3, 7, 11, ... use full attention,
|
||||||
|
while other layers use window attention.
|
||||||
|
|
||||||
|
Used by models like Qwen2.5-VL where full attention layers follow a regular pattern.
|
||||||
|
"""
|
||||||
self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
|
self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
|
||||||
|
|
||||||
|
def add_vision_wa_layer_indexes(self, layers: Sequence[int]) -> None:
|
||||||
|
"""Add explicit layer indexes that use full attention in vision models.
|
||||||
|
|
||||||
|
This specifies the exact layer indices (0-based) that should use full attention
|
||||||
|
instead of window attention. All other layers will use window attention.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
layers: List of layer indices that use full attention (e.g., [3, 7, 11, 15])
|
||||||
|
|
||||||
|
Used by models like YoutuVL where full attention layers are explicitly specified
|
||||||
|
rather than following a regular pattern.
|
||||||
|
|
||||||
|
Difference from add_vision_n_wa_pattern:
|
||||||
|
- n_wa_pattern: Defines a regular interval pattern (every Nth layer uses full attention)
|
||||||
|
- wa_layer_indexes: Explicitly lists which layers use full attention (irregular pattern)
|
||||||
|
"""
|
||||||
|
self.add_array(Keys.ClipVision.WA_LAYER_INDEXES, layers)
|
||||||
|
|
||||||
def add_vision_is_deepstack_layers(self, layers: Sequence[bool]) -> None:
|
def add_vision_is_deepstack_layers(self, layers: Sequence[bool]) -> None:
|
||||||
self.add_array(Keys.ClipVision.IS_DEEPSTACK_LAYERS, layers)
|
self.add_array(Keys.ClipVision.IS_DEEPSTACK_LAYERS, layers)
|
||||||
|
|
||||||
|
def add_vision_window_size(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.ClipVision.WINDOW_SIZE, value)
|
||||||
|
|
||||||
# audio models
|
# audio models
|
||||||
|
|
||||||
def add_audio_projection_dim(self, value: int) -> None:
|
def add_audio_projection_dim(self, value: int) -> None:
|
||||||
|
|
|
||||||
|
|
@ -1221,6 +1221,7 @@ class TensorNameMap:
|
||||||
MODEL_TENSOR.V_MMPROJ: (
|
MODEL_TENSOR.V_MMPROJ: (
|
||||||
"multi_modal_projector.linear_{bid}",
|
"multi_modal_projector.linear_{bid}",
|
||||||
"visual.merger.mlp.{bid}", # qwen2vl
|
"visual.merger.mlp.{bid}", # qwen2vl
|
||||||
|
"merger.mlp.{bid}",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_MMPROJ_FC: (
|
MODEL_TENSOR.V_MMPROJ_FC: (
|
||||||
|
|
@ -1258,6 +1259,7 @@ class TensorNameMap:
|
||||||
"visual.patch_embed.proj", # qwen2vl
|
"visual.patch_embed.proj", # qwen2vl
|
||||||
"vision_tower.patch_embed.proj", # kimi-vl
|
"vision_tower.patch_embed.proj", # kimi-vl
|
||||||
"model.vision.patch_embedding.proj", # cogvlm
|
"model.vision.patch_embedding.proj", # cogvlm
|
||||||
|
"siglip2.vision_model.embeddings.patch_embedding",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_ENC_EMBD_NORM: (
|
MODEL_TENSOR.V_ENC_EMBD_NORM: (
|
||||||
|
|
@ -1291,6 +1293,7 @@ class TensorNameMap:
|
||||||
"vision_encoder.transformer.layers.{bid}.attention.wq", # pixtral
|
"vision_encoder.transformer.layers.{bid}.attention.wq", # pixtral
|
||||||
"visual.blocks.{bid}.attn.q", # qwen2vl, generated
|
"visual.blocks.{bid}.attn.q", # qwen2vl, generated
|
||||||
"vision_tower.encoder.blocks.{bid}.wq", # kimi-vl, generated
|
"vision_tower.encoder.blocks.{bid}.wq", # kimi-vl, generated
|
||||||
|
"siglip2.vision_model.encoder.layers.{bid}.self_attn.q_proj", # youtuvl
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_Q_NORM: (
|
MODEL_TENSOR.V_ENC_ATTN_Q_NORM: (
|
||||||
|
|
@ -1308,6 +1311,7 @@ class TensorNameMap:
|
||||||
"vision_encoder.transformer.layers.{bid}.attention.wk", # pixtral
|
"vision_encoder.transformer.layers.{bid}.attention.wk", # pixtral
|
||||||
"visual.blocks.{bid}.attn.k", # qwen2vl, generated
|
"visual.blocks.{bid}.attn.k", # qwen2vl, generated
|
||||||
"vision_tower.encoder.blocks.{bid}.wk", # kimi-vl, generated
|
"vision_tower.encoder.blocks.{bid}.wk", # kimi-vl, generated
|
||||||
|
"siglip2.vision_model.encoder.layers.{bid}.self_attn.k_proj",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_K_NORM: (
|
MODEL_TENSOR.V_ENC_ATTN_K_NORM: (
|
||||||
|
|
@ -1325,6 +1329,7 @@ class TensorNameMap:
|
||||||
"vision_encoder.transformer.layers.{bid}.attention.wv", # pixtral
|
"vision_encoder.transformer.layers.{bid}.attention.wv", # pixtral
|
||||||
"visual.blocks.{bid}.attn.v", # qwen2vl, generated
|
"visual.blocks.{bid}.attn.v", # qwen2vl, generated
|
||||||
"vision_tower.encoder.blocks.{bid}.wv", # kimi-vl, generated
|
"vision_tower.encoder.blocks.{bid}.wv", # kimi-vl, generated
|
||||||
|
"siglip2.vision_model.encoder.layers.{bid}.self_attn.v_proj",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_ENC_INPUT_NORM: (
|
MODEL_TENSOR.V_ENC_INPUT_NORM: (
|
||||||
|
|
@ -1339,6 +1344,7 @@ class TensorNameMap:
|
||||||
"visual.blocks.{bid}.norm1", # qwen2vl
|
"visual.blocks.{bid}.norm1", # qwen2vl
|
||||||
"vision_tower.encoder.blocks.{bid}.norm0", # kimi-vl (norm0/norm1)
|
"vision_tower.encoder.blocks.{bid}.norm0", # kimi-vl (norm0/norm1)
|
||||||
"model.vision.transformer.layers.{bid}.input_layernorm", # cogvlm
|
"model.vision.transformer.layers.{bid}.input_layernorm", # cogvlm
|
||||||
|
"siglip2.vision_model.encoder.layers.{bid}.layer_norm1",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_ENC_ATTN_O: (
|
MODEL_TENSOR.V_ENC_ATTN_O: (
|
||||||
|
|
@ -1354,6 +1360,7 @@ class TensorNameMap:
|
||||||
"visual.blocks.{bid}.attn.proj", # qwen2vl
|
"visual.blocks.{bid}.attn.proj", # qwen2vl
|
||||||
"vision_tower.encoder.blocks.{bid}.wo", # kimi-vl
|
"vision_tower.encoder.blocks.{bid}.wo", # kimi-vl
|
||||||
"model.vision.transformer.layers.{bid}.attention.dense", # cogvlm
|
"model.vision.transformer.layers.{bid}.attention.dense", # cogvlm
|
||||||
|
"siglip2.vision_model.encoder.layers.{bid}.self_attn.out_proj", # youtuvl
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_ENC_POST_ATTN_NORM: (
|
MODEL_TENSOR.V_ENC_POST_ATTN_NORM: (
|
||||||
|
|
@ -1368,6 +1375,7 @@ class TensorNameMap:
|
||||||
"visual.blocks.{bid}.norm2", # qwen2vl
|
"visual.blocks.{bid}.norm2", # qwen2vl
|
||||||
"vision_tower.encoder.blocks.{bid}.norm1", # kimi-vl (norm0/norm1)
|
"vision_tower.encoder.blocks.{bid}.norm1", # kimi-vl (norm0/norm1)
|
||||||
"model.vision.transformer.layers.{bid}.post_attention_layernorm", # cogvlm
|
"model.vision.transformer.layers.{bid}.post_attention_layernorm", # cogvlm
|
||||||
|
"siglip2.vision_model.encoder.layers.{bid}.layer_norm2",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_UP: (
|
MODEL_TENSOR.V_ENC_FFN_UP: (
|
||||||
|
|
@ -1383,6 +1391,7 @@ class TensorNameMap:
|
||||||
"visual.blocks.{bid}.mlp.linear_fc1", # qwen3vl
|
"visual.blocks.{bid}.mlp.linear_fc1", # qwen3vl
|
||||||
"vision_tower.encoder.blocks.{bid}.mlp.fc0", # kimi-vl (fc0/fc1)
|
"vision_tower.encoder.blocks.{bid}.mlp.fc0", # kimi-vl (fc0/fc1)
|
||||||
"model.vision.transformer.layers.{bid}.mlp.fc1", # cogvlm
|
"model.vision.transformer.layers.{bid}.mlp.fc1", # cogvlm
|
||||||
|
"siglip2.vision_model.encoder.layers.{bid}.mlp.fc1",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_ENC_FFN_GATE: (
|
MODEL_TENSOR.V_ENC_FFN_GATE: (
|
||||||
|
|
@ -1404,6 +1413,7 @@ class TensorNameMap:
|
||||||
"visual.blocks.{bid}.mlp.linear_fc2", # qwen3vl
|
"visual.blocks.{bid}.mlp.linear_fc2", # qwen3vl
|
||||||
"vision_tower.encoder.blocks.{bid}.mlp.fc1", # kimi-vl (fc0/fc1)
|
"vision_tower.encoder.blocks.{bid}.mlp.fc1", # kimi-vl (fc0/fc1)
|
||||||
"model.vision.transformer.layers.{bid}.mlp.fc2", # cogvlm
|
"model.vision.transformer.layers.{bid}.mlp.fc2", # cogvlm
|
||||||
|
"siglip2.vision_model.encoder.layers.{bid}.mlp.fc2",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_LAYER_SCALE_1: (
|
MODEL_TENSOR.V_LAYER_SCALE_1: (
|
||||||
|
|
@ -1430,6 +1440,7 @@ class TensorNameMap:
|
||||||
"visual.merger.ln_q", # qwen2vl
|
"visual.merger.ln_q", # qwen2vl
|
||||||
"vision_tower.encoder.final_layernorm", # kimi-vl
|
"vision_tower.encoder.final_layernorm", # kimi-vl
|
||||||
"visual.post_layernorm", # glm4v
|
"visual.post_layernorm", # glm4v
|
||||||
|
"siglip2.vision_model.post_layernorm",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_MM_POST_NORM: (
|
MODEL_TENSOR.V_MM_POST_NORM: (
|
||||||
|
|
@ -1446,6 +1457,7 @@ class TensorNameMap:
|
||||||
"multi_modal_projector.pre_norm",
|
"multi_modal_projector.pre_norm",
|
||||||
"pre_mm_projector_norm",
|
"pre_mm_projector_norm",
|
||||||
"model.vision.linear_proj.norm1", # cogvlm
|
"model.vision.linear_proj.norm1", # cogvlm
|
||||||
|
"merger.ln_q",
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.V_MM_SOFT_EMB_NORM: (
|
MODEL_TENSOR.V_MM_SOFT_EMB_NORM: (
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
130bc125a88bb57664b88932c48c38a1cb316fac
|
ebc3a0f4a56be1c9424a89fbec09962ac34fde85
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
|
||||||
{ "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
|
{ "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
|
||||||
{ "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
|
{ "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
|
||||||
{ "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
|
{ "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
|
||||||
|
{ "solar-open", LLM_CHAT_TEMPLATE_SOLAR_OPEN },
|
||||||
};
|
};
|
||||||
|
|
||||||
llm_chat_template llm_chat_template_from_str(const std::string & name) {
|
llm_chat_template llm_chat_template_from_str(const std::string & name) {
|
||||||
|
|
@ -216,6 +217,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
|
||||||
return LLM_CHAT_TEMPLATE_GROK_2;
|
return LLM_CHAT_TEMPLATE_GROK_2;
|
||||||
} else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
|
} else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
|
||||||
return LLM_CHAT_TEMPLATE_PANGU_EMBED;
|
return LLM_CHAT_TEMPLATE_PANGU_EMBED;
|
||||||
|
} else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
|
||||||
|
return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
|
||||||
}
|
}
|
||||||
return LLM_CHAT_TEMPLATE_UNKNOWN;
|
return LLM_CHAT_TEMPLATE_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
@ -845,6 +848,14 @@ int32_t llm_chat_apply_template(
|
||||||
if (add_ass) {
|
if (add_ass) {
|
||||||
ss << "[unused9]助手:";
|
ss << "[unused9]助手:";
|
||||||
}
|
}
|
||||||
|
} else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
|
||||||
|
for (auto message : chat) {
|
||||||
|
std::string role(message->role);
|
||||||
|
ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
|
||||||
|
}
|
||||||
|
if (add_ass) {
|
||||||
|
ss << "<|begin|>assistant";
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// template not supported
|
// template not supported
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,7 @@ enum llm_chat_template {
|
||||||
LLM_CHAT_TEMPLATE_SEED_OSS,
|
LLM_CHAT_TEMPLATE_SEED_OSS,
|
||||||
LLM_CHAT_TEMPLATE_GROK_2,
|
LLM_CHAT_TEMPLATE_GROK_2,
|
||||||
LLM_CHAT_TEMPLATE_PANGU_EMBED,
|
LLM_CHAT_TEMPLATE_PANGU_EMBED,
|
||||||
|
LLM_CHAT_TEMPLATE_SOLAR_OPEN,
|
||||||
LLM_CHAT_TEMPLATE_UNKNOWN,
|
LLM_CHAT_TEMPLATE_UNKNOWN,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -240,9 +240,10 @@ struct llama_file::impl {
|
||||||
throw std::runtime_error("unexpectedly reached end of file");
|
throw std::runtime_error("unexpectedly reached end of file");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
bool successful = false;
|
size_t bytes_read = 0;
|
||||||
while (!successful) {
|
while (bytes_read < len) {
|
||||||
off_t ret = read(fd, ptr, len);
|
const size_t to_read = len - bytes_read;
|
||||||
|
ssize_t ret = ::read(fd, reinterpret_cast<char *>(ptr) + bytes_read, to_read);
|
||||||
|
|
||||||
if (ret == -1) {
|
if (ret == -1) {
|
||||||
if (errno == EINTR) {
|
if (errno == EINTR) {
|
||||||
|
|
@ -251,10 +252,16 @@ struct llama_file::impl {
|
||||||
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
||||||
}
|
}
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
|
// EOF: allow if this read was only pulling alignment padding past file end
|
||||||
|
off_t pos = lseek(fd, 0, SEEK_CUR);
|
||||||
|
if (pos != -1 && (size_t) pos == size) {
|
||||||
|
std::memset(reinterpret_cast<char *>(ptr) + bytes_read, 0, len - bytes_read);
|
||||||
|
return;
|
||||||
|
}
|
||||||
throw std::runtime_error("unexpectedly reached end of file");
|
throw std::runtime_error("unexpectedly reached end of file");
|
||||||
}
|
}
|
||||||
|
|
||||||
successful = true;
|
bytes_read += (size_t) ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -126,6 +126,7 @@ const char * llm_type_name(llm_type type) {
|
||||||
case LLM_TYPE_31B_A3_5B: return "31B.A3.5B";
|
case LLM_TYPE_31B_A3_5B: return "31B.A3.5B";
|
||||||
case LLM_TYPE_80B_A3B: return "80B.A3B";
|
case LLM_TYPE_80B_A3B: return "80B.A3B";
|
||||||
case LLM_TYPE_100B_A6B: return "100B.A6B";
|
case LLM_TYPE_100B_A6B: return "100B.A6B";
|
||||||
|
case LLM_TYPE_102B_A12B: return "102B.A12B";
|
||||||
case LLM_TYPE_106B_A12B: return "106B.A12B";
|
case LLM_TYPE_106B_A12B: return "106B.A12B";
|
||||||
case LLM_TYPE_230B_A10B: return "230B.A10B";
|
case LLM_TYPE_230B_A10B: return "230B.A10B";
|
||||||
case LLM_TYPE_235B_A22B: return "235B.A22B";
|
case LLM_TYPE_235B_A22B: return "235B.A22B";
|
||||||
|
|
@ -1682,7 +1683,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
ml.get_key(LLM_KV_ATTENTION_VALUE_LENGTH_MLA, hparams.n_embd_head_v_mla, false);
|
ml.get_key(LLM_KV_ATTENTION_VALUE_LENGTH_MLA, hparams.n_embd_head_v_mla, false);
|
||||||
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
||||||
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
|
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
|
||||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
|
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale, false);
|
||||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false);
|
ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false);
|
||||||
ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false);
|
ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false);
|
||||||
if (hparams.expert_gating_func == LLAMA_EXPERT_GATING_FUNC_TYPE_NONE) {
|
if (hparams.expert_gating_func == LLAMA_EXPERT_GATING_FUNC_TYPE_NONE) {
|
||||||
|
|
@ -1778,6 +1779,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
case 47: type = LLM_TYPE_106B_A12B; break; // GLM-4.5-Air (46 layers + 1 NextN layer)
|
case 47: type = LLM_TYPE_106B_A12B; break; // GLM-4.5-Air (46 layers + 1 NextN layer)
|
||||||
|
case 48: type = LLM_TYPE_102B_A12B; break; // Solar Open
|
||||||
case 93: type = LLM_TYPE_355B_A32B; break; // GLM-4.5 (92 layers + 1 NextN layer)
|
case 93: type = LLM_TYPE_355B_A32B; break; // GLM-4.5 (92 layers + 1 NextN layer)
|
||||||
default: type = LLM_TYPE_UNKNOWN;
|
default: type = LLM_TYPE_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
@ -3320,7 +3322,14 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
layer.attn_norm_2_b = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
|
layer.attn_norm_2_b = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
|
||||||
|
|
||||||
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED);
|
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED);
|
||||||
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, layer.ffn_gate ? n_ff : n_ff * 2}, 0);
|
|
||||||
|
const auto tn_ffn_up_weight = tn(LLM_TENSOR_FFN_UP, "weight", i);
|
||||||
|
ggml_tensor * t_ffn_up = ml.get_tensor_meta(tn_ffn_up_weight.str().c_str());
|
||||||
|
const int64_t n_ffn_up = t_ffn_up ? t_ffn_up->ne[1] : n_ff;
|
||||||
|
|
||||||
|
GGML_ASSERT(n_ffn_up == n_ff || n_ffn_up == n_ff * 2);
|
||||||
|
layer.ffn_up = create_tensor(tn_ffn_up_weight, {n_embd, n_ffn_up}, 0);
|
||||||
|
layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ffn_up}, TENSOR_NOT_REQUIRED);
|
||||||
|
|
||||||
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
|
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
|
||||||
layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, 0);
|
layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, 0);
|
||||||
|
|
@ -4776,7 +4785,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
|
|
||||||
// output
|
// output
|
||||||
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
|
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
|
||||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
|
// try to load output.weight, if not found, use token_embd (tied embeddings)
|
||||||
|
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
|
||||||
|
if (!output) {
|
||||||
|
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < n_layer; ++i) {
|
for (int i = 0; i < n_layer; ++i) {
|
||||||
auto & layer = layers[i];
|
auto & layer = layers[i];
|
||||||
|
|
@ -4839,7 +4852,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
|
|
||||||
// output
|
// output
|
||||||
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
|
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
|
||||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
|
// try to load output.weight, if not found, use token_embd (tied embeddings)
|
||||||
|
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
|
||||||
|
if (!output) {
|
||||||
|
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < n_layer; ++i) {
|
for (int i = 0; i < n_layer; ++i) {
|
||||||
auto & layer = layers[i];
|
auto & layer = layers[i];
|
||||||
|
|
@ -5206,9 +5223,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, flags);
|
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, flags);
|
||||||
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, flags);
|
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, flags);
|
||||||
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, flags);
|
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, flags);
|
||||||
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, flags);
|
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, TENSOR_NOT_REQUIRED | flags);
|
||||||
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, flags);
|
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, TENSOR_NOT_REQUIRED | flags);
|
||||||
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, flags);
|
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED | flags);
|
||||||
|
|
||||||
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, flags);
|
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, flags);
|
||||||
|
|
||||||
|
|
@ -7440,7 +7457,7 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_MODERN_BERT:
|
case LLM_ARCH_MODERN_BERT:
|
||||||
{
|
{
|
||||||
llm = std::make_unique<llm_build_modern_bert<true>>(*this, params);
|
llm = std::make_unique<llm_build_modern_bert>(*this, params);
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_NEO_BERT:
|
case LLM_ARCH_NEO_BERT:
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -119,6 +119,7 @@ enum llm_type {
|
||||||
LLM_TYPE_31B_A3_5B,
|
LLM_TYPE_31B_A3_5B,
|
||||||
LLM_TYPE_80B_A3B, // Qwen3 Next
|
LLM_TYPE_80B_A3B, // Qwen3 Next
|
||||||
LLM_TYPE_100B_A6B,
|
LLM_TYPE_100B_A6B,
|
||||||
|
LLM_TYPE_102B_A12B, // Solar-Open
|
||||||
LLM_TYPE_106B_A12B, // GLM-4.5-Air
|
LLM_TYPE_106B_A12B, // GLM-4.5-Air
|
||||||
LLM_TYPE_230B_A10B, // Minimax M2
|
LLM_TYPE_230B_A10B, // Minimax M2
|
||||||
LLM_TYPE_235B_A22B,
|
LLM_TYPE_235B_A22B,
|
||||||
|
|
|
||||||
|
|
@ -314,6 +314,12 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
||||||
"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
|
"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
|
||||||
};
|
};
|
||||||
break;
|
break;
|
||||||
|
case LLAMA_VOCAB_PRE_TYPE_YOUTU:
|
||||||
|
regex_exprs = {
|
||||||
|
"[가-힣ㄱ-ㆎ]+|[!…“”‘’—:;,、-〿︰-﹏]+|[ㄅ-ㄯ]+|[一-龥-ゟ゠-ヿ]+",
|
||||||
|
"[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
||||||
|
};
|
||||||
|
break;
|
||||||
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
|
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
|
||||||
regex_exprs = {
|
regex_exprs = {
|
||||||
"[\r\n]",
|
"[\r\n]",
|
||||||
|
|
@ -355,6 +361,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
||||||
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
|
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
|
||||||
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
|
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
|
||||||
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
|
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
|
||||||
|
case LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN:
|
||||||
regex_exprs = {
|
regex_exprs = {
|
||||||
// original regex from tokenizer.json
|
// original regex from tokenizer.json
|
||||||
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
||||||
|
|
@ -1860,6 +1867,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||||
tokenizer_pre == "deepseek-v3") {
|
tokenizer_pre == "deepseek-v3") {
|
||||||
pre_type = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM;
|
pre_type = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM;
|
||||||
clean_spaces = false;
|
clean_spaces = false;
|
||||||
|
} else if (
|
||||||
|
tokenizer_pre == "youtu") {
|
||||||
|
pre_type = LLAMA_VOCAB_PRE_TYPE_YOUTU;
|
||||||
|
clean_spaces = false;
|
||||||
|
ignore_merges = true;
|
||||||
} else if (
|
} else if (
|
||||||
tokenizer_pre == "falcon") {
|
tokenizer_pre == "falcon") {
|
||||||
pre_type = LLAMA_VOCAB_PRE_TYPE_FALCON;
|
pre_type = LLAMA_VOCAB_PRE_TYPE_FALCON;
|
||||||
|
|
@ -2015,6 +2027,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||||
tokenizer_pre == "minimax-m2") {
|
tokenizer_pre == "minimax-m2") {
|
||||||
pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
|
pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
|
||||||
clean_spaces = false;
|
clean_spaces = false;
|
||||||
|
} else if (
|
||||||
|
tokenizer_pre == "solar-open") {
|
||||||
|
pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN;
|
||||||
|
clean_spaces = false;
|
||||||
} else {
|
} else {
|
||||||
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
||||||
}
|
}
|
||||||
|
|
@ -2358,6 +2374,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||||
|| t.first == "<|end|>"
|
|| t.first == "<|end|>"
|
||||||
|| t.first == "<|return|>" // o200k_harmony
|
|| t.first == "<|return|>" // o200k_harmony
|
||||||
|| t.first == "<|call|>" // o200k_harmony
|
|| t.first == "<|call|>" // o200k_harmony
|
||||||
|
|| t.first == "<|flush|>" // solar-open
|
||||||
|
|| t.first == "<|calls|>" // solar-open
|
||||||
|| t.first == "<end_of_turn>"
|
|| t.first == "<end_of_turn>"
|
||||||
|| t.first == "<|endoftext|>"
|
|| t.first == "<|endoftext|>"
|
||||||
|| t.first == "<|eom_id|>"
|
|| t.first == "<|eom_id|>"
|
||||||
|
|
@ -2404,13 +2422,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||||
LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
|
LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
|
// TODO: workaround for o200k_harmony and solar-open tokenizer: the "<|end|>" token should not be EOG
|
||||||
// we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
|
// we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens ("<|calls|>" and "<|flush|>" for solar-open),
|
||||||
// we remove the "<|end|>" token from the EOG list
|
// we remove the "<|end|>" token from the EOG list
|
||||||
{
|
{
|
||||||
bool has_return = false;
|
bool has_return = false;
|
||||||
bool has_call = false;
|
bool has_call = false;
|
||||||
bool has_end = false;
|
bool has_end = false;
|
||||||
|
bool has_flush = false;
|
||||||
|
|
||||||
llama_token end_id = LLAMA_TOKEN_NULL;
|
llama_token end_id = LLAMA_TOKEN_NULL;
|
||||||
|
|
||||||
|
|
@ -2420,18 +2439,20 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||||
|
|
||||||
if (id_to_token[tid].text == "<|return|>") {
|
if (id_to_token[tid].text == "<|return|>") {
|
||||||
has_return = true;
|
has_return = true;
|
||||||
} else if (id_to_token[tid].text == "<|call|>") {
|
} else if (id_to_token[tid].text == "<|call|>" || id_to_token[tid].text == "<|calls|>") {
|
||||||
has_call = true;
|
has_call = true;
|
||||||
|
} else if (id_to_token[tid].text == "<|flush|>") {
|
||||||
|
has_flush = true;
|
||||||
} else if (id_to_token[tid].text == "<|end|>") {
|
} else if (id_to_token[tid].text == "<|end|>") {
|
||||||
has_end = true;
|
has_end = true;
|
||||||
end_id = tid;
|
end_id = tid;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_return && has_call && has_end) {
|
if ((has_return && has_call && has_end) || (has_call && has_flush && has_end)) {
|
||||||
special_eog_ids.erase(end_id);
|
special_eog_ids.erase(end_id);
|
||||||
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
||||||
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
|
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,8 @@ enum llama_vocab_pre_type {
|
||||||
LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,
|
LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,
|
||||||
LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2 = 41,
|
LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2 = 41,
|
||||||
LLAMA_VOCAB_PRE_TYPE_AFMOE = 42,
|
LLAMA_VOCAB_PRE_TYPE_AFMOE = 42,
|
||||||
|
LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43,
|
||||||
|
LLAMA_VOCAB_PRE_TYPE_YOUTU = 44,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LLM_KV;
|
struct LLM_KV;
|
||||||
|
|
|
||||||
|
|
@ -142,11 +142,13 @@ llm_build_bert::llm_build_bert(const llama_model & model, const llm_graph_params
|
||||||
LLM_FFN_GELU, LLM_FFN_SEQ, il);
|
LLM_FFN_GELU, LLM_FFN_SEQ, il);
|
||||||
cb(cur, "ffn_out", il);
|
cb(cur, "ffn_out", il);
|
||||||
} else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
|
} else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
|
||||||
|
const bool up_contains_gate = !model.layers[il].ffn_gate && model.layers[il].ffn_up->ne[1] != hparams.n_ff();
|
||||||
|
auto type_op = up_contains_gate ? LLM_FFN_GEGLU : LLM_FFN_GELU;
|
||||||
cur = build_ffn(cur,
|
cur = build_ffn(cur,
|
||||||
model.layers[il].ffn_up, NULL, NULL,
|
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||||
model.layers[il].ffn_gate, NULL, NULL,
|
model.layers[il].ffn_gate, NULL, NULL,
|
||||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, NULL,
|
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, NULL,
|
||||||
model.layers[il].ffn_gate ? LLM_FFN_GELU : LLM_FFN_GEGLU, LLM_FFN_PAR, il);
|
type_op, LLM_FFN_PAR, il);
|
||||||
cb(cur, "ffn_out", il);
|
cb(cur, "ffn_out", il);
|
||||||
} else {
|
} else {
|
||||||
cur = build_ffn(cur,
|
cur = build_ffn(cur,
|
||||||
|
|
|
||||||
|
|
@ -215,7 +215,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
|
||||||
model.layers[il].ffn_exp_probs_b,
|
model.layers[il].ffn_exp_probs_b,
|
||||||
n_expert, n_expert_used,
|
n_expert, n_expert_used,
|
||||||
LLM_FFN_SILU, hparams.expert_weights_norm,
|
LLM_FFN_SILU, hparams.expert_weights_norm,
|
||||||
true, hparams.expert_weights_scale,
|
hparams.expert_weights_scale, hparams.expert_weights_scale,
|
||||||
(llama_expert_gating_func_type) hparams.expert_gating_func,
|
(llama_expert_gating_func_type) hparams.expert_gating_func,
|
||||||
il);
|
il);
|
||||||
cb(moe_out, "ffn_moe_out", il);
|
cb(moe_out, "ffn_moe_out", il);
|
||||||
|
|
|
||||||
|
|
@ -332,7 +332,6 @@ struct llm_build_mistral3 : public llm_graph_context {
|
||||||
llm_build_mistral3(const llama_model & model, const llm_graph_params & params);
|
llm_build_mistral3(const llama_model & model, const llm_graph_params & params);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <bool iswa>
|
|
||||||
struct llm_build_modern_bert : public llm_graph_context {
|
struct llm_build_modern_bert : public llm_graph_context {
|
||||||
llm_build_modern_bert(const llama_model & model, const llm_graph_params & params);
|
llm_build_modern_bert(const llama_model & model, const llm_graph_params & params);
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
#include "models.h"
|
#include "models.h"
|
||||||
|
|
||||||
template <bool iswa>
|
llm_build_modern_bert::llm_build_modern_bert(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
|
||||||
llm_build_modern_bert<iswa>::llm_build_modern_bert(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
|
|
||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||||
|
|
||||||
|
|
@ -24,13 +23,7 @@ llm_build_modern_bert<iswa>::llm_build_modern_bert(const llama_model & model, co
|
||||||
auto * inp_attn = build_attn_inp_no_cache();
|
auto * inp_attn = build_attn_inp_no_cache();
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
float freq_base_l = 0.0f;
|
float freq_base_l = model.get_rope_freq_base(cparams, il);
|
||||||
|
|
||||||
if constexpr (iswa) {
|
|
||||||
freq_base_l = model.get_rope_freq_base(cparams, il);
|
|
||||||
} else {
|
|
||||||
freq_base_l = freq_base;
|
|
||||||
}
|
|
||||||
|
|
||||||
cur = inpL;
|
cur = inpL;
|
||||||
|
|
||||||
|
|
@ -120,7 +113,3 @@ llm_build_modern_bert<iswa>::llm_build_modern_bert(const llama_model & model, co
|
||||||
res->t_embd = cur;
|
res->t_embd = cur;
|
||||||
ggml_build_forward_expand(gf, cur);
|
ggml_build_forward_expand(gf, cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Explicit template instantiations
|
|
||||||
template struct llm_build_modern_bert<false>;
|
|
||||||
template struct llm_build_modern_bert<true>;
|
|
||||||
|
|
|
||||||
|
|
@ -964,6 +964,11 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
||||||
{ "\\p{P}", unicode_cpt_flags::PUNCTUATION },
|
{ "\\p{P}", unicode_cpt_flags::PUNCTUATION },
|
||||||
{ "\\p{M}", unicode_cpt_flags::ACCENT_MARK },
|
{ "\\p{M}", unicode_cpt_flags::ACCENT_MARK },
|
||||||
{ "\\p{S}", unicode_cpt_flags::SYMBOL },
|
{ "\\p{S}", unicode_cpt_flags::SYMBOL },
|
||||||
|
{ "\\p{Lu}", unicode_cpt_flags::LETTER }, // Uppercase letter
|
||||||
|
{ "\\p{Ll}", unicode_cpt_flags::LETTER }, // Lowercase letter
|
||||||
|
{ "\\p{Lt}", unicode_cpt_flags::LETTER }, // Titlecase letter
|
||||||
|
{ "\\p{Lm}", unicode_cpt_flags::LETTER }, // Modifier letter
|
||||||
|
{ "\\p{Lo}", unicode_cpt_flags::LETTER }, // Other letter
|
||||||
};
|
};
|
||||||
|
|
||||||
static const std::map<int, int> k_ucat_cpt = {
|
static const std::map<int, int> k_ucat_cpt = {
|
||||||
|
|
@ -1074,22 +1079,26 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (regex_expr[i + 0] == '\\' && i + 4 < regex_expr.size() &&
|
// Match \p{...} Unicode properties of varying lengths
|
||||||
|
if (regex_expr[i + 0] == '\\' && i + 3 < regex_expr.size() &&
|
||||||
regex_expr[i + 1] == 'p' &&
|
regex_expr[i + 1] == 'p' &&
|
||||||
regex_expr[i + 2] == '{' &&
|
regex_expr[i + 2] == '{') {
|
||||||
regex_expr[i + 4] == '}') {
|
// Find the closing brace
|
||||||
const std::string pat = regex_expr.substr(i, 5);
|
size_t closing_brace = regex_expr.find('}', i + 3);
|
||||||
if (k_ucat_enum.find(pat) != k_ucat_enum.end()) {
|
if (closing_brace != std::string::npos && closing_brace <= i + 10) { // reasonable limit
|
||||||
if (!inside) {
|
const std::string pat = regex_expr.substr(i, closing_brace - i + 1);
|
||||||
regex_expr_collapsed += '[';
|
if (k_ucat_enum.find(pat) != k_ucat_enum.end()) {
|
||||||
|
if (!inside) {
|
||||||
|
regex_expr_collapsed += '[';
|
||||||
|
}
|
||||||
|
regex_expr_collapsed += k_ucat_cpt.at(k_ucat_enum.at(pat));
|
||||||
|
regex_expr_collapsed += k_ucat_map.at(k_ucat_enum.at(pat));
|
||||||
|
if (!inside) {
|
||||||
|
regex_expr_collapsed += ']';
|
||||||
|
}
|
||||||
|
i = closing_brace;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
regex_expr_collapsed += k_ucat_cpt.at(k_ucat_enum.at(pat));
|
|
||||||
regex_expr_collapsed += k_ucat_map.at(k_ucat_enum.at(pat));
|
|
||||||
if (!inside) {
|
|
||||||
regex_expr_collapsed += ']';
|
|
||||||
}
|
|
||||||
i += 4;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1158,6 +1158,7 @@ struct test_case {
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool run_whole_graph() { return false; }
|
virtual bool run_whole_graph() { return false; }
|
||||||
|
virtual std::vector<ggml_tensor *> fusion_test_nodes() { return {}; }
|
||||||
|
|
||||||
ggml_cgraph * gf = nullptr;
|
ggml_cgraph * gf = nullptr;
|
||||||
ggml_cgraph * gb = nullptr;
|
ggml_cgraph * gb = nullptr;
|
||||||
|
|
@ -1391,7 +1392,13 @@ struct test_case {
|
||||||
GGML_UNUSED(index);
|
GGML_UNUSED(index);
|
||||||
};
|
};
|
||||||
|
|
||||||
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud, run_whole_graph() ? out : nullptr);
|
std::vector<ggml_tensor *> fused_nodes_to_verify = fusion_test_nodes();
|
||||||
|
if (fused_nodes_to_verify.size() == 0 && run_whole_graph()) {
|
||||||
|
fused_nodes_to_verify.push_back(out);
|
||||||
|
}
|
||||||
|
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud,
|
||||||
|
run_whole_graph() ? fused_nodes_to_verify.data() : nullptr,
|
||||||
|
fused_nodes_to_verify.size());
|
||||||
|
|
||||||
ggml_backend_buffer_free(buf);
|
ggml_backend_buffer_free(buf);
|
||||||
|
|
||||||
|
|
@ -5180,6 +5187,8 @@ struct test_topk_moe : public test_case {
|
||||||
const bool bias_probs;
|
const bool bias_probs;
|
||||||
const MoeGatingFunc gating_func;
|
const MoeGatingFunc gating_func;
|
||||||
const float scale_w;
|
const float scale_w;
|
||||||
|
ggml_tensor * weights {};
|
||||||
|
ggml_tensor * selected_experts {};
|
||||||
|
|
||||||
test_topk_moe(std::array<int64_t, 4> ne = { 10, 5, 1, 1 },
|
test_topk_moe(std::array<int64_t, 4> ne = { 10, 5, 1, 1 },
|
||||||
int n_expert_used = 1,
|
int n_expert_used = 1,
|
||||||
|
|
@ -5217,16 +5226,16 @@ struct test_topk_moe : public test_case {
|
||||||
|
|
||||||
ggml_tensor * selection_probs = probs;
|
ggml_tensor * selection_probs = probs;
|
||||||
if (bias_probs) {
|
if (bias_probs) {
|
||||||
ggml_tensor * exp_probs_b = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne.data());
|
ggml_tensor * exp_probs_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ne[0]);
|
||||||
ggml_set_name(exp_probs_b, "exp_probs_b");
|
ggml_set_name(exp_probs_b, "exp_probs_b");
|
||||||
selection_probs = ggml_add(ctx, probs, exp_probs_b);
|
selection_probs = ggml_add(ctx, probs, exp_probs_b);
|
||||||
ggml_set_name(selection_probs, "selection_probs");
|
ggml_set_name(selection_probs, "selection_probs");
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_tensor * selected_experts = ggml_argsort_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
|
selected_experts = ggml_argsort_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
|
||||||
ggml_set_name(selected_experts, "selected_experts");
|
ggml_set_name(selected_experts, "selected_experts");
|
||||||
|
|
||||||
ggml_tensor * weights = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
|
weights = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
|
||||||
ggml_set_name(weights, "weights");
|
ggml_set_name(weights, "weights");
|
||||||
|
|
||||||
if (gating_func == GATING_FUNC_SOFTMAX_WEIGHT) {
|
if (gating_func == GATING_FUNC_SOFTMAX_WEIGHT) {
|
||||||
|
|
@ -5252,6 +5261,21 @@ struct test_topk_moe : public test_case {
|
||||||
ggml_set_name(weights, "weights");
|
ggml_set_name(weights, "weights");
|
||||||
return weights;
|
return weights;
|
||||||
}
|
}
|
||||||
|
// Verify two outputs
|
||||||
|
std::vector<ggml_tensor *> fusion_test_nodes() override { return { selected_experts, weights }; }
|
||||||
|
|
||||||
|
// allow output in arbitrary order
|
||||||
|
double err(const float * a, const float * b, size_t n) override {
|
||||||
|
std::vector<float> a2(n);
|
||||||
|
std::vector<float> b2(n);
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
a2[i] = a[i];
|
||||||
|
b2[i] = b[i];
|
||||||
|
}
|
||||||
|
std::sort(a2.begin(), a2.end());
|
||||||
|
std::sort(b2.begin(), b2.end());
|
||||||
|
return nmse(a2.data(), b2.data(), n);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct test_mul_mat_vec_fusion : public test_case {
|
struct test_mul_mat_vec_fusion : public test_case {
|
||||||
|
|
|
||||||
|
|
@ -724,6 +724,30 @@ static void test_tools_oaicompat_json_conversion() {
|
||||||
"]"
|
"]"
|
||||||
),
|
),
|
||||||
common_chat_tools_to_json_oaicompat<json>({special_function_tool}).dump(2));
|
common_chat_tools_to_json_oaicompat<json>({special_function_tool}).dump(2));
|
||||||
|
|
||||||
|
{
|
||||||
|
auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse(
|
||||||
|
R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])"));
|
||||||
|
assert_equals((size_t) 1, tools_no_params.size());
|
||||||
|
assert_equals(std::string("test_func"), tools_no_params[0].name);
|
||||||
|
assert_equals(std::string("A test"), tools_no_params[0].description);
|
||||||
|
assert_equals(std::string("{}"), tools_no_params[0].parameters);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse(
|
||||||
|
R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])"));
|
||||||
|
assert_equals((size_t) 1, tools_no_desc.size());
|
||||||
|
assert_equals(std::string("test_func"), tools_no_desc[0].name);
|
||||||
|
assert_equals(std::string(""), tools_no_desc[0].description);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse(
|
||||||
|
R"([{"type": "function", "function": {"name": "test_func"}}])"));
|
||||||
|
assert_equals((size_t) 1, tools_minimal.size());
|
||||||
|
assert_equals(std::string("test_func"), tools_minimal[0].name);
|
||||||
|
assert_equals(std::string(""), tools_minimal[0].description);
|
||||||
|
assert_equals(std::string("{}"), tools_minimal[0].parameters);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_template_output_parsers() {
|
static void test_template_output_parsers() {
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ add_library(mtmd
|
||||||
models/qwen3vl.cpp
|
models/qwen3vl.cpp
|
||||||
models/siglip.cpp
|
models/siglip.cpp
|
||||||
models/whisper-enc.cpp
|
models/whisper-enc.cpp
|
||||||
|
models/youtuvl.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set_target_properties(mtmd PROPERTIES
|
set_target_properties(mtmd PROPERTIES
|
||||||
|
|
|
||||||
|
|
@ -45,13 +45,14 @@
|
||||||
#define KEY_SPATIAL_MERGE_SIZE "clip.vision.spatial_merge_size"
|
#define KEY_SPATIAL_MERGE_SIZE "clip.vision.spatial_merge_size"
|
||||||
#define KEY_IS_DEEPSTACK_LAYERS "clip.vision.is_deepstack_layers"
|
#define KEY_IS_DEEPSTACK_LAYERS "clip.vision.is_deepstack_layers"
|
||||||
|
|
||||||
#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type"
|
#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type"
|
||||||
#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints"
|
#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints"
|
||||||
#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution"
|
#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution"
|
||||||
#define KEY_WIN_ATTN_PATTERN "clip.vision.n_wa_pattern"
|
#define KEY_WIN_ATTN_PATTERN "clip.vision.n_wa_pattern"
|
||||||
#define KEY_ATTN_WINDOW_SIZE "clip.vision.window_size"
|
#define KEY_WIN_ATTN_LAYER_INDEXES "clip.vision.wa_layer_indexes"
|
||||||
#define KEY_MINICPMV_VERSION "clip.minicpmv_version"
|
#define KEY_ATTN_WINDOW_SIZE "clip.vision.window_size"
|
||||||
#define KEY_MINICPMV_QUERY_NUM "clip.minicpmv_query_num"
|
#define KEY_MINICPMV_VERSION "clip.minicpmv_version"
|
||||||
|
#define KEY_MINICPMV_QUERY_NUM "clip.minicpmv_query_num"
|
||||||
|
|
||||||
// audio-specific
|
// audio-specific
|
||||||
#define KEY_AUDIO_PROJ_TYPE "clip.audio.projector_type" // for models with mixed modalities
|
#define KEY_AUDIO_PROJ_TYPE "clip.audio.projector_type" // for models with mixed modalities
|
||||||
|
|
@ -180,6 +181,7 @@ enum projector_type {
|
||||||
PROJECTOR_TYPE_GLMA,
|
PROJECTOR_TYPE_GLMA,
|
||||||
PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx
|
PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx
|
||||||
PROJECTOR_TYPE_VOXTRAL,
|
PROJECTOR_TYPE_VOXTRAL,
|
||||||
|
PROJECTOR_TYPE_MUSIC_FLAMINGO,
|
||||||
PROJECTOR_TYPE_LFM2,
|
PROJECTOR_TYPE_LFM2,
|
||||||
PROJECTOR_TYPE_KIMIVL,
|
PROJECTOR_TYPE_KIMIVL,
|
||||||
PROJECTOR_TYPE_LIGHTONOCR,
|
PROJECTOR_TYPE_LIGHTONOCR,
|
||||||
|
|
@ -187,6 +189,7 @@ enum projector_type {
|
||||||
PROJECTOR_TYPE_JANUS_PRO,
|
PROJECTOR_TYPE_JANUS_PRO,
|
||||||
PROJECTOR_TYPE_LFM2A,
|
PROJECTOR_TYPE_LFM2A,
|
||||||
PROJECTOR_TYPE_GLM4V,
|
PROJECTOR_TYPE_GLM4V,
|
||||||
|
PROJECTOR_TYPE_YOUTUVL,
|
||||||
PROJECTOR_TYPE_UNKNOWN,
|
PROJECTOR_TYPE_UNKNOWN,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -209,6 +212,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
||||||
{ PROJECTOR_TYPE_GLMA, "glma"},
|
{ PROJECTOR_TYPE_GLMA, "glma"},
|
||||||
{ PROJECTOR_TYPE_QWEN25O, "qwen2.5o"},
|
{ PROJECTOR_TYPE_QWEN25O, "qwen2.5o"},
|
||||||
{ PROJECTOR_TYPE_VOXTRAL, "voxtral"},
|
{ PROJECTOR_TYPE_VOXTRAL, "voxtral"},
|
||||||
|
{ PROJECTOR_TYPE_MUSIC_FLAMINGO, "musicflamingo"},
|
||||||
{ PROJECTOR_TYPE_LFM2, "lfm2"},
|
{ PROJECTOR_TYPE_LFM2, "lfm2"},
|
||||||
{ PROJECTOR_TYPE_KIMIVL, "kimivl"},
|
{ PROJECTOR_TYPE_KIMIVL, "kimivl"},
|
||||||
{ PROJECTOR_TYPE_LIGHTONOCR,"lightonocr"},
|
{ PROJECTOR_TYPE_LIGHTONOCR,"lightonocr"},
|
||||||
|
|
@ -216,6 +220,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
||||||
{ PROJECTOR_TYPE_JANUS_PRO, "janus_pro"},
|
{ PROJECTOR_TYPE_JANUS_PRO, "janus_pro"},
|
||||||
{ PROJECTOR_TYPE_LFM2A, "lfm2a"},
|
{ PROJECTOR_TYPE_LFM2A, "lfm2a"},
|
||||||
{ PROJECTOR_TYPE_GLM4V, "glm4v"},
|
{ PROJECTOR_TYPE_GLM4V, "glm4v"},
|
||||||
|
{ PROJECTOR_TYPE_YOUTUVL, "youtuvl"},
|
||||||
};
|
};
|
||||||
|
|
||||||
static projector_type clip_projector_type_from_string(const std::string & str) {
|
static projector_type clip_projector_type_from_string(const std::string & str) {
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,7 @@ struct clip_hparams {
|
||||||
std::unordered_set<int32_t> vision_feature_layer;
|
std::unordered_set<int32_t> vision_feature_layer;
|
||||||
int32_t attn_window_size = 0;
|
int32_t attn_window_size = 0;
|
||||||
int32_t n_wa_pattern = 0;
|
int32_t n_wa_pattern = 0;
|
||||||
|
std::unordered_set<int32_t> wa_layer_indexes; // explicit layer indexes that use full attention (for irregular patterns like YoutuVL)
|
||||||
|
|
||||||
// audio
|
// audio
|
||||||
int32_t n_mel_bins = 0; // whisper preprocessor
|
int32_t n_mel_bins = 0; // whisper preprocessor
|
||||||
|
|
@ -319,7 +320,8 @@ struct clip_model {
|
||||||
|
|
||||||
bool audio_has_avgpool() const {
|
bool audio_has_avgpool() const {
|
||||||
return proj_type == PROJECTOR_TYPE_QWEN2A
|
return proj_type == PROJECTOR_TYPE_QWEN2A
|
||||||
|| proj_type == PROJECTOR_TYPE_VOXTRAL;
|
|| proj_type == PROJECTOR_TYPE_VOXTRAL
|
||||||
|
|| proj_type == PROJECTOR_TYPE_MUSIC_FLAMINGO;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool audio_has_stack_frames() const {
|
bool audio_has_stack_frames() const {
|
||||||
|
|
|
||||||
|
|
@ -818,6 +818,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
case PROJECTOR_TYPE_VOXTRAL:
|
case PROJECTOR_TYPE_VOXTRAL:
|
||||||
case PROJECTOR_TYPE_QWEN2A:
|
case PROJECTOR_TYPE_QWEN2A:
|
||||||
case PROJECTOR_TYPE_GLMA:
|
case PROJECTOR_TYPE_GLMA:
|
||||||
|
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||||
{
|
{
|
||||||
builder = std::make_unique<clip_graph_whisper_enc>(ctx, img);
|
builder = std::make_unique<clip_graph_whisper_enc>(ctx, img);
|
||||||
} break;
|
} break;
|
||||||
|
|
@ -845,6 +846,10 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
{
|
{
|
||||||
builder = std::make_unique<clip_graph_glm4v>(ctx, img);
|
builder = std::make_unique<clip_graph_glm4v>(ctx, img);
|
||||||
} break;
|
} break;
|
||||||
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
|
{
|
||||||
|
builder = std::make_unique<clip_graph_youtuvl>(ctx, img);
|
||||||
|
} break;
|
||||||
default:
|
default:
|
||||||
GGML_ABORT("missing cgraph builder");
|
GGML_ABORT("missing cgraph builder");
|
||||||
}
|
}
|
||||||
|
|
@ -1158,6 +1163,20 @@ struct clip_model_loader {
|
||||||
LOG_WRN("%s: more info: https://github.com/ggml-org/llama.cpp/issues/16842\n\n", __func__);
|
LOG_WRN("%s: more info: https://github.com/ggml-org/llama.cpp/issues/16842\n\n", __func__);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
|
{
|
||||||
|
hparams.n_merge = 2;
|
||||||
|
get_u32(KEY_SPATIAL_MERGE_SIZE, hparams.n_merge, false);
|
||||||
|
get_u32(KEY_ATTN_WINDOW_SIZE, hparams.attn_window_size, true);
|
||||||
|
std::vector<int> wa_layer_indexes_vec;
|
||||||
|
get_arr_int(KEY_WIN_ATTN_LAYER_INDEXES, wa_layer_indexes_vec, true);
|
||||||
|
for (auto & layer : wa_layer_indexes_vec) {
|
||||||
|
hparams.wa_layer_indexes.insert(layer);
|
||||||
|
}
|
||||||
|
// support max_height * max_width = 8000 * 8000. 8000/16/2 = 250 image tokens
|
||||||
|
hparams.set_limit_image_tokens(1, 62500);
|
||||||
|
hparams.set_warmup_n_tokens(16*16); // avoid OOM on warmup
|
||||||
|
} break;
|
||||||
case PROJECTOR_TYPE_GLM4V:
|
case PROJECTOR_TYPE_GLM4V:
|
||||||
{
|
{
|
||||||
hparams.rope_theta = 10000.0f;
|
hparams.rope_theta = 10000.0f;
|
||||||
|
|
@ -1176,6 +1195,7 @@ struct clip_model_loader {
|
||||||
case PROJECTOR_TYPE_QWEN2A:
|
case PROJECTOR_TYPE_QWEN2A:
|
||||||
case PROJECTOR_TYPE_GLMA:
|
case PROJECTOR_TYPE_GLMA:
|
||||||
case PROJECTOR_TYPE_VOXTRAL:
|
case PROJECTOR_TYPE_VOXTRAL:
|
||||||
|
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||||
{
|
{
|
||||||
bool require_stack = model.proj_type == PROJECTOR_TYPE_ULTRAVOX ||
|
bool require_stack = model.proj_type == PROJECTOR_TYPE_ULTRAVOX ||
|
||||||
model.proj_type == PROJECTOR_TYPE_VOXTRAL ||
|
model.proj_type == PROJECTOR_TYPE_VOXTRAL ||
|
||||||
|
|
@ -1225,7 +1245,14 @@ struct clip_model_loader {
|
||||||
LOG_INF("%s: has_llava_proj: %d\n", __func__, hparams.has_llava_projector);
|
LOG_INF("%s: has_llava_proj: %d\n", __func__, hparams.has_llava_projector);
|
||||||
LOG_INF("%s: minicpmv_version: %d\n", __func__, hparams.minicpmv_version);
|
LOG_INF("%s: minicpmv_version: %d\n", __func__, hparams.minicpmv_version);
|
||||||
LOG_INF("%s: n_merge: %d\n", __func__, hparams.n_merge);
|
LOG_INF("%s: n_merge: %d\n", __func__, hparams.n_merge);
|
||||||
LOG_INF("%s: n_wa_pattern: %d\n", __func__, hparams.n_wa_pattern);
|
LOG_INF("%s: n_wa_pattern: %d\n", __func__, hparams.n_wa_pattern);
|
||||||
|
if (!hparams.wa_layer_indexes.empty()) {
|
||||||
|
LOG_INF("%s: wa_layer_indexes: ", __func__);
|
||||||
|
for (auto & layer : hparams.wa_layer_indexes) {
|
||||||
|
LOG_INF("%d ", layer);
|
||||||
|
}
|
||||||
|
LOG_INF("\n");
|
||||||
|
}
|
||||||
if (hparams.image_min_pixels > 0) {
|
if (hparams.image_min_pixels > 0) {
|
||||||
LOG_INF("%s: image_min_pixels: %d%s\n", __func__, hparams.image_min_pixels, hparams.custom_image_min_tokens > 0 ? " (custom value)" : "");
|
LOG_INF("%s: image_min_pixels: %d%s\n", __func__, hparams.image_min_pixels, hparams.custom_image_min_tokens > 0 ? " (custom value)" : "");
|
||||||
}
|
}
|
||||||
|
|
@ -1493,6 +1520,14 @@ struct clip_model_loader {
|
||||||
model.mm_1_w = get_tensor(string_format(TN_LLAVA_PROJ, 2, "weight"));
|
model.mm_1_w = get_tensor(string_format(TN_LLAVA_PROJ, 2, "weight"));
|
||||||
model.mm_1_b = get_tensor(string_format(TN_LLAVA_PROJ, 2, "bias"));
|
model.mm_1_b = get_tensor(string_format(TN_LLAVA_PROJ, 2, "bias"));
|
||||||
} break;
|
} break;
|
||||||
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
|
{
|
||||||
|
model.mm_input_norm_w = get_tensor(TN_MM_INP_NORM); // merger.ln_q (RMS norm)
|
||||||
|
model.mm_0_w = get_tensor(string_format(TN_LLAVA_PROJ, 0, "weight")); // merger.mlp.0
|
||||||
|
model.mm_0_b = get_tensor(string_format(TN_LLAVA_PROJ, 0, "bias"));
|
||||||
|
model.mm_1_w = get_tensor(string_format(TN_LLAVA_PROJ, 2, "weight")); // merger.mlp.2
|
||||||
|
model.mm_1_b = get_tensor(string_format(TN_LLAVA_PROJ, 2, "bias"));
|
||||||
|
} break;
|
||||||
case PROJECTOR_TYPE_GLM4V:
|
case PROJECTOR_TYPE_GLM4V:
|
||||||
{
|
{
|
||||||
model.projection = get_tensor(TN_MM_PROJECTOR);
|
model.projection = get_tensor(TN_MM_PROJECTOR);
|
||||||
|
|
@ -1576,6 +1611,17 @@ struct clip_model_loader {
|
||||||
model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight"));
|
model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight"));
|
||||||
model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight"));
|
model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight"));
|
||||||
} break;
|
} break;
|
||||||
|
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||||
|
{
|
||||||
|
model.conv1d_1_w = get_tensor(string_format(TN_CONV1D, 1, "weight"));
|
||||||
|
model.conv1d_1_b = get_tensor(string_format(TN_CONV1D, 1, "bias"));
|
||||||
|
model.conv1d_2_w = get_tensor(string_format(TN_CONV1D, 2, "weight"));
|
||||||
|
model.conv1d_2_b = get_tensor(string_format(TN_CONV1D, 2, "bias"));
|
||||||
|
model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight"));
|
||||||
|
model.mm_1_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "bias"));
|
||||||
|
model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight"));
|
||||||
|
model.mm_2_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "bias"));
|
||||||
|
} break;
|
||||||
case PROJECTOR_TYPE_INTERNVL:
|
case PROJECTOR_TYPE_INTERNVL:
|
||||||
{
|
{
|
||||||
model.mm_0_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "weight"));
|
model.mm_0_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "weight"));
|
||||||
|
|
@ -2684,6 +2730,57 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
|
||||||
// res_imgs->data[0] = *res;
|
// res_imgs->data[0] = *res;
|
||||||
res_imgs->entries.push_back(std::move(img_f32));
|
res_imgs->entries.push_back(std::move(img_f32));
|
||||||
} break;
|
} break;
|
||||||
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
|
{
|
||||||
|
const int patch_size = params.patch_size; // typically 16
|
||||||
|
const int merge_size = params.n_merge; // typically 2
|
||||||
|
const int align_size = patch_size * merge_size; // 32
|
||||||
|
|
||||||
|
const int max_num_patches = params.image_max_pixels > 0 ?
|
||||||
|
params.image_max_pixels / (patch_size * patch_size) : 256;
|
||||||
|
|
||||||
|
// Linear search for optimal scale to fit within max_num_patches
|
||||||
|
float scale = 1.0f;
|
||||||
|
int target_height = original_size.height;
|
||||||
|
int target_width = original_size.width;
|
||||||
|
|
||||||
|
auto get_scaled_image_size = [align_size](float scale, int size) -> int {
|
||||||
|
float scaled_size = size * scale;
|
||||||
|
// Round up to nearest multiple of align_size
|
||||||
|
int aligned = static_cast<int>(std::ceil(scaled_size / align_size)) * align_size;
|
||||||
|
// Ensure at least one patch
|
||||||
|
return std::max(align_size, aligned);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Linear search with 0.02 step size
|
||||||
|
while (scale > 0.0f) {
|
||||||
|
target_height = get_scaled_image_size(scale, original_size.height);
|
||||||
|
target_width = get_scaled_image_size(scale, original_size.width);
|
||||||
|
|
||||||
|
int num_patches_h = target_height / patch_size;
|
||||||
|
int num_patches_w = target_width / patch_size;
|
||||||
|
int num_patches = num_patches_h * num_patches_w;
|
||||||
|
|
||||||
|
if (num_patches > max_num_patches) {
|
||||||
|
scale -= 0.02f;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clip_image_size new_size = {target_width, target_height};
|
||||||
|
|
||||||
|
// Resize the image
|
||||||
|
clip_image_u8 resized;
|
||||||
|
img_tool::resize(*img, resized, new_size, img_tool::RESIZE_ALGO_BILINEAR, false);
|
||||||
|
|
||||||
|
// Normalize to float32
|
||||||
|
clip_image_f32_ptr img_f32(clip_image_f32_init());
|
||||||
|
normalize_image_u8_to_f32(resized, *img_f32, params.image_mean, params.image_std);
|
||||||
|
|
||||||
|
// Add to results
|
||||||
|
res_imgs->entries.push_back(std::move(img_f32));
|
||||||
|
} break;
|
||||||
|
|
||||||
case PROJECTOR_TYPE_IDEFICS3:
|
case PROJECTOR_TYPE_IDEFICS3:
|
||||||
{
|
{
|
||||||
|
|
@ -2916,6 +3013,7 @@ int clip_n_output_tokens_x(const struct clip_ctx * ctx, struct clip_image_f32 *
|
||||||
case PROJECTOR_TYPE_QWEN25VL:
|
case PROJECTOR_TYPE_QWEN25VL:
|
||||||
case PROJECTOR_TYPE_QWEN3VL:
|
case PROJECTOR_TYPE_QWEN3VL:
|
||||||
case PROJECTOR_TYPE_GLM4V:
|
case PROJECTOR_TYPE_GLM4V:
|
||||||
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
return (img->nx / params.patch_size) / 2;
|
return (img->nx / params.patch_size) / 2;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
@ -2931,6 +3029,7 @@ int clip_n_output_tokens_y(const struct clip_ctx * ctx, struct clip_image_f32 *
|
||||||
case PROJECTOR_TYPE_QWEN25VL:
|
case PROJECTOR_TYPE_QWEN25VL:
|
||||||
case PROJECTOR_TYPE_QWEN3VL:
|
case PROJECTOR_TYPE_QWEN3VL:
|
||||||
case PROJECTOR_TYPE_GLM4V:
|
case PROJECTOR_TYPE_GLM4V:
|
||||||
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
return (img->ny / params.patch_size) / 2;
|
return (img->ny / params.patch_size) / 2;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
@ -2991,6 +3090,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
|
||||||
case PROJECTOR_TYPE_QWEN25VL:
|
case PROJECTOR_TYPE_QWEN25VL:
|
||||||
case PROJECTOR_TYPE_QWEN3VL:
|
case PROJECTOR_TYPE_QWEN3VL:
|
||||||
case PROJECTOR_TYPE_GLM4V:
|
case PROJECTOR_TYPE_GLM4V:
|
||||||
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
{
|
{
|
||||||
// dynamic size (2 conv, so double patch size)
|
// dynamic size (2 conv, so double patch size)
|
||||||
int x_patch = img->nx / (params.patch_size * 2);
|
int x_patch = img->nx / (params.patch_size * 2);
|
||||||
|
|
@ -3031,6 +3131,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
|
||||||
case PROJECTOR_TYPE_VOXTRAL:
|
case PROJECTOR_TYPE_VOXTRAL:
|
||||||
case PROJECTOR_TYPE_ULTRAVOX:
|
case PROJECTOR_TYPE_ULTRAVOX:
|
||||||
case PROJECTOR_TYPE_QWEN2A:
|
case PROJECTOR_TYPE_QWEN2A:
|
||||||
|
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||||
{
|
{
|
||||||
n_patches = img->nx;
|
n_patches = img->nx;
|
||||||
|
|
||||||
|
|
@ -3117,7 +3218,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
const int pos_w = image_size_width / patch_size;
|
const int pos_w = image_size_width / patch_size;
|
||||||
const int pos_h = image_size_height / patch_size;
|
const int pos_h = image_size_height / patch_size;
|
||||||
|
|
||||||
const bool use_window_attn = hparams.n_wa_pattern > 0; // for qwen2.5vl
|
|
||||||
|
|
||||||
auto get_inp_tensor = [&gf](const char * name) {
|
auto get_inp_tensor = [&gf](const char * name) {
|
||||||
ggml_tensor * inp = ggml_graph_get_tensor(gf, name);
|
ggml_tensor * inp = ggml_graph_get_tensor(gf, name);
|
||||||
|
|
@ -3266,9 +3366,11 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
set_input_i32("positions", positions);
|
set_input_i32("positions", positions);
|
||||||
} break;
|
} break;
|
||||||
case PROJECTOR_TYPE_QWEN25VL:
|
case PROJECTOR_TYPE_QWEN25VL:
|
||||||
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
{
|
{
|
||||||
// pw * ph = number of tokens output by ViT after apply patch merger
|
// pw * ph = number of tokens output by ViT after apply patch merger
|
||||||
// ipw * ipw = number of vision token been processed inside ViT
|
// ipw * ipw = number of vision token been processed inside ViT
|
||||||
|
const bool use_window_attn = ctx->model.proj_type == PROJECTOR_TYPE_QWEN25VL ? hparams.n_wa_pattern > 0 : !hparams.wa_layer_indexes.empty();
|
||||||
const int merge_ratio = 2;
|
const int merge_ratio = 2;
|
||||||
const int pw = image_size_width / patch_size / merge_ratio;
|
const int pw = image_size_width / patch_size / merge_ratio;
|
||||||
const int ph = image_size_height / patch_size / merge_ratio;
|
const int ph = image_size_height / patch_size / merge_ratio;
|
||||||
|
|
@ -3279,7 +3381,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
std::vector<int> inv_idx(ph * pw);
|
std::vector<int> inv_idx(ph * pw);
|
||||||
|
|
||||||
if (use_window_attn) {
|
if (use_window_attn) {
|
||||||
const int attn_window_size = 112;
|
const int attn_window_size = hparams.attn_window_size > 0 ? hparams.attn_window_size : 112;
|
||||||
const int grid_window = attn_window_size / patch_size / merge_ratio;
|
const int grid_window = attn_window_size / patch_size / merge_ratio;
|
||||||
int dst = 0;
|
int dst = 0;
|
||||||
// [num_vision_tokens, num_vision_tokens] attention mask tensor
|
// [num_vision_tokens, num_vision_tokens] attention mask tensor
|
||||||
|
|
@ -3403,6 +3505,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
case PROJECTOR_TYPE_ULTRAVOX:
|
case PROJECTOR_TYPE_ULTRAVOX:
|
||||||
case PROJECTOR_TYPE_LFM2:
|
case PROJECTOR_TYPE_LFM2:
|
||||||
case PROJECTOR_TYPE_VOXTRAL:
|
case PROJECTOR_TYPE_VOXTRAL:
|
||||||
|
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||||
case PROJECTOR_TYPE_JANUS_PRO:
|
case PROJECTOR_TYPE_JANUS_PRO:
|
||||||
case PROJECTOR_TYPE_COGVLM:
|
case PROJECTOR_TYPE_COGVLM:
|
||||||
{
|
{
|
||||||
|
|
@ -3516,6 +3619,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
||||||
case PROJECTOR_TYPE_QWEN2VL:
|
case PROJECTOR_TYPE_QWEN2VL:
|
||||||
case PROJECTOR_TYPE_QWEN25VL:
|
case PROJECTOR_TYPE_QWEN25VL:
|
||||||
case PROJECTOR_TYPE_JANUS_PRO:
|
case PROJECTOR_TYPE_JANUS_PRO:
|
||||||
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
return ctx->model.mm_1_b->ne[0];
|
return ctx->model.mm_1_b->ne[0];
|
||||||
case PROJECTOR_TYPE_QWEN3VL:
|
case PROJECTOR_TYPE_QWEN3VL:
|
||||||
// main path + deepstack paths
|
// main path + deepstack paths
|
||||||
|
|
@ -3526,6 +3630,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
||||||
return ctx->model.projection->ne[1];
|
return ctx->model.projection->ne[1];
|
||||||
case PROJECTOR_TYPE_ULTRAVOX:
|
case PROJECTOR_TYPE_ULTRAVOX:
|
||||||
case PROJECTOR_TYPE_VOXTRAL:
|
case PROJECTOR_TYPE_VOXTRAL:
|
||||||
|
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||||
return ctx->model.mm_2_w->ne[1];
|
return ctx->model.mm_2_w->ne[1];
|
||||||
case PROJECTOR_TYPE_INTERNVL:
|
case PROJECTOR_TYPE_INTERNVL:
|
||||||
return ctx->model.mm_3_w->ne[1];
|
return ctx->model.mm_3_w->ne[1];
|
||||||
|
|
@ -3587,7 +3692,8 @@ bool clip_has_whisper_encoder(const struct clip_ctx * ctx) {
|
||||||
return ctx->proj_type() == PROJECTOR_TYPE_ULTRAVOX
|
return ctx->proj_type() == PROJECTOR_TYPE_ULTRAVOX
|
||||||
|| ctx->proj_type() == PROJECTOR_TYPE_QWEN2A
|
|| ctx->proj_type() == PROJECTOR_TYPE_QWEN2A
|
||||||
|| ctx->proj_type() == PROJECTOR_TYPE_GLMA
|
|| ctx->proj_type() == PROJECTOR_TYPE_GLMA
|
||||||
|| ctx->proj_type() == PROJECTOR_TYPE_VOXTRAL;
|
|| ctx->proj_type() == PROJECTOR_TYPE_VOXTRAL
|
||||||
|
|| ctx->proj_type() == PROJECTOR_TYPE_MUSIC_FLAMINGO;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) {
|
bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) {
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,11 @@ struct clip_graph_qwen3vl : clip_graph {
|
||||||
ggml_cgraph * build() override;
|
ggml_cgraph * build() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct clip_graph_youtuvl : clip_graph {
|
||||||
|
clip_graph_youtuvl(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||||
|
ggml_cgraph * build() override;
|
||||||
|
};
|
||||||
|
|
||||||
struct clip_graph_minicpmv : clip_graph {
|
struct clip_graph_minicpmv : clip_graph {
|
||||||
clip_graph_minicpmv(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
clip_graph_minicpmv(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||||
ggml_cgraph * build() override;
|
ggml_cgraph * build() override;
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,15 @@ ggml_cgraph * clip_graph_whisper_enc::build() {
|
||||||
FFN_GELU_ERF,
|
FFN_GELU_ERF,
|
||||||
-1);
|
-1);
|
||||||
|
|
||||||
|
} else if (proj_type == PROJECTOR_TYPE_MUSIC_FLAMINGO) {
|
||||||
|
// projector
|
||||||
|
cur = build_ffn(cur,
|
||||||
|
model.mm_1_w, model.mm_1_b,
|
||||||
|
nullptr, nullptr,
|
||||||
|
model.mm_2_w, model.mm_2_b,
|
||||||
|
FFN_GELU_ERF,
|
||||||
|
-1);
|
||||||
|
|
||||||
} else if (proj_type == PROJECTOR_TYPE_GLMA) {
|
} else if (proj_type == PROJECTOR_TYPE_GLMA) {
|
||||||
cur = ggml_norm(ctx0, cur, hparams.eps);
|
cur = ggml_norm(ctx0, cur, hparams.eps);
|
||||||
cur = ggml_mul(ctx0, cur, model.mm_norm_pre_w);
|
cur = ggml_mul(ctx0, cur, model.mm_norm_pre_w);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,179 @@
|
||||||
|
#include "models.h"
|
||||||
|
|
||||||
|
ggml_cgraph * clip_graph_youtuvl::build() {
|
||||||
|
GGML_ASSERT(model.class_embedding == nullptr);
|
||||||
|
const int batch_size = 1;
|
||||||
|
const bool use_window_attn = !hparams.wa_layer_indexes.empty();
|
||||||
|
const int n_pos = n_patches;
|
||||||
|
const int num_position_ids = n_pos * 4;
|
||||||
|
const int m = 2;
|
||||||
|
const int Wp = n_patches_x;
|
||||||
|
const int Hp = n_patches_y;
|
||||||
|
const int Hm = Hp / m;
|
||||||
|
const int Wm = Wp / m;
|
||||||
|
norm_type norm_t = NORM_TYPE_NORMAL;
|
||||||
|
|
||||||
|
int mrope_sections[4] = {d_head/4, d_head/4, d_head/4, d_head/4};
|
||||||
|
|
||||||
|
ggml_tensor * inp = build_inp_raw();
|
||||||
|
|
||||||
|
// change conv3d to linear
|
||||||
|
// reshape and permute to get patches, permute from (patch_size, m, Wm, patch_size, m, Hm, C) to (C, patch_size, patch_size, m, m, Wm, Hm)
|
||||||
|
{
|
||||||
|
inp = ggml_reshape_4d(
|
||||||
|
ctx0, inp,
|
||||||
|
Wm * m * patch_size, m * patch_size, Hm, 3);
|
||||||
|
inp = ggml_permute(ctx0, inp, 1, 2, 3, 0);
|
||||||
|
inp = ggml_cont_4d(
|
||||||
|
ctx0, inp,
|
||||||
|
m * patch_size * 3, Wm, m * patch_size, Hm);
|
||||||
|
|
||||||
|
inp = ggml_permute(ctx0, inp, 0, 2, 1, 3);
|
||||||
|
inp = ggml_cont_4d(
|
||||||
|
ctx0, inp,
|
||||||
|
m * patch_size * 3, patch_size, m, Hm * Wm);
|
||||||
|
|
||||||
|
inp = ggml_permute(ctx0, inp, 1, 0, 2, 3);
|
||||||
|
inp = ggml_cont_4d(
|
||||||
|
ctx0, inp,
|
||||||
|
patch_size, 3, patch_size, Hm * Wm * m * m);
|
||||||
|
|
||||||
|
inp = ggml_permute(ctx0, inp, 2, 0, 1, 3);
|
||||||
|
inp = ggml_cont_3d(
|
||||||
|
ctx0, inp,
|
||||||
|
3*patch_size* patch_size, Hm * Wm * m * m, 1);
|
||||||
|
}
|
||||||
|
inp = ggml_mul_mat(ctx0, model.patch_embeddings_0, inp);
|
||||||
|
|
||||||
|
if (model.patch_bias) {
|
||||||
|
inp = ggml_add(ctx0, inp, model.patch_bias);
|
||||||
|
}
|
||||||
|
|
||||||
|
inp = ggml_reshape_2d(ctx0, inp, n_embd, n_patches);
|
||||||
|
|
||||||
|
ggml_tensor * inpL = inp;
|
||||||
|
ggml_tensor * window_mask = nullptr;
|
||||||
|
ggml_tensor * window_idx = nullptr;
|
||||||
|
ggml_tensor * inv_window_idx = nullptr;
|
||||||
|
|
||||||
|
ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_position_ids);
|
||||||
|
ggml_set_name(positions, "positions");
|
||||||
|
ggml_set_input(positions);
|
||||||
|
|
||||||
|
// pre-layernorm
|
||||||
|
if (model.pre_ln_w) {
|
||||||
|
inpL = build_norm(inpL, model.pre_ln_w, model.pre_ln_b, norm_t, eps, -1);
|
||||||
|
}
|
||||||
|
if (use_window_attn) {
|
||||||
|
inv_window_idx = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_pos / 4);
|
||||||
|
ggml_set_name(inv_window_idx, "inv_window_idx");
|
||||||
|
ggml_set_input(inv_window_idx);
|
||||||
|
// mask for window attention
|
||||||
|
window_mask = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_pos, n_pos);
|
||||||
|
ggml_set_name(window_mask, "window_mask");
|
||||||
|
ggml_set_input(window_mask);
|
||||||
|
|
||||||
|
// if flash attn is used, we need to pad the mask and cast to f16
|
||||||
|
if (flash_attn_type == CLIP_FLASH_ATTN_TYPE_ENABLED) {
|
||||||
|
window_mask = ggml_cast(ctx0, window_mask, GGML_TYPE_F16);
|
||||||
|
}
|
||||||
|
|
||||||
|
// inpL shape: [n_embd, n_patches_x * n_patches_y, batch_size]
|
||||||
|
GGML_ASSERT(batch_size == 1);
|
||||||
|
inpL = ggml_reshape_2d(ctx0, inpL, n_embd * 4, n_patches_x * n_patches_y * batch_size / 4);
|
||||||
|
inpL = ggml_get_rows(ctx0, inpL, inv_window_idx);
|
||||||
|
inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_patches_x * n_patches_y, batch_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// loop over layers
|
||||||
|
for (int il = 0; il < n_layer; il++) {
|
||||||
|
const auto & layer = model.layers[il];
|
||||||
|
const bool full_attn = use_window_attn ? hparams.wa_layer_indexes.count(il) > 0 : true;
|
||||||
|
|
||||||
|
ggml_tensor * cur = inpL; // inpL = residual, cur = hidden_states
|
||||||
|
|
||||||
|
// layernorm1
|
||||||
|
cur = build_norm(cur, layer.ln_1_w, layer.ln_1_b, norm_t, eps, il);
|
||||||
|
// self-attention
|
||||||
|
{
|
||||||
|
ggml_tensor * Qcur = ggml_add(ctx0,
|
||||||
|
ggml_mul_mat(ctx0, layer.q_w, cur), layer.q_b);
|
||||||
|
ggml_tensor * Kcur = ggml_add(ctx0,
|
||||||
|
ggml_mul_mat(ctx0, layer.k_w, cur), layer.k_b);
|
||||||
|
ggml_tensor * Vcur = ggml_add(ctx0,
|
||||||
|
ggml_mul_mat(ctx0, layer.v_w, cur), layer.v_b);
|
||||||
|
|
||||||
|
Qcur = ggml_reshape_3d(ctx0, Qcur, d_head, n_head, n_patches);
|
||||||
|
Kcur = ggml_reshape_3d(ctx0, Kcur, d_head, n_head, n_patches);
|
||||||
|
Vcur = ggml_reshape_3d(ctx0, Vcur, d_head, n_head, n_patches);
|
||||||
|
|
||||||
|
Qcur = ggml_rope_multi(
|
||||||
|
ctx0, Qcur, positions, nullptr,
|
||||||
|
d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1);
|
||||||
|
Kcur = ggml_rope_multi(
|
||||||
|
ctx0, Kcur, positions, nullptr,
|
||||||
|
d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1);
|
||||||
|
|
||||||
|
ggml_tensor * attn_mask = full_attn ? nullptr : window_mask;
|
||||||
|
|
||||||
|
cur = build_attn(layer.o_w, layer.o_b,
|
||||||
|
Qcur, Kcur, Vcur, attn_mask, kq_scale, il);
|
||||||
|
}
|
||||||
|
// re-add the layer input, e.g., residual
|
||||||
|
cur = ggml_add(ctx0, cur, inpL);
|
||||||
|
|
||||||
|
inpL = cur; // inpL = residual, cur = hidden_states
|
||||||
|
|
||||||
|
// layernorm2
|
||||||
|
cur = build_norm(cur, layer.ln_2_w, layer.ln_2_b, norm_t, eps, il);
|
||||||
|
|
||||||
|
// ffn
|
||||||
|
cur = build_ffn(cur,
|
||||||
|
layer.ff_up_w, layer.ff_up_b,
|
||||||
|
nullptr, nullptr,
|
||||||
|
layer.ff_down_w, layer.ff_down_b,
|
||||||
|
hparams.ffn_op, il);
|
||||||
|
|
||||||
|
// residual 2
|
||||||
|
cur = ggml_add(ctx0, inpL, cur);
|
||||||
|
|
||||||
|
inpL = cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor * embeddings = inpL;
|
||||||
|
if (use_window_attn) {
|
||||||
|
const int spatial_merge_unit = 4;
|
||||||
|
window_idx = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_pos / spatial_merge_unit);
|
||||||
|
ggml_set_name(window_idx, "window_idx");
|
||||||
|
ggml_set_input(window_idx);
|
||||||
|
GGML_ASSERT(batch_size == 1);
|
||||||
|
embeddings = ggml_reshape_2d(ctx0, embeddings, n_embd * spatial_merge_unit, n_patches / spatial_merge_unit);
|
||||||
|
embeddings = ggml_get_rows(ctx0, embeddings, window_idx);
|
||||||
|
embeddings = ggml_reshape_3d(ctx0, embeddings, n_embd, n_patches, batch_size);
|
||||||
|
cb(embeddings, "window_order_restored", -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// post-layernorm (part of Siglip2VisionTransformer, applied after encoder)
|
||||||
|
if (model.post_ln_w) {
|
||||||
|
embeddings = build_norm(embeddings, model.post_ln_w, model.post_ln_b, norm_t, eps, n_layer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now apply merger (VLPatchMerger):
|
||||||
|
// 1. Apply RMS norm (ln_q in VLPatchMerger)
|
||||||
|
embeddings = build_norm(embeddings, model.mm_input_norm_w, nullptr, NORM_TYPE_RMS, 1e-6, -1);
|
||||||
|
cb(embeddings, "merger_normed", -1);
|
||||||
|
|
||||||
|
// 2. First reshape for spatial merge (merge 2x2 patches)
|
||||||
|
embeddings = ggml_reshape_3d(ctx0, embeddings, n_embd * 4, n_pos / 4, batch_size);
|
||||||
|
cb(embeddings, "merger_reshaped", -1);
|
||||||
|
|
||||||
|
embeddings = build_ffn(embeddings,
|
||||||
|
model.mm_0_w, model.mm_0_b,
|
||||||
|
nullptr, nullptr,
|
||||||
|
model.mm_1_w, model.mm_1_b,
|
||||||
|
FFN_GELU,
|
||||||
|
-1);
|
||||||
|
ggml_build_forward_expand(gf, embeddings);
|
||||||
|
|
||||||
|
return gf;
|
||||||
|
}
|
||||||
|
|
@ -283,7 +283,7 @@ struct mtmd_context {
|
||||||
// https://github.com/huggingface/transformers/blob/1cd110c6cb6a6237614130c470e9a902dbc1a4bd/docs/source/en/model_doc/pixtral.md
|
// https://github.com/huggingface/transformers/blob/1cd110c6cb6a6237614130c470e9a902dbc1a4bd/docs/source/en/model_doc/pixtral.md
|
||||||
img_end = "[IMG_END]";
|
img_end = "[IMG_END]";
|
||||||
|
|
||||||
} else if (proj == PROJECTOR_TYPE_QWEN2VL || proj == PROJECTOR_TYPE_QWEN25VL || proj == PROJECTOR_TYPE_QWEN3VL) {
|
} else if (proj == PROJECTOR_TYPE_QWEN2VL || proj == PROJECTOR_TYPE_QWEN25VL || proj == PROJECTOR_TYPE_QWEN3VL || proj == PROJECTOR_TYPE_YOUTUVL) {
|
||||||
// <|vision_start|> ... (image embeddings) ... <|vision_end|>
|
// <|vision_start|> ... (image embeddings) ... <|vision_end|>
|
||||||
img_beg = "<|vision_start|>";
|
img_beg = "<|vision_start|>";
|
||||||
img_end = "<|vision_end|>";
|
img_end = "<|vision_end|>";
|
||||||
|
|
@ -330,6 +330,7 @@ struct mtmd_context {
|
||||||
case PROJECTOR_TYPE_ULTRAVOX:
|
case PROJECTOR_TYPE_ULTRAVOX:
|
||||||
case PROJECTOR_TYPE_VOXTRAL:
|
case PROJECTOR_TYPE_VOXTRAL:
|
||||||
case PROJECTOR_TYPE_GLMA:
|
case PROJECTOR_TYPE_GLMA:
|
||||||
|
case PROJECTOR_TYPE_MUSIC_FLAMINGO:
|
||||||
audio_preproc = std::make_unique<mtmd_audio_preprocessor_whisper>(ctx_a);
|
audio_preproc = std::make_unique<mtmd_audio_preprocessor_whisper>(ctx_a);
|
||||||
break;
|
break;
|
||||||
case PROJECTOR_TYPE_LFM2A:
|
case PROJECTOR_TYPE_LFM2A:
|
||||||
|
|
@ -352,6 +353,9 @@ struct mtmd_context {
|
||||||
// [BEGIN_AUDIO] ... (embeddings) ...
|
// [BEGIN_AUDIO] ... (embeddings) ...
|
||||||
aud_beg = "[BEGIN_AUDIO]";
|
aud_beg = "[BEGIN_AUDIO]";
|
||||||
|
|
||||||
|
} else if (proj == PROJECTOR_TYPE_MUSIC_FLAMINGO) {
|
||||||
|
// <sound> ... (embeddings) ...
|
||||||
|
aud_beg = "<sound>";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
struct quant_option {
|
struct quant_option {
|
||||||
std::string name;
|
std::string name;
|
||||||
|
|
@ -643,6 +644,11 @@ int main(int argc, char ** argv) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (std::error_code ec; std::filesystem::equivalent(fname_inp, fname_out, ec)) {
|
||||||
|
fprintf(stderr, "%s: error: input and output files are the same: '%s'\n", __func__, fname_inp.c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
print_build_info();
|
print_build_info();
|
||||||
|
|
||||||
fprintf(stderr, "%s: quantizing '%s' to '%s' as %s", __func__, fname_inp.c_str(), fname_out.c_str(), ftype_str.c_str());
|
fprintf(stderr, "%s: quantizing '%s' to '%s' as %s", __func__, fname_inp.c_str(), fname_out.c_str(), ftype_str.c_str());
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -65,10 +65,7 @@ export async function copyCodeToClipboard(
|
||||||
successMessage = 'Code copied to clipboard',
|
successMessage = 'Code copied to clipboard',
|
||||||
errorMessage = 'Failed to copy code'
|
errorMessage = 'Failed to copy code'
|
||||||
): Promise<boolean> {
|
): Promise<boolean> {
|
||||||
const doc = new DOMParser().parseFromString(rawCode, 'text/html');
|
return copyToClipboard(rawCode, successMessage, errorMessage);
|
||||||
const decodedCode = doc.body.textContent ?? rawCode;
|
|
||||||
|
|
||||||
return copyToClipboard(decodedCode, successMessage, errorMessage);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue