update hunyuan_moe to hunyuan_v1_moe
Signed-off-by: stevenkuang <stevenkuang@tencent.com>
This commit is contained in:
parent
5d2c042513
commit
aa973ca219
|
|
@ -7387,7 +7387,7 @@ class FalconH1Model(Mamba2Model):
|
||||||
|
|
||||||
@ModelBase.register("HunYuanMoEV1ForCausalLM")
|
@ModelBase.register("HunYuanMoEV1ForCausalLM")
|
||||||
class HunYuanMoEModel(TextModel):
|
class HunYuanMoEModel(TextModel):
|
||||||
model_arch = gguf.MODEL_ARCH.HUNYUAN_MOE
|
model_arch = gguf.MODEL_ARCH.HUNYUAN_V1_MOE
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -139,7 +139,7 @@ pre_computed_hashes = [
|
||||||
{"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
|
{"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
|
||||||
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
|
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
|
||||||
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
|
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
|
||||||
{"name": "hunyuan", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"},
|
{"name": "hunyuan-v1-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"},
|
||||||
{"name": "hunyuan-v1-dense", "tokt": TOKENIZER_TYPE.BPE, "repo": "", "chkhsh": ""}, # TODO: update hunyuan-v1-dense repo
|
{"name": "hunyuan-v1-dense", "tokt": TOKENIZER_TYPE.BPE, "repo": "", "chkhsh": ""}, # TODO: update hunyuan-v1-dense repo
|
||||||
# falcon-h1 series uses 4 different tokenizers across model sizes (0.5b - 34b), hence we need to define 4 different hashes
|
# falcon-h1 series uses 4 different tokenizers across model sizes (0.5b - 34b), hence we need to define 4 different hashes
|
||||||
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base", "chkhsh": "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"},
|
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base", "chkhsh": "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"},
|
||||||
|
|
|
||||||
|
|
@ -372,7 +372,7 @@ class MODEL_ARCH(IntEnum):
|
||||||
ARCEE = auto()
|
ARCEE = auto()
|
||||||
ERNIE4_5 = auto()
|
ERNIE4_5 = auto()
|
||||||
ERNIE4_5_MOE = auto()
|
ERNIE4_5_MOE = auto()
|
||||||
HUNYUAN_MOE = auto()
|
HUNYUAN_V1_MOE = auto()
|
||||||
HUNYUAN_V1_DENSE = auto()
|
HUNYUAN_V1_DENSE = auto()
|
||||||
SMOLLM3 = auto()
|
SMOLLM3 = auto()
|
||||||
LFM2 = auto()
|
LFM2 = auto()
|
||||||
|
|
@ -692,7 +692,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.ERNIE4_5: "ernie4_5",
|
MODEL_ARCH.ERNIE4_5: "ernie4_5",
|
||||||
MODEL_ARCH.ERNIE4_5_MOE: "ernie4_5-moe",
|
MODEL_ARCH.ERNIE4_5_MOE: "ernie4_5-moe",
|
||||||
MODEL_ARCH.FALCON_H1: "falcon-h1",
|
MODEL_ARCH.FALCON_H1: "falcon-h1",
|
||||||
MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
|
MODEL_ARCH.HUNYUAN_V1_MOE: "hunyuan-v1-moe",
|
||||||
MODEL_ARCH.HUNYUAN_V1_DENSE: "hunyuan-v1-dense",
|
MODEL_ARCH.HUNYUAN_V1_DENSE: "hunyuan-v1-dense",
|
||||||
MODEL_ARCH.SMOLLM3: "smollm3",
|
MODEL_ARCH.SMOLLM3: "smollm3",
|
||||||
MODEL_ARCH.LFM2: "lfm2",
|
MODEL_ARCH.LFM2: "lfm2",
|
||||||
|
|
@ -2430,7 +2430,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
|
MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
|
||||||
MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
|
MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
|
||||||
],
|
],
|
||||||
MODEL_ARCH.HUNYUAN_MOE: [
|
MODEL_ARCH.HUNYUAN_V1_MOE: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
MODEL_TENSOR.OUTPUT,
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||||
{ LLM_ARCH_ARCEE, "arcee" },
|
{ LLM_ARCH_ARCEE, "arcee" },
|
||||||
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
|
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
|
||||||
{ LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
|
{ LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
|
||||||
{ LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
|
{ LLM_ARCH_HUNYUAN_V1_MOE, "hunyuan-v1-moe" },
|
||||||
{ LLM_ARCH_HUNYUAN_V1_DENSE, "hunyuan-v1-dense" },
|
{ LLM_ARCH_HUNYUAN_V1_DENSE, "hunyuan-v1-dense" },
|
||||||
{ LLM_ARCH_SMOLLM3, "smollm3" },
|
{ LLM_ARCH_SMOLLM3, "smollm3" },
|
||||||
{ LLM_ARCH_LFM2, "lfm2" },
|
{ LLM_ARCH_LFM2, "lfm2" },
|
||||||
|
|
@ -1874,7 +1874,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
LLM_ARCH_HUNYUAN_MOE,
|
LLM_ARCH_HUNYUAN_V1_MOE,
|
||||||
{
|
{
|
||||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||||
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,7 @@ enum llm_arch {
|
||||||
LLM_ARCH_ARCEE,
|
LLM_ARCH_ARCEE,
|
||||||
LLM_ARCH_ERNIE4_5,
|
LLM_ARCH_ERNIE4_5,
|
||||||
LLM_ARCH_ERNIE4_5_MOE,
|
LLM_ARCH_ERNIE4_5_MOE,
|
||||||
LLM_ARCH_HUNYUAN_MOE,
|
LLM_ARCH_HUNYUAN_V1_MOE,
|
||||||
LLM_ARCH_HUNYUAN_V1_DENSE,
|
LLM_ARCH_HUNYUAN_V1_DENSE,
|
||||||
LLM_ARCH_SMOLLM3,
|
LLM_ARCH_SMOLLM3,
|
||||||
LLM_ARCH_LFM2,
|
LLM_ARCH_LFM2,
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
|
||||||
{ "bailing", LLM_CHAT_TEMPLATE_BAILING },
|
{ "bailing", LLM_CHAT_TEMPLATE_BAILING },
|
||||||
{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
|
{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
|
||||||
{ "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
|
{ "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
|
||||||
{ "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
|
{ "hunyuan-v1-moe", LLM_CHAT_TEMPLATE_HUNYUAN_V1_MOE },
|
||||||
{ "hunyuan-v1-dense", LLM_CHAT_TEMPLATE_HUNYUAN_V1_DENSE },
|
{ "hunyuan-v1-dense", LLM_CHAT_TEMPLATE_HUNYUAN_V1_DENSE },
|
||||||
{ "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
|
{ "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
|
||||||
};
|
};
|
||||||
|
|
@ -193,7 +193,7 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
|
||||||
} else if (tmpl_contains("<|endofuserprompt|>")) {
|
} else if (tmpl_contains("<|endofuserprompt|>")) {
|
||||||
return LLM_CHAT_TEMPLATE_DOTS1;
|
return LLM_CHAT_TEMPLATE_DOTS1;
|
||||||
} else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
|
} else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
|
||||||
return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
|
return LLM_CHAT_TEMPLATE_HUNYUAN_V1_MOE;
|
||||||
} else if (tmpl_contains("<|hy_place▁holder▁no▁2|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
|
} else if (tmpl_contains("<|hy_place▁holder▁no▁2|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
|
||||||
return LLM_CHAT_TEMPLATE_HUNYUAN_V1_DENSE;
|
return LLM_CHAT_TEMPLATE_HUNYUAN_V1_DENSE;
|
||||||
} else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
|
} else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
|
||||||
|
|
@ -694,14 +694,14 @@ int32_t llm_chat_apply_template(
|
||||||
if (add_ass) {
|
if (add_ass) {
|
||||||
ss << "<|response|>";
|
ss << "<|response|>";
|
||||||
}
|
}
|
||||||
} else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
|
} else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_V1_MOE) {
|
||||||
// tencent/Hunyuan-A13B-Instruct
|
// tencent/Hunyuan-A13B-Instruct
|
||||||
for (auto message : chat) {
|
for (auto message : chat) {
|
||||||
std::string role(message->role);
|
std::string role(message->role);
|
||||||
if (role == "system") {
|
if (role == "system") {
|
||||||
ss << "<|startoftext|>" << message->content << "<|extra_4|>";
|
ss << "<|startoftext|>" << message->content << "<|extra_4|>";
|
||||||
} else if (role == "assistant") {
|
} else if (role == "assistant") {
|
||||||
ss << "<|startoftext|>" << message->content << "<|eos|>";
|
ss << message->content << "<|eos|>";
|
||||||
} else {
|
} else {
|
||||||
ss << "<|startoftext|>" << message->content << "<|extra_0|>";
|
ss << "<|startoftext|>" << message->content << "<|extra_0|>";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ enum llm_chat_template {
|
||||||
LLM_CHAT_TEMPLATE_LLAMA4,
|
LLM_CHAT_TEMPLATE_LLAMA4,
|
||||||
LLM_CHAT_TEMPLATE_SMOLVLM,
|
LLM_CHAT_TEMPLATE_SMOLVLM,
|
||||||
LLM_CHAT_TEMPLATE_DOTS1,
|
LLM_CHAT_TEMPLATE_DOTS1,
|
||||||
LLM_CHAT_TEMPLATE_HUNYUAN_MOE,
|
LLM_CHAT_TEMPLATE_HUNYUAN_V1_MOE,
|
||||||
LLM_CHAT_TEMPLATE_HUNYUAN_V1_DENSE,
|
LLM_CHAT_TEMPLATE_HUNYUAN_V1_DENSE,
|
||||||
LLM_CHAT_TEMPLATE_KIMI_K2,
|
LLM_CHAT_TEMPLATE_KIMI_K2,
|
||||||
LLM_CHAT_TEMPLATE_UNKNOWN,
|
LLM_CHAT_TEMPLATE_UNKNOWN,
|
||||||
|
|
|
||||||
|
|
@ -1733,7 +1733,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
type = LLM_TYPE_UNKNOWN;
|
type = LLM_TYPE_UNKNOWN;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_HUNYUAN_MOE:
|
case LLM_ARCH_HUNYUAN_V1_MOE:
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||||
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
||||||
|
|
@ -5078,7 +5078,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {ffn_intermediate_size}, TENSOR_NOT_REQUIRED);
|
layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {ffn_intermediate_size}, TENSOR_NOT_REQUIRED);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_HUNYUAN_MOE:
|
case LLM_ARCH_HUNYUAN_V1_MOE:
|
||||||
{
|
{
|
||||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||||
|
|
||||||
|
|
@ -16580,8 +16580,8 @@ struct llm_build_arcee : public llm_graph_context {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llm_build_hunyuan_moe : public llm_graph_context {
|
struct llm_build_hunyuan_v1_moe : public llm_graph_context {
|
||||||
llm_build_hunyuan_moe(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
|
llm_build_hunyuan_v1_moe(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
|
||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
|
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
|
|
@ -17615,9 +17615,9 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
|
||||||
{
|
{
|
||||||
llm = std::make_unique<llm_build_ernie4_5_moe>(*this, params);
|
llm = std::make_unique<llm_build_ernie4_5_moe>(*this, params);
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_HUNYUAN_MOE:
|
case LLM_ARCH_HUNYUAN_V1_MOE:
|
||||||
{
|
{
|
||||||
llm = std::make_unique<llm_build_hunyuan_moe>(*this, params);
|
llm = std::make_unique<llm_build_hunyuan_v1_moe>(*this, params);
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_HUNYUAN_V1_DENSE:
|
case LLM_ARCH_HUNYUAN_V1_DENSE:
|
||||||
{
|
{
|
||||||
|
|
@ -17831,7 +17831,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
|
||||||
case LLM_ARCH_EXAONE4:
|
case LLM_ARCH_EXAONE4:
|
||||||
case LLM_ARCH_MINICPM3:
|
case LLM_ARCH_MINICPM3:
|
||||||
case LLM_ARCH_DOTS1:
|
case LLM_ARCH_DOTS1:
|
||||||
case LLM_ARCH_HUNYUAN_MOE:
|
case LLM_ARCH_HUNYUAN_V1_MOE:
|
||||||
case LLM_ARCH_HUNYUAN_V1_DENSE:
|
case LLM_ARCH_HUNYUAN_V1_DENSE:
|
||||||
case LLM_ARCH_LFM2:
|
case LLM_ARCH_LFM2:
|
||||||
return LLAMA_ROPE_TYPE_NEOX;
|
return LLAMA_ROPE_TYPE_NEOX;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue