keep indexer tensors
This commit is contained in:
parent
0451c849ee
commit
9e4e556cc0
|
|
@ -8706,7 +8706,7 @@ class GlmMoeDsaModel(DeepseekV2Model):
|
||||||
super().set_gguf_parameters()
|
super().set_gguf_parameters()
|
||||||
|
|
||||||
rope_dim = self.hparams["qk_rope_head_dim"]
|
rope_dim = self.hparams["qk_rope_head_dim"]
|
||||||
partial_rotary_factor = self.hparams["partial_rotary_factor"]
|
partial_rotary_factor = self.hparams.get("partial_rotary_factor", 1.0)
|
||||||
self.gguf_writer.add_rope_dimension_count(int(rope_dim * partial_rotary_factor))
|
self.gguf_writer.add_rope_dimension_count(int(rope_dim * partial_rotary_factor))
|
||||||
|
|
||||||
# Expert gating function (sigmoid for GLM4_MOE)
|
# Expert gating function (sigmoid for GLM4_MOE)
|
||||||
|
|
|
||||||
|
|
@ -667,6 +667,10 @@ class MODEL_TENSOR(IntEnum):
|
||||||
VISEXP_GATE = auto()
|
VISEXP_GATE = auto()
|
||||||
VISEXP_DOWN = auto()
|
VISEXP_DOWN = auto()
|
||||||
VISEXP_UP = auto()
|
VISEXP_UP = auto()
|
||||||
|
INDEXER_K_NORM = auto()
|
||||||
|
INDEXER_PROJ = auto()
|
||||||
|
INDEXER_ATTN_K = auto()
|
||||||
|
INDEXER_ATTN_Q_B = auto()
|
||||||
# vision
|
# vision
|
||||||
V_MMPROJ = auto()
|
V_MMPROJ = auto()
|
||||||
V_MMPROJ_FC = auto()
|
V_MMPROJ_FC = auto()
|
||||||
|
|
@ -1096,6 +1100,10 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
MODEL_TENSOR.VISEXP_GATE: "blk.{bid}.vis_gate",
|
MODEL_TENSOR.VISEXP_GATE: "blk.{bid}.vis_gate",
|
||||||
MODEL_TENSOR.VISEXP_DOWN: "blk.{bid}.vis_down",
|
MODEL_TENSOR.VISEXP_DOWN: "blk.{bid}.vis_down",
|
||||||
MODEL_TENSOR.VISEXP_UP: "blk.{bid}.vis_up",
|
MODEL_TENSOR.VISEXP_UP: "blk.{bid}.vis_up",
|
||||||
|
MODEL_TENSOR.INDEXER_K_NORM: "blk.{bid}.indexer.k_norm",
|
||||||
|
MODEL_TENSOR.INDEXER_PROJ: "blk.{bid}.indexer.proj",
|
||||||
|
MODEL_TENSOR.INDEXER_ATTN_K: "blk.{bid}.indexer.attn_k",
|
||||||
|
MODEL_TENSOR.INDEXER_ATTN_Q_B: "blk.{bid}.indexer.attn_q_b",
|
||||||
# vision
|
# vision
|
||||||
MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
|
MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
|
||||||
MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
|
MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
|
||||||
|
|
@ -2646,6 +2654,10 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
||||||
MODEL_TENSOR.FFN_UP_SHEXP,
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
||||||
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
||||||
|
MODEL_TENSOR.INDEXER_K_NORM,
|
||||||
|
MODEL_TENSOR.INDEXER_PROJ,
|
||||||
|
MODEL_TENSOR.INDEXER_ATTN_K,
|
||||||
|
MODEL_TENSOR.INDEXER_ATTN_Q_B,
|
||||||
# NextN/MTP tensors - preserved but unused
|
# NextN/MTP tensors - preserved but unused
|
||||||
MODEL_TENSOR.NEXTN_EH_PROJ,
|
MODEL_TENSOR.NEXTN_EH_PROJ,
|
||||||
MODEL_TENSOR.NEXTN_EMBED_TOKENS,
|
MODEL_TENSOR.NEXTN_EMBED_TOKENS,
|
||||||
|
|
|
||||||
|
|
@ -1199,6 +1199,22 @@ class TensorNameMap:
|
||||||
"model.layers.{bid}.self_attn.vision_expert_query_key_value", # cogvlm
|
"model.layers.{bid}.self_attn.vision_expert_query_key_value", # cogvlm
|
||||||
),
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.INDEXER_K_NORM: (
|
||||||
|
"model.layers.{bid}.self_attn.indexer.k_norm", # DSA
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.INDEXER_PROJ: (
|
||||||
|
"model.layers.{bid}.self_attn.indexer.weights_proj", # DSA
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.INDEXER_ATTN_K: (
|
||||||
|
"model.layers.{bid}.self_attn.indexer.wk", # DSA
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.INDEXER_ATTN_Q_B: (
|
||||||
|
"model.layers.{bid}.self_attn.indexer.wq_b", # DSA
|
||||||
|
),
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
# TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
|
# TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
|
||||||
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
||||||
|
|
|
||||||
|
|
@ -513,6 +513,10 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
|
||||||
{ LLM_TENSOR_VISEXP_FFN_GATE, "blk.%d.vis_gate" },
|
{ LLM_TENSOR_VISEXP_FFN_GATE, "blk.%d.vis_gate" },
|
||||||
{ LLM_TENSOR_VISEXP_FFN_DOWN, "blk.%d.vis_down" },
|
{ LLM_TENSOR_VISEXP_FFN_DOWN, "blk.%d.vis_down" },
|
||||||
{ LLM_TENSOR_VISEXP_FFN_UP, "blk.%d.vis_up" },
|
{ LLM_TENSOR_VISEXP_FFN_UP, "blk.%d.vis_up" },
|
||||||
|
{ LLM_TENSOR_INDEXER_K_NORM, "blk.%d.indexer.k_norm" },
|
||||||
|
{ LLM_TENSOR_INDEXER_PROJ, "blk.%d.indexer.proj" },
|
||||||
|
{ LLM_TENSOR_INDEXER_ATTN_K, "blk.%d.indexer.attn_k" },
|
||||||
|
{ LLM_TENSOR_INDEXER_ATTN_Q_B, "blk.%d.indexer.attn_q_b" },
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
|
static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
|
||||||
|
|
@ -1627,6 +1631,10 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
|
||||||
LLM_TENSOR_FFN_DOWN_SHEXP,
|
LLM_TENSOR_FFN_DOWN_SHEXP,
|
||||||
LLM_TENSOR_FFN_UP_SHEXP,
|
LLM_TENSOR_FFN_UP_SHEXP,
|
||||||
LLM_TENSOR_FFN_EXP_PROBS_B,
|
LLM_TENSOR_FFN_EXP_PROBS_B,
|
||||||
|
LLM_TENSOR_INDEXER_K_NORM,
|
||||||
|
LLM_TENSOR_INDEXER_PROJ,
|
||||||
|
LLM_TENSOR_INDEXER_ATTN_K,
|
||||||
|
LLM_TENSOR_INDEXER_ATTN_Q_B,
|
||||||
LLM_TENSOR_NEXTN_EH_PROJ,
|
LLM_TENSOR_NEXTN_EH_PROJ,
|
||||||
LLM_TENSOR_NEXTN_EMBED_TOKENS,
|
LLM_TENSOR_NEXTN_EMBED_TOKENS,
|
||||||
LLM_TENSOR_NEXTN_ENORM,
|
LLM_TENSOR_NEXTN_ENORM,
|
||||||
|
|
|
||||||
|
|
@ -514,6 +514,10 @@ enum llm_tensor {
|
||||||
LLM_TENSOR_VISEXP_FFN_GATE,
|
LLM_TENSOR_VISEXP_FFN_GATE,
|
||||||
LLM_TENSOR_VISEXP_FFN_DOWN,
|
LLM_TENSOR_VISEXP_FFN_DOWN,
|
||||||
LLM_TENSOR_VISEXP_FFN_UP,
|
LLM_TENSOR_VISEXP_FFN_UP,
|
||||||
|
LLM_TENSOR_INDEXER_K_NORM,
|
||||||
|
LLM_TENSOR_INDEXER_PROJ,
|
||||||
|
LLM_TENSOR_INDEXER_ATTN_K,
|
||||||
|
LLM_TENSOR_INDEXER_ATTN_Q_B,
|
||||||
LLM_TENSOR_NEXTN_EH_PROJ,
|
LLM_TENSOR_NEXTN_EH_PROJ,
|
||||||
LLM_TENSOR_NEXTN_EMBED_TOKENS,
|
LLM_TENSOR_NEXTN_EMBED_TOKENS,
|
||||||
LLM_TENSOR_NEXTN_ENORM,
|
LLM_TENSOR_NEXTN_ENORM,
|
||||||
|
|
|
||||||
|
|
@ -5525,6 +5525,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
|
|
||||||
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, flags);
|
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, flags);
|
||||||
|
|
||||||
|
// DSA indexer
|
||||||
|
layer.indexer_k_norm = create_tensor(tn(LLM_TENSOR_INDEXER_K_NORM, "weight", i), {n_embd_head_k_mla}, flags);
|
||||||
|
layer.indexer_k_norm_b = create_tensor(tn(LLM_TENSOR_INDEXER_K_NORM, "bias", i), {n_embd_head_k_mla}, flags);
|
||||||
|
layer.indexer_proj = create_tensor(tn(LLM_TENSOR_INDEXER_PROJ, "weight", i), {n_embd, n_head}, flags);
|
||||||
|
layer.indexer_attn_k = create_tensor(tn(LLM_TENSOR_INDEXER_ATTN_K, "weight", i), {n_embd, n_embd_head_k_mla}, flags);
|
||||||
|
layer.indexer_attn_q_b = create_tensor(tn(LLM_TENSOR_INDEXER_ATTN_Q_B, "weight", i), {q_lora_rank, n_head * n_embd_head_k_mla}, flags);
|
||||||
|
|
||||||
if (i < (int) hparams.n_layer_dense_lead) {
|
if (i < (int) hparams.n_layer_dense_lead) {
|
||||||
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, flags);
|
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, flags);
|
||||||
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, flags);
|
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, flags);
|
||||||
|
|
|
||||||
|
|
@ -425,6 +425,13 @@ struct llama_layer {
|
||||||
struct ggml_tensor * ssm_g_b = nullptr;
|
struct ggml_tensor * ssm_g_b = nullptr;
|
||||||
struct ggml_tensor * ssm_o_norm = nullptr;
|
struct ggml_tensor * ssm_o_norm = nullptr;
|
||||||
|
|
||||||
|
// DSA (deepseek sparse attention)
|
||||||
|
struct ggml_tensor * indexer_k_norm = nullptr;
|
||||||
|
struct ggml_tensor * indexer_k_norm_b = nullptr;
|
||||||
|
struct ggml_tensor * indexer_proj = nullptr;
|
||||||
|
struct ggml_tensor * indexer_attn_k = nullptr;
|
||||||
|
struct ggml_tensor * indexer_attn_q_b = nullptr; // note: for lora a/b, not bias
|
||||||
|
|
||||||
struct llama_layer_posnet posnet;
|
struct llama_layer_posnet posnet;
|
||||||
|
|
||||||
struct llama_layer_convnext convnext;
|
struct llama_layer_convnext convnext;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue