minor changes
This commit is contained in:
parent
000fded1ea
commit
8ec5b08409
|
|
@ -5189,7 +5189,7 @@ class KimiLinearModel(TextModel):
|
|||
|
||||
# process the experts separately
|
||||
if name.find("block_sparse_moe.experts") != -1:
|
||||
n_experts = self.find_hparam(["num_experts"], optional=False)
|
||||
n_experts = self.find_hparam(["num_local_experts", "num_experts"], optional=False)
|
||||
assert bid is not None
|
||||
|
||||
if self._experts is None:
|
||||
|
|
@ -5218,7 +5218,7 @@ class KimiLinearModel(TextModel):
|
|||
name_kb = name.replace("kv_b_proj", "k_b_proj")
|
||||
name_vb = name.replace("kv_b_proj", "v_b_proj")
|
||||
n_head_kv = self.hparams["num_key_value_heads"]
|
||||
v_head_dim = self.hparams["v_head_dim"]
|
||||
v_head_dim = self.find_hparam(["n_embd_head_v_mla", "v_head_dim"], optional=False)
|
||||
qk_nope_head_dim = self.hparams["qk_nope_head_dim"]
|
||||
logger.info("Split kv_b n_head_kv %d\n" % n_head_kv)
|
||||
assert data_torch.shape[0] == n_head_kv * (v_head_dim + qk_nope_head_dim)
|
||||
|
|
|
|||
Loading…
Reference in New Issue