From 8ec5b0840964b5993b1bb399c16e971998a9fe61 Mon Sep 17 00:00:00 2001 From: Yee Man Chan Date: Tue, 3 Feb 2026 19:25:33 +0800 Subject: [PATCH] minor changes --- convert_hf_to_gguf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 2a9c93dfbb..c167de8a46 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5189,7 +5189,7 @@ class KimiLinearModel(TextModel): # process the experts separately if name.find("block_sparse_moe.experts") != -1: - n_experts = self.find_hparam(["num_experts"], optional=False) + n_experts = self.find_hparam(["num_local_experts", "num_experts"], optional=False) assert bid is not None if self._experts is None: @@ -5218,7 +5218,7 @@ class KimiLinearModel(TextModel): name_kb = name.replace("kv_b_proj", "k_b_proj") name_vb = name.replace("kv_b_proj", "v_b_proj") n_head_kv = self.hparams["num_key_value_heads"] - v_head_dim = self.hparams["v_head_dim"] + v_head_dim = self.find_hparam(["n_embd_head_v_mla", "v_head_dim"], optional=False) qk_nope_head_dim = self.hparams["qk_nope_head_dim"] logger.info("Split kv_b n_head_kv %d\n" % n_head_kv) assert data_torch.shape[0] == n_head_kv * (v_head_dim + qk_nope_head_dim)