diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a6d2810359..cc808aa0d0 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5088,7 +5088,6 @@ class KimiLinearModel(TextModel): merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank) if len(merged) == 2: merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged))) - # Build token list vocab_size = self.hparams["vocab_size"] special_tokens = tokenizer.special_tokens @@ -5316,14 +5315,11 @@ class KimiLinearModel(TextModel): if name.endswith("kv_b_proj.weight"): name_kb = name.replace("kv_b_proj", "k_b_proj") name_vb = name.replace("kv_b_proj", "v_b_proj") - n_head_kv = self.hparams["num_key_value_heads"] v_head_dim = self.hparams["v_head_dim"] qk_nope_head_dim = self.hparams["qk_nope_head_dim"] logger.info("Split kv_b n_head_kv %d\n" % n_head_kv) - assert data_torch.shape[0] == n_head_kv * (v_head_dim + qk_nope_head_dim) - kv_b = data_torch.view(n_head_kv, v_head_dim + qk_nope_head_dim, data_torch.shape[-1]) k_b, v_b = torch.split(kv_b, [qk_nope_head_dim, v_head_dim], dim=1) k_b = k_b.transpose(1, 2)