removed traling whitespaces in empty line + make sure indentation is multiple of 4

This commit is contained in:
Yee Man Chan 2026-01-11 22:19:29 +08:00
parent 59182f5e06
commit 58d1ee5227
1 changed files with 30 additions and 31 deletions

View File

@ -5138,7 +5138,7 @@ class KimiLinearModel(TextModel):
# Default to 4096 if not found
logger.warning("No context length found in config, defaulting to 4096")
self.gguf_writer.add_context_length(4096)
# KDA & MLA params
# Get ssm_d_conv from linear_attn_config.short_conv_kernel_size or ssm_d_conv
linear_attn_config = self.hparams.get("linear_attn_config", {})
@ -5156,23 +5156,23 @@ class KimiLinearModel(TextModel):
ssm_d_conv = self.hparams.get("ssm_d_conv") or linear_attn_config.get("short_conv_kernel_size")
if ssm_d_conv is not None:
self.gguf_writer.add_ssm_conv_kernel(ssm_d_conv)
self.gguf_writer.add_ssm_conv_kernel(ssm_d_conv)
kda_head_dim = self.hparams.get("kda_head_dim") or linear_attn_config.get("head_dim")
if kda_head_dim is not None:
self.gguf_writer.add_kda_head_dim(kda_head_dim)
self.gguf_writer.add_kda_head_dim(kda_head_dim)
# MLA params - use add_* methods that handle arch substitution
# Support both HuggingFace naming (q_lora_rank, kv_lora_rank) and internal naming (n_lora_q, n_lora_kv)
q_lora_rank = self.hparams.get("q_lora_rank", self.hparams.get("n_lora_q"))
kv_lora_rank = self.hparams.get("kv_lora_rank", self.hparams.get("n_lora_kv"))
if q_lora_rank is not None:
self.gguf_writer.add_q_lora_rank(q_lora_rank)
self.gguf_writer.add_q_lora_rank(q_lora_rank)
if kv_lora_rank is not None:
self.gguf_writer.add_kv_lora_rank(kv_lora_rank)
self.gguf_writer.add_kv_lora_rank(kv_lora_rank)
# MLA head dimensions
# Support HuggingFace naming: qk_nope_head_dim, qk_rope_head_dim, v_head_dim
qk_nope_head_dim = self.hparams.get("qk_nope_head_dim")
@ -5182,28 +5182,27 @@ class KimiLinearModel(TextModel):
self.gguf_writer.add_key_length(self.hparams["kv_lora_rank"] + self.hparams["qk_rope_head_dim"])
self.gguf_writer.add_value_length(self.hparams["kv_lora_rank"])
# Calculate n_embd_head_k_mla = qk_nope_head_dim + qk_rope_head_dim
if "n_embd_head_k_mla" in self.hparams:
self.gguf_writer.add_key_length_mla(self.hparams["n_embd_head_k_mla"])
self.gguf_writer.add_key_length_mla(self.hparams["n_embd_head_k_mla"])
elif qk_nope_head_dim is not None and qk_rope_head_dim is not None:
n_embd_head_k_mla = qk_nope_head_dim + qk_rope_head_dim
self.gguf_writer.add_key_length_mla(n_embd_head_k_mla)
n_embd_head_k_mla = qk_nope_head_dim + qk_rope_head_dim
self.gguf_writer.add_key_length_mla(n_embd_head_k_mla)
# n_embd_head_v_mla = v_head_dim
if "n_embd_head_v_mla" in self.hparams:
self.gguf_writer.add_value_length_mla(self.hparams["n_embd_head_v_mla"])
self.gguf_writer.add_value_length_mla(self.hparams["n_embd_head_v_mla"])
elif v_head_dim is not None:
self.gguf_writer.add_value_length_mla(v_head_dim)
self.gguf_writer.add_value_length_mla(v_head_dim)
# Rotation - use qk_rope_head_dim for Kimi
rope_dim = self.hparams.get("qk_rope_head_dim") or self.hparams.get("n_rot")
if rope_dim is not None:
self.gguf_writer.add_rope_dimension_count(rope_dim)
self.gguf_writer.add_rope_dimension_count(rope_dim)
else:
# Default to head_dim
head_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
self.gguf_writer.add_rope_dimension_count(head_dim)
# Default to head_dim
head_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
self.gguf_writer.add_rope_dimension_count(head_dim)
# Copied from Qwen2Moe as this model inherits parts of it
# YaRN is not enabled by default
@ -5227,17 +5226,17 @@ class KimiLinearModel(TextModel):
moe_intermediate_size = self.hparams.get("moe_intermediate_size")
if moe_intermediate_size is not None:
self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
# num_shared_experts (1 for Kimi)
num_shared_experts = self.hparams.get("num_shared_experts")
if num_shared_experts is not None:
self.gguf_writer.add_expert_shared_count(num_shared_experts)
# first_k_dense_replace (1 for Kimi - first layer uses dense MLP)
first_k_dense_replace = self.hparams.get("first_k_dense_replace")
if first_k_dense_replace is not None:
self.gguf_writer.add_leading_dense_block_count(first_k_dense_replace)
# Routed scaling factor (expert_weights_scale = 2.446 for Kimi)
routed_scaling_factor = self.hparams.get("routed_scaling_factor")
if routed_scaling_factor is not None:
@ -5246,13 +5245,13 @@ class KimiLinearModel(TextModel):
def prepare_tensors(self):
super().prepare_tensors()
if self._experts is not None:
experts = [k for d in self._experts for k in d.keys()]
if len(experts) > 0:
raise ValueError(f"Unprocessed experts: {experts}")
experts = [k for d in self._experts for k in d.keys()]
if len(experts) > 0:
raise ValueError(f"Unprocessed experts: {experts}")
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
logger.info(f"Processing {name}: shape before = {tuple(data_torch.shape)}")
# Handle KDA conv1d weights
# HuggingFace/vLLM stores as [d_inner, d_conv] (2D), memory layout: conv_step changes fastest
# llama.cpp expects ggml ne = [d_conv, 1, d_inner, 1], memory layout: ne[0]=d_conv changes fastest
@ -5271,7 +5270,7 @@ class KimiLinearModel(TextModel):
d_inner, _, d_conv = data_torch.shape
data_torch = data_torch.reshape(1, d_inner, 1, d_conv)
logger.info(f"Reshaped conv1d weight {name}: [d_inner={d_inner}, 1, d_conv={d_conv}] -> numpy {tuple(data_torch.shape)} -> ggml ne=[{d_conv}, 1, {d_inner}, 1]")
# Handle A_log: HF stores as [1, 1, num_heads, 1]
# llama.cpp expects ggml ne = [1, num_heads, 1, 1]
# GGUF reverses numpy shape: numpy (1, 1, num_heads, 1) -> ggml ne = [1, num_heads, 1, 1]
@ -5279,11 +5278,11 @@ class KimiLinearModel(TextModel):
if name.endswith(".A_log"):
if data_torch.ndim == 4:
logger.info(f"A_log {name}: numpy {tuple(data_torch.shape)} -> ggml ne={list(reversed(data_torch.shape))}")
# Kimi specific bias
if name.endswith("e_score_correction_bias"):
new_name = self.format_tensor_name(gguf.MODEL_TENSOR.FFN_EXP_PROBS_B, bid)
return [(new_name, data_torch)]
new_name = self.format_tensor_name(gguf.MODEL_TENSOR.FFN_EXP_PROBS_B, bid)
return [(new_name, data_torch)]
# process the experts separately
if name.find("block_sparse_moe.experts") != -1: