Kimi-K2.5: remove v/o permutes, unnecessary

This commit is contained in:
Aes Sedai 2026-02-08 12:18:49 -08:00
parent d0d1062e7f
commit c8953657c4
2 changed files with 3 additions and 21 deletions

View File

@ -11122,14 +11122,6 @@ class KimiK25Model(MmprojModel):
w = w.permute(0, 2, 1, 3, 4)
return w.reshape(out_dim, in_dim)
@staticmethod
def _permute_output_proj(weights: Tensor, n_head: int) -> Tensor:
out_dim, in_dim = weights.shape
head_dim = in_dim // n_head
w = weights.reshape(out_dim, n_head, head_dim // 4, 2, 2)
w = w.permute(0, 1, 3, 2, 4)
return w.reshape(out_dim, in_dim)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# Only process vision and projector tensors
is_vision = any(x in name for x in ["vision_tower", "mm_projector"])
@ -11140,10 +11132,8 @@ class KimiK25Model(MmprojModel):
assert self.hparams_vision is not None
n_head = self.hparams_vision.get("num_attention_heads", 16)
# Permute Q/K/V weights/biases from interleaved to split RoPE format
# Permute Q/K weights/biases from interleaved to split RoPE format
# This allows using build_rope_2d at runtime without post-permutation.
# V is also permuted so the attention output is in split format,
# which is then handled by the permuted output projection.
if "wqkv" in name:
out_dim = data_torch.shape[0]
qkv_dim = out_dim // 3
@ -11153,18 +11143,13 @@ class KimiK25Model(MmprojModel):
wq, wk, wv = data_torch[:qkv_dim, :], data_torch[qkv_dim:2*qkv_dim, :], data_torch[2*qkv_dim:, :]
wq = self._permute_kqv(wq, n_head)
wk = self._permute_kqv(wk, n_head)
wv = self._permute_kqv(wv, n_head)
data_torch = torch.cat([wq, wk, wv], dim=0)
elif "bias" in name:
bq, bk, bv = data_torch[:qkv_dim], data_torch[qkv_dim:2*qkv_dim], data_torch[2*qkv_dim:]
bq = bq.reshape(n_head, head_dim // 4, 2, 2).permute(0, 2, 1, 3).reshape(-1)
bk = bk.reshape(n_head, head_dim // 4, 2, 2).permute(0, 2, 1, 3).reshape(-1)
bv = bv.reshape(n_head, head_dim // 4, 2, 2).permute(0, 2, 1, 3).reshape(-1)
data_torch = torch.cat([bq, bk, bv], dim=0)
# Permute output projection from interleaved to split RoPE format
if "wo.weight" in name:
data_torch = self._permute_output_proj(data_torch, n_head)
# Temporal embeddings: (T, 1, C) → (T, C)
if "pos_emb.time_weight" in name:

View File

@ -42,11 +42,8 @@ ggml_cgraph * clip_graph_kimik25::build() {
ggml_tensor * learned_pos_embd = resize_position_embeddings_3d(GGML_SCALE_MODE_BICUBIC);
// Kimi-K2.5 uses interleaved 2D RoPE pattern natively, but all attention weights
// (Q, K, V, O) are permuted during conversion to use split format throughout.
// This allows using build_rope_2d without any runtime format conversion.
// The dot product in attention is order-independent, so keeping everything in
// split format produces mathematically equivalent results.
// Kimi-K2.5 uses interleaved 2D RoPE pattern natively, but
// Q / K are permuted during conversion to use split format.
auto add_pos = [&](ggml_tensor * cur, const clip_layer &) {
cur = build_rope_2d(ctx0, cur, pos_w, pos_h, hparams.rope_theta, false);
return cur;