From be1b0c35546cafdfc91296b1f95329834409fe4b Mon Sep 17 00:00:00 2001 From: Aes Sedai <7980540+AesSedai@users.noreply.github.com> Date: Sat, 7 Feb 2026 21:14:17 -0800 Subject: [PATCH] Kimi-K2.5: Use merged QKV for vision --- convert_hf_to_gguf.py | 9 --------- tools/mtmd/clip.cpp | 5 +++++ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 826fb707ab..7d8fb6bb12 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -11113,15 +11113,6 @@ class KimiK25Model(MmprojModel): if not is_vision: return - # Split fused QKV tensors in vision encoder - if "wqkv" in name: - split_dim = 0 if "weight" in name else -1 - wq, wk, wv = data_torch.chunk(3, dim=split_dim) - yield from super().modify_tensors(wq, name.replace("wqkv", "wq"), bid) - yield from super().modify_tensors(wk, name.replace("wqkv", "wk"), bid) - yield from super().modify_tensors(wv, name.replace("wqkv", "wv"), bid) - return - # Temporal embeddings: (T, 1, C) → (T, C) if "pos_emb.time_weight" in name: T, _, C = data_torch.shape diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index daa7a01379..0fd07f5ca7 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -726,6 +726,11 @@ ggml_tensor * clip_graph::build_rope_2d_interleaved( GGML_ASSERT(n_dim % 4 == 0); // Must be divisible by 4 for interleaved x,y pairs + // Ensure input is contiguous (needed when using merged QKV with ggml_view) + if (!ggml_is_contiguous(cur)) { + cur = ggml_cont(ctx0, cur); + } + // Step 1: Reshape to expose interleaved structure // cur: [n_dim, n_head, n_pos] -> [4, n_dim/4, n_head, n_pos] ggml_tensor * reshaped = ggml_reshape_4d(ctx0, cur, 4, n_dim/4, n_head, n_pos);