Kimi-K2.5: Use merged QKV for vision

2026-02-07 21:14:17 -08:00 · 2026-02-07 21:14:17 -08:00 · be1b0c3554
parent f13b383843
commit be1b0c3554
2 changed files with 5 additions and 9 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -11113,15 +11113,6 @@ class KimiK25Model(MmprojModel):
        if not is_vision:
            return
        # Split fused QKV tensors in vision encoder
        if "wqkv" in name:
            split_dim = 0 if "weight" in name else -1
            wq, wk, wv = data_torch.chunk(3, dim=split_dim)
            yield from super().modify_tensors(wq, name.replace("wqkv", "wq"), bid)
            yield from super().modify_tensors(wk, name.replace("wqkv", "wk"), bid)
            yield from super().modify_tensors(wv, name.replace("wqkv", "wv"), bid)
            return
        # Temporal embeddings: (T, 1, C) → (T, C)
        if "pos_emb.time_weight" in name:
            T, _, C = data_torch.shape
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@ -726,6 +726,11 @@ ggml_tensor * clip_graph::build_rope_2d_interleaved(
    GGML_ASSERT(n_dim % 4 == 0);  // Must be divisible by 4 for interleaved x,y pairs
    // Ensure input is contiguous (needed when using merged QKV with ggml_view)
    if (!ggml_is_contiguous(cur)) {
        cur = ggml_cont(ctx0, cur);
    }
    // Step 1: Reshape to expose interleaved structure
    // cur: [n_dim, n_head, n_pos] -> [4, n_dim/4, n_head, n_pos]
    ggml_tensor * reshaped = ggml_reshape_4d(ctx0, cur, 4, n_dim/4, n_head, n_pos);