From bb7a5561cea397fa37e82a66d366c1b0418fe3b6 Mon Sep 17 00:00:00 2001
From: liyang <liyang2@uniontech.com>
Date: Fri, 14 Nov 2025 21:46:26 +0800
Subject: [PATCH] remove unused fused QKV mapping

---
 convert_hf_to_gguf.py   | 27 ---------------------------
 tools/mtmd/clip.cpp     |  1 -
 tools/mtmd/mtmd-cli.cpp |  1 -
 tools/mtmd/mtmd.cpp     | 11 +++++++----
 tools/mtmd/mtmd.h       |  2 +-
 5 files changed, 8 insertions(+), 34 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index b861d1c3b8..fbf30b81b3 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -7144,38 +7144,11 @@ class JinaCLIPVisionModel(MmprojModel):
             suffix = parts[-1]
             return [(f'v.blk.{layer}.attn_ln.{suffix}', data_torch)]
 
-        # fused qkv
-        if rest == 'attn.qkv.weight':
-            w = data_torch
-            wdim = w.shape[0]
-            if wdim % 3 != 0:
-                logger.warning('mmproj(jinaclip): unexpected qkv weight shape %s for %s', tuple(w.shape), name)
-            d = wdim // 3
-            q, k, v = w[0:d, :], w[d:2 * d, :], w[2 * d:, :]
-            return [
-                (f'v.blk.{layer}.attn_q.weight', q),
-                (f'v.blk.{layer}.attn_k.weight', k),
-                (f'v.blk.{layer}.attn_v.weight', v),
-            ]
-        if rest == 'attn.qkv.bias':
-            b = data_torch
-            bdim = b.shape[0]
-            if bdim % 3 != 0:
-                logger.warning('mmproj(jinaclip): unexpected qkv bias shape %s for %s', tuple(b.shape), name)
-            d = bdim // 3
-            qb, kb, vb = b[0:d], b[d:2 * d], b[2 * d:]
-            return [
-                (f'v.blk.{layer}.attn_q.bias', qb),
-                (f'v.blk.{layer}.attn_k.bias', kb),
-                (f'v.blk.{layer}.attn_v.bias', vb),
-            ]
-        # separate q/v bias (some checkpoints)
         if rest == 'attn.q_bias':
             return [(f'v.blk.{layer}.attn_q.bias', data_torch)]
         if rest == 'attn.v_bias':
             return [(f'v.blk.{layer}.attn_v.bias', data_torch)]
 
-        # separate projections
         if rest.startswith('attn.q_proj.'):
             suffix = parts[-1]
             return [(f'v.blk.{layer}.attn_q.{suffix}', data_torch)]
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
index 01b5dd35de..25a93c050c 100644
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@@ -489,7 +489,6 @@ ggml_tensor * clip_graph::build_norm(
         cur = ggml_add(ctx0, cur, mb);
         cb(cur, "norm_b", il);
     }
-
     return cur;
 }
 
diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp
index 7790dce99b..e0b0eb67e9 100644
--- a/tools/mtmd/mtmd-cli.cpp
+++ b/tools/mtmd/mtmd-cli.cpp
@@ -183,7 +183,6 @@ static int run_mmproj_only(common_params & params) {
     if (params.mmproj.path.empty() || params.image.empty()) return -1;
     mtmd_context_params ctx_params = mtmd_context_params_default();
     ctx_params.use_gpu   = params.mmproj_use_gpu;
-    ctx_params.verbosity = (params.verbosity > 0) ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO;
     mtmd_mmproj_context * mctx = mtmd_mmproj_init(params.mmproj.path.c_str(), ctx_params);
     if (!mctx) {
         LOG_ERR("[ERROR] Failed to load vision mmproj: %s\n", params.mmproj.path.c_str());
diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp
index b707e3e6d8..77cda27494 100644
--- a/tools/mtmd/mtmd.cpp
+++ b/tools/mtmd/mtmd.cpp
@@ -435,10 +435,13 @@ struct mtmd_mmproj_context {
 };
 
 mtmd_mmproj_context * mtmd_mmproj_init(const char * mmproj_fname,
-                                        const struct mtmd_context_params ctx_params) {
-    clip_context_params clip_params;
-    clip_params.use_gpu   = ctx_params.use_gpu;
-    clip_params.verbosity = ctx_params.verbosity;
+                                       const struct mtmd_context_params ctx_params) {
+    clip_context_params clip_params {
+        /* use_gpu           */ ctx_params.use_gpu,
+        /* flash_attn_type   */ CLIP_FLASH_ATTN_TYPE_AUTO,
+        /* image_min_tokens  */ ctx_params.image_min_tokens,
+        /* image_max_tokens  */ ctx_params.image_max_tokens,
+    };
     auto res = clip_init(mmproj_fname, clip_params);
     if (!res.ctx_v) {
         return nullptr;
diff --git a/tools/mtmd/mtmd.h b/tools/mtmd/mtmd.h
index 3bd80da626..a1c0884401 100644
--- a/tools/mtmd/mtmd.h
+++ b/tools/mtmd/mtmd.h
@@ -246,7 +246,7 @@ MTMD_API int  mtmd_mmproj_get_image_size (struct mtmd_mmproj_context * ctx);
 MTMD_API int  mtmd_mmproj_get_patch_size (struct mtmd_mmproj_context * ctx);
 MTMD_API int  mtmd_mmproj_get_hidden_size(struct mtmd_mmproj_context * ctx);
 MTMD_API bool mtmd_mmproj_is_jinaclip    (struct mtmd_mmproj_context * ctx);
-// generic support check for projector-only encode path
+// generic support check for projector-only encode
 MTMD_API bool mtmd_mmproj_is_supported   (struct mtmd_mmproj_context * ctx);
 
 // encode a bitmap (RGB) to projector embeddings