remove unused fused QKV mapping

2025-11-14 21:46:26 +08:00 · 2025-11-14 21:46:26 +08:00 · bb7a5561ce
parent 06f50ded46
commit bb7a5561ce
5 changed files with 8 additions and 34 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -7144,38 +7144,11 @@ class JinaCLIPVisionModel(MmprojModel):
            suffix = parts[-1]
            return [(f'v.blk.{layer}.attn_ln.{suffix}', data_torch)]

-        # fused qkv
-        if rest == 'attn.qkv.weight':
-            w = data_torch
-            wdim = w.shape[0]
-            if wdim % 3 != 0:
-                logger.warning('mmproj(jinaclip): unexpected qkv weight shape %s for %s', tuple(w.shape), name)
-            d = wdim // 3
-            q, k, v = w[0:d, :], w[d:2 * d, :], w[2 * d:, :]
-            return [
-                (f'v.blk.{layer}.attn_q.weight', q),
-                (f'v.blk.{layer}.attn_k.weight', k),
-                (f'v.blk.{layer}.attn_v.weight', v),
-            ]
-        if rest == 'attn.qkv.bias':
-            b = data_torch
-            bdim = b.shape[0]
-            if bdim % 3 != 0:
-                logger.warning('mmproj(jinaclip): unexpected qkv bias shape %s for %s', tuple(b.shape), name)
-            d = bdim // 3
-            qb, kb, vb = b[0:d], b[d:2 * d], b[2 * d:]
-            return [
-                (f'v.blk.{layer}.attn_q.bias', qb),
-                (f'v.blk.{layer}.attn_k.bias', kb),
-                (f'v.blk.{layer}.attn_v.bias', vb),
-            ]
-        # separate q/v bias (some checkpoints)
        if rest == 'attn.q_bias':
            return [(f'v.blk.{layer}.attn_q.bias', data_torch)]
        if rest == 'attn.v_bias':
            return [(f'v.blk.{layer}.attn_v.bias', data_torch)]

-        # separate projections
        if rest.startswith('attn.q_proj.'):
            suffix = parts[-1]
            return [(f'v.blk.{layer}.attn_q.{suffix}', data_torch)]
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@ -489,7 +489,6 @@ ggml_tensor * clip_graph::build_norm(
        cur = ggml_add(ctx0, cur, mb);
        cb(cur, "norm_b", il);
    }
-
    return cur;
 }

--- a/tools/mtmd/mtmd-cli.cpp
+++ b/tools/mtmd/mtmd-cli.cpp
@ -183,7 +183,6 @@ static int run_mmproj_only(common_params & params) {
    if (params.mmproj.path.empty() || params.image.empty()) return -1;
    mtmd_context_params ctx_params = mtmd_context_params_default();
    ctx_params.use_gpu   = params.mmproj_use_gpu;
-    ctx_params.verbosity = (params.verbosity > 0) ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO;
    mtmd_mmproj_context * mctx = mtmd_mmproj_init(params.mmproj.path.c_str(), ctx_params);
    if (!mctx) {
        LOG_ERR("[ERROR] Failed to load vision mmproj: %s\n", params.mmproj.path.c_str());
--- a/tools/mtmd/mtmd.cpp
+++ b/tools/mtmd/mtmd.cpp
@ -435,10 +435,13 @@ struct mtmd_mmproj_context {
 };

 mtmd_mmproj_context * mtmd_mmproj_init(const char * mmproj_fname,
-                                        const struct mtmd_context_params ctx_params) {
-    clip_context_params clip_params;
-    clip_params.use_gpu   = ctx_params.use_gpu;
-    clip_params.verbosity = ctx_params.verbosity;
+                                       const struct mtmd_context_params ctx_params) {
+    clip_context_params clip_params {
+        /* use_gpu           */ ctx_params.use_gpu,
+        /* flash_attn_type   */ CLIP_FLASH_ATTN_TYPE_AUTO,
+        /* image_min_tokens  */ ctx_params.image_min_tokens,
+        /* image_max_tokens  */ ctx_params.image_max_tokens,
+    };
    auto res = clip_init(mmproj_fname, clip_params);
    if (!res.ctx_v) {
        return nullptr;
--- a/tools/mtmd/mtmd.h
+++ b/tools/mtmd/mtmd.h
@ -246,7 +246,7 @@ MTMD_API int  mtmd_mmproj_get_image_size (struct mtmd_mmproj_context * ctx);
 MTMD_API int  mtmd_mmproj_get_patch_size (struct mtmd_mmproj_context * ctx);
 MTMD_API int  mtmd_mmproj_get_hidden_size(struct mtmd_mmproj_context * ctx);
 MTMD_API bool mtmd_mmproj_is_jinaclip    (struct mtmd_mmproj_context * ctx);
-// generic support check for projector-only encode path
+// generic support check for projector-only encode
 MTMD_API bool mtmd_mmproj_is_supported   (struct mtmd_mmproj_context * ctx);

 // encode a bitmap (RGB) to projector embeddings