From bb7a5561cea397fa37e82a66d366c1b0418fe3b6 Mon Sep 17 00:00:00 2001 From: liyang Date: Fri, 14 Nov 2025 21:46:26 +0800 Subject: [PATCH] remove unused fused QKV mapping --- convert_hf_to_gguf.py | 27 --------------------------- tools/mtmd/clip.cpp | 1 - tools/mtmd/mtmd-cli.cpp | 1 - tools/mtmd/mtmd.cpp | 11 +++++++---- tools/mtmd/mtmd.h | 2 +- 5 files changed, 8 insertions(+), 34 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index b861d1c3b8..fbf30b81b3 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -7144,38 +7144,11 @@ class JinaCLIPVisionModel(MmprojModel): suffix = parts[-1] return [(f'v.blk.{layer}.attn_ln.{suffix}', data_torch)] - # fused qkv - if rest == 'attn.qkv.weight': - w = data_torch - wdim = w.shape[0] - if wdim % 3 != 0: - logger.warning('mmproj(jinaclip): unexpected qkv weight shape %s for %s', tuple(w.shape), name) - d = wdim // 3 - q, k, v = w[0:d, :], w[d:2 * d, :], w[2 * d:, :] - return [ - (f'v.blk.{layer}.attn_q.weight', q), - (f'v.blk.{layer}.attn_k.weight', k), - (f'v.blk.{layer}.attn_v.weight', v), - ] - if rest == 'attn.qkv.bias': - b = data_torch - bdim = b.shape[0] - if bdim % 3 != 0: - logger.warning('mmproj(jinaclip): unexpected qkv bias shape %s for %s', tuple(b.shape), name) - d = bdim // 3 - qb, kb, vb = b[0:d], b[d:2 * d], b[2 * d:] - return [ - (f'v.blk.{layer}.attn_q.bias', qb), - (f'v.blk.{layer}.attn_k.bias', kb), - (f'v.blk.{layer}.attn_v.bias', vb), - ] - # separate q/v bias (some checkpoints) if rest == 'attn.q_bias': return [(f'v.blk.{layer}.attn_q.bias', data_torch)] if rest == 'attn.v_bias': return [(f'v.blk.{layer}.attn_v.bias', data_torch)] - # separate projections if rest.startswith('attn.q_proj.'): suffix = parts[-1] return [(f'v.blk.{layer}.attn_q.{suffix}', data_torch)] diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 01b5dd35de..25a93c050c 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -489,7 +489,6 @@ ggml_tensor * clip_graph::build_norm( cur = ggml_add(ctx0, cur, mb); cb(cur, "norm_b", il); } - return cur; } diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp index 7790dce99b..e0b0eb67e9 100644 --- a/tools/mtmd/mtmd-cli.cpp +++ b/tools/mtmd/mtmd-cli.cpp @@ -183,7 +183,6 @@ static int run_mmproj_only(common_params & params) { if (params.mmproj.path.empty() || params.image.empty()) return -1; mtmd_context_params ctx_params = mtmd_context_params_default(); ctx_params.use_gpu = params.mmproj_use_gpu; - ctx_params.verbosity = (params.verbosity > 0) ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO; mtmd_mmproj_context * mctx = mtmd_mmproj_init(params.mmproj.path.c_str(), ctx_params); if (!mctx) { LOG_ERR("[ERROR] Failed to load vision mmproj: %s\n", params.mmproj.path.c_str()); diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp index b707e3e6d8..77cda27494 100644 --- a/tools/mtmd/mtmd.cpp +++ b/tools/mtmd/mtmd.cpp @@ -435,10 +435,13 @@ struct mtmd_mmproj_context { }; mtmd_mmproj_context * mtmd_mmproj_init(const char * mmproj_fname, - const struct mtmd_context_params ctx_params) { - clip_context_params clip_params; - clip_params.use_gpu = ctx_params.use_gpu; - clip_params.verbosity = ctx_params.verbosity; + const struct mtmd_context_params ctx_params) { + clip_context_params clip_params { + /* use_gpu */ ctx_params.use_gpu, + /* flash_attn_type */ CLIP_FLASH_ATTN_TYPE_AUTO, + /* image_min_tokens */ ctx_params.image_min_tokens, + /* image_max_tokens */ ctx_params.image_max_tokens, + }; auto res = clip_init(mmproj_fname, clip_params); if (!res.ctx_v) { return nullptr; diff --git a/tools/mtmd/mtmd.h b/tools/mtmd/mtmd.h index 3bd80da626..a1c0884401 100644 --- a/tools/mtmd/mtmd.h +++ b/tools/mtmd/mtmd.h @@ -246,7 +246,7 @@ MTMD_API int mtmd_mmproj_get_image_size (struct mtmd_mmproj_context * ctx); MTMD_API int mtmd_mmproj_get_patch_size (struct mtmd_mmproj_context * ctx); MTMD_API int mtmd_mmproj_get_hidden_size(struct mtmd_mmproj_context * ctx); MTMD_API bool mtmd_mmproj_is_jinaclip (struct mtmd_mmproj_context * ctx); -// generic support check for projector-only encode path +// generic support check for projector-only encode MTMD_API bool mtmd_mmproj_is_supported (struct mtmd_mmproj_context * ctx); // encode a bitmap (RGB) to projector embeddings