From 0995fbbc93a7d261ca008883a2c26ccaef7467f0 Mon Sep 17 00:00:00 2001
From: megemini <megemini@outlook.com>
Date: Tue, 13 Jan 2026 13:40:11 +0800
Subject: [PATCH] [update] restore change of minicpmv

---
 tools/mtmd/clip-impl.h | 14 ++++++--------
 tools/mtmd/clip.cpp    | 40 ++++++++++++++++++++--------------------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h
index bde7e3d999..a977fe5e9e 100644
--- a/tools/mtmd/clip-impl.h
+++ b/tools/mtmd/clip-impl.h
@@ -110,14 +110,12 @@
 #define TN_DEEPSTACK_FC2   "v.deepstack.%d.fc2.%s"      // qwen3vl deepstack
 
 // mimicpmv
-#define TN_RESAMPL_POS_EMBD_K "resampler.pos_embed_k"
-#define TN_RESAMPL_QUERY      "resampler.query"
-#define TN_RESAMPL_PROJ       "resampler.proj.weight"
-#define TN_RESAMPL_KV_PROJ    "resampler.kv.weight"
-#define TN_RESAMPL_ATTN       "resampler.attn.%s.%s"
-#define TN_RESAMPL_LN         "resampler.ln_%s.%s"
-#define TN_RESAMPL_FFN_UP     "resampler.ffn_up.%s"
-#define TN_RESAMPL_FFN_DOWN   "resampler.ffn_down.%s"
+#define TN_MINICPMV_POS_EMBD_K "resampler.pos_embed_k"
+#define TN_MINICPMV_QUERY      "resampler.query"
+#define TN_MINICPMV_PROJ       "resampler.proj.weight"
+#define TN_MINICPMV_KV_PROJ    "resampler.kv.weight"
+#define TN_MINICPMV_ATTN       "resampler.attn.%s.%s"
+#define TN_MINICPMV_LN         "resampler.ln_%s.%s"
 
 #define TN_GLM_ADAPER_CONV      "adapter.conv.%s"
 #define TN_GLM_ADAPTER_LINEAR   "adapter.linear.linear.%s"
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
index 7d13b8a44d..3a32d819a3 100644
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@@ -1233,6 +1233,7 @@ struct clip_model_loader {
                         // TODO(megemini): paddleocr vl not specified?
                         hparams.set_limit_image_tokens(8, 4096);
                         hparams.set_warmup_n_tokens(28*28); // avoid OOM on warmup
+                    } break;
                 case PROJECTOR_TYPE_LFM2A:
                     {
                         // audio preprocessing params
@@ -1499,26 +1500,25 @@ struct clip_model_loader {
                 } break;
             case PROJECTOR_TYPE_MINICPMV:
                 {
-                    // model.mm_model_pos_embed = get_tensor(new_clip->ctx_data, TN_RESAMPL_POS_EMBD);
-                    model.mm_model_pos_embed_k = get_tensor(TN_RESAMPL_POS_EMBD_K);
-                    model.mm_model_query = get_tensor(TN_RESAMPL_QUERY);
-                    model.mm_model_proj = get_tensor(TN_RESAMPL_PROJ);
-                    model.mm_model_kv_proj = get_tensor(TN_RESAMPL_KV_PROJ);
-                    model.mm_model_attn_q_w = get_tensor(string_format(TN_RESAMPL_ATTN, "q", "weight"));
-                    model.mm_model_attn_k_w = get_tensor(string_format(TN_RESAMPL_ATTN, "k", "weight"));
-                    model.mm_model_attn_v_w = get_tensor(string_format(TN_RESAMPL_ATTN, "v", "weight"));
-                    model.mm_model_attn_q_b = get_tensor(string_format(TN_RESAMPL_ATTN, "q", "bias"));
-                    model.mm_model_attn_k_b = get_tensor(string_format(TN_RESAMPL_ATTN, "k", "bias"));
-                    model.mm_model_attn_v_b = get_tensor(string_format(TN_RESAMPL_ATTN, "v", "bias"));
-                    model.mm_model_attn_o_w = get_tensor(string_format(TN_RESAMPL_ATTN, "out", "weight"));
-                    model.mm_model_attn_o_b = get_tensor(string_format(TN_RESAMPL_ATTN, "out", "bias"));
-                    model.mm_model_ln_q_w   = get_tensor(string_format(TN_RESAMPL_LN, "q", "weight"));
-                    model.mm_model_ln_q_b   = get_tensor(string_format(TN_RESAMPL_LN, "q", "bias"));
-                    model.mm_model_ln_kv_w  = get_tensor(string_format(TN_RESAMPL_LN, "kv", "weight"));
-                    model.mm_model_ln_kv_b  = get_tensor(string_format(TN_RESAMPL_LN, "kv", "bias"));
-                    model.mm_model_ln_post_w = get_tensor(string_format(TN_RESAMPL_LN, "post", "weight"));
-                    model.mm_model_ln_post_b = get_tensor(string_format(TN_RESAMPL_LN, "post", "bias"));
-                } break;
+                    // model.mm_model_pos_embed = get_tensor(new_clip->ctx_data, TN_MINICPMV_POS_EMBD);
+                    model.mm_model_pos_embed_k = get_tensor(TN_MINICPMV_POS_EMBD_K);
+                    model.mm_model_query = get_tensor(TN_MINICPMV_QUERY);
+                    model.mm_model_proj = get_tensor(TN_MINICPMV_PROJ);
+                    model.mm_model_kv_proj = get_tensor(TN_MINICPMV_KV_PROJ);
+                    model.mm_model_attn_q_w = get_tensor(string_format(TN_MINICPMV_ATTN, "q", "weight"));
+                    model.mm_model_attn_k_w = get_tensor(string_format(TN_MINICPMV_ATTN, "k", "weight"));
+                    model.mm_model_attn_v_w = get_tensor(string_format(TN_MINICPMV_ATTN, "v", "weight"));
+                    model.mm_model_attn_q_b = get_tensor(string_format(TN_MINICPMV_ATTN, "q", "bias"));
+                    model.mm_model_attn_k_b = get_tensor(string_format(TN_MINICPMV_ATTN, "k", "bias"));
+                    model.mm_model_attn_v_b = get_tensor(string_format(TN_MINICPMV_ATTN, "v", "bias"));
+                    model.mm_model_attn_o_w = get_tensor(string_format(TN_MINICPMV_ATTN, "out", "weight"));
+                    model.mm_model_attn_o_b = get_tensor(string_format(TN_MINICPMV_ATTN, "out", "bias"));
+                    model.mm_model_ln_q_w = get_tensor(string_format(TN_MINICPMV_LN, "q", "weight"));
+                    model.mm_model_ln_q_b = get_tensor(string_format(TN_MINICPMV_LN, "q", "bias"));
+                    model.mm_model_ln_kv_w = get_tensor(string_format(TN_MINICPMV_LN, "kv", "weight"));
+                    model.mm_model_ln_kv_b = get_tensor(string_format(TN_MINICPMV_LN, "kv", "bias"));
+                    model.mm_model_ln_post_w = get_tensor(string_format(TN_MINICPMV_LN, "post", "weight"));
+                    model.mm_model_ln_post_b = get_tensor(string_format(TN_MINICPMV_LN, "post", "bias"));                } break;
             case PROJECTOR_TYPE_GLM_EDGE:
                 {
                     model.mm_model_adapter_conv_w = get_tensor(string_format(TN_GLM_ADAPER_CONV, "weight"));