From 0995fbbc93a7d261ca008883a2c26ccaef7467f0 Mon Sep 17 00:00:00 2001 From: megemini Date: Tue, 13 Jan 2026 13:40:11 +0800 Subject: [PATCH] [update] restore change of minicpmv --- tools/mtmd/clip-impl.h | 14 ++++++-------- tools/mtmd/clip.cpp | 40 ++++++++++++++++++++-------------------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h index bde7e3d999..a977fe5e9e 100644 --- a/tools/mtmd/clip-impl.h +++ b/tools/mtmd/clip-impl.h @@ -110,14 +110,12 @@ #define TN_DEEPSTACK_FC2 "v.deepstack.%d.fc2.%s" // qwen3vl deepstack // mimicpmv -#define TN_RESAMPL_POS_EMBD_K "resampler.pos_embed_k" -#define TN_RESAMPL_QUERY "resampler.query" -#define TN_RESAMPL_PROJ "resampler.proj.weight" -#define TN_RESAMPL_KV_PROJ "resampler.kv.weight" -#define TN_RESAMPL_ATTN "resampler.attn.%s.%s" -#define TN_RESAMPL_LN "resampler.ln_%s.%s" -#define TN_RESAMPL_FFN_UP "resampler.ffn_up.%s" -#define TN_RESAMPL_FFN_DOWN "resampler.ffn_down.%s" +#define TN_MINICPMV_POS_EMBD_K "resampler.pos_embed_k" +#define TN_MINICPMV_QUERY "resampler.query" +#define TN_MINICPMV_PROJ "resampler.proj.weight" +#define TN_MINICPMV_KV_PROJ "resampler.kv.weight" +#define TN_MINICPMV_ATTN "resampler.attn.%s.%s" +#define TN_MINICPMV_LN "resampler.ln_%s.%s" #define TN_GLM_ADAPER_CONV "adapter.conv.%s" #define TN_GLM_ADAPTER_LINEAR "adapter.linear.linear.%s" diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 7d13b8a44d..3a32d819a3 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -1233,6 +1233,7 @@ struct clip_model_loader { // TODO(megemini): paddleocr vl not specified? hparams.set_limit_image_tokens(8, 4096); hparams.set_warmup_n_tokens(28*28); // avoid OOM on warmup + } break; case PROJECTOR_TYPE_LFM2A: { // audio preprocessing params @@ -1499,26 +1500,25 @@ struct clip_model_loader { } break; case PROJECTOR_TYPE_MINICPMV: { - // model.mm_model_pos_embed = get_tensor(new_clip->ctx_data, TN_RESAMPL_POS_EMBD); - model.mm_model_pos_embed_k = get_tensor(TN_RESAMPL_POS_EMBD_K); - model.mm_model_query = get_tensor(TN_RESAMPL_QUERY); - model.mm_model_proj = get_tensor(TN_RESAMPL_PROJ); - model.mm_model_kv_proj = get_tensor(TN_RESAMPL_KV_PROJ); - model.mm_model_attn_q_w = get_tensor(string_format(TN_RESAMPL_ATTN, "q", "weight")); - model.mm_model_attn_k_w = get_tensor(string_format(TN_RESAMPL_ATTN, "k", "weight")); - model.mm_model_attn_v_w = get_tensor(string_format(TN_RESAMPL_ATTN, "v", "weight")); - model.mm_model_attn_q_b = get_tensor(string_format(TN_RESAMPL_ATTN, "q", "bias")); - model.mm_model_attn_k_b = get_tensor(string_format(TN_RESAMPL_ATTN, "k", "bias")); - model.mm_model_attn_v_b = get_tensor(string_format(TN_RESAMPL_ATTN, "v", "bias")); - model.mm_model_attn_o_w = get_tensor(string_format(TN_RESAMPL_ATTN, "out", "weight")); - model.mm_model_attn_o_b = get_tensor(string_format(TN_RESAMPL_ATTN, "out", "bias")); - model.mm_model_ln_q_w = get_tensor(string_format(TN_RESAMPL_LN, "q", "weight")); - model.mm_model_ln_q_b = get_tensor(string_format(TN_RESAMPL_LN, "q", "bias")); - model.mm_model_ln_kv_w = get_tensor(string_format(TN_RESAMPL_LN, "kv", "weight")); - model.mm_model_ln_kv_b = get_tensor(string_format(TN_RESAMPL_LN, "kv", "bias")); - model.mm_model_ln_post_w = get_tensor(string_format(TN_RESAMPL_LN, "post", "weight")); - model.mm_model_ln_post_b = get_tensor(string_format(TN_RESAMPL_LN, "post", "bias")); - } break; + // model.mm_model_pos_embed = get_tensor(new_clip->ctx_data, TN_MINICPMV_POS_EMBD); + model.mm_model_pos_embed_k = get_tensor(TN_MINICPMV_POS_EMBD_K); + model.mm_model_query = get_tensor(TN_MINICPMV_QUERY); + model.mm_model_proj = get_tensor(TN_MINICPMV_PROJ); + model.mm_model_kv_proj = get_tensor(TN_MINICPMV_KV_PROJ); + model.mm_model_attn_q_w = get_tensor(string_format(TN_MINICPMV_ATTN, "q", "weight")); + model.mm_model_attn_k_w = get_tensor(string_format(TN_MINICPMV_ATTN, "k", "weight")); + model.mm_model_attn_v_w = get_tensor(string_format(TN_MINICPMV_ATTN, "v", "weight")); + model.mm_model_attn_q_b = get_tensor(string_format(TN_MINICPMV_ATTN, "q", "bias")); + model.mm_model_attn_k_b = get_tensor(string_format(TN_MINICPMV_ATTN, "k", "bias")); + model.mm_model_attn_v_b = get_tensor(string_format(TN_MINICPMV_ATTN, "v", "bias")); + model.mm_model_attn_o_w = get_tensor(string_format(TN_MINICPMV_ATTN, "out", "weight")); + model.mm_model_attn_o_b = get_tensor(string_format(TN_MINICPMV_ATTN, "out", "bias")); + model.mm_model_ln_q_w = get_tensor(string_format(TN_MINICPMV_LN, "q", "weight")); + model.mm_model_ln_q_b = get_tensor(string_format(TN_MINICPMV_LN, "q", "bias")); + model.mm_model_ln_kv_w = get_tensor(string_format(TN_MINICPMV_LN, "kv", "weight")); + model.mm_model_ln_kv_b = get_tensor(string_format(TN_MINICPMV_LN, "kv", "bias")); + model.mm_model_ln_post_w = get_tensor(string_format(TN_MINICPMV_LN, "post", "weight")); + model.mm_model_ln_post_b = get_tensor(string_format(TN_MINICPMV_LN, "post", "bias")); } break; case PROJECTOR_TYPE_GLM_EDGE: { model.mm_model_adapter_conv_w = get_tensor(string_format(TN_GLM_ADAPER_CONV, "weight"));