From 5d08f3e87b2c5f0af5fc6007a2018e45a3da8a20 Mon Sep 17 00:00:00 2001 From: suhyun-hwang Date: Sat, 10 Jan 2026 20:49:01 +0900 Subject: [PATCH] feat: VAETKI dynamic image size support --- tools/mtmd/clip.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 308a384ed9..bc277e82c9 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -1201,6 +1201,7 @@ struct clip_model_loader { hparams.rope_theta = 10000.0f; hparams.n_merge = 2; get_u32(KEY_SPATIAL_MERGE_SIZE, hparams.n_merge, false); + hparams.set_limit_image_tokens(4, 3265); hparams.set_warmup_n_tokens(40*40); } break; case PROJECTOR_TYPE_LLAMA4: @@ -2853,6 +2854,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str case PROJECTOR_TYPE_QWEN25VL: case PROJECTOR_TYPE_QWEN3VL: case PROJECTOR_TYPE_GLM4V: + case PROJECTOR_TYPE_VAETKI: { GGML_ASSERT(params.image_min_pixels > 0 && params.image_max_pixels > 0); clip_image_u8 resized; @@ -2973,7 +2975,6 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str case PROJECTOR_TYPE_GLM_EDGE: case PROJECTOR_TYPE_GEMMA3: case PROJECTOR_TYPE_INTERNVL: // TODO @ngxson : support dynamic resolution - case PROJECTOR_TYPE_VAETKI: { clip_image_u8 resized_image; int sz = params.image_size;