From c5e9eac8c5677d93b4e4c639d87eefd9802f182b Mon Sep 17 00:00:00 2001 From: suhyun-hwang Date: Sat, 10 Jan 2026 20:35:39 +0900 Subject: [PATCH] refactor: merge VAETKI positions case with QWEN2VL --- tools/mtmd/clip.cpp | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 8c95ae9f92..33461a6d56 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -3499,11 +3499,14 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima case PROJECTOR_TYPE_QWEN2VL: case PROJECTOR_TYPE_QWEN3VL: case PROJECTOR_TYPE_GLM4V: + case PROJECTOR_TYPE_VAETKI: { const int merge_ratio = hparams.n_merge; const int pw = image_size_width / patch_size; const int ph = image_size_height / patch_size; - std::vector positions(n_pos * 4); + + const int pos_size = (model.proj_type == PROJECTOR_TYPE_VAETKI) ? num_patches : n_pos; + std::vector positions(pos_size * 4); int ptr = 0; for (int y = 0; y < ph; y += merge_ratio) { for (int x = 0; x < pw; x += merge_ratio) { @@ -3519,31 +3522,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima } } - set_input_i32("positions", positions); - } break; - case PROJECTOR_TYPE_VAETKI: - { - const int merge_ratio = 2; - const int ipw = image_size_width / patch_size; - const int iph = image_size_height / patch_size; - - std::vector positions(num_patches * 4); - - int ptr = 0; - for (int y = 0; y < iph; y += merge_ratio) { - for (int x = 0; x < ipw; x += merge_ratio) { - for (int dy = 0; dy < 2; dy++) { - for (int dx = 0; dx < 2; dx++) { - positions[ ptr] = y + dy; - positions[ num_patches + ptr] = x + dx; - positions[2 * num_patches + ptr] = y + dy; - positions[3 * num_patches + ptr] = x + dx; - ptr++; - } - } - } - } - set_input_i32("positions", positions); } break; case PROJECTOR_TYPE_QWEN25VL: