refactor: use standard tensor naming for VAETKI projector

This commit is contained in:
suhyun-hwang 2026-01-11 14:59:12 +09:00
parent 8657eceda5
commit 566128ffb7
3 changed files with 18 additions and 21 deletions

View File

@ -7948,21 +7948,18 @@ class VaetkiVisionModel(MmprojModel):
# Handle merger tensors with special index mapping
# clip.cpp PROJECTOR_TYPE_VAETKI expects:
# mm.model.mlp.0.* -> ln_q (pre-norm)
# mm.model.mlp.1.* -> mlp.0 (up projection)
# mm.model.mlp.3.* -> mlp.2 (down projection)
# mm.input_norm.* -> ln_q (pre-norm)
# mm.up.* -> mlp.0 (up projection)
# mm.down.* -> mlp.2 (down projection)
if "merger.ln_q" in name:
# ln_q -> mm.model.mlp.0 (used as norm in vaetki.cpp)
suffix = "weight" if name.endswith(".weight") else "bias"
return [(f"mm.model.mlp.0.{suffix}", data_torch)]
suffix = ".weight" if name.endswith(".weight") else ".bias"
return [(self.format_tensor_name(gguf.MODEL_TENSOR.V_MM_INP_NORM, suffix=suffix), data_torch)]
elif "merger.mlp.0" in name:
# mlp.0 -> mm.model.mlp.1 (up projection)
suffix = "weight" if name.endswith(".weight") else "bias"
return [(f"mm.model.mlp.1.{suffix}", data_torch)]
suffix = ".weight" if name.endswith(".weight") else ".bias"
return [(self.format_tensor_name(gguf.MODEL_TENSOR.V_MM_UP, suffix=suffix), data_torch)]
elif "merger.mlp.2" in name:
# mlp.2 -> mm.model.mlp.3 (down projection)
suffix = "weight" if name.endswith(".weight") else "bias"
return [(f"mm.model.mlp.3.{suffix}", data_torch)]
suffix = ".weight" if name.endswith(".weight") else ".bias"
return [(self.format_tensor_name(gguf.MODEL_TENSOR.V_MM_DOWN, suffix=suffix), data_torch)]
# Handle class_embedding and class_pos_emb (keep model.visual. prefix for mapping)
if "class_embedding" in name or "class_pos_emb" in name:

View File

@ -1555,12 +1555,12 @@ struct clip_model_loader {
case PROJECTOR_TYPE_VAETKI:
{
model.class_pos_emb = get_tensor(TN_CLASS_POS_EMBD);
model.mm_0_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "weight"));
model.mm_0_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "bias"));
model.mm_1_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 1, "weight"));
model.mm_1_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 1, "bias"));
model.mm_3_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 3, "weight"));
model.mm_3_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 3, "bias"));
model.mm_input_norm_w = get_tensor(TN_MM_INP_NORM);
model.mm_input_norm_b = get_tensor(TN_MM_INP_NORM_B);
model.mm_ffn_up_w = get_tensor(string_format(TN_MM_UP, "weight"));
model.mm_ffn_up_b = get_tensor(string_format(TN_MM_UP, "bias"));
model.mm_ffn_down_w = get_tensor(string_format(TN_MM_DOWN, "weight"));
model.mm_ffn_down_b = get_tensor(string_format(TN_MM_DOWN, "bias"));
} break;
case PROJECTOR_TYPE_GLM4V:
{

View File

@ -81,7 +81,7 @@ ggml_cgraph * clip_graph_vaetki::build() {
cb(embeddings, "patches_only", -1);
// merger
embeddings = build_norm(embeddings, model.mm_0_w, model.mm_0_b, NORM_TYPE_NORMAL, 1e-5, -1);
embeddings = build_norm(embeddings, model.mm_input_norm_w, model.mm_input_norm_b, NORM_TYPE_NORMAL, 1e-5, -1);
cb(embeddings, "merger_normed", -1);
// pixel shuffle
@ -90,9 +90,9 @@ ggml_cgraph * clip_graph_vaetki::build() {
cb(embeddings, "merger_reshaped", -1);
embeddings = build_ffn(embeddings,
model.mm_1_w, model.mm_1_b,
model.mm_ffn_up_w, model.mm_ffn_up_b,
nullptr, nullptr,
model.mm_3_w, model.mm_3_b,
model.mm_ffn_down_w, model.mm_ffn_down_b,
FFN_GELU,
-1);
cb(embeddings, "merger_out", -1);