refactor: use standard tensor naming for VAETKI projector
This commit is contained in:
parent
8657eceda5
commit
566128ffb7
|
|
@ -7948,21 +7948,18 @@ class VaetkiVisionModel(MmprojModel):
|
|||
|
||||
# Handle merger tensors with special index mapping
|
||||
# clip.cpp PROJECTOR_TYPE_VAETKI expects:
|
||||
# mm.model.mlp.0.* -> ln_q (pre-norm)
|
||||
# mm.model.mlp.1.* -> mlp.0 (up projection)
|
||||
# mm.model.mlp.3.* -> mlp.2 (down projection)
|
||||
# mm.input_norm.* -> ln_q (pre-norm)
|
||||
# mm.up.* -> mlp.0 (up projection)
|
||||
# mm.down.* -> mlp.2 (down projection)
|
||||
if "merger.ln_q" in name:
|
||||
# ln_q -> mm.model.mlp.0 (used as norm in vaetki.cpp)
|
||||
suffix = "weight" if name.endswith(".weight") else "bias"
|
||||
return [(f"mm.model.mlp.0.{suffix}", data_torch)]
|
||||
suffix = ".weight" if name.endswith(".weight") else ".bias"
|
||||
return [(self.format_tensor_name(gguf.MODEL_TENSOR.V_MM_INP_NORM, suffix=suffix), data_torch)]
|
||||
elif "merger.mlp.0" in name:
|
||||
# mlp.0 -> mm.model.mlp.1 (up projection)
|
||||
suffix = "weight" if name.endswith(".weight") else "bias"
|
||||
return [(f"mm.model.mlp.1.{suffix}", data_torch)]
|
||||
suffix = ".weight" if name.endswith(".weight") else ".bias"
|
||||
return [(self.format_tensor_name(gguf.MODEL_TENSOR.V_MM_UP, suffix=suffix), data_torch)]
|
||||
elif "merger.mlp.2" in name:
|
||||
# mlp.2 -> mm.model.mlp.3 (down projection)
|
||||
suffix = "weight" if name.endswith(".weight") else "bias"
|
||||
return [(f"mm.model.mlp.3.{suffix}", data_torch)]
|
||||
suffix = ".weight" if name.endswith(".weight") else ".bias"
|
||||
return [(self.format_tensor_name(gguf.MODEL_TENSOR.V_MM_DOWN, suffix=suffix), data_torch)]
|
||||
|
||||
# Handle class_embedding and class_pos_emb (keep model.visual. prefix for mapping)
|
||||
if "class_embedding" in name or "class_pos_emb" in name:
|
||||
|
|
|
|||
|
|
@ -1555,12 +1555,12 @@ struct clip_model_loader {
|
|||
case PROJECTOR_TYPE_VAETKI:
|
||||
{
|
||||
model.class_pos_emb = get_tensor(TN_CLASS_POS_EMBD);
|
||||
model.mm_0_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "weight"));
|
||||
model.mm_0_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "bias"));
|
||||
model.mm_1_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 1, "weight"));
|
||||
model.mm_1_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 1, "bias"));
|
||||
model.mm_3_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 3, "weight"));
|
||||
model.mm_3_b = get_tensor(string_format(TN_MVLM_PROJ_MLP, 3, "bias"));
|
||||
model.mm_input_norm_w = get_tensor(TN_MM_INP_NORM);
|
||||
model.mm_input_norm_b = get_tensor(TN_MM_INP_NORM_B);
|
||||
model.mm_ffn_up_w = get_tensor(string_format(TN_MM_UP, "weight"));
|
||||
model.mm_ffn_up_b = get_tensor(string_format(TN_MM_UP, "bias"));
|
||||
model.mm_ffn_down_w = get_tensor(string_format(TN_MM_DOWN, "weight"));
|
||||
model.mm_ffn_down_b = get_tensor(string_format(TN_MM_DOWN, "bias"));
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GLM4V:
|
||||
{
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ ggml_cgraph * clip_graph_vaetki::build() {
|
|||
cb(embeddings, "patches_only", -1);
|
||||
|
||||
// merger
|
||||
embeddings = build_norm(embeddings, model.mm_0_w, model.mm_0_b, NORM_TYPE_NORMAL, 1e-5, -1);
|
||||
embeddings = build_norm(embeddings, model.mm_input_norm_w, model.mm_input_norm_b, NORM_TYPE_NORMAL, 1e-5, -1);
|
||||
cb(embeddings, "merger_normed", -1);
|
||||
|
||||
// pixel shuffle
|
||||
|
|
@ -90,9 +90,9 @@ ggml_cgraph * clip_graph_vaetki::build() {
|
|||
cb(embeddings, "merger_reshaped", -1);
|
||||
|
||||
embeddings = build_ffn(embeddings,
|
||||
model.mm_1_w, model.mm_1_b,
|
||||
model.mm_ffn_up_w, model.mm_ffn_up_b,
|
||||
nullptr, nullptr,
|
||||
model.mm_3_w, model.mm_3_b,
|
||||
model.mm_ffn_down_w, model.mm_ffn_down_b,
|
||||
FFN_GELU,
|
||||
-1);
|
||||
cb(embeddings, "merger_out", -1);
|
||||
|
|
|
|||
Loading…
Reference in New Issue