added gelu op in rank pooling for modern bert

This commit is contained in:
ryan-mangeno 2025-12-27 11:28:46 -05:00
parent 98e24a5953
commit 856c609742
3 changed files with 14 additions and 8 deletions

View File

@ -10028,6 +10028,14 @@ class ModernBertModel(BertModel):
if name.startswith("model."):
name = name[6:]
if self.cls_out_labels:
# For BertForSequenceClassification (direct projection layer)
if name == "classifier.weight":
name = "classifier.out_proj.weight"
if name == "classifier.bias":
name = "classifier.out_proj.bias"
return super().modify_tensors(data_torch, name, bid)

View File

@ -2056,7 +2056,12 @@ void llm_graph_context::build_pooling(
if (cls_b) {
cur = ggml_add(ctx0, cur, cls_b);
}
cur = ggml_tanh(ctx0, cur);
// modernbert uses gelu
if (arch == LLM_ARCH_MODERN_BERT) {
cur = ggml_gelu(ctx0, cur);
} else {
cur = ggml_tanh(ctx0, cur);
}
if (cls_norm) {
// head norm
cur = build_norm(cur, cls_norm, NULL, LLM_NORM, -1);

View File

@ -110,13 +110,6 @@ llm_build_modern_bert<iswa>::llm_build_modern_bert(const llama_model & model, co
LLM_NORM, -1);
cb(cur, "final_norm_out", -1);
if (hparams.pooling_type == LLAMA_POOLING_TYPE_CLS) {
// extracting cls token
cur = ggml_view_1d(ctx0, cur, hparams.n_embd, 0);
cb(cur, "cls_pooled_embd", -1);
}
cb(cur, "res_embd", -1);
res->t_embd = cur;
ggml_build_forward_expand(gf, cur);
}