added gelu op in rank pooling for modern bert
This commit is contained in:
parent
98e24a5953
commit
856c609742
|
|
@ -10028,6 +10028,14 @@ class ModernBertModel(BertModel):
|
||||||
|
|
||||||
if name.startswith("model."):
|
if name.startswith("model."):
|
||||||
name = name[6:]
|
name = name[6:]
|
||||||
|
|
||||||
|
if self.cls_out_labels:
|
||||||
|
# For BertForSequenceClassification (direct projection layer)
|
||||||
|
if name == "classifier.weight":
|
||||||
|
name = "classifier.out_proj.weight"
|
||||||
|
|
||||||
|
if name == "classifier.bias":
|
||||||
|
name = "classifier.out_proj.bias"
|
||||||
|
|
||||||
return super().modify_tensors(data_torch, name, bid)
|
return super().modify_tensors(data_torch, name, bid)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2056,7 +2056,12 @@ void llm_graph_context::build_pooling(
|
||||||
if (cls_b) {
|
if (cls_b) {
|
||||||
cur = ggml_add(ctx0, cur, cls_b);
|
cur = ggml_add(ctx0, cur, cls_b);
|
||||||
}
|
}
|
||||||
cur = ggml_tanh(ctx0, cur);
|
// modernbert uses gelu
|
||||||
|
if (arch == LLM_ARCH_MODERN_BERT) {
|
||||||
|
cur = ggml_gelu(ctx0, cur);
|
||||||
|
} else {
|
||||||
|
cur = ggml_tanh(ctx0, cur);
|
||||||
|
}
|
||||||
if (cls_norm) {
|
if (cls_norm) {
|
||||||
// head norm
|
// head norm
|
||||||
cur = build_norm(cur, cls_norm, NULL, LLM_NORM, -1);
|
cur = build_norm(cur, cls_norm, NULL, LLM_NORM, -1);
|
||||||
|
|
|
||||||
|
|
@ -110,13 +110,6 @@ llm_build_modern_bert<iswa>::llm_build_modern_bert(const llama_model & model, co
|
||||||
LLM_NORM, -1);
|
LLM_NORM, -1);
|
||||||
cb(cur, "final_norm_out", -1);
|
cb(cur, "final_norm_out", -1);
|
||||||
|
|
||||||
if (hparams.pooling_type == LLAMA_POOLING_TYPE_CLS) {
|
|
||||||
// extracting cls token
|
|
||||||
cur = ggml_view_1d(ctx0, cur, hparams.n_embd, 0);
|
|
||||||
cb(cur, "cls_pooled_embd", -1);
|
|
||||||
}
|
|
||||||
|
|
||||||
cb(cur, "res_embd", -1);
|
|
||||||
res->t_embd = cur;
|
res->t_embd = cur;
|
||||||
ggml_build_forward_expand(gf, cur);
|
ggml_build_forward_expand(gf, cur);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue