full modern bert support

This commit is contained in:
ryan-mangeno 2025-12-23 16:39:49 -05:00
parent 5ee4e43f26
commit 98e24a5953
10 changed files with 27 additions and 6 deletions

View File

@ -614,6 +614,7 @@ class MODEL_TENSOR(IntEnum):
ENC_OUTPUT_NORM = auto()
CLS = auto() # classifier
CLS_OUT = auto() # classifier output projection
CLS_NORM = auto()
CONV1D = auto()
CONVNEXT_DW = auto()
CONVNEXT_NORM = auto()
@ -1007,6 +1008,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
MODEL_TENSOR.CLS: "cls",
MODEL_TENSOR.CLS_OUT: "cls.output",
MODEL_TENSOR.CLS_NORM: "cls.norm",
MODEL_TENSOR.CONV1D: "conv1d",
MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
@ -1382,6 +1384,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.CLS,
MODEL_TENSOR.CLS_OUT,
MODEL_TENSOR.CLS_NORM,
],
MODEL_ARCH.NOMIC_BERT: [
MODEL_TENSOR.TOKEN_EMBD,

View File

@ -1138,6 +1138,10 @@ class TensorNameMap:
MODEL_TENSOR.CLS_OUT: (
"classifier.out_proj", # roberta
),
MODEL_TENSOR.CLS_NORM: (
"head.norm", # modern-bert
),
#############################################################################
MODEL_TENSOR.CONVNEXT_DW: (

View File

@ -348,6 +348,7 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
{ LLM_TENSOR_TOKEN_TYPES, "token_types" },
{ LLM_TENSOR_CLS, "cls" },
{ LLM_TENSOR_CLS_OUT, "cls.output" },
{ LLM_TENSOR_CLS_NORM, "cls.norm" },
{ LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
{ LLM_TENSOR_SSM_A_NOSCAN, "blk.%d.ssm_a" },
@ -794,6 +795,7 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
LLM_TENSOR_FFN_NORM,
LLM_TENSOR_CLS,
LLM_TENSOR_CLS_OUT,
LLM_TENSOR_CLS_NORM,
};
case LLM_ARCH_JINA_BERT_V2:
return {
@ -2216,6 +2218,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
{LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
{LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
{LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
{LLM_TENSOR_CLS_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
{LLM_TENSOR_DENSE_2_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
{LLM_TENSOR_DENSE_3_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
{LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},

View File

@ -468,6 +468,7 @@ enum llm_tensor {
LLM_TENSOR_ENC_OUTPUT_NORM,
LLM_TENSOR_CLS,
LLM_TENSOR_CLS_OUT,
LLM_TENSOR_CLS_NORM,
LLM_TENSOR_CONV1D,
LLM_TENSOR_CONVNEXT_DW,
LLM_TENSOR_CONVNEXT_NORM,

View File

@ -2191,6 +2191,7 @@ void llama_context::opt_init(struct llama_model * model, struct llama_opt_params
llama_set_param(model->cls_b, param_filter, param_filter_ud);
llama_set_param(model->cls_out, param_filter, param_filter_ud);
llama_set_param(model->cls_out_b, param_filter, param_filter_ud);
llama_set_param(model->cls_norm, param_filter, param_filter_ud);
for (struct llama_layer & layer : model->layers) {
for (size_t i = 0; i < sizeof(layer)/sizeof(struct ggml_tensor *); ++i) {

View File

@ -2006,7 +2006,8 @@ void llm_graph_context::build_pooling(
ggml_tensor * cls,
ggml_tensor * cls_b,
ggml_tensor * cls_out,
ggml_tensor * cls_out_b) const {
ggml_tensor * cls_out_b,
ggml_tensor * cls_norm) const {
if (!cparams.embeddings) {
return;
}
@ -2056,6 +2057,10 @@ void llm_graph_context::build_pooling(
cur = ggml_add(ctx0, cur, cls_b);
}
cur = ggml_tanh(ctx0, cur);
if (cls_norm) {
// head norm
cur = build_norm(cur, cls_norm, NULL, LLM_NORM, -1);
}
}
// some models don't have `cls_out`, for example: https://huggingface.co/jinaai/jina-reranker-v1-tiny-en

View File

@ -830,7 +830,8 @@ struct llm_graph_context {
ggml_tensor * cls,
ggml_tensor * cls_b,
ggml_tensor * cls_out,
ggml_tensor * cls_out_b) const;
ggml_tensor * cls_out_b,
ggml_tensor * cls_norm) const;
//
// dense (out)

View File

@ -268,6 +268,7 @@ void llama_model_saver::add_tensors_from_model() {
add_tensor(model.cls_b);
add_tensor(model.cls_out);
add_tensor(model.cls_out_b);
add_tensor(model.cls_norm);
for (const struct llama_layer & layer : model.layers) {
for (size_t i = 0; i < sizeof(layer)/sizeof(struct ggml_tensor *); ++i) {

View File

@ -3212,9 +3212,10 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
}
cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
cls_norm = create_tensor(tn(LLM_TENSOR_CLS_NORM, "weight"), {n_embd}, TENSOR_NOT_REQUIRED);
} break;
case LLM_ARCH_NEO_BERT:
@ -7709,7 +7710,7 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
}
// add on pooling layer
llm->build_pooling(cls, cls_b, cls_out, cls_out_b);
llm->build_pooling(cls, cls_b, cls_out, cls_out_b, cls_norm);
// if the gguf model was converted with --sentence-transformers-dense-modules
// there will be two additional dense projection layers

View File

@ -446,6 +446,7 @@ struct llama_model {
struct ggml_tensor * cls_b = nullptr;
struct ggml_tensor * cls_out = nullptr;
struct ggml_tensor * cls_out_b = nullptr;
struct ggml_tensor * cls_norm = nullptr;
struct ggml_tensor * conv1d = nullptr;
struct ggml_tensor * conv1d_b = nullptr;