Merge 856c609742 into d84a6a98be
This commit is contained in:
commit
bb014ab2d7
|
|
@ -10218,6 +10218,14 @@ class ModernBertModel(BertModel):
|
|||
|
||||
if name.startswith("model."):
|
||||
name = name[6:]
|
||||
|
||||
if self.cls_out_labels:
|
||||
# For BertForSequenceClassification (direct projection layer)
|
||||
if name == "classifier.weight":
|
||||
name = "classifier.out_proj.weight"
|
||||
|
||||
if name == "classifier.bias":
|
||||
name = "classifier.out_proj.bias"
|
||||
|
||||
return super().modify_tensors(data_torch, name, bid)
|
||||
|
||||
|
|
|
|||
|
|
@ -619,6 +619,7 @@ class MODEL_TENSOR(IntEnum):
|
|||
ENC_OUTPUT_NORM = auto()
|
||||
CLS = auto() # classifier
|
||||
CLS_OUT = auto() # classifier output projection
|
||||
CLS_NORM = auto()
|
||||
CONV1D = auto()
|
||||
CONVNEXT_DW = auto()
|
||||
CONVNEXT_NORM = auto()
|
||||
|
|
@ -1015,6 +1016,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|||
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
||||
MODEL_TENSOR.CLS: "cls",
|
||||
MODEL_TENSOR.CLS_OUT: "cls.output",
|
||||
MODEL_TENSOR.CLS_NORM: "cls.norm",
|
||||
MODEL_TENSOR.CONV1D: "conv1d",
|
||||
MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
|
||||
MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
|
||||
|
|
@ -1390,6 +1392,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.CLS,
|
||||
MODEL_TENSOR.CLS_OUT,
|
||||
MODEL_TENSOR.CLS_NORM,
|
||||
],
|
||||
MODEL_ARCH.NOMIC_BERT: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
|
|
|
|||
|
|
@ -1141,6 +1141,10 @@ class TensorNameMap:
|
|||
MODEL_TENSOR.CLS_OUT: (
|
||||
"classifier.out_proj", # roberta
|
||||
),
|
||||
|
||||
MODEL_TENSOR.CLS_NORM: (
|
||||
"head.norm", # modern-bert
|
||||
),
|
||||
#############################################################################
|
||||
|
||||
MODEL_TENSOR.CONVNEXT_DW: (
|
||||
|
|
|
|||
|
|
@ -351,6 +351,7 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
|
|||
{ LLM_TENSOR_TOKEN_TYPES, "token_types" },
|
||||
{ LLM_TENSOR_CLS, "cls" },
|
||||
{ LLM_TENSOR_CLS_OUT, "cls.output" },
|
||||
{ LLM_TENSOR_CLS_NORM, "cls.norm" },
|
||||
{ LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
|
||||
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
|
||||
{ LLM_TENSOR_SSM_A_NOSCAN, "blk.%d.ssm_a" },
|
||||
|
|
@ -798,6 +799,7 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
|
|||
LLM_TENSOR_FFN_NORM,
|
||||
LLM_TENSOR_CLS,
|
||||
LLM_TENSOR_CLS_OUT,
|
||||
LLM_TENSOR_CLS_NORM,
|
||||
};
|
||||
case LLM_ARCH_JINA_BERT_V2:
|
||||
return {
|
||||
|
|
@ -2257,6 +2259,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|||
{LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
|
||||
{LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
|
||||
{LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
|
||||
{LLM_TENSOR_CLS_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
|
||||
{LLM_TENSOR_DENSE_2_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
|
||||
{LLM_TENSOR_DENSE_3_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
|
||||
{LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
|
||||
|
|
|
|||
|
|
@ -471,6 +471,7 @@ enum llm_tensor {
|
|||
LLM_TENSOR_ENC_OUTPUT_NORM,
|
||||
LLM_TENSOR_CLS,
|
||||
LLM_TENSOR_CLS_OUT,
|
||||
LLM_TENSOR_CLS_NORM,
|
||||
LLM_TENSOR_CONV1D,
|
||||
LLM_TENSOR_CONVNEXT_DW,
|
||||
LLM_TENSOR_CONVNEXT_NORM,
|
||||
|
|
|
|||
|
|
@ -2193,6 +2193,7 @@ void llama_context::opt_init(struct llama_model * model, struct llama_opt_params
|
|||
llama_set_param(model->cls_b, param_filter, param_filter_ud);
|
||||
llama_set_param(model->cls_out, param_filter, param_filter_ud);
|
||||
llama_set_param(model->cls_out_b, param_filter, param_filter_ud);
|
||||
llama_set_param(model->cls_norm, param_filter, param_filter_ud);
|
||||
|
||||
for (struct llama_layer & layer : model->layers) {
|
||||
for (size_t i = 0; i < sizeof(layer)/sizeof(struct ggml_tensor *); ++i) {
|
||||
|
|
|
|||
|
|
@ -2006,7 +2006,8 @@ void llm_graph_context::build_pooling(
|
|||
ggml_tensor * cls,
|
||||
ggml_tensor * cls_b,
|
||||
ggml_tensor * cls_out,
|
||||
ggml_tensor * cls_out_b) const {
|
||||
ggml_tensor * cls_out_b,
|
||||
ggml_tensor * cls_norm) const {
|
||||
if (!cparams.embeddings) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -2055,7 +2056,16 @@ void llm_graph_context::build_pooling(
|
|||
if (cls_b) {
|
||||
cur = ggml_add(ctx0, cur, cls_b);
|
||||
}
|
||||
cur = ggml_tanh(ctx0, cur);
|
||||
// modernbert uses gelu
|
||||
if (arch == LLM_ARCH_MODERN_BERT) {
|
||||
cur = ggml_gelu(ctx0, cur);
|
||||
} else {
|
||||
cur = ggml_tanh(ctx0, cur);
|
||||
}
|
||||
if (cls_norm) {
|
||||
// head norm
|
||||
cur = build_norm(cur, cls_norm, NULL, LLM_NORM, -1);
|
||||
}
|
||||
}
|
||||
|
||||
// some models don't have `cls_out`, for example: https://huggingface.co/jinaai/jina-reranker-v1-tiny-en
|
||||
|
|
|
|||
|
|
@ -830,7 +830,8 @@ struct llm_graph_context {
|
|||
ggml_tensor * cls,
|
||||
ggml_tensor * cls_b,
|
||||
ggml_tensor * cls_out,
|
||||
ggml_tensor * cls_out_b) const;
|
||||
ggml_tensor * cls_out_b,
|
||||
ggml_tensor * cls_norm) const;
|
||||
|
||||
//
|
||||
// dense (out)
|
||||
|
|
|
|||
|
|
@ -268,6 +268,7 @@ void llama_model_saver::add_tensors_from_model() {
|
|||
add_tensor(model.cls_b);
|
||||
add_tensor(model.cls_out);
|
||||
add_tensor(model.cls_out_b);
|
||||
add_tensor(model.cls_norm);
|
||||
|
||||
for (const struct llama_layer & layer : model.layers) {
|
||||
for (size_t i = 0; i < sizeof(layer)/sizeof(struct ggml_tensor *); ++i) {
|
||||
|
|
|
|||
|
|
@ -3253,9 +3253,10 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
|
||||
}
|
||||
|
||||
cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
|
||||
cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
|
||||
cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
|
||||
cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
|
||||
cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
|
||||
cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
|
||||
cls_norm = create_tensor(tn(LLM_TENSOR_CLS_NORM, "weight"), {n_embd}, TENSOR_NOT_REQUIRED);
|
||||
|
||||
} break;
|
||||
case LLM_ARCH_NEO_BERT:
|
||||
|
|
@ -7865,7 +7866,7 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
|
|||
}
|
||||
|
||||
// add on pooling layer
|
||||
llm->build_pooling(cls, cls_b, cls_out, cls_out_b);
|
||||
llm->build_pooling(cls, cls_b, cls_out, cls_out_b, cls_norm);
|
||||
|
||||
// if the gguf model was converted with --sentence-transformers-dense-modules
|
||||
// there will be two additional dense projection layers
|
||||
|
|
|
|||
|
|
@ -448,6 +448,7 @@ struct llama_model {
|
|||
struct ggml_tensor * cls_b = nullptr;
|
||||
struct ggml_tensor * cls_out = nullptr;
|
||||
struct ggml_tensor * cls_out_b = nullptr;
|
||||
struct ggml_tensor * cls_norm = nullptr;
|
||||
|
||||
struct ggml_tensor * conv1d = nullptr;
|
||||
struct ggml_tensor * conv1d_b = nullptr;
|
||||
|
|
|
|||
|
|
@ -103,13 +103,6 @@ llm_build_modern_bert::llm_build_modern_bert(const llama_model & model, const ll
|
|||
LLM_NORM, -1);
|
||||
cb(cur, "final_norm_out", -1);
|
||||
|
||||
if (hparams.pooling_type == LLAMA_POOLING_TYPE_CLS) {
|
||||
// extracting cls token
|
||||
cur = ggml_view_1d(ctx0, cur, hparams.n_embd, 0);
|
||||
cb(cur, "cls_pooled_embd", -1);
|
||||
}
|
||||
|
||||
cb(cur, "res_embd", -1);
|
||||
res->t_embd = cur;
|
||||
ggml_build_forward_expand(gf, cur);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue