constants and tensor mappings for modern bert support, model not supported yet but working on getting conversion to work for encoder only

This commit is contained in:
ryan-mangeno 2025-08-21 12:38:04 -04:00
parent 79c1160b07
commit 6151592ea7
4 changed files with 61 additions and 1 deletions

View File

@ -133,6 +133,7 @@ class ModelBase:
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
# Configure GGUF Writer
print(f"arch: {gguf.MODEL_ARCH_NAMES[self.model_arch]}")
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
@ -465,6 +466,7 @@ class ModelBase:
@classmethod
def from_model_architecture(cls, arch: str, model_type = ModelType.TEXT) -> type[ModelBase]:
try:
print(f"model_type: {model_type}, arch: {arch}")
return cls._model_classes[model_type][arch]
except KeyError:
raise NotImplementedError(f'Architecture {arch!r} not supported!') from None
@ -8303,6 +8305,40 @@ class SmallThinkerModel(TextModel):
experts = [k for d in self._experts for k in d.keys()]
if len(experts) > 0:
raise ValueError(f"Unprocessed experts: {experts}")
@ModelBase.register("ModernBertModel")
class ModernBertModel(TextModel):
model_arch = gguf.MODEL_ARCH.MODERN_BERT
def set_gguf_parameters(self) -> None:
# Determine block count (number of hidden layers)
block_count = self.hparams.get("num_hidden_layers") or self.hparams.get("num_hidden_layers_alt")
if block_count is None:
raise ValueError("Could not determine number of hidden layers from hparams")
# Attention heads and dimensions
n_head = self.hparams.get("num_attention_heads")
if n_head is None:
raise ValueError("Missing 'num_attention_heads' in hparams")
hidden_size = self.hparams["hidden_size"]
head_dim = hidden_size // n_head
ffn_dim = self.hparams.get("intermediate_size", 4 * hidden_size)
# GGUF parameter assignment
self.gguf_writer.add_context_length(self.hparams.get("max_position_embeddings", 512))
self.gguf_writer.add_embedding_length(hidden_size)
self.gguf_writer.add_feed_forward_length(ffn_dim)
self.gguf_writer.add_block_count(block_count)
self.gguf_writer.add_head_count(n_head)
self.gguf_writer.add_layer_norm_eps(self.hparams.get("layer_norm_eps", 1e-12))
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# Directly map tensor names without QKV splitting or reordering
return [(self.map_tensor_name(name), data_torch)]
###### CONVERSION LOGIC ######

View File

@ -311,6 +311,7 @@ class MODEL_ARCH(IntEnum):
STARCODER = auto()
REFACT = auto()
BERT = auto()
MODERN_BERT = auto()
NOMIC_BERT = auto()
NOMIC_BERT_MOE = auto()
NEO_BERT = auto()
@ -642,6 +643,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.STARCODER: "starcoder",
MODEL_ARCH.REFACT: "refact",
MODEL_ARCH.BERT: "bert",
MODEL_ARCH.MODERN_BERT: "modern-bert",
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
MODEL_ARCH.NOMIC_BERT_MOE: "nomic-bert-moe",
MODEL_ARCH.NEO_BERT: "neo-bert",
@ -1172,6 +1174,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.CLS,
MODEL_TENSOR.CLS_OUT,
],
MODEL_ARCH.MODERN_BERT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.TOKEN_EMBD_NORM,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.POS_EMBD,
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_NORM,
],
MODEL_ARCH.NOMIC_BERT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.TOKEN_EMBD_NORM,

View File

@ -16,6 +16,7 @@ class TensorNameMap:
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 plamo2 granite-hybrid
"tok_embeddings", # llama-pth
"embeddings.word_embeddings", # bert nomic-bert
"embeddings.tok_embeddings", # modern bert
"language_model.embedding.word_embeddings", # persimmon
"wte", # gpt2
"transformer.embd.wte", # phi2
@ -45,6 +46,7 @@ class TensorNameMap:
MODEL_TENSOR.TOKEN_EMBD_NORM: (
"word_embeddings_layernorm", # bloom
"embeddings.LayerNorm", # bert
"embeddings.norm", # modern bert
"emb_ln", # nomic-bert
"transformer.norm", # openelm
"rwkv.blocks.0.pre_ln", # rwkv
@ -98,6 +100,7 @@ class TensorNameMap:
"backbone.final_layer_norm", # wavtokenizer
"model.norm", # llama4
"model.transformer.ln_f", # llada
"final_norm", # modern bert
),
# Rope frequencies
@ -142,9 +145,10 @@ class TensorNameMap:
"model.layers.{bid}.ln1", # rwkv7
"model.layers.{bid}.input_layernorm", # llama4
"transformer_encoder.{bid}.attention_norm", # neobert
"layers.{bid}.attn_norm", # bert
"model.layers.{bid}.operator_norm", # lfm2
"model.transformer.blocks.{bid}.attn_norm", # llada
"layers.{bid}.input_layernorm", # qwen3-embedding
"layers.{bid}.input_layernorm", # qwen3-embedding,
),
# Attention norm 2
@ -174,6 +178,7 @@ class TensorNameMap:
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
"transformer.layers.{bid}.attn.qkv_proj", # openelm
"transformer_encoder.{bid}.qkv", # neobert
"layers.{bid}.attn.Wqkv", # modern bert
),
# Attention query
@ -240,6 +245,7 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.linear_attn", # deci
"layers.{bid}.attention.wo", # llama-pth
"encoder.layer.{bid}.attention.output.dense", # bert
"layers.{bid}.attn.Wo", # modern bert
"transformer.layer.{bid}.attention.out_lin", # distillbert
"transformer.h.{bid}.attn.out_proj", # gpt-j
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
@ -311,6 +317,7 @@ class TensorNameMap:
"model.layers.layers.{bid}.pre_mlp_norm", # plamo2
"model.transformer.blocks.{bid}.ff_norm", # llada
"layers.{bid}.post_attention_layernorm", # qwen3-embedding
"layers.{bid}.mlp_norm" # modern bert
),
# Post feed-forward norm
@ -360,6 +367,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
"layers.{bid}.feed_forward.w3", # llama-pth
"encoder.layer.{bid}.intermediate.dense", # bert
"layers.{bid}.mlp.Wo", # modern bert
"transformer.layer.{bid}.ffn.lin1", # distillbert
"transformer.h.{bid}.mlp.fc_in", # gpt-j
"transformer.h.{bid}.mlp.linear_3", # refact
@ -459,6 +467,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
"layers.{bid}.feed_forward.w2", # llama-pth
"encoder.layer.{bid}.output.dense", # bert
"layers.{bid}.mlp.Wi", # modern bert
"transformer.layer.{bid}.ffn.lin2", # distillbert
"transformer.h.{bid}.mlp.fc_out", # gpt-j
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon

View File

@ -22,6 +22,7 @@ enum llm_arch {
LLM_ARCH_STARCODER,
LLM_ARCH_REFACT,
LLM_ARCH_BERT,
LLM_ARCH_MODERN_BERT,
LLM_ARCH_NOMIC_BERT,
LLM_ARCH_NOMIC_BERT_MOE,
LLM_ARCH_NEO_BERT,