diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 8a1d37c5ad..ef372f7205 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -9329,7 +9329,7 @@ class SmallThinkerModel(TextModel): experts = [k for d in self._experts for k in d.keys()] if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - + @ModelBase.register("ModernBertModel", "ModernBertForMaskedLM", "ModernBertForSequenceClassification") class ModernBertModel(BertModel): @@ -9368,7 +9368,6 @@ class ModernBertModel(BertModel): name = name[6:] return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("ApertusForCausalLM") diff --git a/src/llama-model.cpp b/src/llama-model.cpp index baca6da504..7b4e29f980 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -855,7 +855,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { type = LLM_TYPE_149M; break; // modern-bert-base case 28: type = LLM_TYPE_395M; break; // modern-bert-large - default: type = LLM_TYPE_UNKNOWN; + default: type = LLM_TYPE_UNKNOWN; } } break; case LLM_ARCH_JINA_BERT_V2: @@ -2993,11 +2993,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.layer_out_norm_b = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd}, 0); } } break; - case LLM_ARCH_MODERN_BERT: + case LLM_ARCH_MODERN_BERT: { tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0); - + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); for(int i = 0; i < n_layer; ++i) { @@ -3006,7 +3006,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) { if ( i != 0 ) { layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); } else{ - // layer 0 uses identity + // layer 0 uses identity layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, TENSOR_NOT_REQUIRED); } @@ -3014,7 +3014,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, 3 * n_embd }, 0); layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); - layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, 2 * n_ff}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, 2 * n_ff}, 0); layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0); layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); } @@ -8209,7 +8209,7 @@ struct llm_build_modern_bert : public llm_graph_context { ggml_tensor * cur = nullptr; ggml_tensor * inpL = nullptr; - ggml_tensor * inp_pos = build_inp_pos(); + ggml_tensor * inp_pos = build_inp_pos(); // construct input embeddings (token, type, position) inpL = build_inp_embd(model.tok_embd); @@ -8221,7 +8221,7 @@ struct llm_build_modern_bert : public llm_graph_context { ggml_tensor * inp_out_ids = build_inp_out_ids(); - auto * inp_attn = build_attn_inp_no_cache(); + auto * inp_attn = build_attn_inp_no_cache(); for (int il = 0; il < n_layer; ++il) { ggml_tensor * cur = inpL; @@ -19831,7 +19831,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, case LLM_ARCH_NOMIC_BERT_MOE: case LLM_ARCH_NEO_BERT: case LLM_ARCH_WAVTOKENIZER_DEC: - case LLM_ARCH_MODERN_BERT: + case LLM_ARCH_MODERN_BERT: case LLM_ARCH_GEMMA_EMBEDDING: case LLM_ARCH_DREAM: case LLM_ARCH_LLADA: