diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index 3dc6588976..194462fb08 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -775,27 +775,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 ml.get_key(LLM_KV_ATTENTION_CAUSAL,           hparams.causal_attn);
                 ml.get_key(LLM_KV_POOLING_TYPE,               hparams.pooling_type, false);
 
-                switch (hparams.n_layer) {
-                    case 12:
-                        type = LLM_TYPE_47M; break; // granite-embeddings-small
-                    default: type = LLM_TYPE_UNKNOWN; 
-                }
-            } break;
-        case LLM_ARCH_MODERN_BERT:
-            {
-
-                hparams.swa_type = LLAMA_SWA_TYPE_LOCAL;
-
-                hparams.set_swa_pattern(3, 0);
-                hparams.rope_freq_base_train_swa = 10000.f;
-                hparams.rope_freq_base_train = 160000.f;
-                hparams.n_swa = 128;
-
-                ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW,   hparams.n_swa);
-                ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS,    hparams.f_norm_eps);
-                ml.get_key(LLM_KV_ATTENTION_CAUSAL,           hparams.causal_attn);
-                ml.get_key(LLM_KV_POOLING_TYPE,               hparams.pooling_type, false);
-
                 switch (hparams.n_layer) {
                     case 12:
                         type = LLM_TYPE_47M; break; // granite-embeddings-small
@@ -7792,7 +7771,7 @@ struct llm_build_modern_bert : public llm_graph_context {
         inpL = build_norm(inpL, model.tok_norm, nullptr, LLM_NORM, -1);
         cb(inpL, "inp_norm", -1);
 
-        auto * inp_attn = build_attn_inp_kv_unified_iswa();
+        auto * inp_attn = build_attn_inp_kv_iswa();
 
         // iterate layers
         for (int il = 0; il < n_layer; ++il) {
@@ -7842,8 +7821,8 @@ struct llm_build_modern_bert : public llm_graph_context {
             cb(Vcur, "Vcur", il);
 
             cur = build_attn(inp_attn,
-                    model.layers[il].wo, nullptr,
-                    Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+                        model.layers[il].wo, nullptr,
+                        Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
             cb(cur, "kqv_out", il);
 
             if (il == n_layer - 1 && pooling_type == LLAMA_POOLING_TYPE_NONE) {