diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 9c36c84189..45538fcabb 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -5021,6 +5021,13 @@ class KimiLinearModel(TextModel):
         ssm_d_conv = self.hparams.get("ssm_d_conv") or linear_attn_config.get("short_conv_kernel_size")
         if ssm_d_conv is not None:
              self.gguf_writer.add_ssm_conv_kernel(ssm_d_conv)
+
+        kda_head_dim = self.hparams.get("kda_head_dim") or linear_attn_config.get("head_dim")
+
+        if kda_head_dim is not None:
+             self.gguf_writer.add_kda_head_dim(kda_head_dim)
+        
+        # MLA params - use add_* methods that handle arch substitution
         
         # MLA params - use add_* methods that handle arch substitution
         # Support both HuggingFace naming (q_lora_rank, kv_lora_rank) and internal naming (n_lora_q, n_lora_kv)
@@ -5035,8 +5042,9 @@ class KimiLinearModel(TextModel):
         # MLA head dimensions
         # Support HuggingFace naming: qk_nope_head_dim, qk_rope_head_dim, v_head_dim
         qk_nope_head_dim = self.hparams.get("qk_nope_head_dim")
-        qk_rope_head_dim = self.hparams.get("qk_rope_head_dim", self.hparams.get("n_rot"))
+        qk_rope_head_dim = self.hparams.get("qk_rope_head_dim")
         v_head_dim = self.hparams.get("v_head_dim")
+        self.gguf_writer.add_rope_dimension_count(self.hparams["qk_rope_head_dim"])
         
         # Calculate n_embd_head_k_mla = qk_nope_head_dim + qk_rope_head_dim
         if "n_embd_head_k_mla" in self.hparams:
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 485c41abfb..fe9785918b 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -205,6 +205,9 @@ class Keys:
         GROUP_COUNT    = "{arch}.ssm.group_count"
         DT_B_C_RMS     = "{arch}.ssm.dt_b_c_rms"
 
+    class KDA:
+        HEAD_DIM = "{arch}.kda.head_dim"
+
     class WKV:
         HEAD_SIZE = "{arch}.wkv.head_size"
 
@@ -3475,6 +3478,9 @@ KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
 KEY_SSM_GROUP_COUNT    = Keys.SSM.GROUP_COUNT
 KEY_SSM_DT_B_C_RMS     = Keys.SSM.DT_B_C_RMS
 
+# KDA
+KEY_KDA_HEAD_DIM       = Keys.KDA.HEAD_DIM
+
 # tokenization
 KEY_TOKENIZER_MODEL      = Keys.Tokenizer.MODEL
 KEY_TOKENIZER_PRE        = Keys.Tokenizer.PRE
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 9e6ff3ac77..3b2dfef479 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -970,6 +970,9 @@ class GGUFWriter:
     def add_ssm_dt_b_c_rms(self, value: bool) -> None:
         self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
 
+    def add_kda_head_dim(self, value: int) -> None:
+        self.add_uint32(Keys.KDA.HEAD_DIM.format(arch=self.arch), value)
+
     def add_tokenizer_model(self, model: str) -> None:
         self.add_string(Keys.Tokenizer.MODEL, model)
 
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
index ab09bb7eb7..6aabdb7f7d 100644
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@@ -236,6 +236,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
     { LLM_KV_SSM_GROUP_COUNT,    "%s.ssm.group_count"    },
     { LLM_KV_SSM_DT_B_C_RMS,     "%s.ssm.dt_b_c_rms"     },
 
+    { LLM_KV_KDA_HEAD_DIM, "%s.kda.head_dim" },
+
     { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
 
     { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
diff --git a/src/llama-arch.h b/src/llama-arch.h
index 2b965850c5..d68af214a7 100644
--- a/src/llama-arch.h
+++ b/src/llama-arch.h
@@ -240,6 +240,8 @@ enum llm_kv {
     LLM_KV_SSM_GROUP_COUNT,
     LLM_KV_SSM_DT_B_C_RMS,
 
+    LLM_KV_KDA_HEAD_DIM,
+
     LLM_KV_WKV_HEAD_SIZE,
 
     LLM_KV_TOKENIZER_MODEL,
diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp
index 88d266b8da..75ddeeba09 100644
--- a/src/llama-hparams.cpp
+++ b/src/llama-hparams.cpp
@@ -137,7 +137,7 @@ uint32_t llama_hparams::n_embd_r() const {
         // for Kimi KDA layers
         // Conv state for Q, K, V: 3 * (d_conv - 1) * n_head * head_dim
         const uint32_t d_inner = n_head() * kda_head_dim;  // 32 * 128 = 4096
-        return 3 * (kda_d_conv > 0 ? kda_d_conv - 1 : 3) * d_inner;
+        return 3 * (ssm_d_conv > 0 ? ssm_d_conv - 1 : 3) * d_inner;
     }
 
     // TODO: maybe support other convolution strides than 1
diff --git a/src/llama-hparams.h b/src/llama-hparams.h
index 80170650eb..c90ed12b90 100644
--- a/src/llama-hparams.h
+++ b/src/llama-hparams.h
@@ -133,9 +133,8 @@ struct llama_hparams {
     uint32_t ssm_dt_rank = 0;
     uint32_t ssm_n_group = 0;
 
-    // for Kimi Delta Attention (KDA)
-    uint32_t kda_head_dim  = 0;  // head_dim for KDA layers (128 for Kimi)
-    uint32_t kda_d_conv    = 0;  // conv kernel size for KDA (4 for Kimi)
+    // for Kimi Linear KDA
+    uint32_t kda_head_dim = 0;
 
     // for hybrid state space models
     std::array<bool, LLAMA_MAX_LAYERS> recurrent_layer_arr;
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index 0f162cdd7a..2e3cb9d78c 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -2291,10 +2291,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 ml.get_key(LLM_KV_ATTENTION_VALUE_LENGTH_MLA,  hparams.n_embd_head_v_mla, false);
                 ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK,      hparams.n_lora_kv, false);
                 ml.get_key(LLM_KV_ROPE_DIMENSION_COUNT,        hparams.n_rot, false);
-
-                // KDA (Delta Attention) parameters
-                hparams.kda_head_dim = 128;  // linear_attn_config.head_dim
-                hparams.kda_d_conv = 4;      // linear_attn_config.short_conv_kernel_size
+                ml.get_key(LLM_KV_SSM_CONV_KERNEL,             hparams.ssm_d_conv, false);
+                ml.get_key(LLM_KV_KDA_HEAD_DIM,                hparams.kda_head_dim, false);
 
                 // MLA qk_rope_head_dim (for reference)
                 // qk_rope_head_dim = 64, qk_nope_head_dim = 128, qk_head_dim = 192
@@ -6447,9 +6445,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                         // Assuming KDA layer if KDA tensors are present
 
                         // KDA uses head_dim = 128 (from linear_attn_config.head_dim)
-                        const int64_t n_embd_head_k_kda = 128;
-                        const int64_t n_embd_head_v_kda = 128;
-                        const int64_t ssm_d_conv = hparams.ssm_d_conv > 0 ? hparams.ssm_d_conv : 4;
+                        const int64_t n_embd_head_k_kda = hparams.kda_head_dim;
+                        const int64_t n_embd_head_v_kda = hparams.kda_head_dim;
+                        const int64_t ssm_d_conv = hparams.ssm_d_conv;
 
                         // Try loading KDA specific tensors (using SSM_ prefix)
                         // Conv1d weights: try 4D first, then 3D (quantization may remove trailing 1)
@@ -6513,8 +6511,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                              // MLA Layer - use MLA-specific head dimensions
                              const int64_t q_lora_rank  = hparams.n_lora_q;
                              const int64_t kv_lora_rank = hparams.n_lora_kv;
-                             const int64_t n_embd_head_k_mla = hparams.n_embd_head_k_mla > 0 ? hparams.n_embd_head_k_mla : 192;
-                             const int64_t n_embd_head_v_mla = hparams.n_embd_head_v_mla > 0 ? hparams.n_embd_head_v_mla : 128;
+                             const int64_t n_embd_head_k_mla = hparams.n_embd_head_k_mla;
+                             const int64_t n_embd_head_v_mla = hparams.n_embd_head_v_mla;
 
                              layer.attn_q_a_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_A_NORM, "weight", i), {q_lora_rank}, TENSOR_NOT_REQUIRED);
                              layer.attn_kv_a_norm = create_tensor(tn(LLM_TENSOR_ATTN_KV_A_NORM, "weight", i), {kv_lora_rank}, 0);
@@ -6529,7 +6527,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
 
                              // Kimi: qk_rope_head_dim = 64 (actual RoPE dimension for MLA)
                              // Note: hparams.n_rot may be 72 (from conversion) but actual is 64
-                             const int64_t qk_rope_head_dim = 64;  // From config: qk_rope_head_dim
+                             const int64_t qk_rope_head_dim = hparams.n_rot;  // From config: qk_rope_head_dim
                              layer.wkv_a_mqa = create_tensor(tn(LLM_TENSOR_ATTN_KV_A_MQA, "weight", i), {n_embd, kv_lora_rank + qk_rope_head_dim}, 0);
                              layer.wkv_b = create_tensor(tn(LLM_TENSOR_ATTN_KV_B, "weight", i), {kv_lora_rank, n_head * (n_embd_head_k_mla - qk_rope_head_dim + n_embd_head_v_mla)}, 0);
 
@@ -6539,7 +6537,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                         layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
 
                         // MoE intermediate size (different from dense FFN)
-                        const int64_t n_ff_exp = hparams.n_ff_exp > 0 ? hparams.n_ff_exp : 1024;
+                        const int64_t n_ff_exp = hparams.n_ff_exp;
 
                         // Kimi uses n_layer_dense_lead to determine which layers use dense FFN vs MoE
                         // first_k_dense_replace = 1 means layer 0 uses dense FFN, layers 1+ use MoE
diff --git a/src/llama-model.h b/src/llama-model.h
index b067b686d2..7081423588 100644
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -84,7 +84,6 @@ enum llm_type {
     LLM_TYPE_35B,
     LLM_TYPE_36B,
     LLM_TYPE_40B,
-    LLM_TYPE_48B,
     LLM_TYPE_65B,
     LLM_TYPE_70B,
     LLM_TYPE_120B,
@@ -114,6 +113,7 @@ enum llm_type {
     LLM_TYPE_16B_A1B,
     LLM_TYPE_21B_A3B, // Ernie MoE small
     LLM_TYPE_30B_A3B,
+    LLM_TYPE_48B_A3B, // Kimi Linear
     LLM_TYPE_80B_A3B, // Qwen3 Next
     LLM_TYPE_100B_A6B,
     LLM_TYPE_106B_A12B, // GLM-4.5-Air
diff --git a/src/models/kimi-linear.cpp b/src/models/kimi-linear.cpp
index 40fbe469b3..d025eab5f3 100644
--- a/src/models/kimi-linear.cpp
+++ b/src/models/kimi-linear.cpp
@@ -21,8 +21,8 @@ llm_build_kimi_linear::llm_build_kimi_linear(const llama_model & model, const ll
 
     // Kimi dimension constants
     const int64_t n_head = hparams.n_head();
-    const int64_t head_dim = hparams.kda_head_dim > 0 ? hparams.kda_head_dim : 128;
-    const int64_t d_conv = hparams.kda_d_conv > 0 ? hparams.kda_d_conv : 4;
+    const int64_t head_dim = hparams.kda_head_dim;
+    const int64_t d_conv = hparams.ssm_d_conv;
     const int64_t d_inner = n_head * head_dim;  // 32 * 128 = 4096
     const int64_t n_seqs = ubatch.n_seqs;
     const int64_t n_seq_tokens = ubatch.n_seq_tokens;
@@ -33,12 +33,12 @@ llm_build_kimi_linear::llm_build_kimi_linear(const llama_model & model, const ll
     GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs);
     
     // MLA params
-    const int64_t n_embd_head_k_mla = hparams.n_embd_head_k_mla > 0 ? hparams.n_embd_head_k_mla : 192;
-    const int64_t n_embd_head_v_mla = hparams.n_embd_head_v_mla > 0 ? hparams.n_embd_head_v_mla : 128;
-    const int64_t kv_lora_rank = hparams.n_lora_kv > 0 ? hparams.n_lora_kv : 512;
-    // qk_rope_head_dim = 64 (from Kimi config), NOT hparams.n_rot (which is 72)
+    const int64_t n_embd_head_k_mla = hparams.n_embd_head_k_mla;
+    const int64_t n_embd_head_v_mla = hparams.n_embd_head_v_mla;
+    const int64_t kv_lora_rank = hparams.n_lora_kv;
+    // qk_rope_head_dim = 64 (from Kimi config) which is hparams.n_rot
     // Confirmed from tensor shape: wkv_a_mqa [2304, 576] = [n_embd, kv_lora_rank + qk_rope_head_dim]
-    const int64_t n_embd_head_qk_rope = 64;  // config.qk_rope_head_dim
+    const int64_t n_embd_head_qk_rope = hparams.n_rot;  // config.qk_rope_head_dim
     const int64_t n_embd_head_qk_nope = n_embd_head_k_mla - n_embd_head_qk_rope;  // 192 - 64 = 128
     
     // Attention scale for KDA (1/sqrt(head_dim))