diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 17959dde2f..9e659b98b5 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -7758,15 +7758,14 @@ class DeepseekV2Model(TextModel):
             # Default: if no MoE, all layers are dense; if MoE, none are dense
             first_k_dense_replace = hparams["num_hidden_layers"] if not has_moe else 0
         self.gguf_writer.add_leading_dense_block_count(first_k_dense_replace)
-        kv_lora_rank = hparams["kv_lora_rank"] if hparams.get("kv_lora_rank") is not None else 512
+        kv_lora_rank = hparams.get("kv_lora_rank", 512)
         self.gguf_writer.add_vocab_size(hparams["vocab_size"])
         if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
             self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
-        if "kv_lora_rank" in hparams and hparams["kv_lora_rank"] is not None:
-            self.gguf_writer.add_kv_lora_rank(kv_lora_rank)
 
         # note: deepseek2 using MLA converts into MQA with larger heads, then decompresses to MHA
         if not is_ocr:
+            self.gguf_writer.add_kv_lora_rank(kv_lora_rank)
             self.gguf_writer.add_key_length(kv_lora_rank + hparams["qk_rope_head_dim"])
             self.gguf_writer.add_value_length(kv_lora_rank)
             self.gguf_writer.add_key_length_mla(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 3cc4967611..c3c1d40ff7 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -300,7 +300,6 @@ class Keys:
         IMAGE_MEAN          = "clip.vision.image_mean"
         IMAGE_STD           = "clip.vision.image_std"
         SPATIAL_MERGE_SIZE  = "clip.vision.spatial_merge_size"
-        WINDOW_SIZE         = "clip.vision.window_size"
         USE_GELU            = "clip.use_gelu"
         USE_SILU            = "clip.use_silu"
         N_WA_PATTERN        = "clip.vision.n_wa_pattern" # used by qwen2.5vl