diff --git a/src/models/kimi-linear.cpp b/src/models/kimi-linear.cpp
index 3fb40471a1..013926e544 100644
--- a/src/models/kimi-linear.cpp
+++ b/src/models/kimi-linear.cpp
@@ -263,7 +263,6 @@ llm_build_kimi_linear::llm_build_kimi_linear(const llama_model & model, const ll
             ggml_tensor * state = build_rs(inp_rs, ssm_states_all, hparams.n_embd_s(), n_seqs);
             state = ggml_reshape_4d(ctx0, state, head_dim, head_dim, n_head, n_seqs);
             // Choose between build_kda_chunking and build_kda_recurrent based on n_tokens
-            // TODO: Currently only build_kda_recurrent is implemented
             ggml_tensor * attn_out = n_seq_tokens > CHUNK_SIZE ?
                 build_kda_chunking(Qcur, Kcur, Vcur, g1, beta, state, causal_mask, identity, il) : 
                 build_kda_recurrent(Qcur, Kcur, Vcur, g1, beta, state, causal_mask, identity, il);
@@ -315,7 +314,6 @@ llm_build_kimi_linear::llm_build_kimi_linear(const llama_model & model, const ll
         } else if (is_mla) {
             // === MLA Layer (Multi-head Latent Attention) without KV Cache ===
             // Reference: vLLM mla.py
-            // TODO: Implement proper KV caching for MLA (requires custom cache format)
             
             // Step 1: Q projection and reshape
             // vLLM Kimi: q = q_proj(hidden_states), then view as [n_tokens, n_head, qk_head_dim]
@@ -454,7 +452,8 @@ llm_build_kimi_linear::llm_build_kimi_linear(const llama_model & model, const ll
 }
 
 /*
-    IMPORTANT: Currently build_kda_chunking is not implemented nor called
+    This is a ggml implementation of the naive_chunk_kda function of
+    https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/kda/naive.py
 */
 ggml_tensor * llm_build_kimi_linear::build_kda_chunking(
         ggml_tensor * q,