From f85e5c73b91370d605f4835c2f6112fb729006cd Mon Sep 17 00:00:00 2001
From: Yee Man Chan <ymchan@gmail.com>
Date: Fri, 2 Jan 2026 21:20:34 +0800
Subject: [PATCH] Move KIMI_LINEAR to llm_arch_is_hybrid to enable KV cache

---
 src/llama-arch.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
index 6aabdb7f7d..cf5ea1177f 100644
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@@ -2863,7 +2863,6 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
         case LLM_ARCH_RWKV6QWEN2:
         case LLM_ARCH_RWKV7:
         case LLM_ARCH_ARWKV7:
-        case LLM_ARCH_KIMI_LINEAR:  // KDA layers use delta attention with recurrent state
             return true;
         default:
             return false;
@@ -2880,9 +2879,7 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
         case LLM_ARCH_LFM2MOE:
         case LLM_ARCH_NEMOTRON_H:
         case LLM_ARCH_QWEN3NEXT:
-        // Kimi: Currently using recurrent-only mode since MLA doesn't use KV cache
-        // TODO: Enable hybrid when MLA KV caching is implemented
-        // case LLM_ARCH_KIMI_LINEAR:
+        case LLM_ARCH_KIMI_LINEAR:
             return true;
         default:
             return false;