From a4020d867f55bf4721ddd1c5df713f4ade14ad49 Mon Sep 17 00:00:00 2001
From: Yee Man Chan <ymchan@gmail.com>
Date: Sat, 3 Jan 2026 08:27:29 +0800
Subject: [PATCH] don't quantize conv1d of Kimi Linear

---
 src/llama-quant.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index 7b8bf6e524..bae907f92c 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -869,9 +869,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
         quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_POS_EMBD,    "weight");
         quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_TOKEN_TYPES, "weight");
 
-        // do not quantize Mamba's small yet 2D weights
+        // do not quantize Mamba /Kimi's small conv1d weights
         // NOTE: can't use LLM_TN here because the layer number is not known
-        quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
+        quantize &= name.find("ssm_conv1d") == std::string::npos;
         quantize &= name.find("shortconv.conv.weight") == std::string::npos;
 
         // do not quantize RWKV's small yet 2D weights