don't quantize conv1d of Kimi Linear

2026-01-03 08:27:29 +08:00 · 2026-01-03 08:27:29 +08:00 · a4020d867f
parent 8bd617eb1c
commit a4020d867f
1 changed files with 2 additions and 2 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -869,9 +869,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
        quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_POS_EMBD,    "weight");
        quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_TOKEN_TYPES, "weight");

-        // do not quantize Mamba's small yet 2D weights
+        // do not quantize Mamba /Kimi's small conv1d weights
        // NOTE: can't use LLM_TN here because the layer number is not known
-        quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
+        quantize &= name.find("ssm_conv1d") == std::string::npos;
        quantize &= name.find("shortconv.conv.weight") == std::string::npos;

        // do not quantize RWKV's small yet 2D weights