From a82103eb48685a2d942c44bf7d3a4f097fede9b9 Mon Sep 17 00:00:00 2001 From: Yee Man Chan Date: Thu, 5 Feb 2026 07:44:10 +0800 Subject: [PATCH] f16 gguf cannot run without context length --- convert_hf_to_gguf.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index c167de8a46..4dee3f6a10 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5086,6 +5086,15 @@ class KimiLinearModel(TextModel): super().set_gguf_parameters() self.gguf_writer.add_vocab_size(self.hparams["vocab_size"]) + # Use find_hparam for context length + # Kimi uses model_max_length + if (n_ctx := self.find_hparam(["max_position_embeddings", "model_max_length", "n_ctx", "n_positions"], optional=True)) is not None: + self.gguf_writer.add_context_length(n_ctx) + else: + # Default to 4096 if not found + logger.warning("No context length found in config, defaulting to 4096") + self.gguf_writer.add_context_length(4096) + # KDA & MLA params # Get ssm_d_conv from linear_attn_config.short_conv_kernel_size or ssm_d_conv linear_attn_config = self.hparams["linear_attn_config"]