From 44bc40fdd9af155f519b3caf4cca111ff6e52bd9 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 29 Jan 2026 16:06:06 +0100
Subject: [PATCH] wip

---
 convert_hf_to_gguf.py     | 48 +++++++++++++++++++++++++++++++++++++++
 gguf-py/gguf/constants.py | 32 ++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index a391717e32..764c458be7 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -7513,6 +7513,7 @@ class DeepseekV2Model(TextModel):
             self._experts[bid][name] = data_torch
 
             if len(self._experts[bid]) >= n_experts * 3:
+                print("->>>> Merging experts for block", bid, '\n'.join(self._experts[bid].keys()))
                 # merge the experts into a single 3d tensor
                 for w_name in ["down_proj", "gate_proj", "up_proj"]:
                     datas: list[Tensor] = []
@@ -10914,6 +10915,53 @@ class SolarOpenModel(Glm4MoeModel):
         special_vocab.add_to_gguf(self.gguf_writer)
 
 
+@ModelBase.register("LongcatFlashForCausalLM")
+class LongcatFlashModel(DeepseekV2Model):
+    model_arch = gguf.MODEL_ARCH.LONGCAT_FLASH
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # the model use double block, we need to adjust block count
+        self.block_count = self.hparams["num_layers"] * 2
+        self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
+        # compat with deepseek2 base class hparam
+        self.hparams["num_hidden_layers"] = self.block_count
+        self.hparams["num_key_value_heads"] = self.hparams["num_attention_heads"]
+        self.hparams["intermediate_size"] = self.hparams["ffn_hidden_size"]
+        self.hparams["moe_intermediate_size"] = self.hparams["expert_ffn_hidden_size"]
+
+    def modify_tensors(self, data_torch, name, bid):
+        if bid is not None:
+            bid = bid * 2  # double block id
+
+        # Rename rules examples:
+        # model.layers.1.input_layernorm.0.weight --> model.layers.1.input_layernorm.weight
+        # model.layers.1.input_layernorm.1.weight --> model.layers.2.input_layernorm.weight
+        # model.layers.1.mlp.experts.0 --> model.layers.2.mlp.expert.0 (special case for experts)
+
+        name = name.replace('.mlps.', '.mlp.')
+        name = name.replace('.router.classifier.', '.gate.')
+        name = name.replace('.router.e_score_correction_bias', '.e_score_correction_bias')
+
+        # handle sub-block remapping
+        match = re.match(r'.*\.(\d+)\.([a-z_\.]+)\.(\d+)\..*', name)
+        if match and ".mlp.experts." not in name:
+            # convert block id from N.(name).M to (N+M).(name)
+            N = int(match.group(1))
+            middle = match.group(2)
+            M = int(match.group(3))
+            assert(N * 2 == bid)
+            new_bid = N * 2 + M
+            new_name = re.sub(r'\.(\d+)\.([a-z_\.]+)\.(\d+)\.', f'.{new_bid}.{middle}.', name)
+            print(f"Renaming tensor from {name} to {new_name}")
+            yield from super().modify_tensors(data_torch, new_name, new_bid)
+        else:
+            # correct block inside name
+            if bid is not None:
+                name = name.replace(f'.{bid // 2}.', f'.{bid}.', 1)
+            yield from super().modify_tensors(data_torch, name, bid)
+
+
 ###### CONVERSION LOGIC ######
 
 
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 31273b2b5a..caa3dc2af4 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -459,6 +459,7 @@ class MODEL_ARCH(IntEnum):
     MIMO2            = auto()
     LLAMA_EMBED      = auto()
     MAINCODER        = auto()
+    LONGCAT_FLASH    = auto()
 
 
 class VISION_PROJECTOR_TYPE(IntEnum):
@@ -880,6 +881,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
     MODEL_ARCH.MIMO2:            "mimo2",
     MODEL_ARCH.LLAMA_EMBED:      "llama-embed",
     MODEL_ARCH.MAINCODER:        "maincoder",
+    MODEL_ARCH.LONGCAT_FLASH:    "longcat-flash",
 }
 
 VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
@@ -3377,6 +3379,36 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
         MODEL_TENSOR.FFN_DOWN,
         MODEL_TENSOR.FFN_UP,
     ],
+    MODEL_ARCH.LONGCAT_FLASH: [
+        MODEL_TENSOR.TOKEN_EMBD,
+        MODEL_TENSOR.OUTPUT_NORM,
+        MODEL_TENSOR.OUTPUT,
+        MODEL_TENSOR.ROPE_FREQS,
+        MODEL_TENSOR.ATTN_NORM,
+        MODEL_TENSOR.ATTN_Q,
+        MODEL_TENSOR.ATTN_Q_A,
+        MODEL_TENSOR.ATTN_Q_B,
+        MODEL_TENSOR.ATTN_KV_A_MQA,
+        MODEL_TENSOR.ATTN_KV_B,
+        MODEL_TENSOR.ATTN_K_B,
+        MODEL_TENSOR.ATTN_V_B,
+        MODEL_TENSOR.ATTN_Q_A_NORM,
+        MODEL_TENSOR.ATTN_KV_A_NORM,
+        MODEL_TENSOR.ATTN_OUT,
+        MODEL_TENSOR.ATTN_ROT_EMBD,
+        MODEL_TENSOR.FFN_GATE_INP,
+        MODEL_TENSOR.FFN_NORM,
+        MODEL_TENSOR.FFN_GATE,
+        MODEL_TENSOR.FFN_DOWN,
+        MODEL_TENSOR.FFN_UP,
+        MODEL_TENSOR.FFN_GATE_EXP,
+        MODEL_TENSOR.FFN_DOWN_EXP,
+        MODEL_TENSOR.FFN_UP_EXP,
+        MODEL_TENSOR.FFN_GATE_SHEXP,
+        MODEL_TENSOR.FFN_DOWN_SHEXP,
+        MODEL_TENSOR.FFN_UP_SHEXP,
+        MODEL_TENSOR.FFN_EXP_PROBS_B,
+    ],
     # TODO
 }