From ca5a877ec34f58d4eaa5dde6624cf7817b2d5b28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sat, 21 Mar 2026 15:44:31 +0100 Subject: [PATCH] new_name not needed --- convert_hf_to_gguf.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index b6215e845b..f6771eed38 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -605,7 +605,7 @@ class ModelBase: def _nvfp4_scale2_is_trivial(scale2: Tensor) -> bool: return scale2.numel() <= 1 and abs(float(scale2.float().sum()) - 1.0) < 1e-6 - def _repack_nvfp4(self, name: str, weight: Tensor, scale: Tensor, scale2: Tensor) -> str: + def _repack_nvfp4(self, name: str, weight: Tensor, scale: Tensor, scale2: Tensor): if "language_model." in name: name = name.replace("language_model.", "") @@ -622,8 +622,6 @@ class ModelBase: logger.info(f" + {scale_name} (per-tensor NVFP4 scale2, shape [{scale2_f32.size}])") self.gguf_writer.add_tensor(scale_name, scale2_f32) - return new_name - def _generate_nvfp4_tensors(self): # Per-layer expert merging to avoid holding all experts in memory expert_blocks: dict[tuple[int, str], list[tuple[int, np.ndarray]]] = {} @@ -5073,9 +5071,9 @@ class _LinearAttentionVReorderBase(Qwen3NextModel): return weight, scale - def _repack_nvfp4(self, name: str, weight: Tensor, scale: Tensor, scale2: Tensor) -> str: + def _repack_nvfp4(self, name: str, weight: Tensor, scale: Tensor, scale2: Tensor): weight, scale = self._transform_nvfp4_weight(name, weight, scale) - return super()._repack_nvfp4(name, weight, scale, scale2) + super()._repack_nvfp4(name, weight, scale, scale2) def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: num_k_heads = self.hparams.get("linear_num_key_heads", 0)