diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index d8bc95fa6a..b56a99c5fa 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3799,7 +3799,7 @@ class Ernie4_5MoeModel(Ernie4_5Model): merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight" yield from super().modify_tensors(data_torch, merged_name, bid) else: - yield from super().modify_tensors(data_torch, name, bid) + yield from ModelBase.modify_tensors(self, data_torch, name, bid) def prepare_tensors(self): super().prepare_tensors() @@ -6153,7 +6153,7 @@ class Gemma3nVisionAudioModel(ConformerAudioModel): if new_name.endswith("conv_stem.conv.bias") or new_name.endswith("layer_scale.gamma"): data_torch = data_torch.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) # [1, C, 1, 1] - yield from super().modify_tensors(data_torch, new_name, bid) + yield from ModelBase.modify_tensors(self, data_torch, new_name, bid) @ModelBase.register("Gemma3nForCausalLM", "Gemma3nForConditionalGeneration") @@ -6253,7 +6253,7 @@ class Gemma3NModel(Gemma3Model): # Continue with normal processing name = name.replace("language_model.", "") - yield from super().modify_tensors(data_torch, name, bid) + yield from ModelBase.modify_tensors(self, data_torch, name, bid) return if "altup_unembed_projections" in name: @@ -6270,7 +6270,7 @@ class Gemma3NModel(Gemma3Model): raise ValueError(f"Unknown name: {name}") out = self._stack_matrices(self._altup_unembd) if out is not None: - yield from super().modify_tensors(out, "model.altup_unembed_projections.weight", bid) + yield from ModelBase.modify_tensors(self, out, "model.altup_unembed_projections.weight", bid) return else: return @@ -6287,7 +6287,7 @@ class Gemma3NModel(Gemma3Model): raise ValueError(f"Unknown name: {name}") out = self._stack_matrices(self._altup_proj) if out is not None: - yield from super().modify_tensors(out, "model.altup_projections.weight", bid) + yield from ModelBase.modify_tensors(self, out, "model.altup_projections.weight", bid) return else: return @@ -8803,8 +8803,8 @@ class GraniteMoeModel(GraniteModel): ffn_dim = self.hparams["intermediate_size"] assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * intermediate_size" gate, up = data_torch.split(ffn_dim, dim=-2) - yield from super().modify_tensors(gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), bid) - yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), bid) + yield from ModelBase.modify_tensors(self, gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), bid) + yield from ModelBase.modify_tensors(self, up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), bid) has_experts = bool(self.hparams.get('num_local_experts')) @@ -8813,15 +8813,15 @@ class GraniteMoeModel(GraniteModel): assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * shared_intermediate_size" gate, up = data_torch.split(ffn_dim, dim=-2) if has_experts: - yield from super().modify_tensors(gate,self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), bid) - yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), bid) + yield from ModelBase.modify_tensors(self, gate,self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), bid) + yield from ModelBase.modify_tensors(self, up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), bid) return - yield from super().modify_tensors(gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), bid) - yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), bid) + yield from ModelBase.modify_tensors(self, gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), bid) + yield from ModelBase.modify_tensors(self, up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), bid) return if not has_experts and name.endswith("shared_mlp.output_linear.weight"): - yield from super().modify_tensors(data_torch, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), bid) + yield from ModelBase.modify_tensors(self, data_torch, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), bid) return yield from super().modify_tensors(data_torch, name, bid)