diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 8cc4963fb2..d8bc95fa6a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2736,7 +2736,7 @@ class AfmoeModel(LlamaModel): data_torch = torch.stack(datas, dim=0) merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight" - yield from super().modify_tensors(data_torch, merged_name, bid) + yield from ModelBase.modify_tensors(self, data_torch, merged_name, bid) return else: @@ -2745,7 +2745,7 @@ class AfmoeModel(LlamaModel): if name.endswith(".expert_bias"): name = name.replace(".expert_bias", ".expert_bias.bias") - yield from super().modify_tensors(data_torch, name, bid) + yield from ModelBase.modify_tensors(self, data_torch, name, bid) @ModelBase.register( @@ -8918,7 +8918,7 @@ class GraniteHybridModel(Mamba2Model, GraniteMoeModel): return Mamba2Model.modify_tensors(self, data_torch, name, bid) elif bid in self._attn_layers: return GraniteMoeModel.modify_tensors(self, data_torch, name, bid) - yield from super().modify_tensors(data_torch, name, bid) + yield from ModelBase.modify_tensors(self, data_torch, name, bid) def set_gguf_parameters(self): """This method merges params from both parents and some that are @@ -9050,33 +9050,33 @@ class NemotronHModel(GraniteHybridModel): if self.is_moe and bid is not None: if name.endswith("mixer.gate.e_score_correction_bias"): new_name = name.replace("e_score_correction_bias", "e_score_correction.bias") - yield from super().modify_tensors(data_torch, new_name, bid) + yield from ModelBase.modify_tensors(self, data_torch, new_name, bid) return if name.endswith("mixer.dt_bias"): new_name = name.replace("dt_bias", "dt.bias") - yield from super().modify_tensors(data_torch, new_name, bid) + yield from ModelBase.modify_tensors(self, data_torch, new_name, bid) return if name.endswith("mixer.conv1d.weight"): squeezed_data = data_torch.squeeze() - yield from super().modify_tensors(squeezed_data, name, bid) + yield from ModelBase.modify_tensors(self, squeezed_data, name, bid) return if name.endswith("mixer.A_log"): transformed_data = -torch.exp(data_torch) reshaped_data = transformed_data.squeeze().reshape(-1, 1) - yield from super().modify_tensors(reshaped_data, name, bid) + yield from ModelBase.modify_tensors(self, reshaped_data, name, bid) return if name.endswith("mixer.D"): reshaped_data = data_torch.squeeze().reshape(-1, 1) - yield from super().modify_tensors(reshaped_data, name, bid) + yield from ModelBase.modify_tensors(self, reshaped_data, name, bid) return if name.endswith("mixer.norm.weight"): reshaped_data = data_torch.reshape(self.n_group, -1) - yield from super().modify_tensors(reshaped_data, name, bid) + yield from ModelBase.modify_tensors(self, reshaped_data, name, bid) return if name.find("mixer.experts") != -1: @@ -9101,7 +9101,7 @@ class NemotronHModel(GraniteHybridModel): data_torch = torch.stack(datas, dim=0) merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight" - yield from super().modify_tensors(data_torch, merged_name, bid) + yield from ModelBase.modify_tensors(self, data_torch, merged_name, bid) return else: return @@ -10731,7 +10731,7 @@ class CogVLMModel(LlamaModel): if name.startswith("model.vision."): return - yield from super().modify_tensors(data_torch, name, bid) + yield from ModelBase.modify_tensors(self, data_torch, name, bid) @ModelBase.register("JanusForConditionalGeneration")