diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index be83e3108e..464ecbaab9 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -8772,11 +8772,7 @@ class ExaoneMoEModel(Exaone4Model): self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"]) n_dense_layer = self.hparams.get("first_k_dense_replace", self.hparams.get("first_last_k_dense_replace", 0)) self.gguf_writer.add_leading_dense_block_count(n_dense_layer) - # For here, we hard-code the number of NextN/MTP layers to 1 for K-EXAONE, - # so that we can convert MTP weights to GGUF format for speculative decoding. - # This is because HF config of K-EXAONE does not have `num_nextn_predict_layers` at now. - # Will be updated when HF config is updated. - self.gguf_writer.add_nextn_predict_layers(self.hparams.get("num_nextn_predict_layers", 1)) + self.gguf_writer.add_nextn_predict_layers(self.hparams.get("num_nextn_predict_layers", 0)) self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 75f9691807..eaedc66b63 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1942,16 +1942,12 @@ void llama_model::load_hparams(llama_model_loader & ml) { hparams.rope_freq_scale_train_swa = hparams.rope_freq_scale_train; ml.get_key(LLM_KV_ROPE_FREQ_BASE_SWA, hparams.rope_freq_base_train_swa, false); - ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, true); + ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa); ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); - ml.get_key(LLM_KV_EXPERT_COUNT, hparams.n_expert); - ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used); ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared, false); ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp); ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp, false); - ml.get_key(LLM_KV_EXPERT_GROUP_COUNT, hparams.n_expert_groups, false); - ml.get_key(LLM_KV_EXPERT_GROUP_USED_COUNT, hparams.n_group_used, false); - ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false); + ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func); ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale, false); ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false); ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);