Merge branch 'add-fh1-rebased' of https://github.com/tiiuae/llama.cpp-public into add-fh1-rebased
This commit is contained in:
commit
f028a43a91
|
|
@ -6665,7 +6665,6 @@ class FalconH1Model(Mamba2Model):
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
||||||
self.gguf_writer.add_key_length(self.hparams["head_dim"])
|
self.gguf_writer.add_key_length(self.hparams["head_dim"])
|
||||||
self.gguf_writer.add_value_length(self.hparams["head_dim"])
|
self.gguf_writer.add_value_length(self.hparams["head_dim"])
|
||||||
self.gguf_writer.add_float64("falcon_h1.key_multiplier", self.hparams["key_multiplier"])
|
|
||||||
|
|
||||||
## Validation ##
|
## Validation ##
|
||||||
assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported"
|
assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported"
|
||||||
|
|
@ -6673,17 +6672,16 @@ class FalconH1Model(Mamba2Model):
|
||||||
|
|
||||||
|
|
||||||
# Add Falcon Mamba2 specific configuration
|
# Add Falcon Mamba2 specific configuration
|
||||||
self.gguf_writer.add_uint32("falcon_h1.attention.head_dim", self.hparams["head_dim"])
|
self.gguf_writer.add_ssm_head_dim(self.hparams["mamba_d_head"])
|
||||||
self.gguf_writer.add_uint32("falcon_h1.ssm.mamba_d_inner", self.hparams["mamba_d_ssm"])
|
|
||||||
self.gguf_writer.add_ssm_inner_size(self.hparams["mamba_d_ssm"])
|
self.gguf_writer.add_ssm_inner_size(self.hparams["mamba_d_ssm"])
|
||||||
self.gguf_writer.add_uint32("falcon_h1.num_attention_heads", self.find_hparam(["num_attention_heads"]))
|
self.gguf_writer.add_head_count(self.find_hparam(["num_attention_heads"]))
|
||||||
self.gguf_writer.add_uint32("falcon_h1.num_key_value_heads",
|
self.gguf_writer.add_key_length(self.hparams["head_dim"])
|
||||||
self.find_hparam(["num_key_value_heads"], optional=True) or
|
self.gguf_writer.add_value_length(self.hparams["head_dim"])
|
||||||
|
self.gguf_writer.add_head_count_kv(self.find_hparam(["num_key_value_heads"], optional=True) or
|
||||||
self.find_hparam(["num_attention_heads"]))
|
self.find_hparam(["num_attention_heads"]))
|
||||||
|
|
||||||
|
|
||||||
# Add any other Falcon Mamba2 specific configuration
|
# Add any other Falcon Mamba2 specific configuration
|
||||||
self.gguf_writer.add_bool("falcon_h1.mamba_rms_norm", self.find_hparam(["mamba_rms_norm"], optional=True))
|
|
||||||
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
|
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
|
||||||
|
|
||||||
###### CONVERSION LOGIC ######
|
###### CONVERSION LOGIC ######
|
||||||
|
|
|
||||||
|
|
@ -128,7 +128,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||||
{ LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
|
{ LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
|
||||||
{ LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
|
{ LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
|
||||||
{ LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
|
{ LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
|
||||||
{ LLM_KV_ATTN_HEAD_DIM, "%s.attention.head_dim" },
|
|
||||||
|
|
||||||
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
||||||
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
||||||
|
|
|
||||||
|
|
@ -1560,7 +1560,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state);
|
ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state);
|
||||||
ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank);
|
ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank);
|
||||||
ml.get_key(LLM_KV_SSM_GROUP_COUNT, hparams.ssm_n_group);
|
ml.get_key(LLM_KV_SSM_GROUP_COUNT, hparams.ssm_n_group);
|
||||||
ml.get_key(LLM_KV_SSM_HEAD_DIM, hparams.ssm_head_dim);
|
|
||||||
|
|
||||||
std::fill(hparams.recurrent_layer_arr.begin(), hparams.recurrent_layer_arr.end(), true);
|
std::fill(hparams.recurrent_layer_arr.begin(), hparams.recurrent_layer_arr.end(), true);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue