model : nit, DeepSeek V1 MoE is 16B and GigaChat is 20B (#12652)
* nit, DeepSeek V1 MoE is 16B * base type on n_ff_exp instead
This commit is contained in:
parent
4e842d5120
commit
42b12b5608
|
|
@ -1606,8 +1606,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
|
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
|
||||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
|
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_ff_exp) {
|
||||||
case 28: type = LLM_TYPE_20B; break;
|
case 1408: type = LLM_TYPE_16B; break;
|
||||||
|
case 1792: type = LLM_TYPE_20B; break;
|
||||||
default: type = LLM_TYPE_UNKNOWN;
|
default: type = LLM_TYPE_UNKNOWN;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue