model : nit, DeepSeek V1 MoE is 16B and GigaChat is 20B (#12652)

* nit, DeepSeek V1 MoE is 16B

* base type on n_ff_exp instead
This commit is contained in:
Sigbjørn Skjæret 2025-12-09 12:15:06 +01:00 committed by GitHub
parent 4e842d5120
commit 42b12b5608
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 3 additions and 2 deletions

View File

@ -1606,8 +1606,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared); ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale); ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
switch (hparams.n_layer) { switch (hparams.n_ff_exp) {
case 28: type = LLM_TYPE_20B; break; case 1408: type = LLM_TYPE_16B; break;
case 1792: type = LLM_TYPE_20B; break;
default: type = LLM_TYPE_UNKNOWN; default: type = LLM_TYPE_UNKNOWN;
} }
} break; } break;