From 42b12b560886dc2093b17af11c97ef6d276a3b97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Tue, 9 Dec 2025 12:15:06 +0100 Subject: [PATCH] model : nit, DeepSeek V1 MoE is 16B and GigaChat is 20B (#12652) * nit, DeepSeek V1 MoE is 16B * base type on n_ff_exp instead --- src/llama-model.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index e09d59e2c1..04fccc9793 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1606,8 +1606,9 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared); ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale); - switch (hparams.n_layer) { - case 28: type = LLM_TYPE_20B; break; + switch (hparams.n_ff_exp) { + case 1408: type = LLM_TYPE_16B; break; + case 1792: type = LLM_TYPE_20B; break; default: type = LLM_TYPE_UNKNOWN; } } break;