diff --git a/src/llama-model.cpp b/src/llama-model.cpp index c93e29555b..52408c5257 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -123,6 +123,7 @@ const char * llm_type_name(llm_type type) { case LLM_TYPE_8B_A1B: return "8B.A1B"; case LLM_TYPE_16B_A1B: return "16B.A1B"; case LLM_TYPE_21B_A3B: return "21B.A3B"; + case LLM_TYPE_24B_A2B: return "24B.A2B"; case LLM_TYPE_30B_A3B: return "30B.A3B"; case LLM_TYPE_31B_A3_5B: return "31B.A3.5B"; case LLM_TYPE_35B_A3B: return "35B.A3B"; @@ -2381,7 +2382,11 @@ void llama_model::load_hparams(llama_model_loader & ml) { hparams.recurrent_layer_arr[il] = hparams.n_head_kv(il) == 0; } - type = LLM_TYPE_8B_A1B; + switch (hparams.n_layer) { + case 24: type = LLM_TYPE_8B_A1B; break; + case 40: type = LLM_TYPE_24B_A2B; break; + default: type = LLM_TYPE_UNKNOWN; + } } break; case LLM_ARCH_SMALLTHINKER: { diff --git a/src/llama-model.h b/src/llama-model.h index 422ed45699..96e407a0b3 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -116,6 +116,7 @@ enum llm_type { LLM_TYPE_8B_A1B, // lfm2moe LLM_TYPE_16B_A1B, LLM_TYPE_21B_A3B, // Ernie MoE small + LLM_TYPE_24B_A2B, // lfm2moe LLM_TYPE_30B_A3B, LLM_TYPE_31B_A3_5B, LLM_TYPE_35B_A3B, // Qwen3.5