model : detect GigaChat3-10-A1.8B as deepseek lite (#17420)
* Detect GigaChat3-10-A1.8B as deepseek lite Hardcodes checking number of layers to detect if lite version of deepseek. * Add commnent identifying deepseek lite variants deepseek lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
This commit is contained in:
parent
28175f857d
commit
23bc779a6e
|
|
@ -1593,7 +1593,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_DEEPSEEK2:
|
case LLM_ARCH_DEEPSEEK2:
|
||||||
{
|
{
|
||||||
bool is_lite = (hparams.n_layer == 27);
|
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
||||||
|
bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||||
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
|
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
|
||||||
if (!is_lite) {
|
if (!is_lite) {
|
||||||
|
|
@ -4581,7 +4582,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_DEEPSEEK2:
|
case LLM_ARCH_DEEPSEEK2:
|
||||||
{
|
{
|
||||||
const bool is_lite = (hparams.n_layer == 27);
|
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
||||||
|
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
||||||
|
|
||||||
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
|
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,8 @@
|
||||||
|
|
||||||
llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_graph_params & params) :
|
llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_graph_params & params) :
|
||||||
llm_graph_context(params) {
|
llm_graph_context(params) {
|
||||||
bool is_lite = (hparams.n_layer == 27);
|
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
||||||
|
bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
||||||
|
|
||||||
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
|
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue