model : make minicpm embedding_scale, residual_scale and logit_scale optional with legacy defaults (#16273)
* minicpm: make GGUF scaling keys optional with legacy defaults
Older MiniCPM GGUFs do not include the scaling metadata keys (minicpm.embedding_scale, minicpm.residual_scale, minicpm.logit_scale). The loader currently treats these as required, so quantization fails with:
key not found in model: minicpm.embedding_scale
This change restores backward compatibility by treating these keys as optional in the loader and using the older MiniCPM scaling values:
embedding_scale = 12.0f
residual_scale = 1.4f / sqrt(n_layer)
logit_scale = 256.0f / n_embd
When the GGUF provides the keys, their values override the defaults; otherwise the legacy defaults are used. Newer GGUFs that already include these keys are unaffected.
Fixes: #16192
Signed-off-by: Vinkal Chudgar <vinkal.chudgar@gmail.com>
* Update src/llama-model.cpp
Committed as suggested. Thanks!
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
---------
Signed-off-by: Vinkal Chudgar <vinkal.chudgar@gmail.com>
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
parent
624207e676
commit
72b24d96c6
|
|
@ -675,10 +675,17 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
} break;
|
||||
case LLM_ARCH_MINICPM:
|
||||
{
|
||||
// Backward-compatible defaults for older MiniCPM GGUFs
|
||||
hparams.f_embedding_scale = 12.0f;
|
||||
hparams.f_residual_scale = 1.4f / sqrtf(float(hparams.n_layer));
|
||||
hparams.f_logit_scale = hparams.n_embd ? (256.0f / float(hparams.n_embd)) : 1.0f;
|
||||
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale);
|
||||
ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale);
|
||||
ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
|
||||
|
||||
// Optional KV reads, override defaults if present in newer GGUF exports
|
||||
ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale, /*required=*/false);
|
||||
ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale, /*required=*/false);
|
||||
ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale, /*required=*/false);
|
||||
|
||||
// MiniCPM uses rope by default, unlike Granite which uses it as a switch
|
||||
hparams.rope_finetuned = true;
|
||||
|
|
|
|||
Loading…
Reference in New Issue