From 72b24d96c6888c609d562779a23787304ae4609c Mon Sep 17 00:00:00 2001 From: Vinkal Date: Sat, 27 Sep 2025 02:58:29 +0530 Subject: [PATCH] model : make minicpm embedding_scale, residual_scale and logit_scale optional with legacy defaults (#16273) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * minicpm: make GGUF scaling keys optional with legacy defaults Older MiniCPM GGUFs do not include the scaling metadata keys (minicpm.embedding_scale, minicpm.residual_scale, minicpm.logit_scale). The loader currently treats these as required, so quantization fails with: key not found in model: minicpm.embedding_scale This change restores backward compatibility by treating these keys as optional in the loader and using the older MiniCPM scaling values: embedding_scale = 12.0f residual_scale = 1.4f / sqrt(n_layer) logit_scale = 256.0f / n_embd When the GGUF provides the keys, their values override the defaults; otherwise the legacy defaults are used. Newer GGUFs that already include these keys are unaffected. Fixes: #16192 Signed-off-by: Vinkal Chudgar * Update src/llama-model.cpp Committed as suggested. Thanks! Co-authored-by: Sigbjørn Skjæret --------- Signed-off-by: Vinkal Chudgar Co-authored-by: Sigbjørn Skjæret --- src/llama-model.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 2ae9abb446..ffd9286ef8 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -675,10 +675,17 @@ void llama_model::load_hparams(llama_model_loader & ml) { } break; case LLM_ARCH_MINICPM: { + // Backward-compatible defaults for older MiniCPM GGUFs + hparams.f_embedding_scale = 12.0f; + hparams.f_residual_scale = 1.4f / sqrtf(float(hparams.n_layer)); + hparams.f_logit_scale = hparams.n_embd ? (256.0f / float(hparams.n_embd)) : 1.0f; + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); - ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale); - ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale); - ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale); + + // Optional KV reads, override defaults if present in newer GGUF exports + ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale, /*required=*/false); + ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale, /*required=*/false); + ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale, /*required=*/false); // MiniCPM uses rope by default, unlike Granite which uses it as a switch hparams.rope_finetuned = true;