From 5ef493ea1a01385c02ef4c56d38dfe5e116c47c6 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Thu, 21 Aug 2025 09:48:29 +0100 Subject: [PATCH] Exclude embeddings and output tensor --- src/llama-quant.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index f5fa309c44..32013e47ba 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -697,8 +697,10 @@ static std::unordered_map target_bpw_type( q &= name.find("time_mix_decay_w2.weight") == std::string::npos; q &= name.find("time_mix_lerp_fused.weight") == std::string::npos; q &= name.find("attn_rel_b.weight") == std::string::npos; - q &= params->quantize_output_tensor || name != "output.weight"; q &= !params->only_copy; + // TODO: Exclude embeddings and output tensors? + q &= params->quantize_output_tensor || name != "output.weight"; + q &= name != name_tn(LLM_TENSOR_TOKEN_EMBD, "weight"); return q; };