From 52da4a4f8c28d063378d54dd806da03614251e76 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Wed, 20 Aug 2025 17:26:05 +0100 Subject: [PATCH] Skip if output.weight or type is COPY --- src/llama-quant.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index c412191c8f..786adfe547 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -697,6 +697,9 @@ static std::unordered_map target_bpw_type( q &= name.find("time_mix_decay_w2.weight") == std::string::npos; q &= name.find("time_mix_lerp_fused.weight") == std::string::npos; q &= name.find("attn_rel_b.weight") == std::string::npos; + q &= params->quantize_output_tensor || name != "output.weight"; + q &= !params->only_copy; + return q; };