diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index c412191c8f..786adfe547 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -697,6 +697,9 @@ static std::unordered_map target_bpw_type( q &= name.find("time_mix_decay_w2.weight") == std::string::npos; q &= name.find("time_mix_lerp_fused.weight") == std::string::npos; q &= name.find("attn_rel_b.weight") == std::string::npos; + q &= params->quantize_output_tensor || name != "output.weight"; + q &= !params->only_copy; + return q; };