diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index f5fa309c44..32013e47ba 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -697,8 +697,10 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
         q &= name.find("time_mix_decay_w2.weight") == std::string::npos;
         q &= name.find("time_mix_lerp_fused.weight") == std::string::npos;
         q &= name.find("attn_rel_b.weight") == std::string::npos;
-        q &= params->quantize_output_tensor || name != "output.weight";
         q &= !params->only_copy;
+        // TODO: Exclude embeddings and output tensors?
+        q &= params->quantize_output_tensor || name != "output.weight";
+        q &= name != name_tn(LLM_TENSOR_TOKEN_EMBD, "weight");
 
         return q;
     };