From 37cf51ebd032e63c7901835cdd85a0e7e9109e25 Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Sun, 30 Nov 2025 00:29:35 +0000
Subject: [PATCH] Process bpw targets up to B/F16

---
 src/llama-quant.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index 44f84ec949..6c6926dee8 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -2089,7 +2089,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
             new_type = default_type;
 
             // get more optimal quantization type based on the tensor shape, layer, etc.
-            if (!params->pure && ggml_is_quantized(default_type)) {
+            if (!params->pure && (ggml_is_quantized(default_type) || params->target_bpw != -1.0f)) {
                 int fallback = qs.n_fallback;
                 new_type = llama_tensor_get_type(qs, new_type, tensor, ftype);