From 128118fdbed9f07bf85849edd810daf12f70e92a Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Thu, 6 Nov 2025 21:59:32 -0500 Subject: [PATCH] convert : use F32 for dequant of pack-quantized tensors --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index bc68be066c..b155d112b1 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -364,7 +364,7 @@ class ModelBase: unpacked = unpacked.reshape(shape[0], (unpacked.shape[-1] + group_size - 1) // group_size, group_size) unpacked = unpacked - offset - return (unpacked * scale.unsqueeze(-1)).reshape(shape) + return (unpacked * scale.unsqueeze(-1).float()).reshape(shape) if quant_method == "bitnet": for name in self.model_tensors.keys():