convert : use F32 for dequant of pack-quantized tensors

2025-11-06 21:59:32 -05:00 · 2025-11-06 21:59:32 -05:00 · 128118fdbe
parent 3770d9410d
commit 128118fdbe
1 changed files with 1 additions and 1 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -364,7 +364,7 @@ class ModelBase:
                unpacked = unpacked.reshape(shape[0], (unpacked.shape[-1] + group_size - 1) // group_size, group_size)
                unpacked = unpacked - offset
-                return (unpacked * scale.unsqueeze(-1)).reshape(shape)
+                return (unpacked * scale.unsqueeze(-1).float()).reshape(shape)
            if quant_method == "bitnet":
                for name in self.model_tensors.keys():