convert : use F32 for dequant of pack-quantized tensors

This commit is contained in:
Francis Couture-Harpin 2025-11-06 21:59:32 -05:00
parent 3770d9410d
commit 128118fdbe
1 changed files with 1 additions and 1 deletions

View File

@ -364,7 +364,7 @@ class ModelBase:
unpacked = unpacked.reshape(shape[0], (unpacked.shape[-1] + group_size - 1) // group_size, group_size) unpacked = unpacked.reshape(shape[0], (unpacked.shape[-1] + group_size - 1) // group_size, group_size)
unpacked = unpacked - offset unpacked = unpacked - offset
return (unpacked * scale.unsqueeze(-1)).reshape(shape) return (unpacked * scale.unsqueeze(-1).float()).reshape(shape)
if quant_method == "bitnet": if quant_method == "bitnet":
for name in self.model_tensors.keys(): for name in self.model_tensors.keys():