convert : use F32 for dequant of pack-quantized tensors
This commit is contained in:
parent
3770d9410d
commit
128118fdbe
|
|
@ -364,7 +364,7 @@ class ModelBase:
|
||||||
unpacked = unpacked.reshape(shape[0], (unpacked.shape[-1] + group_size - 1) // group_size, group_size)
|
unpacked = unpacked.reshape(shape[0], (unpacked.shape[-1] + group_size - 1) // group_size, group_size)
|
||||||
unpacked = unpacked - offset
|
unpacked = unpacked - offset
|
||||||
|
|
||||||
return (unpacked * scale.unsqueeze(-1)).reshape(shape)
|
return (unpacked * scale.unsqueeze(-1).float()).reshape(shape)
|
||||||
|
|
||||||
if quant_method == "bitnet":
|
if quant_method == "bitnet":
|
||||||
for name in self.model_tensors.keys():
|
for name in self.model_tensors.keys():
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue