From 1e9d771e2c2f1113a5ebdd0dc15bafe57dce64be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sun, 12 Apr 2026 19:22:29 +0200 Subject: [PATCH] convert : force f16 or f32 on step3-vl conv weights (#21646) --- convert_hf_to_gguf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index c96afc78b6..374a55fb17 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -4992,6 +4992,8 @@ class Step3VLVisionModel(MmprojModel): def tensor_force_quant(self, name, new_name, bid, n_dims): if ".position_embd." in new_name: return gguf.GGMLQuantizationType.F32 + if ("mm.0." in new_name or "mm.1." in new_name) and new_name.endswith(".weight"): + return gguf.GGMLQuantizationType.F16 if self.ftype == gguf.LlamaFileType.MOSTLY_F16 else gguf.GGMLQuantizationType.F32 return super().tensor_force_quant(name, new_name, bid, n_dims) def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: