convert : force f16 or f32 on step3-vl conv weights (#21646)

This commit is contained in:
Sigbjørn Skjæret 2026-04-12 19:22:29 +02:00 committed by GitHub
parent aa4695c5e5
commit 1e9d771e2c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 2 additions and 0 deletions

View File

@ -4992,6 +4992,8 @@ class Step3VLVisionModel(MmprojModel):
def tensor_force_quant(self, name, new_name, bid, n_dims):
if ".position_embd." in new_name:
return gguf.GGMLQuantizationType.F32
if ("mm.0." in new_name or "mm.1." in new_name) and new_name.endswith(".weight"):
return gguf.GGMLQuantizationType.F16 if self.ftype == gguf.LlamaFileType.MOSTLY_F16 else gguf.GGMLQuantizationType.F32
return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: