From b61de2b2df4ff07e6d6de96320fb311d96908b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 24 Nov 2025 15:50:55 +0100 Subject: [PATCH] convert : allow quantizing lora again (#17453) --- convert_hf_to_gguf.py | 2 +- convert_lora_to_gguf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 6cbaee03df..d24a4682f3 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -565,7 +565,7 @@ class ModelBase: gguf.MODEL_TENSOR.ALTUP_PREDICT_COEF, ) ) - or not new_name.endswith(".weight") + or new_name[-7:] not in (".weight", ".lora_a", ".lora_b") ): data_qtype = gguf.GGMLQuantizationType.F32 diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index 57c6cd0df1..b0adde8a8b 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -242,7 +242,7 @@ def parse_args() -> argparse.Namespace: help="path to write to; default: based on input. {ftype} will be replaced by the outtype.", ) parser.add_argument( - "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f16", + "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f32", help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type", ) parser.add_argument(