convert : allow quantizing lora again (#17453)
This commit is contained in:
parent
b8372eecd9
commit
b61de2b2df
|
|
@ -565,7 +565,7 @@ class ModelBase:
|
||||||
gguf.MODEL_TENSOR.ALTUP_PREDICT_COEF,
|
gguf.MODEL_TENSOR.ALTUP_PREDICT_COEF,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
or not new_name.endswith(".weight")
|
or new_name[-7:] not in (".weight", ".lora_a", ".lora_b")
|
||||||
):
|
):
|
||||||
data_qtype = gguf.GGMLQuantizationType.F32
|
data_qtype = gguf.GGMLQuantizationType.F32
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -242,7 +242,7 @@ def parse_args() -> argparse.Namespace:
|
||||||
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f16",
|
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f32",
|
||||||
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue