convert: rework ftype heuristics (#18214)
* convert: rework ftype heuristics Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> convert: fix type-check Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> convert: bring back heuristics comment Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * convert: revert to using first tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * convert: rework heuristics logic Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * convert: rm redundant float32 check Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
parent
86af848153
commit
a28310488c
|
|
@ -141,16 +141,24 @@ class ModelBase:
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
|
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
|
||||||
|
|
||||||
# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
|
# Apply heuristics to figure out typical tensor encoding based on first tensor's dtype
|
||||||
|
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
|
||||||
if self.ftype == gguf.LlamaFileType.GUESSED:
|
if self.ftype == gguf.LlamaFileType.GUESSED:
|
||||||
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
|
for _, tensor in self.get_tensors():
|
||||||
_, first_tensor = next(self.get_tensors())
|
if tensor.dim() < 2:
|
||||||
if first_tensor.dtype == torch.float16:
|
continue
|
||||||
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
|
|
||||||
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
if tensor.dtype == torch.bfloat16:
|
||||||
|
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
|
||||||
|
logger.info("heuristics detected bfloat16 tensor dtype, setting --outtype bf16")
|
||||||
|
break
|
||||||
|
elif tensor.dtype == torch.float16:
|
||||||
|
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
||||||
|
logger.info("heuristics detected float16 tensor dtype, setting --outtype f16")
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
|
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
||||||
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
|
logger.info("heuristics unable to detect tensor dtype, defaulting to --outtype f16")
|
||||||
|
|
||||||
self.dequant_model()
|
self.dequant_model()
|
||||||
|
|
||||||
|
|
@ -10557,8 +10565,8 @@ def parse_args() -> argparse.Namespace:
|
||||||
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16",
|
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="auto",
|
||||||
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--bigendian", action="store_true",
|
"--bigendian", action="store_true",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue