convert: rework ftype heuristics (#18214)
* convert: rework ftype heuristics Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> convert: fix type-check Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> convert: bring back heuristics comment Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * convert: revert to using first tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * convert: rework heuristics logic Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * convert: rm redundant float32 check Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
parent
86af848153
commit
a28310488c
|
|
@ -141,16 +141,24 @@ class ModelBase:
|
|||
self.model_name = model_name
|
||||
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
|
||||
|
||||
# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
|
||||
# Apply heuristics to figure out typical tensor encoding based on first tensor's dtype
|
||||
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
|
||||
if self.ftype == gguf.LlamaFileType.GUESSED:
|
||||
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
|
||||
_, first_tensor = next(self.get_tensors())
|
||||
if first_tensor.dtype == torch.float16:
|
||||
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
|
||||
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
||||
for _, tensor in self.get_tensors():
|
||||
if tensor.dim() < 2:
|
||||
continue
|
||||
|
||||
if tensor.dtype == torch.bfloat16:
|
||||
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
|
||||
logger.info("heuristics detected bfloat16 tensor dtype, setting --outtype bf16")
|
||||
break
|
||||
elif tensor.dtype == torch.float16:
|
||||
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
||||
logger.info("heuristics detected float16 tensor dtype, setting --outtype f16")
|
||||
break
|
||||
else:
|
||||
logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
|
||||
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
|
||||
self.ftype = gguf.LlamaFileType.MOSTLY_F16
|
||||
logger.info("heuristics unable to detect tensor dtype, defaulting to --outtype f16")
|
||||
|
||||
self.dequant_model()
|
||||
|
||||
|
|
@ -10557,8 +10565,8 @@ def parse_args() -> argparse.Namespace:
|
|||
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16",
|
||||
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
||||
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="auto",
|
||||
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bigendian", action="store_true",
|
||||
|
|
|
|||
Loading…
Reference in New Issue