convert : handle int-quantized models

2025-11-06 21:11:52 -05:00 · 2025-11-06 21:11:52 -05:00 · d23bdd57b0
parent 33dba6ce02
commit d23bdd57b0
1 changed files with 6 additions and 2 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -411,9 +411,11 @@ class ModelBase:
                if len(groups) > 1:
                    raise NotImplementedError("Can't handle multiple config groups for compressed-tensors yet")
                weight_config = tuple(groups.values())[0]["weights"]
-                if quant_format == "float-quantized":
+
+                if quant_format == "float-quantized" or quant_format == "int-quantized":
                    block_size = weight_config.get("block_structure", None)
-                    assert weight_config.get("strategy") == "channel"
+                    strategy = weight_config.get("strategy")
+                    assert strategy == "channel" or strategy == "block"
                    assert weight_config.get("group_size") == None  # didn't find a model using this yet
                    for name in self.model_tensors.keys():
                        if name.endswith(".weight_scale"):
@ -444,6 +446,8 @@ class ModelBase:
                            tensors_to_remove += [base_name + n for n in ("_packed", "_shape", "_scale")]
                            if (base_name + "_zero_point") in self.model_tensors:
                                tensors_to_remove.append(base_name + "_zero_point")
+                else:
+                    raise NotImplementedError(f"Quant format {quant_format!r} for method {quant_method!r} is not yet supported")
            else:
                raise NotImplementedError(f"Quant method is not yet supported: {quant_method!r}")