From 4fcd87cf7cbb131b3e28e121b29cc588e460eb40 Mon Sep 17 00:00:00 2001 From: Aleksei Nikiforov <103434461+AlekseiNikiforovIBM@users.noreply.github.com> Date: Thu, 27 Nov 2025 11:35:38 +0100 Subject: [PATCH] gguf-py : skip endian-conversion of MXFP4 data (#17523) * gguf_convert_endian.py: skip MXFP4 data * Use gguf.constants.GGML_QUANT_SIZES to determine block sizes --- gguf-py/gguf/scripts/gguf_convert_endian.py | 30 +++++++++------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/gguf-py/gguf/scripts/gguf_convert_endian.py b/gguf-py/gguf/scripts/gguf_convert_endian.py index 0bda490a20..86bf87846c 100755 --- a/gguf-py/gguf/scripts/gguf_convert_endian.py +++ b/gguf-py/gguf/scripts/gguf_convert_endian.py @@ -19,6 +19,11 @@ import gguf logger = logging.getLogger("gguf-convert-endian") +def byteswap_noop(tensor, block_offs): + # this function is used when byteswapping is not needed + pass + + def byteswap_q4_0(tensor, block_offs): # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations. @@ -55,22 +60,11 @@ def byteswap_q6_k(tensor, block_offs): byteswap_tensors = { - gguf.GGMLQuantizationType.Q4_0: { - "block_size": 18, # 18 bytes = + 16 * - "byteswap_func": byteswap_q4_0, - }, - gguf.GGMLQuantizationType.Q8_0: { - "block_size": 34, # 34 bytes = + 32 * - "byteswap_func": byteswap_q8_0, - }, - gguf.GGMLQuantizationType.Q4_K: { - "block_size": 144, # 144 bytes = 2 * + 140 * - "byteswap_func": byteswap_q4_k, - }, - gguf.GGMLQuantizationType.Q6_K: { - "block_size": 210, # 210 bytes = + 208 * - "byteswap_func": byteswap_q6_k, - }, + gguf.GGMLQuantizationType.Q4_0: byteswap_q4_0, + gguf.GGMLQuantizationType.Q8_0: byteswap_q8_0, + gguf.GGMLQuantizationType.Q4_K: byteswap_q4_k, + gguf.GGMLQuantizationType.Q6_K: byteswap_q6_k, + gguf.GGMLQuantizationType.MXFP4: byteswap_noop, } @@ -135,8 +129,8 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None tensor.data.resize(newshape) - block_size = byteswap_tensors[tensor.tensor_type]["block_size"] - byteswap_func = byteswap_tensors[tensor.tensor_type]["byteswap_func"] + block_size = gguf.constants.GGML_QUANT_SIZES[tensor.tensor_type][1] + byteswap_func = byteswap_tensors[tensor.tensor_type] n_blocks = len(tensor.data) // block_size for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):