From a1ce428004783bc60511c2048b519138e8ad0698 Mon Sep 17 00:00:00 2001 From: "Yu, Zijun" Date: Fri, 8 Aug 2025 11:07:10 +0800 Subject: [PATCH] Fix Q4_1 --- ggml/src/ggml-openvino/ggml-quants.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-openvino/ggml-quants.cpp b/ggml/src/ggml-openvino/ggml-quants.cpp index 8d4fb14189..e969b0b54a 100644 --- a/ggml/src/ggml-openvino/ggml-quants.cpp +++ b/ggml/src/ggml-openvino/ggml-quants.cpp @@ -15,8 +15,8 @@ void unpack_32_4(const uint8_t* data, uint8_t* dst) { std::fill_n(dst, 16, 0); for (int j = 0; j < 16; ++j) { - uint8_t x = (data[j + 2] & 0x0F); // j+2 to skip scale bytes. - uint8_t y = (data[j + 2] >> 4); + uint8_t x = (data[j] & 0x0F); + uint8_t y = (data[j] >> 4); if (j % 2 != 0) { x <<= 4; y <<= 4; @@ -41,7 +41,7 @@ void extract_q4_0_data(const ggml_tensor* tensor, ov::parallel_for(scales_arr.get_size(), [&](size_t i) { scales[i] = ov::float16::from_bits(*((uint16_t*)(data + i * bytes_per_block))); biases[i] = ov::float16(-8.f * static_cast(scales[i])); - unpack_32_4(data + i * bytes_per_block, weights + i * 16); + unpack_32_4(data + i * bytes_per_block + 2, weights + i * 16); }); } @@ -58,8 +58,8 @@ void extract_q4_1_data(const ggml_tensor* tensor, auto biases = biases_arr.data::value_type>(); ov::parallel_for(scales_arr.get_size(), [&](size_t i) { scales[i] = ov::float16::from_bits(*((uint16_t*)(data + i * bytes_per_block))); - biases[i] = ov::float16::from_bits(*((uint16_t*)(data + i * bytes_per_block + 1))); - unpack_32_4(data + i * bytes_per_block, weights + i * 16); + biases[i] = ov::float16::from_bits(*((uint16_t*)(data + i * bytes_per_block + 2))); + unpack_32_4(data + i * bytes_per_block + 4, weights + i * 16); }); }