From 3d75b14c0f2fc605fb39a3cb425c4c2482b8d8f5 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 27 Sep 2025 17:27:58 +0100 Subject: [PATCH] Simplify dequantisation --- src/llama-quant.cpp | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 601b9ada42..316dd35fa8 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -819,25 +819,16 @@ static std::unordered_map target_bpw_type( // Dequantize into dequantized_buffer { const ggml_type_traits * traits = ggml_get_type_traits(quant_type); - if (traits && traits->to_float && quant_type != GGML_TYPE_F16 && quant_type != GGML_TYPE_BF16) { - traits->to_float(quantized_buffer.data(), dequantized_buffer.data(), (int)(sample_rows * (size_t)n_per_row)); - } else { - for (size_t r = 0; r < sample_rows; ++r) { - const uint8_t * src = quantized_buffer.data() + r * row_sz; - float * dst = dequantized_buffer.data() + r * (size_t)n_per_row; - if (quant_type == GGML_TYPE_F16) { - ggml_fp16_to_fp32_row((const ggml_fp16_t *)src, dst, (int)n_per_row); - } else if (quant_type == GGML_TYPE_BF16) { - ggml_bf16_to_fp32_row((const ggml_bf16_t *)src, dst, (int)n_per_row); - } else { - if (!traits || !traits->to_float) { - if (out_mse) { *out_mse = infinity; } - if (out_proj) { *out_proj = 0.0; } - return infinity; - } - traits->to_float(src, dst, (int)n_per_row); - } - } + if (!traits || !traits->to_float) { + if (out_mse) { *out_mse = infinity; } + if (out_proj) { *out_proj = 0.0; } + return infinity; + } + + for (size_t r = 0; r < sample_rows; ++r) { + const uint8_t * src = quantized_buffer.data() + r * row_sz; + float * dst = dequantized_buffer.data() + r * (size_t)n_per_row; + traits->to_float(src, dst, (int)n_per_row); } }