From f05c8483d8b138c58a41ecdf32f95947bb130be5 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Fri, 22 Aug 2025 09:17:58 +0100 Subject: [PATCH] Improve dequantized_buffer fill --- src/llama-quant.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index c5c19f3c5f..db4a0e1a20 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -843,12 +843,9 @@ static std::unordered_map target_bpw_type( LLAMA_LOG_WARN("%s: unsupported quantization type %s\n", __func__, ggml_type_name(quant_type)); return 1e35; } - - size_t done = 0; - while (done < sample_element_count) { - const size_t chunk = std::min((size_t)n_per_row, sample_element_count - done); - traits->to_float(quantized_buffer.data() + done / n_per_row * row_size, dequantized_buffer.data() + done, (int)chunk); - done += chunk; + const size_t row_size = ggml_row_size(quant_type, n_per_row); + for (size_t r = 0; r < sample_row_count; ++r) { + traits->to_float(quantized_buffer.data() + r * row_size, dequantized_buffer.data() + r * n_per_row, (int)n_per_row); } } }