From f05c8483d8b138c58a41ecdf32f95947bb130be5 Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Fri, 22 Aug 2025 09:17:58 +0100
Subject: [PATCH] Improve dequantized_buffer fill

---
 src/llama-quant.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index c5c19f3c5f..db4a0e1a20 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -843,12 +843,9 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
                     LLAMA_LOG_WARN("%s: unsupported quantization type %s\n", __func__, ggml_type_name(quant_type));
                     return 1e35;
                 }
-
-                size_t done = 0;
-                while (done < sample_element_count) {
-                    const size_t chunk = std::min((size_t)n_per_row, sample_element_count - done);
-                    traits->to_float(quantized_buffer.data() + done / n_per_row * row_size, dequantized_buffer.data() + done, (int)chunk);
-                    done += chunk;
+                const size_t row_size = ggml_row_size(quant_type, n_per_row);
+                for (size_t r = 0; r < sample_row_count; ++r) {
+                    traits->to_float(quantized_buffer.data() + r * row_size, dequantized_buffer.data() + r * n_per_row, (int)n_per_row);
                 }
             }
         }