Improve dequantized_buffer fill

2025-08-22 09:17:58 +01:00 · 2025-08-22 09:17:58 +01:00 · f05c8483d8
parent 897decbe8a
commit f05c8483d8
1 changed files with 3 additions and 6 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -843,12 +843,9 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
                    LLAMA_LOG_WARN("%s: unsupported quantization type %s\n", __func__, ggml_type_name(quant_type));
                    return 1e35;
                }
-
-                size_t done = 0;
-                while (done < sample_element_count) {
-                    const size_t chunk = std::min((size_t)n_per_row, sample_element_count - done);
-                    traits->to_float(quantized_buffer.data() + done / n_per_row * row_size, dequantized_buffer.data() + done, (int)chunk);
-                    done += chunk;
+                const size_t row_size = ggml_row_size(quant_type, n_per_row);
+                for (size_t r = 0; r < sample_row_count; ++r) {
+                    traits->to_float(quantized_buffer.data() + r * row_size, dequantized_buffer.data() + r * n_per_row, (int)n_per_row);
                }
            }
        }