From decafae27060ed923c69ce3b89db505538a9b230 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 23 Aug 2025 11:30:11 +0100 Subject: [PATCH] Adjust bias_lambda --- src/llama-quant.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 392a23b5ca..4ce651723f 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -888,7 +888,9 @@ static std::unordered_map target_bpw_type( } } - constexpr float bias_lambda = 1.75f; + // abias_lambda djusts the trade-off between systematic bias (introduced by block‑wise scaling) and MSE + // larger value favours quantisation types that produce a smaller bias even if the MSE is slightly larger + constexpr float bias_lambda = 1.5f; constexpr double epsilon = 1e-12; double err_num = weighted_mse; if (activations && bias_lambda != 0.0f) { @@ -1024,7 +1026,7 @@ static std::unordered_map target_bpw_type( // Build list of candidate types first (compatible ones) const ggml_type * base_arr = is_iq(params->ftype) ? iq_quants : k_quants; - const size_t base_sz = is_iq(params->ftype) ? sizeof(iq_quants) / sizeof(iq_quants[0]) : sizeof(k_quants) / sizeof(k_quants[0]); + const size_t base_sz = is_iq(params->ftype) ? std::size(iq_quants) : std::size(k_quants); size_t max_row_sz = 0; const bool has_valid_imatrix = !values_sample.empty() && values_sample.size() == (size_t)ne2 * (size_t)n_per_row;