From 551463e2e8c8b4c8f81331976f2a5f53368e9e11 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Fri, 20 Feb 2026 15:03:56 +0000 Subject: [PATCH] Minor refactoring --- src/llama-quant.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 1c0e495983..ea6a59e381 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -651,6 +651,7 @@ static std::unordered_map target_bpw_type( constexpr uint32_t MSE_MAGIC = 0x4d534531; // MSE1 constexpr uint32_t WCE_MAGIC = 0x57434531; // WCE1 constexpr uint64_t HASH_MAGIC = 0xeabada55cafed00d; + constexpr float penalty = 5.0f; const char * func = __func__; const bool wce = params->use_wce; const bool valid_wce = wce && activations_data && statistics_data != nullptr; @@ -1013,7 +1014,7 @@ static std::unordered_map target_bpw_type( return std::accumulate(v.begin() + k, v.end() - k, 0.0) / std::max(1.0, (double)(n - 2 * k)); }; - // Compute Error Metrics: Entropy-Modulated Weighted Cosine Error (WCE) - Experimental + // Weighted Cosine Error (WCE) - Experimental if (do_wce) { double total_cos_error = 0.0; size_t off = 0; @@ -1074,7 +1075,7 @@ static std::unordered_map target_bpw_type( } const double nrows = t->ne[1]; - total_cos_error += slice_sum / (double) rs * (double) nrows; + total_cos_error += slice_sum / (double)rs * (double)nrows; } const double penalty = 2.0 - std::clamp((double) h_norm, 0.0, 1.0); @@ -1083,7 +1084,7 @@ static std::unordered_map target_bpw_type( return qe; } - // Compute Error Metrics: Weighted MSE Optimization - Default + // Weighted Mean Squared Error (MSE) - Default size_t off = 0; size_t row_idx = 0; double total_wmse = 0.0; @@ -1112,7 +1113,7 @@ static std::unordered_map target_bpw_type( if (val && act) { for (int64_t j = 0; j < n_per_row; ++j) { const double w = std::max(0.0f, val[j]); - const double e = y[j] - x[j]; + const double e = (double)y[j] - (double)x[j]; const double we = w * e; w_err += we * e; bias_num += we * act[j]; @@ -1120,18 +1121,18 @@ static std::unordered_map target_bpw_type( } else if (val) { for (int64_t j = 0; j < n_per_row; ++j) { const double w = std::max(0.0f, val[j]); - const double e = y[j] - x[j]; + const double e = (double)y[j] - (double)x[j]; w_err += w * e * e; } } else if (act) { for (int64_t j = 0; j < n_per_row; ++j) { - const double e = y[j] - x[j]; + const double e = (double)y[j] - (double)x[j]; w_err += e * e; bias_num += e * act[j]; } } else { for (int64_t j = 0; j < n_per_row; ++j) { - const double e = y[j] - x[j]; + const double e = (double)y[j] - (double)x[j]; w_err += e * e; } } @@ -1197,7 +1198,7 @@ static std::unordered_map target_bpw_type( }; std::unordered_map bpw_data; - if (params->state_file && !checkpoint_file.empty()) { bpw_data = load_state(); } + if (params->state_file && !checkpoint_file.empty()) { bpw_data = load_state(); } // ToDo: rethink this condition // Parallelize tensor processing (courtesy of https://github.com/ddh0) auto process_tensor = [&]( @@ -2056,9 +2057,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: LLAMA_LOG_INFO("%s: assigning more budget to important tensors\n", __func__); } if (params->use_wce) { - LLAMA_LOG_INFO("%s: using experimental Entropy-Modulated Weighted Cosine Error (WCE) approximation optimization\n", __func__); + LLAMA_LOG_INFO("%s: using experimental Weighted Cosine Error (WCE) optimization\n", __func__); } else { - LLAMA_LOG_INFO("%s: using weighted Mean Squared Error (MSE) optimization\n", __func__); + LLAMA_LOG_INFO("%s: using default Weighted Mean Squared Error (MSE) optimization\n", __func__); } if (params->target_size >= 0) { LLAMA_LOG_INFO("%s: computing tensor quantization mix to achieve file size %.2f MiB\n",