Minor refactoring

This commit is contained in:
Ed Addario 2026-02-20 15:03:56 +00:00
parent fb2af3353d
commit 551463e2e8
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 11 additions and 10 deletions

View File

@ -651,6 +651,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
constexpr uint32_t MSE_MAGIC = 0x4d534531; // MSE1
constexpr uint32_t WCE_MAGIC = 0x57434531; // WCE1
constexpr uint64_t HASH_MAGIC = 0xeabada55cafed00d;
constexpr float penalty = 5.0f;
const char * func = __func__;
const bool wce = params->use_wce;
const bool valid_wce = wce && activations_data && statistics_data != nullptr;
@ -1013,7 +1014,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
return std::accumulate(v.begin() + k, v.end() - k, 0.0) / std::max(1.0, (double)(n - 2 * k));
};
// Compute Error Metrics: Entropy-Modulated Weighted Cosine Error (WCE) - Experimental
// Weighted Cosine Error (WCE) - Experimental
if (do_wce) {
double total_cos_error = 0.0;
size_t off = 0;
@ -1074,7 +1075,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
}
const double nrows = t->ne[1];
total_cos_error += slice_sum / (double) rs * (double) nrows;
total_cos_error += slice_sum / (double)rs * (double)nrows;
}
const double penalty = 2.0 - std::clamp((double) h_norm, 0.0, 1.0);
@ -1083,7 +1084,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
return qe;
}
// Compute Error Metrics: Weighted MSE Optimization - Default
// Weighted Mean Squared Error (MSE) - Default
size_t off = 0;
size_t row_idx = 0;
double total_wmse = 0.0;
@ -1112,7 +1113,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
if (val && act) {
for (int64_t j = 0; j < n_per_row; ++j) {
const double w = std::max(0.0f, val[j]);
const double e = y[j] - x[j];
const double e = (double)y[j] - (double)x[j];
const double we = w * e;
w_err += we * e;
bias_num += we * act[j];
@ -1120,18 +1121,18 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
} else if (val) {
for (int64_t j = 0; j < n_per_row; ++j) {
const double w = std::max(0.0f, val[j]);
const double e = y[j] - x[j];
const double e = (double)y[j] - (double)x[j];
w_err += w * e * e;
}
} else if (act) {
for (int64_t j = 0; j < n_per_row; ++j) {
const double e = y[j] - x[j];
const double e = (double)y[j] - (double)x[j];
w_err += e * e;
bias_num += e * act[j];
}
} else {
for (int64_t j = 0; j < n_per_row; ++j) {
const double e = y[j] - x[j];
const double e = (double)y[j] - (double)x[j];
w_err += e * e;
}
}
@ -1197,7 +1198,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
};
std::unordered_map<std::string, type_choice> bpw_data;
if (params->state_file && !checkpoint_file.empty()) { bpw_data = load_state(); }
if (params->state_file && !checkpoint_file.empty()) { bpw_data = load_state(); } // ToDo: rethink this condition
// Parallelize tensor processing (courtesy of https://github.com/ddh0)
auto process_tensor = [&](
@ -2056,9 +2057,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
LLAMA_LOG_INFO("%s: assigning more budget to important tensors\n", __func__);
}
if (params->use_wce) {
LLAMA_LOG_INFO("%s: using experimental Entropy-Modulated Weighted Cosine Error (WCE) approximation optimization\n", __func__);
LLAMA_LOG_INFO("%s: using experimental Weighted Cosine Error (WCE) optimization\n", __func__);
} else {
LLAMA_LOG_INFO("%s: using weighted Mean Squared Error (MSE) optimization\n", __func__);
LLAMA_LOG_INFO("%s: using default Weighted Mean Squared Error (MSE) optimization\n", __func__);
}
if (params->target_size >= 0) {
LLAMA_LOG_INFO("%s: computing tensor quantization mix to achieve file size %.2f MiB\n",