Add directional scaling

This commit is contained in:
Ed Addario 2025-08-28 16:04:28 +01:00
parent 04946114c9
commit 8df1d00ae4
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 21 additions and 0 deletions

View File

@ -900,6 +900,27 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
return std::isfinite(total_err) ? total_err : 1e35;
};
auto directional_scale = [&](const float * values, const float * activations, int64_t n_per_row) {
if (!activations) { return 1.0f; }
// Compute dominance = ||sqrt(v).*a||_2 / (RMS(a)*sqrt(sum(v)))
// If no values, use v=1
double sum_v = 0.0;
double sum_aw2 = 0.0;
double sum_a2 = 0.0;
for (int64_t j = 0; j < n_per_row; ++j) {
const double v = values ? std::max(0.0f, values[j]) : 1.0;
const double a = activations[j];
sum_v += v;
sum_aw2 += v * a * a;
sum_a2 += a * a;
}
const double rms_a = std::sqrt(sum_a2 / std::max(1.0, (double)n_per_row));
const double denom = std::sqrt(std::max(epsilon, sum_v)) * std::max(epsilon, rms_a);
const double scale = denom > 0.0 ? std::sqrt(sum_aw2) / denom : 1.0;
// Clamp to a reasonable range
return (float)std::clamp(scale, 0.5, 2.0);
};
std::vector<tensor_info> all;
all.reserve(tensors.size());
for (const auto * tw : tensors) {