Minor refactoring

This commit is contained in:
Ed Addario 2025-10-28 23:10:12 +00:00
parent 92a42bac3d
commit ab015065b8
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 51 additions and 58 deletions

View File

@ -166,44 +166,33 @@ static std::vector<float> compute_tensor_averages(const Stats & tstats) {
static bool compute_vector_statistics(std::vector<tensor_statistics> & tstats, const std::string & name, const Stats & e) { static bool compute_vector_statistics(std::vector<tensor_statistics> & tstats, const std::string & name, const Stats & e) {
const size_t n_mat = e.counts.size(); const size_t n_mat = e.counts.size();
const size_t len = e.activations.empty() ? e.values.size() : e.activations.size(); const size_t len = e.activations.empty() ? e.values.size() : e.activations.size();
if (n_mat == 0) { if (n_mat == 0) {
LOG_ERR("%s: there are no activations for tensor %s. The imatrix may be suboptimal\n", __func__, name.c_str()); LOG_ERR("%s: there are no activations for tensor %s. The imatrix may be suboptimal\n", __func__, name.c_str());
return false; return false;
} }
if (len == 0 || (len % n_mat) != 0) { if (len == 0 || (len % n_mat) != 0) {
LOG_ERR("%s: activation size mismatch for tensor %s (len=%zu, counts=%zu)\n", __func__, name.c_str(), len, n_mat); LOG_ERR("%s: activation size mismatch for tensor %s (len=%zu, counts=%zu)\n", __func__, name.c_str(), len, n_mat);
return false; return false;
} }
const int row_size = (int)(len / n_mat); const size_t row_size = len / n_mat;
std::vector<float> activations; std::vector<float> activations;
activations.reserve(len); activations.reserve(len);
for (size_t i = 0; i < n_mat; ++i) {
const auto c = (float)e.counts[i];
const size_t off = i * row_size;
if (c <= 0.0f) {
activations.insert(activations.end(), row_size, 0.0f);
continue;
}
if (e.activations.empty()) { if (e.activations.empty()) {
for (size_t i = 0; i < n_mat; ++i) { for (size_t j = 0; j < row_size; ++j) {
const auto c = (float)e.counts[i]; activations.push_back(e.values[off + j] / c); // mean-of-squares
const size_t off = i * row_size;
for (int j = 0; j < row_size; ++j) {
if (c <= 0.0f) {
activations.push_back(0.0f);
} else {
activations.push_back(e.values[off + j] / c);
}
}
} }
} else { } else {
for (size_t i = 0; i < n_mat; ++i) { for (size_t j = 0; j < row_size; ++j) {
const auto c = (float)e.counts[i]; activations.push_back(e.activations[off + j] / c); // mean
const size_t off = i * row_size;
for (int j = 0; j < row_size; ++j) {
if (c <= 0.0f) {
activations.push_back(0.0f);
} else {
activations.push_back(e.activations[off + j] / c);
}
} }
} }
} }
@ -213,59 +202,63 @@ static bool compute_vector_statistics(std::vector<tensor_statistics> & tstats, c
return false; return false;
} }
const float sum = std::accumulate(activations.begin(), activations.end(), 0.0f); double sum = 0.0;
const float max = * std::max_element(activations.begin(), activations.end()); float vmax = activations[0];
const float min = * std::min_element(activations.begin(), activations.end()); float vmin = activations[0];
const float mean = sum / activations.size(); for (float v : activations) {
const float sqr_sum = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f); sum += v;
const float variance = sqr_sum / activations.size() - mean * mean; vmax = std::max(vmax, v);
const float std_deviation = std::sqrt(std::max(0.0f, variance)); vmin = std::min(vmin, v);
}
const auto mean = (float)(sum / (double)activations.size());
double sqr_sum = 0.0;
for (const float v : activations) { sqr_sum += (double)v * (double)v; }
double variance = sqr_sum / (double)activations.size() - (double)mean * (double)mean;
if (variance < 0.0) { variance = 0.0; }
const float std_deviation = std::sqrt((float)variance);
float entropy = 0.0f; float entropy = 0.0f;
if (e.activations.empty()) { if (e.activations.empty()) {
// classic entropy on normalized activations distribution double energy_sum = 0.0;
if (sum > 0.0f) { for (float v : activations) { energy_sum += (double)std::max(0.0f, v); }
for (const auto act : activations) { if (energy_sum > 0.0) {
const float p = act / sum; for (const float v : activations) {
if (p > 0.0f) { entropy -= p * std::log2(p); } const double p = std::max(0.0, (double)v) / energy_sum;
if (p > 0.0) { entropy -= (float)(p * std::log2(p)); }
} }
} }
} else { } else {
// entropy on normalized squared weights double energy_sum = 0.0;
float div = 0.0f; for (const float v : activations) { energy_sum += (double)v * (double)v; }
std::vector<float> weights(activations.size()); if (energy_sum > 0.0) {
for (size_t i = 0; i < activations.size(); ++i) { for (const float v : activations) {
const float w = activations[i] * activations[i]; const double p = (double)v * (double)v / energy_sum;
weights[i] = w; if (p > 0.0) { entropy -= (float)(p * std::log2(p)); }
div += w;
}
if (div > 0.0f) {
for (const float w : weights) {
const float p = w / div;
if (p > 0.0f) { entropy -= p * std::log2(p); }
} }
} }
} }
float zd_score = 0.0f; // ZD score: fraction with |z| > 1
double zd_count = 0.0;
if (std_deviation > 0.0f) { if (std_deviation > 0.0f) {
for (const auto act : activations) { for (const float v : activations) {
const float z = (act - mean) / std_deviation; const float z = (v - mean) / std_deviation;
if (std::fabs(z) > 1.0f) { zd_score++; } if (std::fabs(z) > 1.0f) { zd_count += 1.0; }
} }
} }
auto & ts = tstats.emplace_back(); auto & ts = tstats.emplace_back();
ts.tensor = name; ts.tensor = name;
ts.stats = e; ts.stats = e;
ts.sum_values = sum; ts.sum_values = (float)sum;
ts.mean_values = mean; ts.mean_values = mean;
ts.max_values = max; ts.max_values = vmax;
ts.min_values = min; ts.min_values = vmin;
ts.elements = static_cast<int>(activations.size()); ts.elements = (int)activations.size();
ts.std_deviation = std_deviation; ts.std_deviation = std_deviation;
ts.entropy = entropy; ts.entropy = entropy;
ts.zd_score = zd_score / ts.elements; ts.zd_score = ts.elements > 0 ? (float)(zd_count / (double)ts.elements) : 0.0f;
return e.activations.empty(); return e.activations.empty();
} }