Refactor compute_tensor_statistics()

This commit is contained in:
Ed Addario 2026-01-11 17:38:47 +00:00
parent 395367f210
commit 309dc1231a
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 86 additions and 47 deletions

View File

@ -272,60 +272,99 @@ static bool compute_vector_statistics(std::vector<tensor_statistics> & tstats, c
}
static void compute_tensor_statistics(std::vector<tensor_statistics> & tstats) {
static const std::regex pattern(R"(blk\.(\d+)\.)");
std::unordered_map<std::string, size_t> tensor_map;
tensor_map.reserve(tstats.size());
for (size_t i = 0; i < tstats.size(); ++i) { tensor_map[tstats[i].tensor] = i; }
for (auto & ts : tstats) {
ts.cossim = 1.0f;
ts.l2_dist = 0.0f;
std::string layer_str;
std::string dummy_tensor;
process_tensor_name(ts.tensor, layer_str, dummy_tensor);
if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) {
const int blk = std::stoi(match[1]);
if (blk <= 0) { continue; }
std::string tname(ts.tensor);
tname.replace(match.position(1), match.length(1), std::to_string(blk - 1));
auto prev_it = std::find_if(tstats.begin(), tstats.end(),
[tname](const tensor_statistics & t) { return t.tensor == tname; });
if (prev_it == tstats.end()) {
LOG_WRN("%s: missing previous-layer tensor '%s' (current: '%s'). Statistics may not be accurate\n",
__func__, tname.c_str(), ts.tensor.c_str());
continue;
}
// Robust block ID extraction
int blk = -1;
try { blk = std::stoi(layer_str); } catch (...) { continue; }
if (blk <= 0) { continue; }
const auto curr_avg = compute_tensor_averages(ts.stats);
const auto prev_avg = compute_tensor_averages(prev_it->stats);
if (curr_avg.empty() || curr_avg.size() != prev_avg.size()) {
LOG_WRN("%s: size mismatch between '%s' and its previous-layer tensor '%s' (%zu vs %zu). Statistics may not be accurate\n",
__func__, ts.tensor.c_str(), tname.c_str(), curr_avg.size(), prev_avg.size());
continue;
}
// Reconstruct previous layer name
const size_t blk_start_pos = ts.tensor.find("blk." + layer_str);
if (blk_start_pos == std::string::npos) { continue; }
float dot_prod = 0.0f;
float norm1_sq = 0.0f;
float norm2_sq = 0.0f;
float l2_dist_sq = 0.0f;
std::string tname = ts.tensor;
tname.replace(blk_start_pos, layer_str.length() + 4, "blk." + std::to_string(blk - 1));
for (size_t i = 0; i < curr_avg.size(); ++i) {
const float c_val = curr_avg[i];
const float p_val = prev_avg[i];
dot_prod += c_val * p_val;
norm1_sq += c_val * c_val;
norm2_sq += p_val * p_val;
const float diff = c_val - p_val;
l2_dist_sq += diff * diff;
}
auto it = tensor_map.find(tname);
if (it == tensor_map.end()) {
LOG_WRN("%s: missing previous-layer tensor '%s'\n", __func__, tname.c_str());
continue;
}
// Compute Cosine Similarity
float cs = 0.0f;
if (norm1_sq > 0.0f && norm2_sq > 0.0f) {
cs = dot_prod / (std::sqrt(norm1_sq) * std::sqrt(norm2_sq));
cs = std::min(cs, 1.0f);
cs = std::max(cs, -1.0f);
} else if (norm1_sq == 0.0f && norm2_sq == 0.0f) {
cs = 1.0f;
}
ts.cossim = cs;
const auto & prev_ts = tstats[it->second];
const auto curr_avg = compute_tensor_averages(ts.stats);
const auto prev_avg = compute_tensor_averages(prev_ts.stats);
// Compute L2 Norm (Euclidean Distance)
ts.l2_dist = std::sqrt(l2_dist_sq);
if (curr_avg.empty() || curr_avg.size() != prev_avg.size()) { continue; }
double dot_prod = 0.0;
double norm1_sq = 0.0;
double norm2_sq = 0.0;
double l2_dist_sq = 0.0;
// Aux variables for Pearson (Spatial Covariance)
double sum_c = 0.0;
double sum_p = 0.0;
const size_t n = curr_avg.size();
// Pass 1: Sums for Means
for (size_t i = 0; i < n; ++i) {
sum_c += curr_avg[i];
sum_p += prev_avg[i];
}
const double mean_c = sum_c / n;
const double mean_p = sum_p / n;
double cov_sum = 0.0;
double var_c_sum = 0.0;
double var_p_sum = 0.0;
// Pass 2: Metrics
for (size_t i = 0; i < n; ++i) {
const double c_val = curr_avg[i];
const double p_val = prev_avg[i];
// Cosine Similarity & L2 basics
dot_prod += c_val * p_val;
norm1_sq += c_val * c_val;
norm2_sq += p_val * p_val;
const double diff = c_val - p_val;
l2_dist_sq += diff * diff;
// Pearson (Centered stats)
const double dc = c_val - mean_c;
const double dp = p_val - mean_p;
cov_sum += dc * dp;
var_c_sum += dc * dc;
var_p_sum += dp * dp;
}
ts.dot_prod = dot_prod;
ts.norm1_sq = norm1_sq;
ts.norm2_sq = norm2_sq;
ts.l2_dist_sq = l2_dist_sq;
ts.l2_dist = (float)std::sqrt(l2_dist_sq);
if (norm1_sq > 0.0 && norm2_sq > 0.0) {
ts.cossim = (float)(dot_prod / (std::sqrt(norm1_sq) * std::sqrt(norm2_sq)));
ts.cossim = std::clamp(ts.cossim, -1.0f, 1.0f);
} else {
ts.cossim = (norm1_sq == 0.0 && norm2_sq == 0.0) ? 1.0f : 0.0f;
}
if (var_c_sum > 0.0 && var_p_sum > 0.0) {
ts.pearson = (float)(cov_sum / (std::sqrt(var_c_sum) * std::sqrt(var_p_sum)));
ts.pearson = std::clamp(ts.pearson, -1.0f, 1.0f);
} else {
ts.pearson = (var_c_sum == 0.0 && var_p_sum == 0.0) ? 1.0f : 0.0f;
}
}
}