Compute aggregated (per layer) l2 norm

This commit is contained in:
Ed Addario 2025-08-05 08:54:57 +01:00
parent 5e40cf4f1c
commit b37393423d
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 24 additions and 9 deletions

View File

@ -255,7 +255,7 @@ static int compute_vector_statistics(std::vector<tensor_statistics> & tstats, co
static void compute_tensor_statistics(std::vector<tensor_statistics> & tstats) {
static const std::regex pattern(R"(blk\.(\d+)\.)");
// compute the cosine similarity between the same tensors in consecutive layers
// compute the Cosine Similarity between the same tensors in consecutive layers
for (auto & ts : tstats) {
ts.cossim = 0;
@ -281,7 +281,7 @@ static void compute_tensor_statistics(std::vector<tensor_statistics> & tstats) {
}
}
// compute the L2 norm between the same tensors in consecutive layers
// compute the L2 Norm (Euclidian Distance) between the same tensors in consecutive layers
for (auto & ts : tstats) {
ts.l2_norm = 0.0f;
if (ts.stats.in_sum.empty()) continue;
@ -310,6 +310,7 @@ static void compute_tensor_statistics(std::vector<tensor_statistics> & tstats) {
static void compute_layer_statistics(const std::vector<tensor_statistics> & tstats,
std::map<int, float> & layer_cossim,
std::map<int, float> & layer_l2_norm,
const std::unordered_map<std::string, Stats> & stats_map) {
struct layer_aggregation {
std::vector<float> curr_avg;
@ -337,22 +338,33 @@ static void compute_layer_statistics(const std::vector<tensor_statistics> & tsta
prev.insert(prev.end(), prev_avg.begin(), prev_avg.end());
}
// compute the cosine similarity between consecutive layers
// compute the aggregated Cosine Similarity between consecutive layers
for (auto & kv : taggr) {
const auto & curr = kv.second.curr_avg;
const auto & prev = kv.second.prev_avg;
if (curr.size() != prev.size() || curr.empty()) continue;
float dot_prod = 0.0, lyr1 = 0.0, lyr2 = 0.0;
for (size_t i = 0; i < curr.size(); ++i) {
float crr = curr[i], prv = prev[i];
dot_prod += crr * prv;
lyr1 += crr * crr;
lyr2 += prv * prv;
dot_prod += curr[i] * prev[i];
lyr1 += curr[i] * curr[i];
lyr2 += prev[i] * prev[i];
}
float cossim = 0.0f;
if (lyr1 > 0.0 && lyr2 > 0.0) cossim = dot_prod / (std::sqrt(lyr1) * std::sqrt(lyr2));
layer_cossim[kv.first] = cossim;
}
// compute the aggregated L2 Norm (Euclidian Distance) between consecutive layers
for (auto & kv : taggr) {
const auto & curr = kv.second.curr_avg;
const auto & prev = kv.second.prev_avg;
if (curr.size() != prev.size() || curr.empty()) continue;
float dist = 0.0f;
for (size_t i = 0; i < curr.size(); ++i) {
dist += (curr[i] - prev[i]) * (curr[i] - prev[i]);
}
layer_l2_norm[kv.first] = std::sqrt(dist);
}
}
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
@ -1346,7 +1358,8 @@ static bool show_statistics(const common_params & params) {
}
std::map<int, float> layer_cossim;
compute_layer_statistics(ts, layer_cossim, g_collector.get_mstats());
std::map<int, float> layer_l2_norm;
compute_layer_statistics(ts, layer_cossim, layer_l2_norm, g_collector.get_mstats());
const auto layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; });
LOG_INF("\nComputing aggregated statistics per layer (%ld layers)\n", layers);
@ -1362,9 +1375,11 @@ static bool show_statistics(const common_params & params) {
const float w_zd = stats.w_zd / stats.n;
const auto lcs = layer_cossim.find(layer);
const float cossim = (lcs != layer_cossim.end()) ? lcs->second : 0.0f;
const auto ll2n = layer_l2_norm.find(layer);
const float l2_norm = (ll2n != layer_l2_norm.end()) ? ll2n->second : 0.0f;
LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n",
layer,
w_sum,
tensor_calc_mode == 1 ? l2_norm: w_sum,
100.0f * w_zd,
cossim);
}