From b37393423d426d962f1aecf70c6fdc26c0bcb52b Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 5 Aug 2025 08:54:57 +0100 Subject: [PATCH] Compute aggregated (per layer) l2 norm --- tools/imatrix/imatrix.cpp | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index b772f2184d..f22b67a309 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -255,7 +255,7 @@ static int compute_vector_statistics(std::vector & tstats, co static void compute_tensor_statistics(std::vector & tstats) { static const std::regex pattern(R"(blk\.(\d+)\.)"); - // compute the cosine similarity between the same tensors in consecutive layers + // compute the Cosine Similarity between the same tensors in consecutive layers for (auto & ts : tstats) { ts.cossim = 0; @@ -281,7 +281,7 @@ static void compute_tensor_statistics(std::vector & tstats) { } } - // compute the L2 norm between the same tensors in consecutive layers + // compute the L2 Norm (Euclidian Distance) between the same tensors in consecutive layers for (auto & ts : tstats) { ts.l2_norm = 0.0f; if (ts.stats.in_sum.empty()) continue; @@ -310,6 +310,7 @@ static void compute_tensor_statistics(std::vector & tstats) { static void compute_layer_statistics(const std::vector & tstats, std::map & layer_cossim, + std::map & layer_l2_norm, const std::unordered_map & stats_map) { struct layer_aggregation { std::vector curr_avg; @@ -337,22 +338,33 @@ static void compute_layer_statistics(const std::vector & tsta prev.insert(prev.end(), prev_avg.begin(), prev_avg.end()); } - // compute the cosine similarity between consecutive layers + // compute the aggregated Cosine Similarity between consecutive layers for (auto & kv : taggr) { const auto & curr = kv.second.curr_avg; const auto & prev = kv.second.prev_avg; if (curr.size() != prev.size() || curr.empty()) continue; float dot_prod = 0.0, lyr1 = 0.0, lyr2 = 0.0; for (size_t i = 0; i < curr.size(); ++i) { - float crr = curr[i], prv = prev[i]; - dot_prod += crr * prv; - lyr1 += crr * crr; - lyr2 += prv * prv; + dot_prod += curr[i] * prev[i]; + lyr1 += curr[i] * curr[i]; + lyr2 += prev[i] * prev[i]; } float cossim = 0.0f; if (lyr1 > 0.0 && lyr2 > 0.0) cossim = dot_prod / (std::sqrt(lyr1) * std::sqrt(lyr2)); layer_cossim[kv.first] = cossim; } + + // compute the aggregated L2 Norm (Euclidian Distance) between consecutive layers + for (auto & kv : taggr) { + const auto & curr = kv.second.curr_avg; + const auto & prev = kv.second.prev_avg; + if (curr.size() != prev.size() || curr.empty()) continue; + float dist = 0.0f; + for (size_t i = 0; i < curr.size(); ++i) { + dist += (curr[i] - prev[i]) * (curr[i] - prev[i]); + } + layer_l2_norm[kv.first] = std::sqrt(dist); + } } bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) { @@ -1346,7 +1358,8 @@ static bool show_statistics(const common_params & params) { } std::map layer_cossim; - compute_layer_statistics(ts, layer_cossim, g_collector.get_mstats()); + std::map layer_l2_norm; + compute_layer_statistics(ts, layer_cossim, layer_l2_norm, g_collector.get_mstats()); const auto layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; }); LOG_INF("\nComputing aggregated statistics per layer (%ld layers)\n", layers); @@ -1362,9 +1375,11 @@ static bool show_statistics(const common_params & params) { const float w_zd = stats.w_zd / stats.n; const auto lcs = layer_cossim.find(layer); const float cossim = (lcs != layer_cossim.end()) ? lcs->second : 0.0f; + const auto ll2n = layer_l2_norm.find(layer); + const float l2_norm = (ll2n != layer_l2_norm.end()) ? ll2n->second : 0.0f; LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n", layer, - w_sum, + tensor_calc_mode == 1 ? l2_norm: w_sum, 100.0f * w_zd, cossim); }