Add Euclidean–Cosine Score (ECS)

This commit is contained in:
Ed Addario 2025-08-07 19:04:49 +01:00
parent 5bb2def02d
commit c5ecdaa1a1
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 29 additions and 16 deletions

View File

@ -18,6 +18,7 @@
#include <regex> #include <regex>
#include <thread> #include <thread>
#include <unordered_map> #include <unordered_map>
#include <valarray>
#include <vector> #include <vector>
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -1301,7 +1302,7 @@ static bool show_statistics(const common_params & params) {
std::map<int, layer_stats> ls; std::map<int, layer_stats> ls;
LOG_INF("\nComputing tensor statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size())); LOG_INF("\nComputing tensor statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
LOG_INF("\n%6s\t%18s\t%13s\t%8s\t%8s\t%7s\t%15s\t%13s\t%12s\t%s\t%5s\t%10s\n", LOG_INF("\n%6s\t%18s\t%13s\t%8s\t%8s\t%7s\t%15s\t%13s\t%11s\t%8s\t%5s\t%10s\n",
"Layer", "Layer",
"Tensor", "Tensor",
legacy_mode ? "Σ(Act²)" : "L₂ Norm", legacy_mode ? "Σ(Act²)" : "L₂ Norm",
@ -1310,8 +1311,8 @@ static bool show_statistics(const common_params & params) {
"μ", "μ",
"σ", "σ",
"N", "N",
"Entropy", "H Norm",
"E (norm)", legacy_mode ? "H" : "ECS",
"ZD", "ZD",
"CosSim"); "CosSim");
LOG_INF( LOG_INF(
@ -1328,17 +1329,17 @@ static bool show_statistics(const common_params & params) {
blk = -1; // not a block layer blk = -1; // not a block layer
} }
LOG_INF("%5s\t%-20s\t%11.2f\t%10.4f\t%10.4f\t%8.2f\t%8.2f\t%7d\t%12.4f\t%7.2f%%\t%6.2f%%\t%10.4f\n", LOG_INF("%5s\t%-20s\t%11.2f\t%10.4f\t%10.4f\t%8.2f\t%8.2f\t%7d\t%10.2f%%\t%10.4f\t%6.2f%%\t%10.4f\n",
layer.c_str(), layer.c_str(),
name.c_str(), name.c_str(),
legacy_mode == 1 ? tstat.sum_values : tstat.l2_norm, legacy_mode ? tstat.sum_values : tstat.l2_norm,
tstat.min_values, tstat.min_values,
tstat.max_values, tstat.max_values,
tstat.mean_values, tstat.mean_values,
tstat.std_deviation, tstat.std_deviation,
tstat.elements, tstat.elements,
tstat.entropy,
100.0f * (tstat.entropy / std::log2(tstat.elements)), 100.0f * (tstat.entropy / std::log2(tstat.elements)),
legacy_mode ? tstat.entropy : 100.0f * std::exp(-0.01f * tstat.l2_norm) * std::pow(fabs(tstat.cossim), 10.0f),
100.0f * tstat.zd_score, 100.0f * tstat.zd_score,
tstat.cossim); tstat.cossim);
@ -1363,25 +1364,37 @@ static bool show_statistics(const common_params & params) {
const auto layers = std::count_if(ls.begin(), ls.end(), [](const auto & kv) { return kv.first >= 0; }); const auto layers = std::count_if(ls.begin(), ls.end(), [](const auto & kv) { return kv.first >= 0; });
LOG_INF("\nComputing layer statistics (%ld layers)\n", layers); LOG_INF("\nComputing layer statistics (%ld layers)\n", layers);
LOG_INF("\n%6s\t%13s\t%5s\t%10s\n", LOG_INF("\n%6s\t%13s\t%6s\t%11s\t%6s\n",
"Layer", "Layer",
legacy_mode ? "Σ(Act²)" : "L₂ Norm", legacy_mode ? "Σ(Act²)" : "L₂ Norm",
"ZD", "ZD",
"CosSim"); "CosSim",
LOG_INF("============================================\n"); legacy_mode ? "" : "ECS");
if (legacy_mode) {
LOG_INF("============================================\n");
} else {
LOG_INF("=========================================================\n");
}
for (const auto & [layer, stats] : ls) { for (const auto & [layer, stats] : ls) {
if (layer < 0 || stats.n == 0) continue; if (layer < 0 || stats.n == 0) continue;
const float lyr_sum = stats.lyr_sum;
const float lyr_zd = stats.lyr_zd / stats.n;
const auto lcs = lyr_cossim.find(layer); const auto lcs = lyr_cossim.find(layer);
const float lyr_cs = (lcs != lyr_cossim.end()) ? lcs->second : 0.0f; const float lyr_cs = lcs != lyr_cossim.end() ? lcs->second : 0.0f;
const auto ll2n = lyr_l2_norm.find(layer); const auto ll2n = lyr_l2_norm.find(layer);
const float l2_norm = (ll2n != lyr_l2_norm.end()) ? ll2n->second : 0.0f; const float lyr_l2n = ll2n != lyr_l2_norm.end() ? ll2n->second : 0.0f;
LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n", if (legacy_mode) {
LOG_INF("%5d\t%11.2f\t%6.2f%%\t%11.4f\n",
layer, layer,
legacy_mode ? lyr_sum : l2_norm, stats.lyr_sum,
100.0f * lyr_zd, 100.0f * stats.lyr_zd / stats.n,
lyr_cs); lyr_cs);
} else {
LOG_INF("%5d\t%11.2f\t%6.2f%%\t%11.4f\t%8.4f\n",
layer,
lyr_l2n,
100.0f * stats.lyr_zd / stats.n,
lyr_cs,
100.0f * std::exp(-0.01f * lyr_l2n) * std::pow(fabs(lyr_cs), 10.0f));
}
} }
LOG_INF("\n"); LOG_INF("\n");