Refactor show_statistics()

2026-01-11 17:41:04 +00:00 · 2026-01-11 17:41:04 +00:00 · fdc2def79c
parent e69058c6ec
commit fdc2def79c
1 changed files with 118 additions and 118 deletions
--- a/tools/imatrix/imatrix.cpp
+++ b/tools/imatrix/imatrix.cpp
@ -1320,179 +1320,179 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params, c
 static bool show_statistics(const common_params & params) {
    g_collector.set_params(params);
    std::vector<tensor_statistics> ts;
-    if (params.in_files.empty() || params.in_files.size() > 1) {
-        LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n");
-        return false;
-    }

-    bool has_activations = false;
-    bool no_activations = false;
+    if (params.in_files.empty()) { return false; }
+
+    // Load and process data
    if (g_collector.load_imatrix(params.in_files[0].c_str())) {
+        ts.reserve(g_collector.get_mstats().size());
        for (const auto & [name, stats] : g_collector.get_mstats()) {
            bool legacy_imatrix = true;
-            if (!compute_vector_statistics(ts, name, stats, legacy_imatrix)) {
-                LOG_WRN("%s: tensor %s has no data - skipping\n", __func__, name.c_str());
-                continue;
-            }
-            if (legacy_imatrix) { no_activations = true; }
-            else { has_activations = true; }
+            if (!compute_vector_statistics(ts, name, stats, legacy_imatrix)) { continue; }
        }
    } else {
-        LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str());
-        return false;
-    }
-    if (ts.empty()) {
-        LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str());
        return false;
    }

-    if (has_activations && no_activations) {
-        LOG_ERR("Error: %s has mixed tensors with and without activations\n\n", params.in_files[0].c_str());
-        return false;
-    }
+    if (ts.empty()) { return false; }

-    const bool legacy = !has_activations;
+    bool legacy = ts.empty() ? true : ts[0].stats.activations.empty();
    compute_tensor_statistics(ts);

+    // Sorting logic (Layer index -> Tensor Name)
    struct tensor_comparer {
-        bool legacy_mode;
-        explicit tensor_comparer(const bool legacy) : legacy_mode(legacy) {}
-
        bool operator()(const tensor_statistics & a, const tensor_statistics & b) const {
-            std::string layer;
+            std::string lay_a;
+            std::string lay_b;
            std::string name_a;
            std::string name_b;
-            process_tensor_name(a.tensor, layer, name_a);
-            process_tensor_name(b.tensor, layer, name_b);
-            return legacy_mode ? name_a < name_b || (name_a == name_b && a.sum_values > b.sum_values)
-                               : name_a < name_b || (name_a == name_b && a.cossim > b.cossim);
+            process_tensor_name(a.tensor, lay_a, name_a);
+            process_tensor_name(b.tensor, lay_b, name_b);
+
+            // Handle non-numeric layers (e.g., "output")
+            int blk_a = 9999;
+            int blk_b = 9999;
+            try {
+                blk_a = std::stoi(lay_a);
+            } catch(...) {
+                if (lay_a == "output") { blk_a = 10000; }
+            }
+            try {
+                blk_b = std::stoi(lay_b);
+            } catch(...) {
+                if (lay_b == "output") { blk_b = 10000; }
+            }
+
+            if (blk_a != blk_b) { return blk_a < blk_b; }
+            return name_a < name_b;
        }
    };
-    std::sort(ts.begin(), ts.end(), tensor_comparer(legacy));
+    std::sort(ts.begin(), ts.end(), tensor_comparer());

    struct layer_stats {
        float layer_sum = 0.0f;
        float layer_zd = 0.0f;
        int n = 0;
    };
-
    std::map<int, layer_stats> ls;
-    LOG_INF("\nComputing tensor statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
-    LOG_INF("\n%6s\t%18s\t%13s\t%8s\t%8s\t%7s\t%15s\t%13s\t%11s\t%8s\t%5s\t%10s\n",
-        "Layer",
-        "Tensor",
-        legacy ? "Σ E[Act²]" : "L₂ Dist",
-        "Min",
-        "Max",
-        "μ",
-        "σ",
-        "N",
-        "H Norm",
-        legacy ? "H" : "ECS",
-        "ZD",
-        "CosSim");
-    LOG_INF(
-        "=============================================================================================================="
-        "=============================================================\n");

-    // Euclidean-Cosine score
-    auto ecs = [](const float l2_dist, const float cossim) {
-        return 100.0f - (100.0f * (1.0f / (1.0f + ((2.0f / 3.0f) * l2_dist * l2_dist))) * ((1 + cossim) * 0.5f));
+    // Helper to shorten names for table formatting "blk.10.attn_k.weight" -> "..10.attn_k.weight"
+    auto label_fmt = [](std::string s, size_t w) -> std::string {
+        if (s.length() <= w) { return s; }
+        return ".." + s.substr(s.length() - (w - 2));
    };

+    // Table Constants
+    constexpr int w_lay = 6;
+    constexpr int w_nam = 40; // Wide enough for most tensors
+    const auto * sep = " | ";
+
+    LOG_INF("\nComputing tensor statistics (%d tensors)\n", static_cast<int>(ts.size()));
+
+    // Header logic separated to handle different column counts
+    if (legacy) {
+        LOG_INF("\n%*s%s%-*s%s%10s %12s %10s %10s%s %8s  %8s%s%17s %8s %8s\n",
+            w_lay, "Layer", sep,
+            w_nam, "Tensor", sep,
+            "Min", "Max", "Mean", "StdDev", sep,
+            "H_Norm", "ZD", sep,
+            "∑ E[A²]", "CosSim", "PCC");
+        LOG_INF("%s\n", std::string(154, '-').c_str());
+    } else {
+        LOG_INF("\n%*s%s%-*s%s%10s %12s %10s %10s%s %8s  %8s%s%17s %12s %8s %8s\n",
+            w_lay, "Layer", sep,
+            w_nam, "Tensor", sep,
+            "Min", "Max", "Mean", "StdDev", sep,
+            "H_Norm", "ZD", sep,
+            "∑ E[A²]", "L2 Dist", "CosSim", "PCC");
+        LOG_INF("%s\n", std::string(167, '-').c_str());
+    }
+
    for (const auto & tstat : ts) {
        std::string layer;
        std::string name;
        process_tensor_name(tstat.tensor, layer, name);
+
+        // Calculate metrics
        const float h_norm = tstat.elements > 1 ? 100.0f * (tstat.entropy / std::log2((float) tstat.elements)) : 0.0f;

        int blk;
-        try {
-            blk = std::stoi(layer);
-        } catch (const std::exception &) {
-            blk = -1; // not a block layer
-        }
+        try { blk = std::stoi(layer); } catch (...) { blk = -1; }

-        LOG_INF("%5s\t%-20s\t%11.4f\t%10.4f\t%10.4f\t%8.4f\t%8.4f\t%7d\t%10.2f%%\t%10.4f\t%6.2f%%\t%10.4f\n",
-            layer.c_str(),
-            name.c_str(),
-            legacy ? tstat.sum_values : tstat.l2_dist,
-            tstat.min_values,
-            tstat.max_values,
-            tstat.mean_values,
-            tstat.std_deviation,
-            tstat.elements,
-            h_norm,
-            legacy ? tstat.entropy : ecs(tstat.l2_dist, tstat.cossim),
-            100.0f * tstat.zd_score,
-            tstat.cossim);
-
-        const float zd = (float)tstat.elements * tstat.zd_score;
-        if (ls.find(blk) != ls.end()) {
-            if (legacy) { ls[blk].layer_sum += tstat.sum_values; }
-            ls[blk].layer_zd += zd;
-            ls[blk].n += tstat.elements;
+        // Print Row
+        if (legacy) {
+            LOG_INF("%*s%s%-*s%s%10.4f %12.4f %10.4f %10.4f%s%8.2f%% %8.2f%%%s%14.4f %8.4f %8.4f\n",
+                w_lay, layer.c_str(), sep,
+                w_nam, label_fmt(tstat.tensor, w_nam).c_str(), sep,
+                tstat.min_values, tstat.max_values, tstat.mean_values, tstat.std_deviation, sep,
+                h_norm, 100.0f * tstat.zd_score, sep,
+                tstat.sum_values, tstat.cossim, tstat.pearson
+            );
        } else {
-            layer_stats temp_ls;
-            if (legacy) { temp_ls.layer_sum = tstat.sum_values; }
-            else { temp_ls.layer_sum = 0.0f; }
-            temp_ls.layer_zd = zd;
-            temp_ls.n = tstat.elements;
-            ls[blk] = temp_ls;
+            // Display L2 Dist AND Sum E[A^2]
+            LOG_INF("%*s%s%-*s%s%10.4f %12.4f %10.4f %10.4f%s%8.2f%% %8.2f%%%s%14.4f %12.4f %8.4f %8.4f\n",
+                w_lay, layer.c_str(), sep,
+                w_nam, label_fmt(tstat.tensor, w_nam).c_str(), sep,
+                tstat.min_values, tstat.max_values, tstat.mean_values, tstat.std_deviation, sep,
+                h_norm, 100.0f * tstat.zd_score, sep,
+                tstat.sum_values, tstat.l2_dist, tstat.cossim, tstat.pearson
+            );
        }
+
+        // Aggregate Layer Stats
+        const float zd = (float)tstat.elements * tstat.zd_score;
+        auto & l_entry = ls[blk];
+
+        // Accumulate sum values regardless of legacy status to allow display in both modes
+        l_entry.layer_sum += tstat.sum_values;
+        l_entry.layer_zd += zd;
+        l_entry.n += tstat.elements;
    }

+    // --- Computed Layer Statistics ---
+
    std::map<int, float> layer_cossim;
    std::map<int, float> layer_l2_dist;
-    compute_layer_statistics(ts, layer_cossim, layer_l2_dist, g_collector.get_mstats());
+    std::map<int, float> layer_pearson;
+    compute_layer_statistics(ts, layer_cossim, layer_l2_dist, layer_pearson);

-    const size_t layers = std::count_if(ls.begin(), ls.end(), [](const auto & kv) { return kv.first >= 0; });
-    LOG_INF("\nComputing layer statistics (%zu layers)\n", layers);
-    LOG_INF("\n%6s\t%13s\t%6s\t%11s\t%6s\n",
-        "Layer",
-        legacy ? "Σ E[Act²]" : "L₂ Dist",
-        "ZD",
-        "CosSim",
-        legacy ? "" : "ECS");
+    LOG_INF("\n\nComputing layer statistics (%zu layers)\n\n", ls.size() - 2);
+
+    // Layer Table Headers
    if (legacy) {
-        LOG_INF("============================================\n");
+        LOG_INF("%*s%s%9s%s%17s %10s %10s\n",
+            w_lay, "Layer", sep,
+            "ZD", sep,
+            "∑ E[A²]", "CosSim", "PCC");
+        LOG_INF("%s\n", std::string(57, '-').c_str());
    } else {
-        LOG_INF("=========================================================\n");
+        LOG_INF("%*s%s%9s%s%17s %14s %10s %10s\n",
+            w_lay, "Layer", sep,
+            "ZD", sep,
+            "∑ E[A²]", "L2 Dist", "CosSim", "PCC");
+        LOG_INF("%s\n", std::string(72, '-').c_str());
    }
+
    for (const auto & [layer, stats] : ls) {
        if (layer < 0 || stats.n == 0) { continue; }
-        const auto lcs = layer_cossim.find(layer);
-        const auto ll2n = layer_l2_dist.find(layer);
-        float layer_cs  = 0.0f;
-        float layer_l2n = 0.0f;

-        if (lcs != layer_cossim.end() && ll2n != layer_l2_dist.end()) {
-            layer_cs  = lcs->second;
-            layer_l2n = ll2n->second;
-        } else if (layer == 0) {
-            layer_cs  = 1.0f;
-            layer_l2n = 0.0f;
-        } else {
-            continue;
-        }
+        float lcs = layer == 0 ? 1.0f : layer_cossim[layer];
+        float ll2 = layer == 0 ? 0.0f : layer_l2_dist[layer];
+        float lpc = layer == 0 ? 1.0f : layer_pearson[layer];

        if (legacy) {
-            LOG_INF("%5d\t%11.4f\t%6.2f%%\t%11.4f\n",
-                layer,
-                stats.layer_sum,
-                100.0f * stats.layer_zd / stats.n,
-                layer_cs);
+            LOG_INF("%*d%s%8.2f%%%s%14.4f %10.4f %10.4f\n",
+                w_lay, layer, sep,
+                100.0f * stats.layer_zd / stats.n, sep,
+                stats.layer_sum, lcs, lpc);
        } else {
-            LOG_INF("%5d\t%11.4f\t%6.2f%%\t%11.4f\t%8.4f\n",
-                layer,
-                layer_l2n,
-                100.0f * stats.layer_zd / stats.n,
-                layer_cs,
-                ecs(layer_l2n, layer_cs));
+            LOG_INF("%*d%s%8.2f%%%s%14.4f %14.4f %10.4f %10.4f\n",
+                w_lay, layer, sep,
+                100.0f * stats.layer_zd / stats.n, sep,
+                stats.layer_sum, ll2, lcs, lpc);
        }
    }
    LOG_INF("\n");
-
    return true;
 }