Fix legacy_mode getting overwritten on each tensor bug

This commit is contained in:
Ed Addario 2025-10-28 18:27:19 +00:00
parent 8fd2aca8ec
commit af3b6aca22
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 48 additions and 43 deletions

View File

@ -1260,7 +1260,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params, c
static bool show_statistics(const common_params & params) { static bool show_statistics(const common_params & params) {
std::vector<tensor_statistics> ts; std::vector<tensor_statistics> ts;
bool legacy_mode = false; bool legacy_mode = true;
if (params.in_files.empty() || params.in_files.size() > 1) { if (params.in_files.empty() || params.in_files.size() > 1) {
LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n"); LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n");
@ -1268,7 +1268,8 @@ static bool show_statistics(const common_params & params) {
} }
if (g_collector.load_imatrix(params.in_files[0].c_str())) { if (g_collector.load_imatrix(params.in_files[0].c_str())) {
for (const auto & [name, stats] : g_collector.get_mstats()) { for (const auto & [name, stats] : g_collector.get_mstats()) {
legacy_mode = compute_vector_statistics(ts, name, stats); const bool is_legacy = compute_vector_statistics(ts, name, stats);
legacy_mode = legacy_mode && is_legacy;
} }
} else { } else {
LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str()); LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str());
@ -1286,11 +1287,13 @@ static bool show_statistics(const common_params & params) {
explicit tensor_comparer(const bool legacy) : legacy_mode(legacy) {} explicit tensor_comparer(const bool legacy) : legacy_mode(legacy) {}
bool operator()(const tensor_statistics & a, const tensor_statistics & b) const { bool operator()(const tensor_statistics & a, const tensor_statistics & b) const {
std::string layer, name_a, name_b; std::string layer;
std::string name_a;
std::string name_b;
process_tensor_name(a.tensor, layer, name_a); process_tensor_name(a.tensor, layer, name_a);
process_tensor_name(b.tensor, layer, name_b); process_tensor_name(b.tensor, layer, name_b);
return legacy_mode ? name_a < name_b || (name_a == name_b && a.sum_values > b.sum_values) : return legacy_mode ? name_a < name_b || (name_a == name_b && a.sum_values > b.sum_values)
name_a < name_b || (name_a == name_b && a.cossim > b.cossim); : name_a < name_b || (name_a == name_b && a.cossim > b.cossim);
} }
}; };
std::sort(ts.begin(), ts.end(), tensor_comparer(legacy_mode)); std::sort(ts.begin(), ts.end(), tensor_comparer(legacy_mode));
@ -1319,6 +1322,7 @@ static bool show_statistics(const common_params & params) {
LOG_INF( LOG_INF(
"==============================================================================================================" "=============================================================================================================="
"=============================================================\n"); "=============================================================\n");
for (const auto & tstat : ts) { for (const auto & tstat : ts) {
std::string layer, name; std::string layer, name;
process_tensor_name(tstat.tensor, layer, name); process_tensor_name(tstat.tensor, layer, name);
@ -1326,26 +1330,27 @@ static bool show_statistics(const common_params & params) {
int blk; int blk;
try { try {
blk = std::stoi(layer); blk = std::stoi(layer);
} catch (const std::exception & e) { } catch (const std::exception &) {
blk = -1; // not a block layer blk = -1; // not a block layer
} }
const float h_norm = tstat.elements > 1 ? 100.0f * (tstat.entropy / std::log2((float) tstat.elements)) : 0.0f;
const float ecs = 100.0f * std::exp(-0.01f * tstat.l2_norm) * std::pow(std::fabs(tstat.cossim), 10.0f);
LOG_INF("%5s\t%-20s\t%11.2f\t%10.4f\t%10.4f\t%8.2f\t%8.2f\t%7d\t%10.2f%%\t%10.4f\t%6.2f%%\t%10.4f\n", LOG_INF("%5s\t%-20s\t%11.2f\t%10.4f\t%10.4f\t%8.2f\t%8.2f\t%7d\t%10.2f%%\t%10.4f\t%6.2f%%\t%10.4f\n",
layer.c_str(), layer.c_str(), name.c_str(),
name.c_str(),
legacy_mode ? tstat.sum_values : tstat.l2_norm, legacy_mode ? tstat.sum_values : tstat.l2_norm,
tstat.min_values, tstat.min_values,
tstat.max_values, tstat.max_values,
tstat.mean_values, tstat.mean_values,
tstat.std_deviation, tstat.std_deviation,
tstat.elements, tstat.elements,
100.0f * (tstat.entropy / std::log2(tstat.elements)), h_norm,
legacy_mode ? tstat.entropy : 100.0f * std::exp(-0.01f * tstat.l2_norm) * std::pow(fabs(tstat.cossim), 10.0f), legacy_mode ? tstat.entropy : ecs,
100.0f * tstat.zd_score, 100.0f * tstat.zd_score,
tstat.cossim); tstat.cossim);
const float zd = tstat.elements * tstat.zd_score; const float zd = tstat.elements * tstat.zd_score;
if (ls.find(blk) != ls.end()) { if (ls.find(blk) != ls.end()) {
ls[blk].layer_sum += tstat.sum_values; ls[blk].layer_sum += tstat.sum_values;
ls[blk].layer_zd += zd; ls[blk].layer_zd += zd;
@ -1394,7 +1399,7 @@ static bool show_statistics(const common_params & params) {
layer_l2n, layer_l2n,
100.0f * stats.layer_zd / stats.n, 100.0f * stats.layer_zd / stats.n,
layer_cs, layer_cs,
100.0f * std::exp(-0.01f * layer_l2n) * std::pow(fabs(layer_cs), 10.0f)); 100.0f * std::exp(-0.01f * layer_l2n) * std::pow(std::fabs(layer_cs), 10.0f));
} }
} }
LOG_INF("\n"); LOG_INF("\n");