From f3323b6503bbc758c632edf0c4dff9e08c94aed5 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Thu, 22 Jan 2026 15:05:15 +0000 Subject: [PATCH] Save tensor statistics to imatrix file --- tools/imatrix/imatrix.cpp | 59 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index f4b9b30003..5a68ad2d88 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -753,11 +753,26 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN); + data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * 10, GGML_MEM_ALIGN); } // deterministic tensor name order std::sort(to_store.begin(), to_store.end()); + // Compute per-tensor statistics + std::vector tstats; + tstats.reserve(m_stats.size()); + bool legacy; + for (const auto & kv : m_stats) { + compute_vector_statistics(tstats, kv.first, kv.second, legacy); + } + if (!tstats.empty()) { compute_tensor_statistics(tstats); } + + // index by tensor name + std::unordered_map tstat_index; + tstat_index.reserve(tstats.size()); + for (const auto & ts : tstats) { tstat_index[ts.tensor] = &ts; } + struct ggml_init_params params = { /* .mem_size = */ data_size, /* .mem_buffer = */ NULL, @@ -814,6 +829,48 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { gguf_add_tensor(ctx_gguf, in_sum); } } + + // Store per-tensor statistics as a small 1D tensor + { + float nan = std::numeric_limits::quiet_NaN(); + float min = 0.0f; + float max = 0.0f; + float mean = 0.0f; + float stddev = 0.0f; + float h_norm = 0.0f; + float zd_score = 0.0f; + float sum_sq = 0.0f; + float l2_dist = 0.0f; + float cossim = 0.0f; + float pcc = 0.0f; + auto it_ts = tstat_index.find(name); + if (it_ts != tstat_index.end() && it_ts->second != nullptr) { + sum_sq = it_ts->second->sum_values; + h_norm = it_ts->second->elements > 0 ? 100.0f * (it_ts->second->entropy / std::log2f((float)it_ts->second->elements)) : nan; + zd_score = it_ts->second->zd_score; + l2_dist = it_ts->second->l2_dist; + cossim = it_ts->second->cossim; + pcc = it_ts->second->pearson; + min = it_ts->second->min_values; + max = it_ts->second->max_values; + mean = it_ts->second->mean_values; + stddev = it_ts->second->std_deviation; + } + + struct ggml_tensor * stats_t = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 10); + ggml_format_name(stats_t, "%s.stats", name.c_str()); + ((float *)stats_t->data)[0] = sum_sq; + ((float *)stats_t->data)[1] = h_norm; + ((float *)stats_t->data)[2] = zd_score; + ((float *)stats_t->data)[3] = l2_dist; + ((float *)stats_t->data)[4] = cossim; + ((float *)stats_t->data)[5] = pcc; + ((float *)stats_t->data)[6] = min; + ((float *)stats_t->data)[7] = max; + ((float *)stats_t->data)[8] = mean; + ((float *)stats_t->data)[9] = stddev; + gguf_add_tensor(ctx_gguf, stats_t); + } } gguf_write_to_file(ctx_gguf, fname.c_str(), false); @@ -1404,7 +1461,7 @@ static bool show_statistics(const common_params & params) { std::string name; process_tensor_name(tstat.tensor, layer, name); - const float h_norm = tstat.elements > 1 ? 100.0f * (tstat.entropy / std::log2((float) tstat.elements)) : 0.0f; + const float h_norm = tstat.elements > 1 ? 100.0f * (tstat.entropy / std::log2f((float)tstat.elements)) : std::numeric_limits::quiet_NaN(); int blk; try { blk = std::stoi(layer); } catch (...) { blk = -1; }