From 5bb2def02dcf923743029f72b0c16b17e3609e28 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Thu, 7 Aug 2025 17:41:21 +0100 Subject: [PATCH] Add --activation-statistics parameter --- common/arg.cpp | 7 +++++++ common/common.h | 9 +++++---- tools/imatrix/imatrix.cpp | 3 ++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 0f01bb3145..2cd0cc0119 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2707,6 +2707,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.show_statistics = true; } ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--activation-statistics"}, + string_format("generate data to compute activation-based statistics (default: %s)", params.show_statistics ? "true" : "false"), + [](common_params & params) { + params.activation_statistics = true; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); add_opt(common_arg( {"--parse-special"}, string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"), diff --git a/common/common.h b/common/common.h index 5eab199af5..d5dfdd49e0 100644 --- a/common/common.h +++ b/common/common.h @@ -443,10 +443,11 @@ struct common_params { int32_t i_chunk = 0; // start processing from this chunk int8_t imat_dat = 0; // whether the legacy imatrix.dat format should be output (gguf <= 0 < dat) - bool process_output = false; // collect data for the output tensor - bool compute_ppl = true; // whether to compute perplexity - bool show_statistics = false; // show imatrix statistics per tensor - bool parse_special = false; // whether to parse special tokens during imatrix tokenization + bool process_output = false; // collect data for the output tensor + bool compute_ppl = true; // whether to compute perplexity + bool show_statistics = false; // show imatrix statistics per tensor + bool activation_statistics = false; // generate data to calculate activation based statistics + bool parse_special = false; // whether to parse special tokens during imatrix tokenization // cvector-generator params int n_pca_batch = 100; diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index a758a94096..902d6e7354 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -30,7 +30,7 @@ static void print_usage(int, char ** argv) { " -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n" " [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n" " [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n" - " [--show-statistics] [...]\n" , argv[0]); + " [--activation-statistics] [--show-statistics] [...]\n" , argv[0]); LOG("\n"); } @@ -428,6 +428,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * // broadcast, when loading an old imatrix e.counts.resize(n_as, e.counts[0]); } + // ToDo: find an efficient way to implement --activation-statistics to avoid doubling the imatrix size by default if (e.values.empty()) { e.activations.resize(src1->ne[0]*n_as, 0); e.values.resize(src1->ne[0]*n_as, 0);