From 844ad3e3268259b85456ebfd4d3417f9b3825c29 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Wed, 11 Feb 2026 12:47:13 -0600 Subject: [PATCH] clean slate for branch --- include/llama.h | 1 + src/llama-quant.cpp | 3 ++- tools/quantize/quantize.cpp | 8 ++++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/llama.h b/include/llama.h index 46c3672e98..8bcefda896 100644 --- a/include/llama.h +++ b/include/llama.h @@ -393,6 +393,7 @@ extern "C" { void * kv_overrides; // pointer to vector containing overrides void * tensor_types; // pointer to vector containing tensor types void * prune_layers; // pointer to vector containing layer indices to prune + bool dry_run; // calculate and show the final quantization size without performing quantization } llama_model_quantize_params; typedef struct llama_logit_bias { diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index a7891647c3..730f13e29e 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1048,7 +1048,8 @@ llama_model_quantize_params llama_model_quantize_default_params() { /*.imatrix =*/ nullptr, /*.kv_overrides =*/ nullptr, /*.tensor_type =*/ nullptr, - /*.prune_layers =*/ nullptr + /*.prune_layers =*/ nullptr, + /*.dry_run =*/ false }; return result; diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index c0f49279ee..3f99d9e6a7 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -120,7 +120,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp static void usage(const char * executable) { printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights]\n", executable); printf(" [--exclude-weights] [--output-tensor-type] [--token-embedding-type] [--tensor-type] [--tensor-type-file]\n"); - printf(" [--prune-layers] [--keep-split] [--override-kv]\n"); + printf(" [--prune-layers] [--keep-split] [--override-kv] [--dry-run]\n"); printf(" model-f32.gguf [model-quant.gguf] type [nthreads]\n\n"); printf(" --allow-requantize\n"); printf(" allow requantizing tensors that have already been quantized\n"); @@ -156,7 +156,9 @@ static void usage(const char * executable) { printf(" generate quantized model in the same shards as input\n"); printf(" --override-kv KEY=TYPE:VALUE\n"); printf(" override model metadata by key in the quantized model. may be specified multiple times.\n"); - printf(" WARNING: this is an advanced option, use with care.\n\n"); + printf(" WARNING: this is an advanced option, use with care.\n"); + printf(" --dry-run\n"); + printf(" calculate and show the final quantization size without performing quantization\n\n"); printf("note: --include-weights and --exclude-weights cannot be used together\n\n"); printf("-----------------------------------------------------------------------------\n"); printf(" allowed quantization types\n"); @@ -532,6 +534,8 @@ int main(int argc, char ** argv) { if (arg_idx == argc-1 || !string_parse_kv_override(argv[++arg_idx], kv_overrides)) { usage(argv[0]); } + } else if (strcmp(argv[arg_idx], "--dry-run") == 0) { + params.dry_run = true; } else if (strcmp(argv[arg_idx], "--allow-requantize") == 0) { params.allow_requantize = true; } else if (strcmp(argv[arg_idx], "--pure") == 0) {