From 844ad3e3268259b85456ebfd4d3417f9b3825c29 Mon Sep 17 00:00:00 2001
From: ddh0 <dylanhalladay02@icloud.com>
Date: Wed, 11 Feb 2026 12:47:13 -0600
Subject: [PATCH] clean slate for branch

---
 include/llama.h             | 1 +
 src/llama-quant.cpp         | 3 ++-
 tools/quantize/quantize.cpp | 8 ++++++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/llama.h b/include/llama.h
index 46c3672e98..8bcefda896 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -393,6 +393,7 @@ extern "C" {
         void * kv_overrides;                  // pointer to vector containing overrides
         void * tensor_types;                  // pointer to vector containing tensor types
         void * prune_layers;                  // pointer to vector containing layer indices to prune
+        bool dry_run;                         // calculate and show the final quantization size without performing quantization
     } llama_model_quantize_params;
 
     typedef struct llama_logit_bias {
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index a7891647c3..730f13e29e 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -1048,7 +1048,8 @@ llama_model_quantize_params llama_model_quantize_default_params() {
         /*.imatrix                     =*/ nullptr,
         /*.kv_overrides                =*/ nullptr,
         /*.tensor_type                 =*/ nullptr,
-        /*.prune_layers                =*/ nullptr
+        /*.prune_layers                =*/ nullptr,
+        /*.dry_run                     =*/ false
     };
 
     return result;
diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp
index c0f49279ee..3f99d9e6a7 100644
--- a/tools/quantize/quantize.cpp
+++ b/tools/quantize/quantize.cpp
@@ -120,7 +120,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
 static void usage(const char * executable) {
     printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights]\n", executable);
     printf("       [--exclude-weights] [--output-tensor-type] [--token-embedding-type] [--tensor-type] [--tensor-type-file]\n");
-    printf("       [--prune-layers] [--keep-split] [--override-kv]\n");
+    printf("       [--prune-layers] [--keep-split] [--override-kv] [--dry-run]\n");
     printf("       model-f32.gguf [model-quant.gguf] type [nthreads]\n\n");
     printf("  --allow-requantize\n");
     printf("                                      allow requantizing tensors that have already been quantized\n");
@@ -156,7 +156,9 @@ static void usage(const char * executable) {
     printf("                                      generate quantized model in the same shards as input\n");
     printf("  --override-kv KEY=TYPE:VALUE\n");
     printf("                                      override model metadata by key in the quantized model. may be specified multiple times.\n");
-    printf("                                      WARNING: this is an advanced option, use with care.\n\n");
+    printf("                                      WARNING: this is an advanced option, use with care.\n");
+    printf("  --dry-run\n");
+    printf("                                      calculate and show the final quantization size without performing quantization\n\n");
     printf("note: --include-weights and --exclude-weights cannot be used together\n\n");
     printf("-----------------------------------------------------------------------------\n");
     printf(" allowed quantization types\n");
@@ -532,6 +534,8 @@ int main(int argc, char ** argv) {
             if (arg_idx == argc-1 || !string_parse_kv_override(argv[++arg_idx], kv_overrides)) {
                 usage(argv[0]);
             }
+        } else if (strcmp(argv[arg_idx], "--dry-run") == 0) {
+            params.dry_run = true;
         } else if (strcmp(argv[arg_idx], "--allow-requantize") == 0) {
             params.allow_requantize = true;
         } else if (strcmp(argv[arg_idx], "--pure") == 0) {