From 26213bc805dcb88d29f809b9863524dcbc71d761 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Wed, 7 Jan 2026 18:32:01 +0000 Subject: [PATCH] Update usage() --- tools/quantize/quantize.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index 0e06a63d4e..1048fa6109 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -119,7 +119,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp [[noreturn]] static void usage(const char * executable) { printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights] [--exclude-weights]\n", executable); - printf(" [--target-bpw n] [--target-size n] [--no-importance] [--keep-bpw-state] [--bpw-state filename] [--output-tensor-type] [--token-embedding-type]\n"); + printf(" [--target-bpw n] [--target-size n] [--no-importance] [--save-state] [--state-file filename] [--output-tensor-type] [--token-embedding-type]\n"); printf(" [--tensor-type] [--prune-layers] [--keep-split] [--override-kv] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n"); printf(" --allow-requantize: allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n"); printf(" --leave-output-tensor: will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n"); @@ -133,14 +133,14 @@ static void usage(const char * executable) { printf(" Advanced option to selectively quantize tensors. May be specified multiple times.\n"); printf(" --prune-layers L0,L1,L2...comma-separated list of layer numbers to prune from the model\n"); printf(" Advanced option to remove all tensors from the given layers\n"); - printf(" --target-bpw: target a total bits per weight (bpw). Must be a positive number between 0.0 and 16.0\n"); + printf(" --target-bpw N: target a total bits per weight (bpw). N must be a positive number between 0.0 and 16.0\n"); printf(" Advanced option to automatically select quantization types to achieve a total bits per weight (bpw) target\n"); - printf(" --target-size: target a file size. Must be a positive number\n"); + printf(" --target-size N[unit]: target a file size. N must be a positive number with an optional unit (b, kb, mb, gb, tb)\n"); printf(" Advanced option to automatically select quantization types to achieve a target file size\n"); - printf(" --no-importance: distribute bpw budget equitably across all tensors\n"); + printf(" --ignore-tensor-importance: distribute bpw budget equitably across all tensors\n"); printf(" Advanced option to disable assigning more bpw budget to important tensors. It may increase quality for some models\n"); - printf(" --keep-bpw-state: save the bpw computations to -.bpw_state\n"); - printf(" --bpw-state: file name to use instead of default\n"); + printf(" --save-state: save the bpw / file size computations to --mse.bpw_state\n"); + printf(" --state-file file_name: file name to use instead of default\n"); printf(" --keep-split: will generate quantized model in the same shards as input\n"); printf(" --override-kv KEY=TYPE:VALUE\n"); printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n");