clean slate for branch

This commit is contained in:
ddh0 2026-02-11 12:47:13 -06:00
parent 914dde72ba
commit 844ad3e326
3 changed files with 9 additions and 3 deletions

View File

@ -393,6 +393,7 @@ extern "C" {
void * kv_overrides; // pointer to vector containing overrides
void * tensor_types; // pointer to vector containing tensor types
void * prune_layers; // pointer to vector containing layer indices to prune
bool dry_run; // calculate and show the final quantization size without performing quantization
} llama_model_quantize_params;
typedef struct llama_logit_bias {

View File

@ -1048,7 +1048,8 @@ llama_model_quantize_params llama_model_quantize_default_params() {
/*.imatrix =*/ nullptr,
/*.kv_overrides =*/ nullptr,
/*.tensor_type =*/ nullptr,
/*.prune_layers =*/ nullptr
/*.prune_layers =*/ nullptr,
/*.dry_run =*/ false
};
return result;

View File

@ -120,7 +120,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
static void usage(const char * executable) {
printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights]\n", executable);
printf(" [--exclude-weights] [--output-tensor-type] [--token-embedding-type] [--tensor-type] [--tensor-type-file]\n");
printf(" [--prune-layers] [--keep-split] [--override-kv]\n");
printf(" [--prune-layers] [--keep-split] [--override-kv] [--dry-run]\n");
printf(" model-f32.gguf [model-quant.gguf] type [nthreads]\n\n");
printf(" --allow-requantize\n");
printf(" allow requantizing tensors that have already been quantized\n");
@ -156,7 +156,9 @@ static void usage(const char * executable) {
printf(" generate quantized model in the same shards as input\n");
printf(" --override-kv KEY=TYPE:VALUE\n");
printf(" override model metadata by key in the quantized model. may be specified multiple times.\n");
printf(" WARNING: this is an advanced option, use with care.\n\n");
printf(" WARNING: this is an advanced option, use with care.\n");
printf(" --dry-run\n");
printf(" calculate and show the final quantization size without performing quantization\n\n");
printf("note: --include-weights and --exclude-weights cannot be used together\n\n");
printf("-----------------------------------------------------------------------------\n");
printf(" allowed quantization types\n");
@ -532,6 +534,8 @@ int main(int argc, char ** argv) {
if (arg_idx == argc-1 || !string_parse_kv_override(argv[++arg_idx], kv_overrides)) {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--dry-run") == 0) {
params.dry_run = true;
} else if (strcmp(argv[arg_idx], "--allow-requantize") == 0) {
params.allow_requantize = true;
} else if (strcmp(argv[arg_idx], "--pure") == 0) {