diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index 2c45adab75..5331dec80c 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -512,6 +512,7 @@ int main(int argc, char ** argv) { std::vector kv_overrides; std::vector tensor_types; std::vector prune_layers; + float target_bpw = -1.0f; for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) { if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) { @@ -538,6 +539,10 @@ int main(int argc, char ** argv) { if (arg_idx == argc-1 || !parse_tensor_type(argv[++arg_idx], tensor_types)) { usage(argv[0]); } + } else if (strcmp(argv[arg_idx], "--target-bpw") == 0) { + if (arg_idx == argc-1 || !parse_target_bpw(argv[++arg_idx], target_bpw)) { + usage(argv[0]); + } } else if (strcmp(argv[arg_idx], "--prune-layers") == 0) { if (arg_idx == argc-1 || !parse_layer_prune(argv[++arg_idx], prune_layers)) { usage(argv[0]);