Add --target-size option

This commit is contained in:
Ed Addario 2026-01-07 18:10:27 +00:00
parent 0fdbe5495d
commit 097bdb34de
No known key found for this signature in database
GPG Key ID: E7875815A3230993
2 changed files with 12 additions and 3 deletions

View File

@ -394,6 +394,7 @@ extern "C" {
void * tensor_types; // pointer to vector containing tensor types
void * prune_layers; // pointer to vector containing layer indices to prune
float target_bpw; // target bits per weight (bpw)
int64_t target_size; // target file size in bytes
bool keep_bpw_state; // keep bpw state file
void * bpw_state; // pointer to bpw state file
bool no_importance; // allocate target bpw budget equitably across all tensors

View File

@ -574,6 +574,7 @@ int main(int argc, char ** argv) {
std::vector<tensor_quantization> tensor_types;
std::vector<int> prune_layers;
float target_bpw = -1.0f;
int64_t target_size = -1;
for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) {
@ -604,6 +605,10 @@ int main(int argc, char ** argv) {
if (arg_idx == argc-1 || !parse_target_bpw(argv[++arg_idx], target_bpw)) {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--target-size") == 0) {
if (arg_idx == argc-1 || !parse_target_size(argv[++arg_idx], target_size)) {
usage(argv[0]);
}
} else if (strcmp(argv[arg_idx], "--no-importance") == 0) {
params.no_importance = true;
} else if (strcmp(argv[arg_idx], "--keep-bpw-state") == 0) {
@ -716,6 +721,9 @@ int main(int argc, char ** argv) {
if (target_bpw != -1.0f) {
params.target_bpw = target_bpw;
}
if (target_size != -1) {
params.target_size = target_size;
}
llama_backend_init();
@ -750,9 +758,9 @@ int main(int argc, char ** argv) {
}
arg_idx++;
// select quantization type if target_bpw is set unless user specifies type and threads
if (argc - arg_idx <= 1 && params.target_bpw != -1.0f) {
auto * ftype = const_cast<char *>(get_ftype(params.target_bpw));
// If --target-bpw or --target-size are set, select a quantization type unless user specifies type and threads
if (argc - arg_idx <= 1 && (params.target_bpw != -1.0f || params.target_size != -1)) {
auto * ftype = params.target_bpw != -1.0f ? const_cast<char *>(get_ftype(params.target_bpw)) : const_cast<char *>("F16");
if (argc == arg_idx) { tmp_argv.push_back(ftype); }
else { tmp_argv.insert(tmp_argv.end() - 1, ftype); }
tmp_argv.push_back(nullptr);