Add --target-size option
This commit is contained in:
parent
0fdbe5495d
commit
097bdb34de
|
|
@ -394,6 +394,7 @@ extern "C" {
|
|||
void * tensor_types; // pointer to vector containing tensor types
|
||||
void * prune_layers; // pointer to vector containing layer indices to prune
|
||||
float target_bpw; // target bits per weight (bpw)
|
||||
int64_t target_size; // target file size in bytes
|
||||
bool keep_bpw_state; // keep bpw state file
|
||||
void * bpw_state; // pointer to bpw state file
|
||||
bool no_importance; // allocate target bpw budget equitably across all tensors
|
||||
|
|
|
|||
|
|
@ -574,6 +574,7 @@ int main(int argc, char ** argv) {
|
|||
std::vector<tensor_quantization> tensor_types;
|
||||
std::vector<int> prune_layers;
|
||||
float target_bpw = -1.0f;
|
||||
int64_t target_size = -1;
|
||||
|
||||
for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
|
||||
if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) {
|
||||
|
|
@ -604,6 +605,10 @@ int main(int argc, char ** argv) {
|
|||
if (arg_idx == argc-1 || !parse_target_bpw(argv[++arg_idx], target_bpw)) {
|
||||
usage(argv[0]);
|
||||
}
|
||||
} else if (strcmp(argv[arg_idx], "--target-size") == 0) {
|
||||
if (arg_idx == argc-1 || !parse_target_size(argv[++arg_idx], target_size)) {
|
||||
usage(argv[0]);
|
||||
}
|
||||
} else if (strcmp(argv[arg_idx], "--no-importance") == 0) {
|
||||
params.no_importance = true;
|
||||
} else if (strcmp(argv[arg_idx], "--keep-bpw-state") == 0) {
|
||||
|
|
@ -716,6 +721,9 @@ int main(int argc, char ** argv) {
|
|||
if (target_bpw != -1.0f) {
|
||||
params.target_bpw = target_bpw;
|
||||
}
|
||||
if (target_size != -1) {
|
||||
params.target_size = target_size;
|
||||
}
|
||||
|
||||
llama_backend_init();
|
||||
|
||||
|
|
@ -750,9 +758,9 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
arg_idx++;
|
||||
|
||||
// select quantization type if target_bpw is set unless user specifies type and threads
|
||||
if (argc - arg_idx <= 1 && params.target_bpw != -1.0f) {
|
||||
auto * ftype = const_cast<char *>(get_ftype(params.target_bpw));
|
||||
// If --target-bpw or --target-size are set, select a quantization type unless user specifies type and threads
|
||||
if (argc - arg_idx <= 1 && (params.target_bpw != -1.0f || params.target_size != -1)) {
|
||||
auto * ftype = params.target_bpw != -1.0f ? const_cast<char *>(get_ftype(params.target_bpw)) : const_cast<char *>("F16");
|
||||
if (argc == arg_idx) { tmp_argv.push_back(ftype); }
|
||||
else { tmp_argv.insert(tmp_argv.end() - 1, ftype); }
|
||||
tmp_argv.push_back(nullptr);
|
||||
|
|
|
|||
Loading…
Reference in New Issue