diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index e65c28723f..d7b90db01f 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1051,8 +1051,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: close_ofstream(); } - LLAMA_LOG_INFO("%s: model size = %8.2f MiB\n", __func__, total_size_org/1024.0/1024.0); - LLAMA_LOG_INFO("%s: quant size = %8.2f MiB\n", __func__, total_size_new/1024.0/1024.0); + LLAMA_LOG_INFO("%s: model size = %8.2f MiB (%.2f BPW)\n", __func__, total_size_org/1024.0/1024.0, total_size_org*8.0/ml.n_elements); + LLAMA_LOG_INFO("%s: quant size = %8.2f MiB (%.2f BPW)\n", __func__, total_size_new/1024.0/1024.0, total_size_new*8.0/ml.n_elements); if (qs.n_fallback > 0) { LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n",