From 966b21a981d2279358d6de76a03dc8de6b8617d4 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Wed, 11 Feb 2026 15:30:12 -0600 Subject: [PATCH] show model and quant BPW when quant completes --- src/llama-quant.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index e65c28723f..d7b90db01f 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1051,8 +1051,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: close_ofstream(); } - LLAMA_LOG_INFO("%s: model size = %8.2f MiB\n", __func__, total_size_org/1024.0/1024.0); - LLAMA_LOG_INFO("%s: quant size = %8.2f MiB\n", __func__, total_size_new/1024.0/1024.0); + LLAMA_LOG_INFO("%s: model size = %8.2f MiB (%.2f BPW)\n", __func__, total_size_org/1024.0/1024.0, total_size_org*8.0/ml.n_elements); + LLAMA_LOG_INFO("%s: quant size = %8.2f MiB (%.2f BPW)\n", __func__, total_size_new/1024.0/1024.0, total_size_new*8.0/ml.n_elements); if (qs.n_fallback > 0) { LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n",