From 966b21a981d2279358d6de76a03dc8de6b8617d4 Mon Sep 17 00:00:00 2001
From: ddh0 <dylanhalladay02@icloud.com>
Date: Wed, 11 Feb 2026 15:30:12 -0600
Subject: [PATCH] show model and quant BPW when quant completes

---
 src/llama-quant.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index e65c28723f..d7b90db01f 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -1051,8 +1051,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
         close_ofstream();
     }
 
-    LLAMA_LOG_INFO("%s: model size  = %8.2f MiB\n", __func__, total_size_org/1024.0/1024.0);
-    LLAMA_LOG_INFO("%s: quant size  = %8.2f MiB\n", __func__, total_size_new/1024.0/1024.0);
+    LLAMA_LOG_INFO("%s: model size  = %8.2f MiB (%.2f BPW)\n", __func__, total_size_org/1024.0/1024.0, total_size_org*8.0/ml.n_elements);
+    LLAMA_LOG_INFO("%s: quant size  = %8.2f MiB (%.2f BPW)\n", __func__, total_size_new/1024.0/1024.0, total_size_new*8.0/ml.n_elements);
 
     if (qs.n_fallback > 0) {
         LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n",