show model and quant BPW when quant completes

2026-02-11 15:30:12 -06:00 · 2026-02-11 15:30:12 -06:00 · 966b21a981
parent 150e1db21d
commit 966b21a981
1 changed files with 2 additions and 2 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -1051,8 +1051,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
        close_ofstream();
    }

-    LLAMA_LOG_INFO("%s: model size  = %8.2f MiB\n", __func__, total_size_org/1024.0/1024.0);
-    LLAMA_LOG_INFO("%s: quant size  = %8.2f MiB\n", __func__, total_size_new/1024.0/1024.0);
+    LLAMA_LOG_INFO("%s: model size  = %8.2f MiB (%.2f BPW)\n", __func__, total_size_org/1024.0/1024.0, total_size_org*8.0/ml.n_elements);
+    LLAMA_LOG_INFO("%s: quant size  = %8.2f MiB (%.2f BPW)\n", __func__, total_size_new/1024.0/1024.0, total_size_new*8.0/ml.n_elements);

    if (qs.n_fallback > 0) {
        LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n",