From f22b3097eb144a913d02fbb445cbdb9b97e91859 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 19 Aug 2025 22:34:01 +0100 Subject: [PATCH] Avoid division by zero if truncation occurs --- src/llama-quant.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 546f6b438c..3911eba43b 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -790,28 +790,24 @@ static std::unordered_map target_bpw_type( } // Scale for the rows we didn't sample in this expert: multiply by stride-ish factor - const float scale_rows = rows_per_expert / std::max(1, rs); + const float scale_rows = (float)rows_per_expert / std::max(1.0f, (float)rs); total_err *= scale_rows; } return total_err; }; - // Produce per-tensor candidate lists std::vector all; all.reserve(tensors.size()); for (const auto * tw : tensors) { - // Temporary workers for dequantization std::vector workers; workers.reserve(std::max(1, nthread)); ggml_tensor * t = tw->tensor; const std::string name = ggml_get_name(t); - if (!can_quantize(t)) { - continue; - } + if (!can_quantize(t)) { continue; } LLAMA_LOG_INFO("\t%s: - processing tensor %45s \t(%12d elements)\n", __func__, name.c_str(), (int)ggml_nelements(t)); if (!ml.use_mmap) { @@ -820,7 +816,6 @@ static std::unordered_map target_bpw_type( } ml.load_data_for(t); - // Prepare f32 weights for error estimates const int64_t nelem = ggml_nelements(t); std::vector> f32_conv_buf; float * f32_data = nullptr; @@ -955,13 +950,13 @@ static std::unordered_map target_bpw_type( if (ti.choice >= (int)ti.candidate.size() - 1) { continue; } int j = next_distinct_idx(ti); - if (j < 0) { continue; } // no larger-size candidate remains + if (j < 0) { continue; } const auto &cur = ti.candidate[ti.choice]; const auto &nxt = ti.candidate[j]; size_t delta_bytes = nxt.bytes - cur.bytes; - if (delta_bytes == 0) { continue; } // should not happen after dedup, but be safe + if (delta_bytes == 0) { continue; } double err = (double)cur.error - (double)nxt.error; err = std::max(err, 0.0); // do not penalize due to sampling noise