diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 2652f5c86e..8ee052a8e5 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1347,9 +1347,12 @@ static std::unordered_map target_bpw_type( // increase mu until we get under budget or hit a safety cap { int expand = 0; + size_t prev_bytes_hi = std::numeric_limits::max(); while (true) { lagrange_penalty(mu_hi, choice_hi, bytes_hi, err_hi); if (bytes_hi <= budget_bytes) { break; } + if (bytes_hi >= prev_bytes_hi) { break; } + prev_bytes_hi = bytes_hi; mu_hi *= 2.0; if (++expand > 60) { break; } // safety cap