From 46706cec28ad83b8ab10781493b84343b5b0f048 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 5 Oct 2025 20:20:28 +0100 Subject: [PATCH] Persist progress --- src/llama-quant.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 9212c88563..640672aec7 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1100,12 +1100,28 @@ static std::unordered_map target_bpw_type( return lambdas; }; + install_signal_handlers(); + auto bpw_data = load_bpw_state(); std::vector all; all.reserve(tensors.size()); for (const auto * tw : tensors) { ggml_tensor * tensor = tw->tensor; const std::string name = ggml_get_name(tensor); if (!can_quantize(tensor)) { continue; } + check_signal_handler(all); + + // If we already have fully evaluatedd this tensor then reuse it + if (auto it_saved = bpw_data.find(name); it_saved != bpw_data.end()) { + tensor_info info; + info.w = tw; + info.candidate = it_saved->second.candidate; + info.choice = it_saved->second.choice; + info.min_bpw = it_saved->second.min_bpw; + info.max_bpw = it_saved->second.max_bpw; + info.n_elements = it_saved->second.n_elements ? it_saved->second.n_elements : (size_t)ggml_nelements(tensor); + all.push_back(std::move(info)); + continue; + } LLAMA_LOG_INFO("\t%s: - processing tensor %45s \t(%12" PRId64 " elements)\n", __func__, name.c_str(), ggml_nelements(tensor)); if (!ml.use_mmap) { @@ -1296,6 +1312,7 @@ static std::unordered_map target_bpw_type( std::vector tl_quantized_buffer(quantized_buffer.size()); std::vector tl_dequantized_buffer(dequantized_buffer.size()); for (;;) { + if (bpw_stop.load(std::memory_order_relaxed)) { break; } // stop if a signal arrived const size_t i = cidx.fetch_add(1, std::memory_order_acq_rel); if (i >= compatible_candidates.size()) { break; } @@ -1311,6 +1328,11 @@ static std::unordered_map target_bpw_type( for (auto &th : eval_workers) { th.join(); } + // If interruption happened mid-evaluation, exit without adding a half-baked tensor entry + if (bpw_stop.load(std::memory_order_relaxed) && cidx.load(std::memory_order_relaxed) < compatible_candidates.size()) { + check_signal_handler(all); + } + for (auto &c : eval_candidates) { if (c.bytes > 0) { info.candidate.push_back(c); } } @@ -1384,6 +1406,7 @@ static std::unordered_map target_bpw_type( info.min_bpw = info.candidate.front().bpw; info.max_bpw = info.candidate.back().bpw; all.push_back(std::move(info)); + check_signal_handler(all); // save after each tensor } if (all.empty()) { return {}; } @@ -1441,7 +1464,7 @@ static std::unordered_map target_bpw_type( return emit_overrides(); } if (budget_bytes >= max_bytes) { - for (auto & ti : all) { ti.choice = (int) ti.candidate.size() - 1; } + for (auto & ti : all) { ti.choice = (int)ti.candidate.size() - 1; } return emit_overrides(); }