diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 4d0dc6a36e..9212c88563 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -848,6 +848,19 @@ static std::unordered_map target_bpw_type( return out; }; + auto delete_bpw_state = [&] { + LLAMA_LOG_INFO("%s: deleting %s\n", func, checkpoint_file.c_str()); + std::remove(checkpoint_file.c_str()); + }; + + auto check_signal_handler = [&](const std::vector & all_vec) { + if (bpw_stop.load(std::memory_order_relaxed)) { + LLAMA_LOG_INFO("\n%s: saving bpw progress for %lu tensors to %s\n", func, all_vec.size(), checkpoint_file.c_str()); + save_bpw_state(all_vec); + throw std::runtime_error("user interrupted the process"); + } + }; + // Estimate error for a given type using a sampled subset of rows auto estimate_error = [&](const ggml_tensor * t, const ggml_type quant_type,