diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 422c929f0c..50c8dbf423 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -613,6 +614,12 @@ static size_t llama_tensor_quantize_impl(enum ggml_type new_type, const float * return new_size; } +static std::atomic bpw_stop{ false }; + +static void signal_handler(int) { + bpw_stop.store(true, std::memory_order_relaxed); +} + // Returns tensor type overrides to meet a global bpw target static std::unordered_map target_bpw_type( llama_model_loader & ml, @@ -711,6 +718,22 @@ static std::unordered_map target_bpw_type( return is_quantizable(ggml_get_name(t), model.arch, params); }; + auto install_signal_handlers = [] { + static std::once_flag once; + std::call_once(once, [] { + std::signal(SIGINT, signal_handler); + std::signal(SIGTERM, signal_handler); + }); + }; + + auto uninstall_signal_handlers = [] { + static std::once_flag once; + std::call_once(once, [] { + std::signal(SIGINT, SIG_DFL); + std::signal(SIGTERM, SIG_DFL); + }); + }; + // Estimate error for a given type using a sampled subset of rows auto estimate_error = [&](const ggml_tensor * t, const ggml_type quant_type,