diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 5b3fec3dc5..31694e2834 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -480,14 +480,12 @@ static size_t llama_tensor_quantize_impl(enum ggml_type new_type, const float * } static bool tensor_type_requires_imatrix(const ggml_type dst_type) { - if (dst_type == GGML_TYPE_IQ2_XXS || dst_type == GGML_TYPE_IQ2_XS || + return ( + dst_type == GGML_TYPE_IQ2_XXS || dst_type == GGML_TYPE_IQ2_XS || dst_type == GGML_TYPE_IQ3_XXS || dst_type == GGML_TYPE_IQ1_S || dst_type == GGML_TYPE_IQ2_S || dst_type == GGML_TYPE_IQ1_M || - dst_type == GGML_TYPE_TQ1_0 || dst_type == GGML_TYPE_TQ2_0) { - return true; - } else { - return false; - } + dst_type == GGML_TYPE_TQ1_0 || dst_type == GGML_TYPE_TQ2_0 + ); } static void llama_model_quantize_impl(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) { @@ -1066,6 +1064,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: LLAMA_LOG_INFO("%s: model size = %8.2f MiB (%.2f BPW)\n", __func__, total_size_org/1024.0/1024.0, total_size_org*8.0/ml.n_elements); LLAMA_LOG_INFO("%s: quant size = %8.2f MiB (%.2f BPW)\n", __func__, total_size_new/1024.0/1024.0, total_size_new*8.0/ml.n_elements); + if (!params->imatrix && params->dry_run && will_require_imatrix) { LLAMA_LOG_WARN("%s: WARNING: dry run completed successfully, but actually completing this quantization will require an imatrix!\n", __func__