From 8503d59ee44bc30b0d030cceb5e17590b334730d Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 13 Sep 2025 11:49:18 +0100 Subject: [PATCH] Increase IQ options --- src/llama-quant.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 955e6c12fe..41fd819f86 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -641,12 +641,21 @@ static std::unordered_map target_bpw_type( constexpr ggml_type iq_quants[] = { GGML_TYPE_IQ1_S, + GGML_TYPE_IQ2_XXS, + GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS, + GGML_TYPE_IQ4_NL, GGML_TYPE_Q5_K, GGML_TYPE_Q6_K, - GGML_TYPE_Q8_0 + GGML_TYPE_Q8_0, + // TODO: find better way to handle F16/BF16 +#ifdef GGML_USE_METAL + GGML_TYPE_F16 +#else + GGML_TYPE_BF16 +#endif }; constexpr double epsilon = 1e-12;