From 228d573424db145ec66babf2f8e9493c4b3c97ff Mon Sep 17 00:00:00 2001 From: ddh0 Date: Fri, 20 Mar 2026 18:26:46 -0500 Subject: [PATCH] llama-quant : default ftype param `Q5_1` --> `Q8_0` Change the default `ftype` in `llama_model_quantize_params` from `LLAMA_FTYPE_MOSTLY_Q5_1` to `LLAMA_FTYPE_MOSTLY_Q8_0`. In case some external program naively uses the default quantization params, we should probably default to a known-good type like Q8_0 rather than Q5_1, which is rather old. --- src/llama-quant.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 8e8ce23124..7d94f76c05 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1271,7 +1271,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: llama_model_quantize_params llama_model_quantize_default_params() { llama_model_quantize_params result = { /*.nthread =*/ 0, - /*.ftype =*/ LLAMA_FTYPE_MOSTLY_Q5_1, + /*.ftype =*/ LLAMA_FTYPE_MOSTLY_Q8_0, /*.output_tensor_type =*/ GGML_TYPE_COUNT, /*.token_embedding_type =*/ GGML_TYPE_COUNT, /*.allow_requantize =*/ false,