From 5cd69a6809c56922e1b973ce900f3680c28a5117 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Wed, 20 Aug 2025 09:41:39 +0100 Subject: [PATCH] Add F16/BF16 type --- src/llama-quant.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index a4a10da062..5522fe39d2 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -630,7 +630,13 @@ static std::unordered_map target_bpw_type( GGML_TYPE_Q5_1, GGML_TYPE_Q5_K, GGML_TYPE_Q6_K, - GGML_TYPE_Q8_0 + GGML_TYPE_Q8_0, +// TODO: find better way to handle F16/BF16 +#ifdef GGML_USE_METAL + GGML_TYPE_F16 +#else + GGML_TYPE_BF16 +#endif }; auto can_quantize = [&](const ggml_tensor * t) -> bool {