From b97cda628960d66a9fcc301062a1dc3925feae9f Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 29 Nov 2025 23:52:51 +0000 Subject: [PATCH] Add B/F16 to get_ftype() --- tools/quantize/quantize.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index ebeea65336..a1426ea4a3 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -512,7 +512,12 @@ static const char * get_ftype(const float bpw) { {4.5000, "Q4_K"}, {5.5000, "Q5_K"}, {6.5625, "Q6_K"}, - {8.5000, "Q8_0"} + {8.5000, "Q8_0"}, +#ifdef GGML_USE_METAL + {16.0000, "F16"} +#else + {16.0000, "BF16"} +#endif }; return quant_bpw.lower_bound(bpw)->second;