diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index d1fa429553..7543ec6961 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -681,7 +681,8 @@ static std::unordered_map target_bpw_type( GGML_TYPE_Q4_K, GGML_TYPE_Q5_K, GGML_TYPE_Q6_K, - GGML_TYPE_Q8_0 + GGML_TYPE_Q8_0, + GGML_TYPE_F16 }; const char * important_tensors[] = { diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index ad2563a48d..e67649beb9 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -501,6 +501,8 @@ static const char * get_ftype(const float bpw) { {1.5625, "IQ1_S"}, {1.7500, "IQ1_M"}, {2.0625, "IQ2_XXS"}, + {2.3125, "IQ2_XS"}, + {2.5625, "IQ2_S"}, {2.6250, "Q2_K"}, {3.0625, "IQ3_XXS"}, {3.4375, "Q3_K"},