diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index d1fa429553..7543ec6961 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -681,7 +681,8 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
         GGML_TYPE_Q4_K,
         GGML_TYPE_Q5_K,
         GGML_TYPE_Q6_K,
-        GGML_TYPE_Q8_0
+        GGML_TYPE_Q8_0,
+        GGML_TYPE_F16
     };
 
     const char * important_tensors[] = {
diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp
index ad2563a48d..e67649beb9 100644
--- a/tools/quantize/quantize.cpp
+++ b/tools/quantize/quantize.cpp
@@ -501,6 +501,8 @@ static const char * get_ftype(const float bpw) {
         {1.5625, "IQ1_S"},
         {1.7500, "IQ1_M"},
         {2.0625, "IQ2_XXS"},
+        {2.3125, "IQ2_XS"},
+        {2.5625, "IQ2_S"},
         {2.6250, "Q2_K"},
         {3.0625, "IQ3_XXS"},
         {3.4375, "Q3_K"},