diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index 3e56b2b86c..e108a82f37 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -865,8 +865,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
         is_clip_model |= name.rfind("mm.", 0) == 0; // check the "mm." prefix
     }
 
-    GGML_ASSERT(qs.n_ffn_down_exp != 0);
-
     qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)model.hparams.n_layer;
 
     // sanity checks for models that have attention layers