diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 3e56b2b86c..e108a82f37 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -865,8 +865,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: is_clip_model |= name.rfind("mm.", 0) == 0; // check the "mm." prefix } - GGML_ASSERT(qs.n_ffn_down_exp != 0); - qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)model.hparams.n_layer; // sanity checks for models that have attention layers