diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 2bce5c3485..e505ef40c2 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -320,7 +320,7 @@ llama_context::llama_context( reserve(); - if (cparams.flash_attn) { + if (!cparams.flash_attn) { if (ggml_is_quantized(params.type_v)) { throw std::runtime_error("quantized V cache was requested, but this requires Flash Attention"); }