cont : fix
This commit is contained in:
parent
69a30dfa57
commit
b8a223daec
|
|
@ -320,7 +320,7 @@ llama_context::llama_context(
|
|||
|
||||
reserve();
|
||||
|
||||
if (cparams.flash_attn) {
|
||||
if (!cparams.flash_attn) {
|
||||
if (ggml_is_quantized(params.type_v)) {
|
||||
throw std::runtime_error("quantized V cache was requested, but this requires Flash Attention");
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue