cont : fix
This commit is contained in:
parent
d96d0417d1
commit
bd5de6bab9
|
|
@ -320,7 +320,7 @@ llama_context::llama_context(
|
|||
|
||||
reserve();
|
||||
|
||||
if (cparams.flash_attn) {
|
||||
if (!cparams.flash_attn) {
|
||||
if (ggml_is_quantized(params.type_v)) {
|
||||
throw std::runtime_error("quantized V cache was requested, but this requires Flash Attention");
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue