cont : fix
This commit is contained in:
parent
e115c63747
commit
7b5264204f
|
|
@ -342,7 +342,7 @@ llama_context::llama_context(
|
||||||
|
|
||||||
reserve();
|
reserve();
|
||||||
|
|
||||||
if (cparams.flash_attn) {
|
if (!cparams.flash_attn) {
|
||||||
if (ggml_is_quantized(params.type_v)) {
|
if (ggml_is_quantized(params.type_v)) {
|
||||||
throw std::runtime_error("quantized V cache was requested, but this requires Flash Attention");
|
throw std::runtime_error("quantized V cache was requested, but this requires Flash Attention");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue