fix: enable flash attention for JAIS2 (fixed by #19115)
This commit is contained in:
parent
d9a442f602
commit
cbe37e3b67
|
|
@ -1695,8 +1695,7 @@ ggml_tensor * llm_graph_context::build_attn_mha(
|
|||
|
||||
ggml_tensor * cur;
|
||||
|
||||
// JAIS2 disabled: non-power-of-2 head count (26/56) causes numerical instability in flash attention
|
||||
const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr && arch != LLM_ARCH_JAIS2;
|
||||
const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr;
|
||||
if (use_flash_attn) {
|
||||
GGML_ASSERT(kq_b == nullptr && "Flash attention does not support KQ bias yet");
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue