fix: enable flash attention for JAIS2 (fixed by #19115)

2026-02-12 08:23:14 +00:00 · 2026-02-12 08:23:14 +00:00 · cbe37e3b67
parent d9a442f602
commit cbe37e3b67
1 changed files with 1 additions and 2 deletions
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@ -1695,8 +1695,7 @@ ggml_tensor * llm_graph_context::build_attn_mha(

    ggml_tensor * cur;

-    // JAIS2 disabled: non-power-of-2 head count (26/56) causes numerical instability in flash attention
-    const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr && arch != LLM_ARCH_JAIS2;
+    const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr;
    if (use_flash_attn) {
        GGML_ASSERT(kq_b == nullptr && "Flash attention does not support KQ bias yet");