From cbe37e3b67947548b58dd4ecc4ba55826aa2f796 Mon Sep 17 00:00:00 2001 From: alielfilali01 Date: Thu, 12 Feb 2026 08:23:14 +0000 Subject: [PATCH] fix: enable flash attention for JAIS2 (fixed by #19115) --- src/llama-graph.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 9c5e50990f..ad0c354ba9 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -1695,8 +1695,7 @@ ggml_tensor * llm_graph_context::build_attn_mha( ggml_tensor * cur; - // JAIS2 disabled: non-power-of-2 head count (26/56) causes numerical instability in flash attention - const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr && arch != LLM_ARCH_JAIS2; + const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr; if (use_flash_attn) { GGML_ASSERT(kq_b == nullptr && "Flash attention does not support KQ bias yet");