From cbe37e3b67947548b58dd4ecc4ba55826aa2f796 Mon Sep 17 00:00:00 2001
From: alielfilali01 <alielfilali0909@gmail.com>
Date: Thu, 12 Feb 2026 08:23:14 +0000
Subject: [PATCH] fix: enable flash attention for JAIS2 (fixed by #19115)

---
 src/llama-graph.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
index 9c5e50990f..ad0c354ba9 100644
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -1695,8 +1695,7 @@ ggml_tensor * llm_graph_context::build_attn_mha(
 
     ggml_tensor * cur;
 
-    // JAIS2 disabled: non-power-of-2 head count (26/56) causes numerical instability in flash attention
-    const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr && arch != LLM_ARCH_JAIS2;
+    const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr;
     if (use_flash_attn) {
         GGML_ASSERT(kq_b == nullptr && "Flash attention does not support KQ bias yet");