fix: enable flash attention for JAIS2 (fixed by #19115)

This commit is contained in:
alielfilali01 2026-02-12 08:23:14 +00:00
parent d9a442f602
commit cbe37e3b67
1 changed files with 1 additions and 2 deletions

View File

@ -1695,8 +1695,7 @@ ggml_tensor * llm_graph_context::build_attn_mha(
ggml_tensor * cur;
// JAIS2 disabled: non-power-of-2 head count (26/56) causes numerical instability in flash attention
const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr && arch != LLM_ARCH_JAIS2;
const bool use_flash_attn = cparams.flash_attn && kq_b == nullptr;
if (use_flash_attn) {
GGML_ASSERT(kq_b == nullptr && "Flash attention does not support KQ bias yet");