From 2ba9adc093127016a48cd0c5d6bf1420dafe17a6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 16 Feb 2026 07:46:08 -0600 Subject: [PATCH] Adjust workaround for ROCWMMA_FATTN/GFX9 to only newer ROCm veresions (#19591) Avoids issues with ROCm 6.4.4. Closes: https://github.com/ggml-org/llama.cpp/issues/19580 Fixes: 6845f7f87 ("Add a workaround for compilation with ROCWMMA_FATTN and gfx9 (#19461)") Signed-off-by: Mario Limonciello (AMD) --- ggml/src/ggml-cuda/fattn-wmma-f16.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu index 35735d48b2..f19defbff9 100644 --- a/ggml/src/ggml-cuda/fattn-wmma-f16.cu +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu @@ -63,7 +63,7 @@ static __global__ void flash_attn_ext_f16( constexpr int frag_m = ncols == 8 ? 32 : 16; constexpr int frag_n = ncols == 8 ? 8 : 16; static_assert(D % frag_m == 0, "If ncols == 8 then D % frag_m must be 0."); -#if defined(GGML_USE_HIP) +#if defined(GGML_USE_HIP) && HIP_VERSION >= 60500000 typedef wmma::fragment frag_a_K; typedef wmma::fragment frag_a_V; typedef wmma::fragment frag_b; @@ -135,7 +135,7 @@ static __global__ void flash_attn_ext_f16( __shared__ half VKQ[ncols*D_padded]; // Accumulator for final VKQ slice. half2 * VKQ2 = (half2 *) VKQ; -#if defined(GGML_USE_HIP) +#if defined(GGML_USE_HIP) && HIP_VERSION >= 60500000 const _Float16 * K_h_f16 = reinterpret_cast(K_h); const _Float16 * V_h_f16 = reinterpret_cast(V_h); _Float16 * KQ_f16 = reinterpret_cast<_Float16 *>(KQ);