CUDA: fix FA kernel selection logic (#21271)

This commit is contained in:
Johannes Gäßler 2026-04-01 21:28:19 +02:00 committed by GitHub
parent 6de97b9d3e
commit 86221cf6da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 7 additions and 0 deletions

View File

@ -340,7 +340,14 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
case 128:
case 112:
case 256:
if (V->ne[0] != K->ne[0]) {
return BEST_FATTN_KERNEL_NONE;
}
break;
case 512:
if (V->ne[0] != K->ne[0]) {
return BEST_FATTN_KERNEL_NONE;
}
if (!gqa_opt_applies) {
return BEST_FATTN_KERNEL_NONE;
}