tests: reduce number of FA test permutations

Only test non-F16 for head size 64 and 72 (one a multiple of QK, one not).
This commit is contained in:
Jeff Bolz 2026-02-05 18:23:25 -06:00
parent 449ec2ab07
commit b1d14aef15
1 changed files with 1 additions and 0 deletions

View File

@ -8231,6 +8231,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
for (ggml_prec prec : {GGML_PREC_F32, GGML_PREC_DEFAULT}) {
if (hsk != 128 && prec == GGML_PREC_DEFAULT) continue;
for (ggml_type type_KV : {GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0}) {
if (type_KV != GGML_TYPE_F16 && hsk != 64 && hsk != 72) continue;
test_cases.emplace_back(new test_flash_attn_ext(
hsk, hsv, nh, {nr2, nr3}, kv, nb, mask, sinks, max_bias, logit_softcap, prec, type_KV));
// run fewer test cases permuted