mmq.cu: move amd wmma mmq/wmma switching behind IS_RDNA3

2026-01-02 15:05:30 -08:00 · 2026-01-02 15:05:30 -08:00 · 3326fa2387
parent a435c7725b
commit 3326fa2387
1 changed files with 22 additions and 16 deletions
--- a/ggml/src/ggml-cuda/mmq.cu
+++ b/ggml/src/ggml-cuda/mmq.cu
@ -333,6 +333,9 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t
    }
    if (amd_wmma_available(cc)) {
        // RDNA 4 is consistently worse on rocblas
        // https://github.com/ggml-org/llama.cpp/pull/18537#issuecomment-3706422301
        if (GGML_CUDA_CC_IS_RDNA3(cc)) {
            // High expert counts almost always better on MMQ
            // due to a large amount of graph splits
            // https://github.com/ggml-org/llama.cpp/pull/18202
@ -351,6 +354,9 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t
                default:
                    return true;
            }
        } else {
            return true;
        }
    }
    return (!GGML_CUDA_CC_IS_CDNA(cc)) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;