This commit is contained in:
Beinsezii 2026-01-02 23:57:45 +00:00 committed by GitHub
commit 4ffc337480
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 22 additions and 0 deletions

View File

@ -333,6 +333,28 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t
}
if (amd_wmma_available(cc)) {
// RDNA 4 is consistently worse on rocblas
// https://github.com/ggml-org/llama.cpp/pull/18537#issuecomment-3706422301
if (GGML_CUDA_CC_IS_RDNA3(cc)) {
// High expert counts almost always better on MMQ
// due to a large amount of graph splits
// https://github.com/ggml-org/llama.cpp/pull/18202
if (n_experts >= 64) {
return true;
}
switch (type) {
// These quants are really bad on MMQ
case GGML_TYPE_Q2_K:
case GGML_TYPE_Q6_K:
// These quants are usually worse but not always
case GGML_TYPE_IQ2_XS:
case GGML_TYPE_IQ2_S:
return ne11 <= 128;
default:
return true;
}
}
return true;
}