mmq.cu: move amd wmma mmq/wmma switching behind IS_RDNA3
This commit is contained in:
parent
a435c7725b
commit
3326fa2387
|
|
@ -333,6 +333,9 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t
|
||||||
}
|
}
|
||||||
|
|
||||||
if (amd_wmma_available(cc)) {
|
if (amd_wmma_available(cc)) {
|
||||||
|
// RDNA 4 is consistently worse on rocblas
|
||||||
|
// https://github.com/ggml-org/llama.cpp/pull/18537#issuecomment-3706422301
|
||||||
|
if (GGML_CUDA_CC_IS_RDNA3(cc)) {
|
||||||
// High expert counts almost always better on MMQ
|
// High expert counts almost always better on MMQ
|
||||||
// due to a large amount of graph splits
|
// due to a large amount of graph splits
|
||||||
// https://github.com/ggml-org/llama.cpp/pull/18202
|
// https://github.com/ggml-org/llama.cpp/pull/18202
|
||||||
|
|
@ -351,6 +354,9 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t
|
||||||
default:
|
default:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (!GGML_CUDA_CC_IS_CDNA(cc)) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
|
return (!GGML_CUDA_CC_IS_CDNA(cc)) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue