From ec259f9ec0fce785655b34f01df16c2bc19db858 Mon Sep 17 00:00:00 2001 From: jiachengjason Date: Mon, 29 Dec 2025 11:26:15 -0500 Subject: [PATCH] add n_experts branch like the cdna path --- ggml/src/ggml-cuda/mmq.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu index 1a29797148..0a0d440a18 100644 --- a/ggml/src/ggml-cuda/mmq.cu +++ b/ggml/src/ggml-cuda/mmq.cu @@ -333,7 +333,10 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t } if (amd_wmma_available(cc)) { - if (ne11 <= 128 || type == GGML_TYPE_Q4_0 || type == GGML_TYPE_Q4_1 || type == GGML_TYPE_Q5_0 || type == GGML_TYPE_Q5_1) { + if (n_experts > 64 || ne11 <= 128) { + return true; + } + if (type == GGML_TYPE_Q4_0 || type == GGML_TYPE_Q4_1 || type == GGML_TYPE_Q5_0 || type == GGML_TYPE_Q5_1) { return true; } if (ne11 <= 256 && (type == GGML_TYPE_Q4_K || type == GGML_TYPE_Q5_K)) {