fine tuned gpt-oss configs

2026-01-26 17:51:47 -05:00 · 2026-01-26 17:51:47 -05:00 · 08d444578d
parent ec3fce1512
commit 08d444578d
1 changed files with 4 additions and 3 deletions
--- a/ggml/src/ggml-cuda/mmq.cu
+++ b/ggml/src/ggml-cuda/mmq.cu
@ -357,9 +357,9 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t
        }

        if (GGML_CUDA_CC_IS_RDNA4(cc)){
-            if (n_experts >= 64) {
-                return true;
-            }
+            // if (n_experts >= 64) {
+            //     return true;
+            // }
            switch (type) {
                case GGML_TYPE_IQ2_S:
                case GGML_TYPE_Q6_K:
@ -368,6 +368,7 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t
                case GGML_TYPE_Q4_1:
                case GGML_TYPE_Q5_0:
                case GGML_TYPE_Q5_1:
+                case GGML_TYPE_MXFP4:
                    return true;
                case GGML_TYPE_Q5_K:
                case GGML_TYPE_IQ3_XXS: