fine tuned gpt-oss configs

This commit is contained in:
jiachengjason 2026-01-26 17:51:47 -05:00
parent ec3fce1512
commit 08d444578d
1 changed files with 4 additions and 3 deletions

View File

@ -357,9 +357,9 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t
} }
if (GGML_CUDA_CC_IS_RDNA4(cc)){ if (GGML_CUDA_CC_IS_RDNA4(cc)){
if (n_experts >= 64) { // if (n_experts >= 64) {
return true; // return true;
} // }
switch (type) { switch (type) {
case GGML_TYPE_IQ2_S: case GGML_TYPE_IQ2_S:
case GGML_TYPE_Q6_K: case GGML_TYPE_Q6_K:
@ -368,6 +368,7 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11, int64_t
case GGML_TYPE_Q4_1: case GGML_TYPE_Q4_1:
case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_0:
case GGML_TYPE_Q5_1: case GGML_TYPE_Q5_1:
case GGML_TYPE_MXFP4:
return true; return true;
case GGML_TYPE_Q5_K: case GGML_TYPE_Q5_K:
case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_XXS: