docs: update ops.md with MOE_SUM support via auto-generation

This commit is contained in:
shaobo.xie 2026-02-05 15:54:57 +08:00
parent 4367734ac3
commit 58aff30b0c
3 changed files with 1691 additions and 588 deletions

View File

@ -65,6 +65,7 @@ Legend:
| LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ |
| LOG | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
| MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ |
| MOE_SUM | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| MUL | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
| MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 |
| MUL_MAT_ID | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ | ❌ | ❌ |
@ -75,7 +76,7 @@ Legend:
| OUT_PROD | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | 🟡 |
| PAD | ❌ | 🟡 | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | ✅ | ✅ | ❌ | ❌ |
| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
| POOL_1D | ❌ | ❌ | | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| POOL_1D | ❌ | ❌ | | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| POOL_2D | ❌ | 🟡 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
| REGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ |
| RELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | ✅ | ❌ | ❌ |

File diff suppressed because it is too large Load Diff

View File

@ -18751,3 +18751,99 @@
"CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA"
"CUDA0","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
"CUDA0","OPT_STEP_SGD","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=2,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=2,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=2,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=2,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=2,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=2,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=2,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=2,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=2,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=2,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=2,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=2,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=2,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=2,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=2,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=2,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=4,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=4,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=4,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=4,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=4,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=4,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=4,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=4,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=4,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=4,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=4,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=4,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=4,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=4,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=4,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=4,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=8,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=8,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=8,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=64,n_expert_used=8,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=8,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=8,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=8,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=128,n_expert_used=8,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=8,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=8,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=8,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=256,n_expert_used=8,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=8,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=8,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=8,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f32,hidden_dim=4096,n_expert_used=8,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=2,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=2,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=2,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=2,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=2,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=2,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=2,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=2,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=2,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=2,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=2,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=2,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=2,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=2,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=2,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=2,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=4,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=4,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=4,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=4,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=4,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=4,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=4,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=4,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=4,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=4,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=4,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=4,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=4,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=4,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=4,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=4,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=8,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=8,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=8,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=64,n_expert_used=8,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=8,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=8,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=8,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=128,n_expert_used=8,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=8,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=8,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=8,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=256,n_expert_used=8,n_tokens=256","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=8,n_tokens=16","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=8,n_tokens=32","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=8,n_tokens=128","support","1","yes","CUDA"
"CUDA0","MOE_SUM","type=f16,hidden_dim=4096,n_expert_used=8,n_tokens=256","support","1","yes","CUDA"

Can't render this file because it is too large.