diff --git a/examples/llama-eval/llama-eval.py b/examples/llama-eval/llama-eval.py index 112f317bc9..4f8e0055b1 100755 --- a/examples/llama-eval/llama-eval.py +++ b/examples/llama-eval/llama-eval.py @@ -24,10 +24,6 @@ GRADER_PATTERNS = { "aime": r'\boxed{(\d+)}|\b(\d+)\b', "aime2025": r'\boxed{(\d+)}|\b(\d+)\b', "gsm8k": r'\b(\d+)\b', - "mmlu": r'[A-D]', - "hellaswag": r'[A-D]', - "arc": r'[A-D]', - "winogrande": r'[A-D]', } SAMPLE_ANSWERS = {