examples: add task summary table to llama-eval-new.py

This commit is contained in:
Georgi Gerganov 2026-01-31 18:58:27 +02:00
parent a939f4c47e
commit e79e8d02d5
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
1 changed files with 12 additions and 0 deletions

View File

@ -208,6 +208,18 @@ class Processor:
print(f"Max tokens: {self.n_predict}")
print()
# Print task summary table
print("Tasks:")
print(" Task ID Dataset Prompt (first 40 chars) Expected Status")
for i in range(min(n_cases, len(self.dataset.questions))):
question = self.dataset.get_question(i)
case_id = f"aime_{self.dataset.split}_{question['id']}"
prompt = question["problem"]
gold = self.dataset.get_answer(question)
truncated_prompt = prompt[:40] + "..." if len(prompt) > 40 else prompt
print(f" {case_id:<15} AIME2025 {truncated_prompt:<40} {gold:<10} pending")
print()
task_states: Dict[str, List[TaskState]] = {task: [] for task in self.eval_state.tasks}
total = 0
correct = 0