diff --git a/examples/llama-eval/llama-eval.py b/examples/llama-eval/llama-eval.py index 66e7319a68..cb6c36148c 100755 --- a/examples/llama-eval/llama-eval.py +++ b/examples/llama-eval/llama-eval.py @@ -105,7 +105,7 @@ class TaskState: case_id: str prompt: str gold: str - pred: Optional[str] = None + result: Optional[str] = None extracted: Optional[str] = None grader_log: Dict[str, Any] = field(default_factory=dict) correct: bool = False @@ -179,7 +179,7 @@ class EvalState: task_id: str, prompt: str, gold: str, - pred: Optional[str], + result: Optional[str], extracted: Optional[str], grader_log: Dict[str, Any], correct: bool, @@ -192,7 +192,7 @@ class EvalState: "case_id": task_id, "prompt": prompt, "gold": gold, - "pred": pred, + "result": result, "extracted": extracted, "grader_log": grader_log, "correct": correct, @@ -237,7 +237,7 @@ class EvalState: "case_id": task_id, "prompt": prompt, "gold": gold, - "pred": None, + "result": None, "extracted": None, "grader_log": {}, "correct": False, @@ -282,7 +282,7 @@ class EvalState: gold = case.get("gold", "") extracted = case.get("extracted", "") if status == "ok" else "" is_correct = case.get("correct", False) if status == "ok" else False - pred = case.get("pred", "") or "" + result = case.get("result", "") or "" prompt = case.get("prompt", "") or "" grader_log = case.get("grader_log", {}) @@ -296,7 +296,7 @@ class EvalState: status_class = "error" status_text = f"Error: {status}" - pred_escaped = self._escape_html(pred) + result_escaped = self._escape_html(result) prompt_escaped = self._escape_html(prompt) grader_log_str = self._escape_html(json.dumps(grader_log, indent=2)) @@ -311,8 +311,8 @@ class EvalState:
{prompt_escaped}
- {pred_escaped}
+ {result_escaped}
{grader_log_str}