From c6315655b765d05204f408875a58278fc2c27c9a Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 16 Feb 2026 10:56:58 +0200
Subject: [PATCH] cont

---
 examples/llama-eval/llama-eval.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/examples/llama-eval/llama-eval.py b/examples/llama-eval/llama-eval.py
index a45bddf222..ecf1ded244 100755
--- a/examples/llama-eval/llama-eval.py
+++ b/examples/llama-eval/llama-eval.py
@@ -32,17 +32,17 @@ GRADER_PATTERNS = {
 SAMPLE_ANSWERS = {
     "aime": [
         "42",
-        "123",
+        "-123",
         "999"
     ],
     "gsm8k": [
         "42",
-        "123",
+        "-123",
         "999"
     ],
     "gpqa": [
         "A",
-        "B",
+        "D",
         "C"
     ],
 }
@@ -331,9 +331,8 @@ class Grader:
             f"Example {i+1}: {ans}" for i, ans in enumerate(sample_answers)
         ])
 
-        prompt = f"""Extract the answer from this response:
+        prompt = f"""Extract the answer from the following response. Here are some extracted answers to demonstrate what you are supposed to output:
 
-Here are some example answers:
 {sample_examples}
 
 ===
@@ -342,7 +341,7 @@ Response: {pred}
 
 ===
 
-Please provide only the extracted answer, nothing else. If there is no clear answer in the response, reply with 'no answer'."""
+Please provide only the extracted answer, nothing else. If there is no clear answer that can be extracted from the response, reply with 'no answer'."""
         url = f"{self.judge_server_url}/v1/chat/completions"
         headers = {"Content-Type": "application/json"}
         data = {