From b0d50a5681706ca965044ba378a3a1c9bf9883b7 Mon Sep 17 00:00:00 2001
From: gatbontonpc <gatbontonpc@gmail.com>
Date: Mon, 12 Jan 2026 13:53:39 -0500
Subject: [PATCH] Add readme

---
 examples/llama-eval/README.md     | 20 ++++++++++++++++++++
 examples/llama-eval/llama-eval.py |  2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 examples/llama-eval/README.md

diff --git a/examples/llama-eval/README.md b/examples/llama-eval/README.md
new file mode 100644
index 0000000000..4dfaf09a22
--- /dev/null
+++ b/examples/llama-eval/README.md
@@ -0,0 +1,20 @@
+# llama.cpp/example/llama-eval
+
+The purpose of this example is to to run evaluations metrics against a an openapi api compatible LLM via http (llama-server).
+
+```bash
+./llama-server -m model.gguf --port 8033
+```
+
+```bash
+python examples/llama-eval/llama-eval.py --path_server http://localhost:8033 --n_prompt 100  --prompt_source arc
+```
+
+## Supported tasks (MVP)
+
+- **GSM8K** — grade-school math (final-answer only)
+- **AIME** — competition math (final-answer only)
+- **MMLU** — multi-domain knowledge (multiple choice)
+- **HellaSwag** — commonsense reasoning (multiple choice)
+- **ARC** — grade-school science reasoning (multiple choice)
+- **WinoGrande** — commonsense coreference resolution (multiple choice)
\ No newline at end of file
diff --git a/examples/llama-eval/llama-eval.py b/examples/llama-eval/llama-eval.py
index 411d0adbab..0ded50545c 100644
--- a/examples/llama-eval/llama-eval.py
+++ b/examples/llama-eval/llama-eval.py
@@ -576,7 +576,7 @@ if __name__ == "__main__":
         "--prompt_source",
         type=str,
         default="mmlu",
-        help=f"Eval types supported: all,{TASK_DICT.keys()}",
+        help=f"Eval types supported: all,{list(TASK_DICT.keys())}",
     )
     parser.add_argument(
         "--n_prompts", type=int, default=None, help="Number of prompts to evaluate"