examples: refactor test-simulator.sh for better readability

Extract repeating question string into TEST_QUESTION variable and create make_request() helper function to reduce code duplication. Add proper error handling for error responses.
2026-01-31 15:45:47 +02:00 · 2026-01-31 15:45:47 +02:00 · 23d4e21a81
parent 07d5e1e0ea
commit 23d4e21a81
1 changed files with 42 additions and 52 deletions
--- a/examples/llama-eval/test-simulator.sh
+++ b/examples/llama-eval/test-simulator.sh
@ -1,10 +1,13 @@
 #!/bin/bash

+set -e
+
 echo "=== llama-server-simulator Test Script ==="
 echo ""

 PORT=8033
 SUCCESS_RATE=0.8
+TEST_PORT=8034

 echo "Starting simulator on port $PORT with success rate $SUCCESS_RATE..."
 source venv/bin/activate
@ -14,74 +17,61 @@ SIMULATOR_PID=$!
 echo "Waiting for simulator to start..."
 sleep 5

-echo ""
-echo "=== Test 1: Basic Request with Known Question ==="
-echo "Sending request with AIME question..."
-curl -s -X POST http://localhost:$PORT/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "llama",
-    "messages": [
-      {"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."}
-    ],
-    "temperature": 0,
-    "max_tokens": 2048
-  }' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])"
+# Helper function to make a request and extract the answer
+make_request() {
+  local question="$1"
+  curl -s -X POST http://localhost:$PORT/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d "{
+      \"model\": \"llama\",
+      \"messages\": [
+        {\"role\": \"user\", \"content\": \"$question\"}
+      ],
+      \"temperature\": 0,
+      \"max_tokens\": 2048
+    }" | python3 -c "import sys, json; data = json.load(sys.stdin); print(data.get('choices', [{}])[0].get('message', {}).get('content', data.get('error', 'No response')))"
+}
+
+# Test question (repeated in multiple tests)
+TEST_QUESTION="Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."

 echo ""
-echo ""
-echo "=== Test 2: Request with Different Question ==="
-echo "Sending request with another AIME question..."
-curl -s -X POST http://localhost:$PORT/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "llama",
-    "messages": [
-      {"role": "user", "content": "Compute the value of 2^10 + 3^10."}
-    ],
-    "temperature": 0,
-    "max_tokens": 2048
-  }' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])"
+echo "=== Test 1: Correct Answer ==="
+echo "Sending request with known question..."
+answer=$(make_request "$TEST_QUESTION")
+echo "Answer: $answer"
+echo "Expected: 116"
+echo "Correct: $([ "$answer" == "116" ] && echo "Yes" || echo "No")"

 echo ""
+echo "=== Test 2: Wrong Answer ==="
+echo "Sending request with known question (success rate 0.0)..."
+answer=$(make_request "$TEST_QUESTION")
+echo "Answer: $answer"
+echo "Expected: 116"
+echo "Correct: $([ "$answer" == "116" ] && echo "Yes" || echo "No")"
+
 echo ""
-echo "=== Test 3: Request with No Matching Question ==="
+echo "=== Test 3: No Matching Question ==="
 echo "Sending request with non-matching text..."
-curl -s -X POST http://localhost:$PORT/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "llama",
-    "messages": [
-      {"role": "user", "content": "What is the capital of France?"}
-    ],
-    "temperature": 0,
-    "max_tokens": 2048
-  }' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Response:', data.get('error', 'No error'))"
+response=$(make_request "What is the capital of France?")
+echo "Response: $response"
+echo "Expected: No matching question found"
+echo "Correct: $([ "$response" == "No matching question found" ] && echo "Yes" || echo "No")"

 echo ""
-echo ""
-echo "=== Test 4: Multiple Requests to Test Success Rate ==="
+echo "=== Test 4: Success Rate Verification ==="
 echo "Sending 10 requests to test success rate..."
 correct_count=0
 for i in {1..10}; do
-  echo "Request $i:"
-  response=$(curl -s -X POST http://localhost:$PORT/v1/chat/completions \
-    -H "Content-Type: application/json" \
-    -d '{
-      "model": "llama",
-      "messages": [
-        {"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."}
-      ],
-      "temperature": 0,
-      "max_tokens": 2048
-    }')
-  answer=$(echo $response | python3 -c "import sys, json; data = json.load(sys.stdin); print(data['choices'][0]['message']['content'])")
+  answer=$(make_request "$TEST_QUESTION")
  if [ "$answer" == "116" ]; then
    correct_count=$((correct_count + 1))
  fi
-  echo "  Answer: $answer"
+  echo "  Request $i: Answer = $answer"
 done
 echo "Correct answers: $correct_count/10"
+echo "Expected: ~8/10 (80% success rate)"
 echo "Success rate: $(echo "scale=1; $correct_count * 10" | bc)%"

 echo ""