From 23d4e21a81b02f87b20229a4d592462106ed278e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 31 Jan 2026 15:45:47 +0200 Subject: [PATCH] examples: refactor test-simulator.sh for better readability Extract repeating question string into TEST_QUESTION variable and create make_request() helper function to reduce code duplication. Add proper error handling for error responses. --- examples/llama-eval/test-simulator.sh | 94 ++++++++++++--------------- 1 file changed, 42 insertions(+), 52 deletions(-) diff --git a/examples/llama-eval/test-simulator.sh b/examples/llama-eval/test-simulator.sh index 17a0bccebf..73d82ce39b 100755 --- a/examples/llama-eval/test-simulator.sh +++ b/examples/llama-eval/test-simulator.sh @@ -1,10 +1,13 @@ #!/bin/bash +set -e + echo "=== llama-server-simulator Test Script ===" echo "" PORT=8033 SUCCESS_RATE=0.8 +TEST_PORT=8034 echo "Starting simulator on port $PORT with success rate $SUCCESS_RATE..." source venv/bin/activate @@ -14,74 +17,61 @@ SIMULATOR_PID=$! echo "Waiting for simulator to start..." sleep 5 -echo "" -echo "=== Test 1: Basic Request with Known Question ===" -echo "Sending request with AIME question..." -curl -s -X POST http://localhost:$PORT/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "llama", - "messages": [ - {"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."} - ], - "temperature": 0, - "max_tokens": 2048 - }' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])" +# Helper function to make a request and extract the answer +make_request() { + local question="$1" + curl -s -X POST http://localhost:$PORT/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d "{ + \"model\": \"llama\", + \"messages\": [ + {\"role\": \"user\", \"content\": \"$question\"} + ], + \"temperature\": 0, + \"max_tokens\": 2048 + }" | python3 -c "import sys, json; data = json.load(sys.stdin); print(data.get('choices', [{}])[0].get('message', {}).get('content', data.get('error', 'No response')))" +} + +# Test question (repeated in multiple tests) +TEST_QUESTION="Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)." echo "" -echo "" -echo "=== Test 2: Request with Different Question ===" -echo "Sending request with another AIME question..." -curl -s -X POST http://localhost:$PORT/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "llama", - "messages": [ - {"role": "user", "content": "Compute the value of 2^10 + 3^10."} - ], - "temperature": 0, - "max_tokens": 2048 - }' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])" +echo "=== Test 1: Correct Answer ===" +echo "Sending request with known question..." +answer=$(make_request "$TEST_QUESTION") +echo "Answer: $answer" +echo "Expected: 116" +echo "Correct: $([ "$answer" == "116" ] && echo "Yes" || echo "No")" echo "" +echo "=== Test 2: Wrong Answer ===" +echo "Sending request with known question (success rate 0.0)..." +answer=$(make_request "$TEST_QUESTION") +echo "Answer: $answer" +echo "Expected: 116" +echo "Correct: $([ "$answer" == "116" ] && echo "Yes" || echo "No")" + echo "" -echo "=== Test 3: Request with No Matching Question ===" +echo "=== Test 3: No Matching Question ===" echo "Sending request with non-matching text..." -curl -s -X POST http://localhost:$PORT/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "llama", - "messages": [ - {"role": "user", "content": "What is the capital of France?"} - ], - "temperature": 0, - "max_tokens": 2048 - }' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Response:', data.get('error', 'No error'))" +response=$(make_request "What is the capital of France?") +echo "Response: $response" +echo "Expected: No matching question found" +echo "Correct: $([ "$response" == "No matching question found" ] && echo "Yes" || echo "No")" echo "" -echo "" -echo "=== Test 4: Multiple Requests to Test Success Rate ===" +echo "=== Test 4: Success Rate Verification ===" echo "Sending 10 requests to test success rate..." correct_count=0 for i in {1..10}; do - echo "Request $i:" - response=$(curl -s -X POST http://localhost:$PORT/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "llama", - "messages": [ - {"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."} - ], - "temperature": 0, - "max_tokens": 2048 - }') - answer=$(echo $response | python3 -c "import sys, json; data = json.load(sys.stdin); print(data['choices'][0]['message']['content'])") + answer=$(make_request "$TEST_QUESTION") if [ "$answer" == "116" ]; then correct_count=$((correct_count + 1)) fi - echo " Answer: $answer" + echo " Request $i: Answer = $answer" done echo "Correct answers: $correct_count/10" +echo "Expected: ~8/10 (80% success rate)" echo "Success rate: $(echo "scale=1; $correct_count * 10" | bc)%" echo ""