examples: refactor test-simulator.sh for better readability

Extract repeating question string into TEST_QUESTION variable and
create make_request() helper function to reduce code duplication.
Add proper error handling for error responses.
This commit is contained in:
Georgi Gerganov 2026-01-31 15:45:47 +02:00
parent 07d5e1e0ea
commit 23d4e21a81
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
1 changed files with 42 additions and 52 deletions

View File

@ -1,10 +1,13 @@
#!/bin/bash #!/bin/bash
set -e
echo "=== llama-server-simulator Test Script ===" echo "=== llama-server-simulator Test Script ==="
echo "" echo ""
PORT=8033 PORT=8033
SUCCESS_RATE=0.8 SUCCESS_RATE=0.8
TEST_PORT=8034
echo "Starting simulator on port $PORT with success rate $SUCCESS_RATE..." echo "Starting simulator on port $PORT with success rate $SUCCESS_RATE..."
source venv/bin/activate source venv/bin/activate
@ -14,74 +17,61 @@ SIMULATOR_PID=$!
echo "Waiting for simulator to start..." echo "Waiting for simulator to start..."
sleep 5 sleep 5
echo "" # Helper function to make a request and extract the answer
echo "=== Test 1: Basic Request with Known Question ===" make_request() {
echo "Sending request with AIME question..." local question="$1"
curl -s -X POST http://localhost:$PORT/v1/chat/completions \ curl -s -X POST http://localhost:$PORT/v1/chat/completions \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d "{
"model": "llama", \"model\": \"llama\",
"messages": [ \"messages\": [
{"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."} {\"role\": \"user\", \"content\": \"$question\"}
], ],
"temperature": 0, \"temperature\": 0,
"max_tokens": 2048 \"max_tokens\": 2048
}' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])" }" | python3 -c "import sys, json; data = json.load(sys.stdin); print(data.get('choices', [{}])[0].get('message', {}).get('content', data.get('error', 'No response')))"
}
# Test question (repeated in multiple tests)
TEST_QUESTION="Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."
echo "" echo ""
echo "" echo "=== Test 1: Correct Answer ==="
echo "=== Test 2: Request with Different Question ===" echo "Sending request with known question..."
echo "Sending request with another AIME question..." answer=$(make_request "$TEST_QUESTION")
curl -s -X POST http://localhost:$PORT/v1/chat/completions \ echo "Answer: $answer"
-H "Content-Type: application/json" \ echo "Expected: 116"
-d '{ echo "Correct: $([ "$answer" == "116" ] && echo "Yes" || echo "No")"
"model": "llama",
"messages": [
{"role": "user", "content": "Compute the value of 2^10 + 3^10."}
],
"temperature": 0,
"max_tokens": 2048
}' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])"
echo "" echo ""
echo "=== Test 2: Wrong Answer ==="
echo "Sending request with known question (success rate 0.0)..."
answer=$(make_request "$TEST_QUESTION")
echo "Answer: $answer"
echo "Expected: 116"
echo "Correct: $([ "$answer" == "116" ] && echo "Yes" || echo "No")"
echo "" echo ""
echo "=== Test 3: Request with No Matching Question ===" echo "=== Test 3: No Matching Question ==="
echo "Sending request with non-matching text..." echo "Sending request with non-matching text..."
curl -s -X POST http://localhost:$PORT/v1/chat/completions \ response=$(make_request "What is the capital of France?")
-H "Content-Type: application/json" \ echo "Response: $response"
-d '{ echo "Expected: No matching question found"
"model": "llama", echo "Correct: $([ "$response" == "No matching question found" ] && echo "Yes" || echo "No")"
"messages": [
{"role": "user", "content": "What is the capital of France?"}
],
"temperature": 0,
"max_tokens": 2048
}' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Response:', data.get('error', 'No error'))"
echo "" echo ""
echo "" echo "=== Test 4: Success Rate Verification ==="
echo "=== Test 4: Multiple Requests to Test Success Rate ==="
echo "Sending 10 requests to test success rate..." echo "Sending 10 requests to test success rate..."
correct_count=0 correct_count=0
for i in {1..10}; do for i in {1..10}; do
echo "Request $i:" answer=$(make_request "$TEST_QUESTION")
response=$(curl -s -X POST http://localhost:$PORT/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama",
"messages": [
{"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."}
],
"temperature": 0,
"max_tokens": 2048
}')
answer=$(echo $response | python3 -c "import sys, json; data = json.load(sys.stdin); print(data['choices'][0]['message']['content'])")
if [ "$answer" == "116" ]; then if [ "$answer" == "116" ]; then
correct_count=$((correct_count + 1)) correct_count=$((correct_count + 1))
fi fi
echo " Answer: $answer" echo " Request $i: Answer = $answer"
done done
echo "Correct answers: $correct_count/10" echo "Correct answers: $correct_count/10"
echo "Expected: ~8/10 (80% success rate)"
echo "Success rate: $(echo "scale=1; $correct_count * 10" | bc)%" echo "Success rate: $(echo "scale=1; $correct_count * 10" | bc)%"
echo "" echo ""