examples: refactor test-simulator.sh for better readability
Extract repeating question string into TEST_QUESTION variable and create make_request() helper function to reduce code duplication. Add proper error handling for error responses.
This commit is contained in:
parent
07d5e1e0ea
commit
23d4e21a81
|
|
@ -1,10 +1,13 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
echo "=== llama-server-simulator Test Script ==="
|
echo "=== llama-server-simulator Test Script ==="
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
PORT=8033
|
PORT=8033
|
||||||
SUCCESS_RATE=0.8
|
SUCCESS_RATE=0.8
|
||||||
|
TEST_PORT=8034
|
||||||
|
|
||||||
echo "Starting simulator on port $PORT with success rate $SUCCESS_RATE..."
|
echo "Starting simulator on port $PORT with success rate $SUCCESS_RATE..."
|
||||||
source venv/bin/activate
|
source venv/bin/activate
|
||||||
|
|
@ -14,74 +17,61 @@ SIMULATOR_PID=$!
|
||||||
echo "Waiting for simulator to start..."
|
echo "Waiting for simulator to start..."
|
||||||
sleep 5
|
sleep 5
|
||||||
|
|
||||||
echo ""
|
# Helper function to make a request and extract the answer
|
||||||
echo "=== Test 1: Basic Request with Known Question ==="
|
make_request() {
|
||||||
echo "Sending request with AIME question..."
|
local question="$1"
|
||||||
curl -s -X POST http://localhost:$PORT/v1/chat/completions \
|
curl -s -X POST http://localhost:$PORT/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d "{
|
||||||
"model": "llama",
|
\"model\": \"llama\",
|
||||||
"messages": [
|
\"messages\": [
|
||||||
{"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."}
|
{\"role\": \"user\", \"content\": \"$question\"}
|
||||||
],
|
],
|
||||||
"temperature": 0,
|
\"temperature\": 0,
|
||||||
"max_tokens": 2048
|
\"max_tokens\": 2048
|
||||||
}' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])"
|
}" | python3 -c "import sys, json; data = json.load(sys.stdin); print(data.get('choices', [{}])[0].get('message', {}).get('content', data.get('error', 'No response')))"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test question (repeated in multiple tests)
|
||||||
|
TEST_QUESTION="Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo ""
|
echo "=== Test 1: Correct Answer ==="
|
||||||
echo "=== Test 2: Request with Different Question ==="
|
echo "Sending request with known question..."
|
||||||
echo "Sending request with another AIME question..."
|
answer=$(make_request "$TEST_QUESTION")
|
||||||
curl -s -X POST http://localhost:$PORT/v1/chat/completions \
|
echo "Answer: $answer"
|
||||||
-H "Content-Type: application/json" \
|
echo "Expected: 116"
|
||||||
-d '{
|
echo "Correct: $([ "$answer" == "116" ] && echo "Yes" || echo "No")"
|
||||||
"model": "llama",
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "Compute the value of 2^10 + 3^10."}
|
|
||||||
],
|
|
||||||
"temperature": 0,
|
|
||||||
"max_tokens": 2048
|
|
||||||
}' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])"
|
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
|
echo "=== Test 2: Wrong Answer ==="
|
||||||
|
echo "Sending request with known question (success rate 0.0)..."
|
||||||
|
answer=$(make_request "$TEST_QUESTION")
|
||||||
|
echo "Answer: $answer"
|
||||||
|
echo "Expected: 116"
|
||||||
|
echo "Correct: $([ "$answer" == "116" ] && echo "Yes" || echo "No")"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== Test 3: Request with No Matching Question ==="
|
echo "=== Test 3: No Matching Question ==="
|
||||||
echo "Sending request with non-matching text..."
|
echo "Sending request with non-matching text..."
|
||||||
curl -s -X POST http://localhost:$PORT/v1/chat/completions \
|
response=$(make_request "What is the capital of France?")
|
||||||
-H "Content-Type: application/json" \
|
echo "Response: $response"
|
||||||
-d '{
|
echo "Expected: No matching question found"
|
||||||
"model": "llama",
|
echo "Correct: $([ "$response" == "No matching question found" ] && echo "Yes" || echo "No")"
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "What is the capital of France?"}
|
|
||||||
],
|
|
||||||
"temperature": 0,
|
|
||||||
"max_tokens": 2048
|
|
||||||
}' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Response:', data.get('error', 'No error'))"
|
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo ""
|
echo "=== Test 4: Success Rate Verification ==="
|
||||||
echo "=== Test 4: Multiple Requests to Test Success Rate ==="
|
|
||||||
echo "Sending 10 requests to test success rate..."
|
echo "Sending 10 requests to test success rate..."
|
||||||
correct_count=0
|
correct_count=0
|
||||||
for i in {1..10}; do
|
for i in {1..10}; do
|
||||||
echo "Request $i:"
|
answer=$(make_request "$TEST_QUESTION")
|
||||||
response=$(curl -s -X POST http://localhost:$PORT/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "llama",
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."}
|
|
||||||
],
|
|
||||||
"temperature": 0,
|
|
||||||
"max_tokens": 2048
|
|
||||||
}')
|
|
||||||
answer=$(echo $response | python3 -c "import sys, json; data = json.load(sys.stdin); print(data['choices'][0]['message']['content'])")
|
|
||||||
if [ "$answer" == "116" ]; then
|
if [ "$answer" == "116" ]; then
|
||||||
correct_count=$((correct_count + 1))
|
correct_count=$((correct_count + 1))
|
||||||
fi
|
fi
|
||||||
echo " Answer: $answer"
|
echo " Request $i: Answer = $answer"
|
||||||
done
|
done
|
||||||
echo "Correct answers: $correct_count/10"
|
echo "Correct answers: $correct_count/10"
|
||||||
|
echo "Expected: ~8/10 (80% success rate)"
|
||||||
echo "Success rate: $(echo "scale=1; $correct_count * 10" | bc)%"
|
echo "Success rate: $(echo "scale=1; $correct_count * 10" | bc)%"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue