llama.cpp/examples/llama-eval/test-simulator.sh

#!/bin/bash

echo "=== llama-server-simulator Test Script ==="
echo ""

PORT=8033
SUCCESS_RATE=0.8

echo "Starting simulator on port $PORT with success rate $SUCCESS_RATE..."
source venv/bin/activate
python3 examples/llama-eval/llama-server-simulator.py --port $PORT --success-rate $SUCCESS_RATE > /tmp/simulator-test.log 2>&1 &
SIMULATOR_PID=$!

echo "Waiting for simulator to start..."
sleep 5

echo ""
echo "=== Test 1: Basic Request with Known Question ==="
echo "Sending request with AIME question..."
curl -s -X POST http://localhost:$PORT/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llama",
    "messages": [
      {"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."}
    ],
    "temperature": 0,
    "max_tokens": 2048
  }' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])"

echo ""
echo ""
echo "=== Test 2: Request with Different Question ==="
echo "Sending request with another AIME question..."
curl -s -X POST http://localhost:$PORT/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llama",
    "messages": [
      {"role": "user", "content": "Compute the value of 2^10 + 3^10."}
    ],
    "temperature": 0,
    "max_tokens": 2048
  }' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Answer:', data['choices'][0]['message']['content'])"

echo ""
echo ""
echo "=== Test 3: Request with No Matching Question ==="
echo "Sending request with non-matching text..."
curl -s -X POST http://localhost:$PORT/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llama",
    "messages": [
      {"role": "user", "content": "What is the capital of France?"}
    ],
    "temperature": 0,
    "max_tokens": 2048
  }' | python3 -c "import sys, json; data = json.load(sys.stdin); print('Response:', data.get('error', 'No error'))"

echo ""
echo ""
echo "=== Test 4: Multiple Requests to Test Success Rate ==="
echo "Sending 10 requests to test success rate..."
correct_count=0
for i in {1..10}; do
  echo "Request $i:"
  response=$(curl -s -X POST http://localhost:$PORT/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
      "model": "llama",
      "messages": [
        {"role": "user", "content": "Quadratic polynomials P(x) and Q(x) have leading coefficients 2 and -2, respectively. The graphs of both polynomials pass through the two points (16,54) and (20,53). Find P(0) + Q(0)."}
      ],
      "temperature": 0,
      "max_tokens": 2048
    }')
  answer=$(echo $response | python3 -c "import sys, json; data = json.load(sys.stdin); print(data['choices'][0]['message']['content'])")
  if [ "$answer" == "116" ]; then
    correct_count=$((correct_count + 1))
  fi
  echo "  Answer: $answer"
done
echo "Correct answers: $correct_count/10"
echo "Success rate: $(echo "scale=1; $correct_count * 10" | bc)%"

echo ""
echo "=== Test Complete ==="
echo "Stopping simulator..."
kill $SIMULATOR_PID 2>/dev/null
wait $SIMULATOR_PID 2>/dev/null || true

echo "Simulator stopped."