llama.cpp/tests/6500xt_benchmark.ps1

$ErrorActionPreference = "Stop"

# Configuration
$BuildDir = "build"
$ModelPath = "models/7B/ggml-model-f16.gguf" # Adjust as needed
$Prompt = "The quick brown fox jumps over the lazy dog"
$NumRuns = 3
$CsvFile = "benchmark_results.csv"

# Ensure build directory exists
if (!(Test-Path $BuildDir)) {
    New-Item -ItemType Directory -Path $BuildDir | Out-Null
}

# Build
Write-Host "Building project..."
Push-Location $BuildDir
cmake .. -DGGML_VULKAN=ON -DCMAKE_BUILD_TYPE=Release
cmake --build . --config Release -j 8
Pop-Location

# Tools paths
$LlamaCli = "$BuildDir/bin/Release/llama-cli.exe"
if (!(Test-Path $LlamaCli)) { $LlamaCli = "$BuildDir/bin/llama-cli.exe" }
if (!(Test-Path $LlamaCli)) { $LlamaCli = "$BuildDir/Release/llama-cli.exe" }

$VkInfoTool = "$BuildDir/bin/Release/llama-vk-device-info.exe"
if (!(Test-Path $VkInfoTool)) { $VkInfoTool = "$BuildDir/bin/llama-vk-device-info.exe" }
if (!(Test-Path $VkInfoTool)) { $VkInfoTool = "$BuildDir/Release/llama-vk-device-info.exe" }

# System Info
Write-Host "Collecting System Info..."
vulkaninfo | Out-File "vulkaninfo.txt"
& $VkInfoTool | Out-File "vk_device_info.txt"
Get-Content "vk_device_info.txt"

# Initialize CSV
"RunType,Layers,LoadTime_ms,EvalTime_ms,TokensPerSec,PeakMem_MB" | Out-File $CsvFile -Encoding ascii

function Invoke-Benchmark {
    param (
        [string]$Type,
        [int]$Layers
    )

    $TotalLoadTime = 0
    $TotalEvalTime = 0
    $TotalTokensPerSec = 0

    Write-Host "Running benchmark: $Type (Layers: $Layers)"

    for ($i = 1; $i -le $NumRuns; $i++) {
        $LlamaArgs = @("-m", $ModelPath, "-p", $Prompt, "-n", "128", "--no-mmap")
        if ($Type -eq "CPU") {
            $LlamaArgs += "-ngld" # No GPU layers
        }
        elseif ($Type -eq "Vulkan") {
            $LlamaArgs += "-ngl", "$Layers"
        }

        # Capture output
        $Output = & $LlamaCli $LlamaArgs 2>&1

        # Parse metrics
        $LoadTime = 0
        $EvalTime = 0
        $Tps = 0

        foreach ($Line in $Output) {
            if ($Line -match "load time = \s+(\d+\.\d+) ms") { $LoadTime = [double]$matches[1] }
            if ($Line -match "eval time = \s+(\d+\.\d+) ms") { $EvalTime = [double]$matches[1] }
            if ($Line -match "(\d+\.\d+) tokens per second") { $Tps = [double]$matches[1] }
        }

        $TotalLoadTime += $LoadTime
        $TotalEvalTime += $EvalTime
        $TotalTokensPerSec += $Tps

        Write-Host "  Run $i : Load=$LoadTime ms, Eval=$EvalTime ms, TPS=$Tps"
    }

    $AvgLoad = $TotalLoadTime / $NumRuns
    $AvgEval = $TotalEvalTime / $NumRuns
    $AvgTps = $TotalTokensPerSec / $NumRuns

    "$Type,$Layers,$AvgLoad,$AvgEval,$AvgTps,0" | Out-File $CsvFile -Append -Encoding ascii
}

# Run Benchmarks
Invoke-Benchmark -Type "CPU" -Layers 0

# Test various GPU layers
# Note: If heuristic works, -ngl -1 (default) should pick 1 layer for 6500 XT
# We explicitly test 1, 2, 3, 4 to show performance degradation
Invoke-Benchmark -Type "Vulkan" -Layers 1
Invoke-Benchmark -Type "Vulkan" -Layers 2
Invoke-Benchmark -Type "Vulkan" -Layers 3
Invoke-Benchmark -Type "Vulkan" -Layers 4

Write-Host "Benchmark complete. Results saved to $CsvFile"