From 88c0296d0d7db3c07d61678aaddaab1855c61f61 Mon Sep 17 00:00:00 2001 From: Progeny Alpha Date: Sun, 15 Mar 2026 01:43:47 -0400 Subject: [PATCH] vulkan: improve bench script output and error handling --- scripts/bench-gdn-chunked.sh | 100 +++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100755 scripts/bench-gdn-chunked.sh diff --git a/scripts/bench-gdn-chunked.sh b/scripts/bench-gdn-chunked.sh new file mode 100755 index 0000000000..d41f4f4b3b --- /dev/null +++ b/scripts/bench-gdn-chunked.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# Chunked GDN coopmat benchmark +# Usage: ./scripts/bench-gdn-chunked.sh [output_file] + +set -euo pipefail + +MODEL="${1:?Usage: $0 [output_file]}" +OUT="${2:-gdn-chunked-results.md}" +LOG="${OUT%.md}.log" +BENCH="./build/bin/llama-bench" + +if [ ! -f "$BENCH" ]; then + echo "ERROR: llama-bench not found. Build first:" + echo " cmake -B build -DGGML_VULKAN=ON -DCMAKE_BUILD_TYPE=Release" + echo " cmake --build build --target llama-bench -j\$(nproc)" + exit 1 +fi + +if [ ! -f "$MODEL" ]; then + echo "ERROR: Model not found: $MODEL" + exit 1 +fi + +echo "Checking model + GPU..." +PROBE=$($BENCH -m "$MODEL" -ngl 99 -fa 1 -n 0 -p 1 -v 2>&1) || { + echo "ERROR: llama-bench failed to load model. Full output:" + echo "$PROBE" + echo "$PROBE" > "$LOG" + exit 1 +} + +GPU_LINE=$(echo "$PROBE" | grep "ggml_vulkan: 0 =" | head -1 || echo "unknown") +GPU_NAME=$(echo "$GPU_LINE" | sed 's/.*0 = //' || echo "unknown") +BUILD=$(echo "$PROBE" | grep "^build:" || echo "unknown") +COOPMAT="no" +echo "$GPU_LINE" | grep -q "KHR_coopmat" && COOPMAT="yes (KHR_coopmat)" +GDN_MODE="not detected" +echo "$PROBE" | grep -q "chunked) enabled" && GDN_MODE="chunked (coopmat)" +echo "$PROBE" | grep -q "autoregressive) enabled" && [ "$GDN_MODE" = "not detected" ] && GDN_MODE="autoregressive" +echo "$PROBE" | grep -q "chunked) enabled" && echo "$PROBE" | grep -q "autoregressive) enabled" && GDN_MODE="both (auto + chunked)" + +{ + echo "# Chunked GDN Coopmat Benchmark" + echo "" + echo "**GPU:** ${GPU_NAME}" + echo "**Coopmat:** ${COOPMAT}" + echo "**GDN mode:** ${GDN_MODE}" + echo "**Model:** $(basename "$MODEL")" + echo "**Date:** $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "**Build:** $BUILD" + echo "**OS:** $(uname -srm)" + echo "**RAM:** $(free -h | awk '/Mem:/{print $2}') total" + echo "" +} > "$OUT" + +if [ "$GDN_MODE" = "not detected" ]; then + echo "WARNING: GDN not detected for this model. Results may not show GDN profiling data." +fi + +echo "Running throughput benchmark (PP-512/1024/2048 + TG-128)..." +if ! RESULT=$($BENCH -m "$MODEL" -ngl 99 -fa 1 -n 128 -p 512,1024,2048 --output md 2>&1); then + echo "ERROR: Benchmark failed. See $LOG for details." + echo "$RESULT" > "$LOG" + echo "" >> "$OUT" + echo "## ERROR: Benchmark failed" >> "$OUT" + echo '```' >> "$OUT" + echo "$RESULT" | tail -30 >> "$OUT" + echo '```' >> "$OUT" + cat "$OUT" + exit 1 +fi + +{ + echo "## Throughput" + echo "" + echo "$RESULT" | grep -E "^\|" + echo "" +} >> "$OUT" + +echo "Running GDN kernel profiling (PP-512)..." +PROF=$(GGML_VK_PERF_LOGGER=1 GGML_VK_PERF_LOGGER_FREQUENCY=9999 $BENCH -m "$MODEL" -ngl 99 -fa 1 -n 0 -p 512 2>&1 | grep "GATED_DELTA" | head -5) + +if [ -n "$PROF" ]; then + { + echo "## GDN Kernel Timing (PP-512)" + echo "" + echo '```' + echo "$PROF" + echo '```' + echo "" + } >> "$OUT" +else + echo "*No GDN profiling data — model may not use GATED_DELTA_NET.*" >> "$OUT" + echo "" >> "$OUT" +fi + +echo "" +echo "Done. Results saved to: $OUT" +echo "---------------------------------------" +cat "$OUT"