scripts : improve get-wikitext-2.sh (#19952)

* scripts : improve get-wikitext-2.sh

Switch to sh, add curl fallback, and avoid redundant downloads

Signed-off-by: Adrien Gallouët <adrien@gallouet.fr>

* fix indent

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

---------

Signed-off-by: Adrien Gallouët <adrien@gallouet.fr>
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2026-03-02 15:40:49 +01:00 committed by GitHub
parent 2afcdb9777
commit ec88c3ceea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 40 additions and 8 deletions

View File

@ -1,11 +1,43 @@
#!/usr/bin/env bash
#!/bin/sh
# vim: set ts=4 sw=4 et:
wget https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
unzip wikitext-2-raw-v1.zip
ZIP="wikitext-2-raw-v1.zip"
FILE="wikitext-2-raw/wiki.test.raw"
URL="https://huggingface.co/datasets/ggml-org/ci/resolve/main/$ZIP"
echo "Usage:"
echo ""
echo " ./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw [other params]"
echo ""
die() {
printf "%s\n" "$@" >&2
exit 1
}
exit 0
have_cmd() {
for cmd; do
command -v "$cmd" >/dev/null || return
done
}
dl() {
[ -f "$2" ] && return
if have_cmd wget; then
wget "$1" -O "$2"
elif have_cmd curl; then
curl -L "$1" -o "$2"
else
die "Please install wget or curl"
fi
}
have_cmd unzip || die "Please install unzip"
if [ ! -f "$FILE" ]; then
dl "$URL" "$ZIP" || exit
unzip -o "$ZIP" || exit
rm -f -- "$ZIP"
fi
cat <<EOF
Usage:
llama-perplexity -m model.gguf -f $FILE [other params]
EOF