examples: use cached dataset path in simulator to avoid HF Hub requests

This commit is contained in:
Georgi Gerganov 2026-01-31 16:39:51 +02:00
parent c2619c18bf
commit 04f6872116
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
1 changed files with 7 additions and 2 deletions

View File

@ -62,9 +62,14 @@ class AimeDataset:
def _load_dataset(self):
print(f"Loading AIME dataset (split: {self.split})...")
print(f"Using cache: {os.environ.get('HF_DATASETS_CACHE', 'default')}")
ds = datasets.load_dataset("AI-MO/aimo-validation-aime", split=self.split)
cache_path = Path.home() / ".cache" / "huggingface" / "datasets" / "AI-MO___aimo-validation-aime" / "default" / "0.0.0"
if cache_path.exists():
print(f"Using cached dataset from {cache_path}")
ds = datasets.load_dataset("AI-MO/aimo-validation-aime", split=self.split, cache_dir=str(cache_path))
else:
ds = datasets.load_dataset("AI-MO/aimo-validation-aime", split=self.split)
self.questions = list(ds)
print(f"AIME dataset loaded: {len(self.questions)} questions")