116 lines
3.4 KiB
Python
116 lines
3.4 KiB
Python
"""
|
|
Run All NLLB Verification Tests
|
|
Executes the complete test suite to verify functional equivalence with HuggingFace
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
def run_test(test_file, test_name):
|
|
"""Run a single test and return success status"""
|
|
print()
|
|
print("=" * 80)
|
|
print(f"Running: {test_name}")
|
|
print("=" * 80)
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
[sys.executable, test_file],
|
|
cwd=Path(__file__).parent,
|
|
capture_output=False,
|
|
text=True
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
print()
|
|
print(f"✅ {test_name} PASSED")
|
|
return True
|
|
else:
|
|
print()
|
|
print(f"❌ {test_name} FAILED (exit code: {result.returncode})")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ {test_name} ERROR: {e}")
|
|
return False
|
|
|
|
def main():
|
|
"""Run all tests in sequence"""
|
|
print()
|
|
print("╔" + "=" * 78 + "╗")
|
|
print("║" + " " * 78 + "║")
|
|
print("║" + " NLLB Functional Equivalence Test Suite".center(78) + "║")
|
|
print("║" + " Verifying llama.cpp vs HuggingFace".center(78) + "║")
|
|
print("║" + " " * 78 + "║")
|
|
print("╚" + "=" * 78 + "╝")
|
|
print()
|
|
|
|
# Check if reference data exists
|
|
results_dir = Path(__file__).parent / "results"
|
|
if not (results_dir / "tokenizer_reference.json").exists():
|
|
print("❌ ERROR: Reference data not found!")
|
|
print()
|
|
print("Please run first:")
|
|
print(" python generate_reference.py")
|
|
print()
|
|
return 1
|
|
|
|
# Test suite
|
|
tests = [
|
|
("test_1_tokenizer.py", "Test 1: Tokenizer Verification"),
|
|
("test_2_encoder.py", "Test 2: Encoder Verification"),
|
|
("test_3_decoder.py", "Test 3: Decoder Verification"),
|
|
("test_4_connection.py", "Test 4: Encoder-Decoder Connection"),
|
|
("test_5_translation.py", "Test 5: End-to-End Translation"),
|
|
]
|
|
|
|
results = []
|
|
for test_file, test_name in tests:
|
|
test_path = Path(__file__).parent / test_file
|
|
success = run_test(test_path, test_name)
|
|
results.append((test_name, success))
|
|
|
|
# Summary
|
|
print()
|
|
print("=" * 80)
|
|
print("TEST SUITE SUMMARY")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
passed = sum(1 for _, success in results if success)
|
|
total = len(results)
|
|
|
|
for test_name, success in results:
|
|
status = "✅ PASSED" if success else "❌ FAILED"
|
|
print(f" {status} {test_name}")
|
|
|
|
print()
|
|
print("-" * 80)
|
|
print(f" Results: {passed}/{total} tests passed")
|
|
print("-" * 80)
|
|
print()
|
|
|
|
if passed == total:
|
|
print("╔" + "=" * 78 + "╗")
|
|
print("║" + " " * 78 + "║")
|
|
print("║" + "🎉 ALL TESTS PASSED - FUNCTIONAL EQUIVALENCE VERIFIED! 🎉".center(78) + "║")
|
|
print("║" + " " * 78 + "║")
|
|
print("║" + "llama.cpp NLLB implementation is functionally equivalent".center(78) + "║")
|
|
print("║" + "to HuggingFace reference implementation.".center(78) + "║")
|
|
print("║" + " " * 78 + "║")
|
|
print("╚" + "=" * 78 + "╝")
|
|
print()
|
|
return 0
|
|
else:
|
|
print("❌ SOME TESTS FAILED")
|
|
print()
|
|
print("Please review the failed tests above.")
|
|
print()
|
|
return 1
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|
|
|
|
|