abc123 / hack /verify_cached_tests.py
vimalk78's picture
feat(crossword): generated crosswords with clues
486eff6
raw
history blame
4.3 kB
#!/usr/bin/env python3
"""
Verification script to confirm test_integrated_system.py uses cached embeddings.
"""
import sys
import os
from pathlib import Path
# Add hack directory to path
sys.path.insert(0, str(Path(__file__).parent))
def verify_cache_setup():
"""Verify that cached files are available and test setup is correct."""
print("πŸ” Verifying Cached Test Setup")
print("=" * 50)
# Check cache directory
cache_dir = Path(__file__).parent / 'model_cache'
print(f"πŸ“‚ Cache directory: {cache_dir}")
print(f" Exists: {'βœ…' if cache_dir.exists() else '❌'}")
if not cache_dir.exists():
print("❌ Cache directory not found")
return False
# Check required cached files for 50K vocabulary
required_files = [
"unified_vocabulary_50000.pkl",
"unified_frequencies_50000.pkl",
"unified_embeddings_all-mpnet-base-v2_50000.npy"
]
print("\nπŸ“‹ Required Cache Files (50K vocabulary):")
all_present = True
for filename in required_files:
filepath = cache_dir / filename
exists = filepath.exists()
size_mb = filepath.stat().st_size / (1024*1024) if exists else 0
status = "βœ…" if exists else "❌"
size_str = f"({size_mb:.1f} MB)" if exists else "(missing)"
print(f" {status} {filename} {size_str}")
if not exists:
all_present = False
# Check test file configuration
print("\nπŸ§ͺ Test Configuration:")
try:
from test_integrated_system import TestIntegratedCrosswordGenerator
# Create test instance to check setup
test_instance = TestIntegratedCrosswordGenerator()
test_instance.setUpClass()
test_instance.setUp()
# Check generator configuration
generator = test_instance.generator
print(f" βœ… Vocabulary limit: {generator.vocab_size_limit:,} words")
print(f" βœ… Cache directory: {generator.cache_dir}")
# Verify cache directory matches
expected_cache = str(cache_dir)
actual_cache = generator.cache_dir
cache_match = expected_cache == actual_cache
print(f" {'βœ…' if cache_match else '❌'} Cache path match: {cache_match}")
if not cache_match:
print(f" Expected: {expected_cache}")
print(f" Actual: {actual_cache}")
except Exception as e:
print(f" ❌ Test setup error: {e}")
all_present = False
# Summary
print("\n" + "=" * 50)
if all_present:
print("βœ… VERIFICATION SUCCESSFUL")
print(" β€’ All cached files are present")
print(" β€’ Test suite is configured to use 50K cached vocabulary")
print(" β€’ Embeddings cache will be loaded instead of recomputed")
print(" β€’ Tests should run much faster (~90s vs ~200s+ initialization)")
else:
print("❌ VERIFICATION FAILED")
print(" β€’ Missing cached files or configuration issues")
print(" β€’ Tests may run slower or fail")
return all_present
def show_cache_benefits():
"""Show the benefits of using cached files."""
print("\nπŸ’‘ Cache Benefits:")
print("-" * 30)
print("πŸš€ Without Cache:")
print(" β€’ Download WordFreq data: ~30s")
print(" β€’ Filter 50K vocabulary: ~10s")
print(" β€’ Load sentence transformer: ~90s")
print(" β€’ Generate embeddings: ~120s")
print(" β€’ Total: ~250s")
print()
print("⚑ With Cache:")
print(" β€’ Load cached vocabulary: ~1s")
print(" β€’ Load cached embeddings: ~2s")
print(" β€’ Load sentence transformer: ~90s")
print(" β€’ Total: ~93s")
print()
print("πŸ“Š Speed Improvement: ~2.7x faster initialization")
def main():
"""Main verification."""
success = verify_cache_setup()
show_cache_benefits()
if success:
print("\nπŸŽ‰ Ready to run optimized tests!")
print(" Run: python test_integrated_system.py")
else:
print("\n⚠️ Cache setup needs attention")
print(" Check that model_cache/ contains the required files")
return success
if __name__ == "__main__":
main()