#!/usr/bin/env python3 """ Verification script to confirm test_integrated_system.py uses cached embeddings. """ import sys import os from pathlib import Path # Add hack directory to path sys.path.insert(0, str(Path(__file__).parent)) def verify_cache_setup(): """Verify that cached files are available and test setup is correct.""" print("๐Ÿ” Verifying Cached Test Setup") print("=" * 50) # Check cache directory cache_dir = Path(__file__).parent / 'model_cache' print(f"๐Ÿ“‚ Cache directory: {cache_dir}") print(f" Exists: {'โœ…' if cache_dir.exists() else 'โŒ'}") if not cache_dir.exists(): print("โŒ Cache directory not found") return False # Check required cached files for 50K vocabulary required_files = [ "unified_vocabulary_50000.pkl", "unified_frequencies_50000.pkl", "unified_embeddings_all-mpnet-base-v2_50000.npy" ] print("\n๐Ÿ“‹ Required Cache Files (50K vocabulary):") all_present = True for filename in required_files: filepath = cache_dir / filename exists = filepath.exists() size_mb = filepath.stat().st_size / (1024*1024) if exists else 0 status = "โœ…" if exists else "โŒ" size_str = f"({size_mb:.1f} MB)" if exists else "(missing)" print(f" {status} {filename} {size_str}") if not exists: all_present = False # Check test file configuration print("\n๐Ÿงช Test Configuration:") try: from test_integrated_system import TestIntegratedCrosswordGenerator # Create test instance to check setup test_instance = TestIntegratedCrosswordGenerator() test_instance.setUpClass() test_instance.setUp() # Check generator configuration generator = test_instance.generator print(f" โœ… Vocabulary limit: {generator.vocab_size_limit:,} words") print(f" โœ… Cache directory: {generator.cache_dir}") # Verify cache directory matches expected_cache = str(cache_dir) actual_cache = generator.cache_dir cache_match = expected_cache == actual_cache print(f" {'โœ…' if cache_match else 'โŒ'} Cache path match: {cache_match}") if not cache_match: print(f" Expected: {expected_cache}") print(f" Actual: {actual_cache}") except Exception as e: print(f" โŒ Test setup error: {e}") all_present = False # Summary print("\n" + "=" * 50) if all_present: print("โœ… VERIFICATION SUCCESSFUL") print(" โ€ข All cached files are present") print(" โ€ข Test suite is configured to use 50K cached vocabulary") print(" โ€ข Embeddings cache will be loaded instead of recomputed") print(" โ€ข Tests should run much faster (~90s vs ~200s+ initialization)") else: print("โŒ VERIFICATION FAILED") print(" โ€ข Missing cached files or configuration issues") print(" โ€ข Tests may run slower or fail") return all_present def show_cache_benefits(): """Show the benefits of using cached files.""" print("\n๐Ÿ’ก Cache Benefits:") print("-" * 30) print("๐Ÿš€ Without Cache:") print(" โ€ข Download WordFreq data: ~30s") print(" โ€ข Filter 50K vocabulary: ~10s") print(" โ€ข Load sentence transformer: ~90s") print(" โ€ข Generate embeddings: ~120s") print(" โ€ข Total: ~250s") print() print("โšก With Cache:") print(" โ€ข Load cached vocabulary: ~1s") print(" โ€ข Load cached embeddings: ~2s") print(" โ€ข Load sentence transformer: ~90s") print(" โ€ข Total: ~93s") print() print("๐Ÿ“Š Speed Improvement: ~2.7x faster initialization") def main(): """Main verification.""" success = verify_cache_setup() show_cache_benefits() if success: print("\n๐ŸŽ‰ Ready to run optimized tests!") print(" Run: python test_integrated_system.py") else: print("\nโš ๏ธ Cache setup needs attention") print(" Check that model_cache/ contains the required files") return success if __name__ == "__main__": main()