File size: 4,297 Bytes
486eff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
"""
Verification script to confirm test_integrated_system.py uses cached embeddings.
"""

import sys
import os
from pathlib import Path

# Add hack directory to path
sys.path.insert(0, str(Path(__file__).parent))

def verify_cache_setup():
    """Verify that cached files are available and test setup is correct."""
    print("πŸ” Verifying Cached Test Setup")
    print("=" * 50)
    
    # Check cache directory
    cache_dir = Path(__file__).parent / 'model_cache'
    print(f"πŸ“‚ Cache directory: {cache_dir}")
    print(f"   Exists: {'βœ…' if cache_dir.exists() else '❌'}")
    
    if not cache_dir.exists():
        print("❌ Cache directory not found")
        return False
    
    # Check required cached files for 50K vocabulary
    required_files = [
        "unified_vocabulary_50000.pkl",
        "unified_frequencies_50000.pkl", 
        "unified_embeddings_all-mpnet-base-v2_50000.npy"
    ]
    
    print("\nπŸ“‹ Required Cache Files (50K vocabulary):")
    all_present = True
    for filename in required_files:
        filepath = cache_dir / filename
        exists = filepath.exists()
        size_mb = filepath.stat().st_size / (1024*1024) if exists else 0
        
        status = "βœ…" if exists else "❌"
        size_str = f"({size_mb:.1f} MB)" if exists else "(missing)"
        print(f"   {status} {filename} {size_str}")
        
        if not exists:
            all_present = False
    
    # Check test file configuration
    print("\nπŸ§ͺ Test Configuration:")
    try:
        from test_integrated_system import TestIntegratedCrosswordGenerator
        
        # Create test instance to check setup
        test_instance = TestIntegratedCrosswordGenerator()
        test_instance.setUpClass()
        test_instance.setUp()
        
        # Check generator configuration
        generator = test_instance.generator
        print(f"   βœ… Vocabulary limit: {generator.vocab_size_limit:,} words")
        print(f"   βœ… Cache directory: {generator.cache_dir}")
        
        # Verify cache directory matches
        expected_cache = str(cache_dir)
        actual_cache = generator.cache_dir
        cache_match = expected_cache == actual_cache
        print(f"   {'βœ…' if cache_match else '❌'} Cache path match: {cache_match}")
        
        if not cache_match:
            print(f"      Expected: {expected_cache}")
            print(f"      Actual:   {actual_cache}")
        
    except Exception as e:
        print(f"   ❌ Test setup error: {e}")
        all_present = False
    
    # Summary
    print("\n" + "=" * 50)
    if all_present:
        print("βœ… VERIFICATION SUCCESSFUL")
        print("   β€’ All cached files are present")
        print("   β€’ Test suite is configured to use 50K cached vocabulary") 
        print("   β€’ Embeddings cache will be loaded instead of recomputed")
        print("   β€’ Tests should run much faster (~90s vs ~200s+ initialization)")
    else:
        print("❌ VERIFICATION FAILED") 
        print("   β€’ Missing cached files or configuration issues")
        print("   β€’ Tests may run slower or fail")
    
    return all_present


def show_cache_benefits():
    """Show the benefits of using cached files."""
    print("\nπŸ’‘ Cache Benefits:")
    print("-" * 30)
    print("πŸš€ Without Cache:")
    print("   β€’ Download WordFreq data: ~30s")
    print("   β€’ Filter 50K vocabulary: ~10s") 
    print("   β€’ Load sentence transformer: ~90s")
    print("   β€’ Generate embeddings: ~120s")
    print("   β€’ Total: ~250s")
    print()
    print("⚑ With Cache:")
    print("   β€’ Load cached vocabulary: ~1s")
    print("   β€’ Load cached embeddings: ~2s")
    print("   β€’ Load sentence transformer: ~90s")
    print("   β€’ Total: ~93s")
    print()
    print("πŸ“Š Speed Improvement: ~2.7x faster initialization")


def main():
    """Main verification."""
    success = verify_cache_setup()
    show_cache_benefits()
    
    if success:
        print("\nπŸŽ‰ Ready to run optimized tests!")
        print("   Run: python test_integrated_system.py")
    else:
        print("\n⚠️  Cache setup needs attention")
        print("   Check that model_cache/ contains the required files")
    
    return success


if __name__ == "__main__":
    main()