|
|
|
""" |
|
Verification script to confirm test_integrated_system.py uses cached embeddings. |
|
""" |
|
|
|
import sys |
|
import os |
|
from pathlib import Path |
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent)) |
|
|
|
def verify_cache_setup(): |
|
"""Verify that cached files are available and test setup is correct.""" |
|
print("π Verifying Cached Test Setup") |
|
print("=" * 50) |
|
|
|
|
|
cache_dir = Path(__file__).parent / 'model_cache' |
|
print(f"π Cache directory: {cache_dir}") |
|
print(f" Exists: {'β
' if cache_dir.exists() else 'β'}") |
|
|
|
if not cache_dir.exists(): |
|
print("β Cache directory not found") |
|
return False |
|
|
|
|
|
required_files = [ |
|
"unified_vocabulary_50000.pkl", |
|
"unified_frequencies_50000.pkl", |
|
"unified_embeddings_all-mpnet-base-v2_50000.npy" |
|
] |
|
|
|
print("\nπ Required Cache Files (50K vocabulary):") |
|
all_present = True |
|
for filename in required_files: |
|
filepath = cache_dir / filename |
|
exists = filepath.exists() |
|
size_mb = filepath.stat().st_size / (1024*1024) if exists else 0 |
|
|
|
status = "β
" if exists else "β" |
|
size_str = f"({size_mb:.1f} MB)" if exists else "(missing)" |
|
print(f" {status} {filename} {size_str}") |
|
|
|
if not exists: |
|
all_present = False |
|
|
|
|
|
print("\nπ§ͺ Test Configuration:") |
|
try: |
|
from test_integrated_system import TestIntegratedCrosswordGenerator |
|
|
|
|
|
test_instance = TestIntegratedCrosswordGenerator() |
|
test_instance.setUpClass() |
|
test_instance.setUp() |
|
|
|
|
|
generator = test_instance.generator |
|
print(f" β
Vocabulary limit: {generator.vocab_size_limit:,} words") |
|
print(f" β
Cache directory: {generator.cache_dir}") |
|
|
|
|
|
expected_cache = str(cache_dir) |
|
actual_cache = generator.cache_dir |
|
cache_match = expected_cache == actual_cache |
|
print(f" {'β
' if cache_match else 'β'} Cache path match: {cache_match}") |
|
|
|
if not cache_match: |
|
print(f" Expected: {expected_cache}") |
|
print(f" Actual: {actual_cache}") |
|
|
|
except Exception as e: |
|
print(f" β Test setup error: {e}") |
|
all_present = False |
|
|
|
|
|
print("\n" + "=" * 50) |
|
if all_present: |
|
print("β
VERIFICATION SUCCESSFUL") |
|
print(" β’ All cached files are present") |
|
print(" β’ Test suite is configured to use 50K cached vocabulary") |
|
print(" β’ Embeddings cache will be loaded instead of recomputed") |
|
print(" β’ Tests should run much faster (~90s vs ~200s+ initialization)") |
|
else: |
|
print("β VERIFICATION FAILED") |
|
print(" β’ Missing cached files or configuration issues") |
|
print(" β’ Tests may run slower or fail") |
|
|
|
return all_present |
|
|
|
|
|
def show_cache_benefits(): |
|
"""Show the benefits of using cached files.""" |
|
print("\nπ‘ Cache Benefits:") |
|
print("-" * 30) |
|
print("π Without Cache:") |
|
print(" β’ Download WordFreq data: ~30s") |
|
print(" β’ Filter 50K vocabulary: ~10s") |
|
print(" β’ Load sentence transformer: ~90s") |
|
print(" β’ Generate embeddings: ~120s") |
|
print(" β’ Total: ~250s") |
|
print() |
|
print("β‘ With Cache:") |
|
print(" β’ Load cached vocabulary: ~1s") |
|
print(" β’ Load cached embeddings: ~2s") |
|
print(" β’ Load sentence transformer: ~90s") |
|
print(" β’ Total: ~93s") |
|
print() |
|
print("π Speed Improvement: ~2.7x faster initialization") |
|
|
|
|
|
def main(): |
|
"""Main verification.""" |
|
success = verify_cache_setup() |
|
show_cache_benefits() |
|
|
|
if success: |
|
print("\nπ Ready to run optimized tests!") |
|
print(" Run: python test_integrated_system.py") |
|
else: |
|
print("\nβ οΈ Cache setup needs attention") |
|
print(" Check that model_cache/ contains the required files") |
|
|
|
return success |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |