#!/usr/bin/env python3 """ Comprehensive Test Suite for Integrated Crossword Generator Tests the complete integration between thematic word discovery and API clue generation, ensuring the system works correctly and produces high-quality results. This test suite uses pre-cached embeddings and vocabulary files (50K words) from model_cache/ directory for faster test execution, avoiding re-initialization of the sentence transformer model and vocabulary generation. Performance: ~93s initialization with cache vs ~250s without cache (~2.7x faster) To verify cache setup before running tests: python verify_cached_tests.py To run the full test suite: export HF_TOKEN='your_token' && python test_integrated_system.py """ import sys import os import time import unittest from pathlib import Path from unittest.mock import Mock, patch # Add hack directory to path for imports sys.path.insert(0, str(Path(__file__).parent)) try: from integrated_crossword_generator import IntegratedCrosswordGenerator, CrosswordEntry INTEGRATED_AVAILABLE = True except ImportError as e: print(f"โŒ Integration import error: {e}") INTEGRATED_AVAILABLE = False class TestIntegratedCrosswordGenerator(unittest.TestCase): """Test cases for the integrated crossword generator.""" @classmethod def setUpClass(cls): """Set up test environment.""" if not INTEGRATED_AVAILABLE: cls.skipTest(cls, "Integrated generator not available") # Use test token if available cls.test_token = os.getenv('HF_TOKEN') if not cls.test_token: print("โš ๏ธ HF_TOKEN not set - some tests may be skipped") def setUp(self): """Set up each test.""" # Use cached 50K vocabulary and embeddings from model_cache cache_dir = str(Path(__file__).parent / 'model_cache') self.generator = IntegratedCrosswordGenerator( vocab_size_limit=50000, # Use cached 50K vocabulary cache_dir=cache_dir ) def test_initialization(self): """Test generator initialization.""" self.assertFalse(self.generator.is_initialized) # Initialize start_time = time.time() self.generator.initialize() init_time = time.time() - start_time self.assertTrue(self.generator.is_initialized) # Check system info system_info = self.generator.get_system_info() self.assertIn('components', system_info) self.assertIn('stats', system_info) # Verify cached files are being used (should still be reasonable time even with model loading) # Note: Model download/loading takes ~90s, but vocabulary/embeddings load from cache self.assertLess(init_time, 120.0, "Initialization should complete within 2 minutes with cached files") # If thematic generator is ready, verify it used cached data if self.generator.thematic_ready: vocab_size = self.generator.thematic_generator.get_vocabulary_size() self.assertEqual(vocab_size, 50000, "Should use full 50K cached vocabulary") def test_cached_files_usage(self): """Test that cached vocabulary and embeddings are being used.""" cache_dir = Path(self.generator.cache_dir) # Verify expected cache files exist vocab_file = cache_dir / "unified_vocabulary_50000.pkl" freq_file = cache_dir / "unified_frequencies_50000.pkl" embeddings_file = cache_dir / "unified_embeddings_all-mpnet-base-v2_50000.npy" self.assertTrue(vocab_file.exists(), f"Vocabulary cache file should exist: {vocab_file}") self.assertTrue(freq_file.exists(), f"Frequency cache file should exist: {freq_file}") self.assertTrue(embeddings_file.exists(), f"Embeddings cache file should exist: {embeddings_file}") # Initialize and verify vocabulary size self.generator.initialize() if self.generator.thematic_ready: vocab_size = self.generator.thematic_generator.get_vocabulary_size() self.assertEqual(vocab_size, 50000, "Should use cached 50K vocabulary") # Verify embeddings are loaded self.assertIsNotNone(self.generator.thematic_generator.vocab_embeddings) embeddings_shape = self.generator.thematic_generator.vocab_embeddings.shape self.assertEqual(embeddings_shape[0], 50000, "Embeddings should have 50K entries") self.assertEqual(embeddings_shape[1], 768, "Should use all-mpnet-base-v2 embeddings (768 dims)") def test_component_availability(self): """Test availability of required components.""" self.generator.initialize() # At least one component should be available has_thematic = self.generator.thematic_ready has_api = self.generator.api_ready self.assertTrue(has_thematic or has_api, "At least one component should be available") if has_thematic: self.assertIsNotNone(self.generator.thematic_generator) vocab_size = self.generator.thematic_generator.get_vocabulary_size() self.assertGreater(vocab_size, 0) if has_api: self.assertIsNotNone(self.generator.api_clue_generator) def test_word_discovery_only(self): """Test word discovery when only thematic generator is available.""" self.generator.initialize() if not self.generator.thematic_ready: self.skipTest("Thematic generator not available") # Mock API generator as unavailable self.generator.api_ready = False # Test word discovery words = self.generator._discover_words("animals", 5, "medium", 0.3) if words: # Only test if words are found self.assertIsInstance(words, list) for word, similarity, tier in words: self.assertIsInstance(word, str) self.assertIsInstance(similarity, float) self.assertIsInstance(tier, str) self.assertGreater(len(word), 2) self.assertGreaterEqual(similarity, 0.0) def test_api_clue_generation_only(self): """Test API clue generation when only API generator is available.""" if not self.test_token: self.skipTest("HF_TOKEN not available for API testing") self.generator.initialize() if not self.generator.api_ready: self.skipTest("API generator not available") # Mock thematic generator as unavailable self.generator.thematic_ready = False # Test with sample word data mock_words = [("CAT", 0.8, "tier_5_common"), ("DOG", 0.7, "tier_4_highly_common")] entries = self.generator._generate_clues_for_words(mock_words, "animals") self.assertIsInstance(entries, list) for entry in entries: self.assertIsInstance(entry, CrosswordEntry) self.assertIsInstance(entry.word, str) self.assertIsInstance(entry.clue, str) self.assertGreater(len(entry.clue), 5) # Clues should be substantial def test_full_integration(self): """Test complete integration when both components are available.""" self.generator.initialize() if not (self.generator.thematic_ready and self.generator.api_ready): self.skipTest("Full integration requires both components") # Test complete pipeline entries = self.generator.generate_crossword_entries( topic="animals", num_words=3, difficulty="medium" ) self.assertIsInstance(entries, list) self.assertLessEqual(len(entries), 3) # Should not exceed requested count for entry in entries: self.assertIsInstance(entry, CrosswordEntry) self.assertIsInstance(entry.word, str) self.assertIsInstance(entry.clue, str) self.assertEqual(entry.topic, "animals") self.assertGreater(entry.similarity_score, 0.0) self.assertIn("tier_", entry.frequency_tier) def test_difficulty_filtering(self): """Test difficulty-based word filtering.""" self.generator.initialize() if not self.generator.thematic_ready: self.skipTest("Requires thematic generator for difficulty testing") # Test different difficulty levels difficulties = ["easy", "medium", "hard"] for difficulty in difficulties: with self.subTest(difficulty=difficulty): mock_results = [ ("CAT", 0.8, "tier_3_very_common"), # Easy word ("ALGORITHM", 0.7, "tier_8_uncommon"), # Hard word ("COMPUTER", 0.6, "tier_5_common") # Medium word ] filtered = self.generator._filter_by_difficulty(mock_results, difficulty) self.assertIsInstance(filtered, list) # Check that filtering occurred self.assertLessEqual(len(filtered), len(mock_results)) def test_multiple_topics(self): """Test generation for multiple topics.""" self.generator.initialize() if not self.generator.is_initialized: self.skipTest("Generator initialization failed") topics = ["animals", "technology"] results = self.generator.generate_by_multiple_topics( topics=topics, words_per_topic=2, difficulty="medium" ) self.assertIsInstance(results, dict) self.assertEqual(len(results), len(topics)) for topic in topics: self.assertIn(topic, results) self.assertIsInstance(results[topic], list) def test_stats_tracking(self): """Test performance statistics tracking.""" self.generator.initialize() # Initial stats initial_stats = self.generator.get_stats() self.assertIsInstance(initial_stats, dict) self.assertIn('words_discovered', initial_stats) self.assertIn('clues_generated', initial_stats) # Generate some entries to update stats if self.generator.thematic_ready or self.generator.api_ready: try: self.generator.generate_crossword_entries("test", 1, "medium") updated_stats = self.generator.get_stats() # Stats should have changed self.assertGreaterEqual(updated_stats['words_discovered'], initial_stats['words_discovered']) self.assertGreaterEqual(updated_stats['clues_generated'], initial_stats['clues_generated']) except Exception: pass # Stats test is secondary if generation fails def test_fallback_behavior(self): """Test fallback behavior when components fail.""" self.generator.initialize() # Test with unavailable topic that should trigger fallbacks entries = self.generator.generate_crossword_entries( topic="nonexistent_impossible_topic_xyz123", num_words=1, difficulty="medium" ) # Should handle gracefully (empty list or basic entries) self.assertIsInstance(entries, list) def test_crossword_entry_structure(self): """Test CrosswordEntry dataclass structure.""" # Create sample entry entry = CrosswordEntry( word="TEST", clue="Sample clue", topic="testing", similarity_score=0.75, frequency_tier="tier_5_common", tier_description="Common words", clue_quality="GOOD", clue_model="test_model" ) # Verify all fields self.assertEqual(entry.word, "TEST") self.assertEqual(entry.clue, "Sample clue") self.assertEqual(entry.topic, "testing") self.assertEqual(entry.similarity_score, 0.75) self.assertEqual(entry.frequency_tier, "tier_5_common") self.assertEqual(entry.tier_description, "Common words") self.assertEqual(entry.clue_quality, "GOOD") self.assertEqual(entry.clue_model, "test_model") class TestIntegrationScenarios(unittest.TestCase): """Test realistic integration scenarios.""" @classmethod def setUpClass(cls): """Set up test environment.""" if not INTEGRATED_AVAILABLE: cls.skipTest(cls, "Integrated generator not available") cls.test_token = os.getenv('HF_TOKEN') def test_education_crossword_scenario(self): """Test generating educational crossword content.""" # Use cached vocabulary and embeddings cache_dir = str(Path(__file__).parent / 'model_cache') generator = IntegratedCrosswordGenerator( vocab_size_limit=50000, cache_dir=cache_dir ) generator.initialize() if not generator.is_initialized: self.skipTest("Generator initialization failed") # Educational topics topics = ["science", "history", "mathematics"] for topic in topics: with self.subTest(topic=topic): entries = generator.generate_crossword_entries( topic=topic, num_words=3, difficulty="medium" ) # Should produce educational content self.assertIsInstance(entries, list) for entry in entries: self.assertEqual(entry.topic, topic) # Educational words should be substantial self.assertGreaterEqual(len(entry.word), 3) def test_themed_puzzle_scenario(self): """Test generating themed puzzle content.""" # Use cached vocabulary and embeddings cache_dir = str(Path(__file__).parent / 'model_cache') generator = IntegratedCrosswordGenerator( vocab_size_limit=50000, cache_dir=cache_dir ) generator.initialize() if not generator.is_initialized: self.skipTest("Generator initialization failed") # Theme-based generation theme = "ocean life" entries = generator.generate_crossword_entries( topic=theme, num_words=5, difficulty="medium" ) if entries: # All entries should be thematically related for entry in entries: self.assertEqual(entry.topic, theme) self.assertIsInstance(entry.similarity_score, float) self.assertGreater(entry.similarity_score, 0.0) def test_performance_benchmarking(self): """Test performance characteristics.""" # Use cached vocabulary and embeddings for faster testing cache_dir = str(Path(__file__).parent / 'model_cache') generator = IntegratedCrosswordGenerator( vocab_size_limit=50000, cache_dir=cache_dir ) generator.initialize() if not generator.is_initialized: self.skipTest("Generator initialization failed") # Benchmark generation time start_time = time.time() try: entries = generator.generate_crossword_entries( topic="technology", num_words=5, difficulty="medium" ) generation_time = time.time() - start_time # Performance expectations self.assertLess(generation_time, 60.0) # Should complete within 1 minute if entries: avg_time_per_entry = generation_time / len(entries) self.assertLess(avg_time_per_entry, 20.0) # Max ~20s per entry except Exception as e: # Performance test is informational print(f"Performance test encountered: {e}") def run_comprehensive_tests(): """Run all integration tests with detailed reporting.""" print("๐Ÿงช Comprehensive Integration Tests") print("=" * 60) print("๐Ÿ“‚ Using cached 50K vocabulary and embeddings from model_cache/") print("โšก This significantly speeds up testing by avoiding re-computation") # Check environment hf_token = os.getenv('HF_TOKEN') if not hf_token: print("โš ๏ธ HF_TOKEN not set - API tests may be limited") if not INTEGRATED_AVAILABLE: print("โŒ Integrated system not available - cannot run tests") return # Create test suite loader = unittest.TestLoader() suite = unittest.TestSuite() # Add test cases suite.addTests(loader.loadTestsFromTestCase(TestIntegratedCrosswordGenerator)) suite.addTests(loader.loadTestsFromTestCase(TestIntegrationScenarios)) # Run tests with detailed output runner = unittest.TextTestRunner(verbosity=2, stream=sys.stdout) result = runner.run(suite) # Summary print("\n" + "=" * 60) print("๐Ÿ“Š TEST SUMMARY") print("=" * 60) print(f"Tests run: {result.testsRun}") print(f"Failures: {len(result.failures)}") print(f"Errors: {len(result.errors)}") print(f"Skipped: {len(result.skipped)}") if result.failures: print("\nโŒ FAILURES:") for test, trace in result.failures: print(f" - {test}: {trace.splitlines()[-1]}") if result.errors: print("\nโŒ ERRORS:") for test, trace in result.errors: print(f" - {test}: {trace.splitlines()[-1]}") if result.skipped: print("\nโญ๏ธ SKIPPED:") for test, reason in result.skipped: print(f" - {test}: {reason}") success_rate = ((result.testsRun - len(result.failures) - len(result.errors)) / result.testsRun * 100) if result.testsRun > 0 else 0 print(f"\nโœ… Success rate: {success_rate:.1f}%") if result.wasSuccessful(): print("๐ŸŽ‰ All tests passed! Integration system is working correctly.") else: print("โš ๏ธ Some tests failed. Check the system configuration.") return result.wasSuccessful() def main(): """Run the comprehensive test suite.""" success = run_comprehensive_tests() sys.exit(0 if success else 1) if __name__ == "__main__": main()