File size: 18,816 Bytes
486eff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
#!/usr/bin/env python3
"""
Comprehensive Test Suite for Integrated Crossword Generator

Tests the complete integration between thematic word discovery and API clue generation,
ensuring the system works correctly and produces high-quality results.

This test suite uses pre-cached embeddings and vocabulary files (50K words) from 
model_cache/ directory for faster test execution, avoiding re-initialization of
the sentence transformer model and vocabulary generation.

Performance: ~93s initialization with cache vs ~250s without cache (~2.7x faster)

To verify cache setup before running tests:
    python verify_cached_tests.py

To run the full test suite:
    export HF_TOKEN='your_token' && python test_integrated_system.py
"""

import sys
import os
import time
import unittest
from pathlib import Path
from unittest.mock import Mock, patch

# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

try:
    from integrated_crossword_generator import IntegratedCrosswordGenerator, CrosswordEntry
    INTEGRATED_AVAILABLE = True
except ImportError as e:
    print(f"❌ Integration import error: {e}")
    INTEGRATED_AVAILABLE = False


class TestIntegratedCrosswordGenerator(unittest.TestCase):
    """Test cases for the integrated crossword generator."""
    
    @classmethod
    def setUpClass(cls):
        """Set up test environment."""
        if not INTEGRATED_AVAILABLE:
            cls.skipTest(cls, "Integrated generator not available")
        
        # Use test token if available
        cls.test_token = os.getenv('HF_TOKEN')
        if not cls.test_token:
            print("⚠️ HF_TOKEN not set - some tests may be skipped")
    
    def setUp(self):
        """Set up each test."""
        # Use cached 50K vocabulary and embeddings from model_cache
        cache_dir = str(Path(__file__).parent / 'model_cache')
        self.generator = IntegratedCrosswordGenerator(
            vocab_size_limit=50000,  # Use cached 50K vocabulary
            cache_dir=cache_dir
        )
    
    def test_initialization(self):
        """Test generator initialization."""
        self.assertFalse(self.generator.is_initialized)
        
        # Initialize
        start_time = time.time()
        self.generator.initialize()
        init_time = time.time() - start_time
        
        self.assertTrue(self.generator.is_initialized)
        
        # Check system info
        system_info = self.generator.get_system_info()
        self.assertIn('components', system_info)
        self.assertIn('stats', system_info)
        
        # Verify cached files are being used (should still be reasonable time even with model loading)
        # Note: Model download/loading takes ~90s, but vocabulary/embeddings load from cache
        self.assertLess(init_time, 120.0, "Initialization should complete within 2 minutes with cached files")
        
        # If thematic generator is ready, verify it used cached data
        if self.generator.thematic_ready:
            vocab_size = self.generator.thematic_generator.get_vocabulary_size()
            self.assertEqual(vocab_size, 50000, "Should use full 50K cached vocabulary")
    
    def test_cached_files_usage(self):
        """Test that cached vocabulary and embeddings are being used."""
        cache_dir = Path(self.generator.cache_dir)
        
        # Verify expected cache files exist
        vocab_file = cache_dir / "unified_vocabulary_50000.pkl"
        freq_file = cache_dir / "unified_frequencies_50000.pkl"
        embeddings_file = cache_dir / "unified_embeddings_all-mpnet-base-v2_50000.npy"
        
        self.assertTrue(vocab_file.exists(), f"Vocabulary cache file should exist: {vocab_file}")
        self.assertTrue(freq_file.exists(), f"Frequency cache file should exist: {freq_file}")
        self.assertTrue(embeddings_file.exists(), f"Embeddings cache file should exist: {embeddings_file}")
        
        # Initialize and verify vocabulary size
        self.generator.initialize()
        
        if self.generator.thematic_ready:
            vocab_size = self.generator.thematic_generator.get_vocabulary_size()
            self.assertEqual(vocab_size, 50000, "Should use cached 50K vocabulary")
            
            # Verify embeddings are loaded
            self.assertIsNotNone(self.generator.thematic_generator.vocab_embeddings)
            embeddings_shape = self.generator.thematic_generator.vocab_embeddings.shape
            self.assertEqual(embeddings_shape[0], 50000, "Embeddings should have 50K entries")
            self.assertEqual(embeddings_shape[1], 768, "Should use all-mpnet-base-v2 embeddings (768 dims)")
    
    def test_component_availability(self):
        """Test availability of required components."""
        self.generator.initialize()
        
        # At least one component should be available
        has_thematic = self.generator.thematic_ready
        has_api = self.generator.api_ready
        
        self.assertTrue(has_thematic or has_api, "At least one component should be available")
        
        if has_thematic:
            self.assertIsNotNone(self.generator.thematic_generator)
            vocab_size = self.generator.thematic_generator.get_vocabulary_size()
            self.assertGreater(vocab_size, 0)
        
        if has_api:
            self.assertIsNotNone(self.generator.api_clue_generator)
    
    def test_word_discovery_only(self):
        """Test word discovery when only thematic generator is available."""
        self.generator.initialize()
        
        if not self.generator.thematic_ready:
            self.skipTest("Thematic generator not available")
        
        # Mock API generator as unavailable
        self.generator.api_ready = False
        
        # Test word discovery
        words = self.generator._discover_words("animals", 5, "medium", 0.3)
        
        if words:  # Only test if words are found
            self.assertIsInstance(words, list)
            for word, similarity, tier in words:
                self.assertIsInstance(word, str)
                self.assertIsInstance(similarity, float)
                self.assertIsInstance(tier, str)
                self.assertGreater(len(word), 2)
                self.assertGreaterEqual(similarity, 0.0)
    
    def test_api_clue_generation_only(self):
        """Test API clue generation when only API generator is available."""
        if not self.test_token:
            self.skipTest("HF_TOKEN not available for API testing")
        
        self.generator.initialize()
        
        if not self.generator.api_ready:
            self.skipTest("API generator not available")
        
        # Mock thematic generator as unavailable
        self.generator.thematic_ready = False
        
        # Test with sample word data
        mock_words = [("CAT", 0.8, "tier_5_common"), ("DOG", 0.7, "tier_4_highly_common")]
        
        entries = self.generator._generate_clues_for_words(mock_words, "animals")
        
        self.assertIsInstance(entries, list)
        for entry in entries:
            self.assertIsInstance(entry, CrosswordEntry)
            self.assertIsInstance(entry.word, str)
            self.assertIsInstance(entry.clue, str)
            self.assertGreater(len(entry.clue), 5)  # Clues should be substantial
    
    def test_full_integration(self):
        """Test complete integration when both components are available."""
        self.generator.initialize()
        
        if not (self.generator.thematic_ready and self.generator.api_ready):
            self.skipTest("Full integration requires both components")
        
        # Test complete pipeline
        entries = self.generator.generate_crossword_entries(
            topic="animals",
            num_words=3,
            difficulty="medium"
        )
        
        self.assertIsInstance(entries, list)
        self.assertLessEqual(len(entries), 3)  # Should not exceed requested count
        
        for entry in entries:
            self.assertIsInstance(entry, CrosswordEntry)
            self.assertIsInstance(entry.word, str)
            self.assertIsInstance(entry.clue, str)
            self.assertEqual(entry.topic, "animals")
            self.assertGreater(entry.similarity_score, 0.0)
            self.assertIn("tier_", entry.frequency_tier)
    
    def test_difficulty_filtering(self):
        """Test difficulty-based word filtering."""
        self.generator.initialize()
        
        if not self.generator.thematic_ready:
            self.skipTest("Requires thematic generator for difficulty testing")
        
        # Test different difficulty levels
        difficulties = ["easy", "medium", "hard"]
        
        for difficulty in difficulties:
            with self.subTest(difficulty=difficulty):
                mock_results = [
                    ("CAT", 0.8, "tier_3_very_common"),  # Easy word
                    ("ALGORITHM", 0.7, "tier_8_uncommon"),  # Hard word
                    ("COMPUTER", 0.6, "tier_5_common")   # Medium word
                ]
                
                filtered = self.generator._filter_by_difficulty(mock_results, difficulty)
                self.assertIsInstance(filtered, list)
                
                # Check that filtering occurred
                self.assertLessEqual(len(filtered), len(mock_results))
    
    def test_multiple_topics(self):
        """Test generation for multiple topics."""
        self.generator.initialize()
        
        if not self.generator.is_initialized:
            self.skipTest("Generator initialization failed")
        
        topics = ["animals", "technology"]
        results = self.generator.generate_by_multiple_topics(
            topics=topics,
            words_per_topic=2,
            difficulty="medium"
        )
        
        self.assertIsInstance(results, dict)
        self.assertEqual(len(results), len(topics))
        
        for topic in topics:
            self.assertIn(topic, results)
            self.assertIsInstance(results[topic], list)
    
    def test_stats_tracking(self):
        """Test performance statistics tracking."""
        self.generator.initialize()
        
        # Initial stats
        initial_stats = self.generator.get_stats()
        self.assertIsInstance(initial_stats, dict)
        self.assertIn('words_discovered', initial_stats)
        self.assertIn('clues_generated', initial_stats)
        
        # Generate some entries to update stats
        if self.generator.thematic_ready or self.generator.api_ready:
            try:
                self.generator.generate_crossword_entries("test", 1, "medium")
                updated_stats = self.generator.get_stats()
                
                # Stats should have changed
                self.assertGreaterEqual(updated_stats['words_discovered'], initial_stats['words_discovered'])
                self.assertGreaterEqual(updated_stats['clues_generated'], initial_stats['clues_generated'])
            except Exception:
                pass  # Stats test is secondary if generation fails
    
    def test_fallback_behavior(self):
        """Test fallback behavior when components fail."""
        self.generator.initialize()
        
        # Test with unavailable topic that should trigger fallbacks
        entries = self.generator.generate_crossword_entries(
            topic="nonexistent_impossible_topic_xyz123",
            num_words=1,
            difficulty="medium"
        )
        
        # Should handle gracefully (empty list or basic entries)
        self.assertIsInstance(entries, list)
    
    def test_crossword_entry_structure(self):
        """Test CrosswordEntry dataclass structure."""
        # Create sample entry
        entry = CrosswordEntry(
            word="TEST",
            clue="Sample clue",
            topic="testing",
            similarity_score=0.75,
            frequency_tier="tier_5_common",
            tier_description="Common words",
            clue_quality="GOOD",
            clue_model="test_model"
        )
        
        # Verify all fields
        self.assertEqual(entry.word, "TEST")
        self.assertEqual(entry.clue, "Sample clue")
        self.assertEqual(entry.topic, "testing")
        self.assertEqual(entry.similarity_score, 0.75)
        self.assertEqual(entry.frequency_tier, "tier_5_common")
        self.assertEqual(entry.tier_description, "Common words")
        self.assertEqual(entry.clue_quality, "GOOD")
        self.assertEqual(entry.clue_model, "test_model")


class TestIntegrationScenarios(unittest.TestCase):
    """Test realistic integration scenarios."""
    
    @classmethod
    def setUpClass(cls):
        """Set up test environment."""
        if not INTEGRATED_AVAILABLE:
            cls.skipTest(cls, "Integrated generator not available")
        
        cls.test_token = os.getenv('HF_TOKEN')
    
    def test_education_crossword_scenario(self):
        """Test generating educational crossword content."""
        # Use cached vocabulary and embeddings
        cache_dir = str(Path(__file__).parent / 'model_cache')
        generator = IntegratedCrosswordGenerator(
            vocab_size_limit=50000,
            cache_dir=cache_dir
        )
        generator.initialize()
        
        if not generator.is_initialized:
            self.skipTest("Generator initialization failed")
        
        # Educational topics
        topics = ["science", "history", "mathematics"]
        
        for topic in topics:
            with self.subTest(topic=topic):
                entries = generator.generate_crossword_entries(
                    topic=topic,
                    num_words=3,
                    difficulty="medium"
                )
                
                # Should produce educational content
                self.assertIsInstance(entries, list)
                for entry in entries:
                    self.assertEqual(entry.topic, topic)
                    # Educational words should be substantial
                    self.assertGreaterEqual(len(entry.word), 3)
    
    def test_themed_puzzle_scenario(self):
        """Test generating themed puzzle content."""
        # Use cached vocabulary and embeddings
        cache_dir = str(Path(__file__).parent / 'model_cache')
        generator = IntegratedCrosswordGenerator(
            vocab_size_limit=50000,
            cache_dir=cache_dir
        )
        generator.initialize()
        
        if not generator.is_initialized:
            self.skipTest("Generator initialization failed")
        
        # Theme-based generation
        theme = "ocean life"
        entries = generator.generate_crossword_entries(
            topic=theme,
            num_words=5,
            difficulty="medium"
        )
        
        if entries:
            # All entries should be thematically related
            for entry in entries:
                self.assertEqual(entry.topic, theme)
                self.assertIsInstance(entry.similarity_score, float)
                self.assertGreater(entry.similarity_score, 0.0)
    
    def test_performance_benchmarking(self):
        """Test performance characteristics."""
        # Use cached vocabulary and embeddings for faster testing
        cache_dir = str(Path(__file__).parent / 'model_cache')
        generator = IntegratedCrosswordGenerator(
            vocab_size_limit=50000,
            cache_dir=cache_dir
        )
        generator.initialize()
        
        if not generator.is_initialized:
            self.skipTest("Generator initialization failed")
        
        # Benchmark generation time
        start_time = time.time()
        
        try:
            entries = generator.generate_crossword_entries(
                topic="technology",
                num_words=5,
                difficulty="medium"
            )
            
            generation_time = time.time() - start_time
            
            # Performance expectations
            self.assertLess(generation_time, 60.0)  # Should complete within 1 minute
            
            if entries:
                avg_time_per_entry = generation_time / len(entries)
                self.assertLess(avg_time_per_entry, 20.0)  # Max ~20s per entry
                
        except Exception as e:
            # Performance test is informational
            print(f"Performance test encountered: {e}")


def run_comprehensive_tests():
    """Run all integration tests with detailed reporting."""
    print("🧪 Comprehensive Integration Tests")
    print("=" * 60)
    print("📂 Using cached 50K vocabulary and embeddings from model_cache/")
    print("⚡ This significantly speeds up testing by avoiding re-computation")
    
    # Check environment
    hf_token = os.getenv('HF_TOKEN')
    if not hf_token:
        print("⚠️ HF_TOKEN not set - API tests may be limited")
    
    if not INTEGRATED_AVAILABLE:
        print("❌ Integrated system not available - cannot run tests")
        return
    
    # Create test suite
    loader = unittest.TestLoader()
    suite = unittest.TestSuite()
    
    # Add test cases
    suite.addTests(loader.loadTestsFromTestCase(TestIntegratedCrosswordGenerator))
    suite.addTests(loader.loadTestsFromTestCase(TestIntegrationScenarios))
    
    # Run tests with detailed output
    runner = unittest.TextTestRunner(verbosity=2, stream=sys.stdout)
    result = runner.run(suite)
    
    # Summary
    print("\n" + "=" * 60)
    print("📊 TEST SUMMARY")
    print("=" * 60)
    print(f"Tests run: {result.testsRun}")
    print(f"Failures: {len(result.failures)}")
    print(f"Errors: {len(result.errors)}")
    print(f"Skipped: {len(result.skipped)}")
    
    if result.failures:
        print("\n❌ FAILURES:")
        for test, trace in result.failures:
            print(f"  - {test}: {trace.splitlines()[-1]}")
    
    if result.errors:
        print("\n❌ ERRORS:")
        for test, trace in result.errors:
            print(f"  - {test}: {trace.splitlines()[-1]}")
    
    if result.skipped:
        print("\n⏭️ SKIPPED:")
        for test, reason in result.skipped:
            print(f"  - {test}: {reason}")
    
    success_rate = ((result.testsRun - len(result.failures) - len(result.errors)) / result.testsRun * 100) if result.testsRun > 0 else 0
    print(f"\n✅ Success rate: {success_rate:.1f}%")
    
    if result.wasSuccessful():
        print("🎉 All tests passed! Integration system is working correctly.")
    else:
        print("⚠️ Some tests failed. Check the system configuration.")
    
    return result.wasSuccessful()


def main():
    """Run the comprehensive test suite."""
    success = run_comprehensive_tests()
    sys.exit(0 if success else 1)


if __name__ == "__main__":
    main()