#!/usr/bin/env python3
"""
Comprehensive Test Suite for Integrated Crossword Generator

Tests the complete integration between thematic word discovery and API clue generation,
ensuring the system works correctly and produces high-quality results.

This test suite uses pre-cached embeddings and vocabulary files (50K words) from 
model_cache/ directory for faster test execution, avoiding re-initialization of
the sentence transformer model and vocabulary generation.

Performance: ~93s initialization with cache vs ~250s without cache (~2.7x faster)

To verify cache setup before running tests:
    python verify_cached_tests.py

To run the full test suite:
    export HF_TOKEN='your_token' && python test_integrated_system.py
"""

import sys
import os
import time
import unittest
from pathlib import Path
from unittest.mock import Mock, patch

# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

try:
    from integrated_crossword_generator import IntegratedCrosswordGenerator, CrosswordEntry
    INTEGRATED_AVAILABLE = True
except ImportError as e:
    print(f"❌ Integration import error: {e}")
    INTEGRATED_AVAILABLE = False


class TestIntegratedCrosswordGenerator(unittest.TestCase):
    """Test cases for the integrated crossword generator."""
    
    @classmethod
    def setUpClass(cls):
        """Set up test environment."""
        if not INTEGRATED_AVAILABLE:
            cls.skipTest(cls, "Integrated generator not available")
        
        # Use test token if available
        cls.test_token = os.getenv('HF_TOKEN')
        if not cls.test_token:
            print("⚠️ HF_TOKEN not set - some tests may be skipped")
    
    def setUp(self):
        """Set up each test."""
        # Use cached 50K vocabulary and embeddings from model_cache
        cache_dir = str(Path(__file__).parent / 'model_cache')
        self.generator = IntegratedCrosswordGenerator(
            vocab_size_limit=50000,  # Use cached 50K vocabulary
            cache_dir=cache_dir
        )
    
    def test_initialization(self):
        """Test generator initialization."""
        self.assertFalse(self.generator.is_initialized)
        
        # Initialize
        start_time = time.time()
        self.generator.initialize()
        init_time = time.time() - start_time
        
        self.assertTrue(self.generator.is_initialized)
        
        # Check system info
        system_info = self.generator.get_system_info()
        self.assertIn('components', system_info)
        self.assertIn('stats', system_info)
        
        # Verify cached files are being used (should still be reasonable time even with model loading)
        # Note: Model download/loading takes ~90s, but vocabulary/embeddings load from cache
        self.assertLess(init_time, 120.0, "Initialization should complete within 2 minutes with cached files")
        
        # If thematic generator is ready, verify it used cached data
        if self.generator.thematic_ready:
            vocab_size = self.generator.thematic_generator.get_vocabulary_size()
            self.assertEqual(vocab_size, 50000, "Should use full 50K cached vocabulary")
    
    def test_cached_files_usage(self):
        """Test that cached vocabulary and embeddings are being used."""
        cache_dir = Path(self.generator.cache_dir)
        
        # Verify expected cache files exist
        vocab_file = cache_dir / "unified_vocabulary_50000.pkl"
        freq_file = cache_dir / "unified_frequencies_50000.pkl"
        embeddings_file = cache_dir / "unified_embeddings_all-mpnet-base-v2_50000.npy"
        
        self.assertTrue(vocab_file.exists(), f"Vocabulary cache file should exist: {vocab_file}")
        self.assertTrue(freq_file.exists(), f"Frequency cache file should exist: {freq_file}")
        self.assertTrue(embeddings_file.exists(), f"Embeddings cache file should exist: {embeddings_file}")
        
        # Initialize and verify vocabulary size
        self.generator.initialize()
        
        if self.generator.thematic_ready:
            vocab_size = self.generator.thematic_generator.get_vocabulary_size()
            self.assertEqual(vocab_size, 50000, "Should use cached 50K vocabulary")
            
            # Verify embeddings are loaded
            self.assertIsNotNone(self.generator.thematic_generator.vocab_embeddings)
            embeddings_shape = self.generator.thematic_generator.vocab_embeddings.shape
            self.assertEqual(embeddings_shape[0], 50000, "Embeddings should have 50K entries")
            self.assertEqual(embeddings_shape[1], 768, "Should use all-mpnet-base-v2 embeddings (768 dims)")
    
    def test_component_availability(self):
        """Test availability of required components."""
        self.generator.initialize()
        
        # At least one component should be available
        has_thematic = self.generator.thematic_ready
        has_api = self.generator.api_ready
        
        self.assertTrue(has_thematic or has_api, "At least one component should be available")
        
        if has_thematic:
            self.assertIsNotNone(self.generator.thematic_generator)
            vocab_size = self.generator.thematic_generator.get_vocabulary_size()
            self.assertGreater(vocab_size, 0)
        
        if has_api:
            self.assertIsNotNone(self.generator.api_clue_generator)
    
    def test_word_discovery_only(self):
        """Test word discovery when only thematic generator is available."""
        self.generator.initialize()
        
        if not self.generator.thematic_ready:
            self.skipTest("Thematic generator not available")
        
        # Mock API generator as unavailable
        self.generator.api_ready = False
        
        # Test word discovery
        words = self.generator._discover_words("animals", 5, "medium", 0.3)
        
        if words:  # Only test if words are found
            self.assertIsInstance(words, list)
            for word, similarity, tier in words:
                self.assertIsInstance(word, str)
                self.assertIsInstance(similarity, float)
                self.assertIsInstance(tier, str)
                self.assertGreater(len(word), 2)
                self.assertGreaterEqual(similarity, 0.0)
    
    def test_api_clue_generation_only(self):
        """Test API clue generation when only API generator is available."""
        if not self.test_token:
            self.skipTest("HF_TOKEN not available for API testing")
        
        self.generator.initialize()
        
        if not self.generator.api_ready:
            self.skipTest("API generator not available")
        
        # Mock thematic generator as unavailable
        self.generator.thematic_ready = False
        
        # Test with sample word data
        mock_words = [("CAT", 0.8, "tier_5_common"), ("DOG", 0.7, "tier_4_highly_common")]
        
        entries = self.generator._generate_clues_for_words(mock_words, "animals")
        
        self.assertIsInstance(entries, list)
        for entry in entries:
            self.assertIsInstance(entry, CrosswordEntry)
            self.assertIsInstance(entry.word, str)
            self.assertIsInstance(entry.clue, str)
            self.assertGreater(len(entry.clue), 5)  # Clues should be substantial
    
    def test_full_integration(self):
        """Test complete integration when both components are available."""
        self.generator.initialize()
        
        if not (self.generator.thematic_ready and self.generator.api_ready):
            self.skipTest("Full integration requires both components")
        
        # Test complete pipeline
        entries = self.generator.generate_crossword_entries(
            topic="animals",
            num_words=3,
            difficulty="medium"
        )
        
        self.assertIsInstance(entries, list)
        self.assertLessEqual(len(entries), 3)  # Should not exceed requested count
        
        for entry in entries:
            self.assertIsInstance(entry, CrosswordEntry)
            self.assertIsInstance(entry.word, str)
            self.assertIsInstance(entry.clue, str)
            self.assertEqual(entry.topic, "animals")
            self.assertGreater(entry.similarity_score, 0.0)
            self.assertIn("tier_", entry.frequency_tier)
    
    def test_difficulty_filtering(self):
        """Test difficulty-based word filtering."""
        self.generator.initialize()
        
        if not self.generator.thematic_ready:
            self.skipTest("Requires thematic generator for difficulty testing")
        
        # Test different difficulty levels
        difficulties = ["easy", "medium", "hard"]
        
        for difficulty in difficulties:
            with self.subTest(difficulty=difficulty):
                mock_results = [
                    ("CAT", 0.8, "tier_3_very_common"),  # Easy word
                    ("ALGORITHM", 0.7, "tier_8_uncommon"),  # Hard word
                    ("COMPUTER", 0.6, "tier_5_common")   # Medium word
                ]
                
                filtered = self.generator._filter_by_difficulty(mock_results, difficulty)
                self.assertIsInstance(filtered, list)
                
                # Check that filtering occurred
                self.assertLessEqual(len(filtered), len(mock_results))
    
    def test_multiple_topics(self):
        """Test generation for multiple topics."""
        self.generator.initialize()
        
        if not self.generator.is_initialized:
            self.skipTest("Generator initialization failed")
        
        topics = ["animals", "technology"]
        results = self.generator.generate_by_multiple_topics(
            topics=topics,
            words_per_topic=2,
            difficulty="medium"
        )
        
        self.assertIsInstance(results, dict)
        self.assertEqual(len(results), len(topics))
        
        for topic in topics:
            self.assertIn(topic, results)
            self.assertIsInstance(results[topic], list)
    
    def test_stats_tracking(self):
        """Test performance statistics tracking."""
        self.generator.initialize()
        
        # Initial stats
        initial_stats = self.generator.get_stats()
        self.assertIsInstance(initial_stats, dict)
        self.assertIn('words_discovered', initial_stats)
        self.assertIn('clues_generated', initial_stats)
        
        # Generate some entries to update stats
        if self.generator.thematic_ready or self.generator.api_ready:
            try:
                self.generator.generate_crossword_entries("test", 1, "medium")
                updated_stats = self.generator.get_stats()
                
                # Stats should have changed
                self.assertGreaterEqual(updated_stats['words_discovered'], initial_stats['words_discovered'])
                self.assertGreaterEqual(updated_stats['clues_generated'], initial_stats['clues_generated'])
            except Exception:
                pass  # Stats test is secondary if generation fails
    
    def test_fallback_behavior(self):
        """Test fallback behavior when components fail."""
        self.generator.initialize()
        
        # Test with unavailable topic that should trigger fallbacks
        entries = self.generator.generate_crossword_entries(
            topic="nonexistent_impossible_topic_xyz123",
            num_words=1,
            difficulty="medium"
        )
        
        # Should handle gracefully (empty list or basic entries)
        self.assertIsInstance(entries, list)
    
    def test_crossword_entry_structure(self):
        """Test CrosswordEntry dataclass structure."""
        # Create sample entry
        entry = CrosswordEntry(
            word="TEST",
            clue="Sample clue",
            topic="testing",
            similarity_score=0.75,
            frequency_tier="tier_5_common",
            tier_description="Common words",
            clue_quality="GOOD",
            clue_model="test_model"
        )
        
        # Verify all fields
        self.assertEqual(entry.word, "TEST")
        self.assertEqual(entry.clue, "Sample clue")
        self.assertEqual(entry.topic, "testing")
        self.assertEqual(entry.similarity_score, 0.75)
        self.assertEqual(entry.frequency_tier, "tier_5_common")
        self.assertEqual(entry.tier_description, "Common words")
        self.assertEqual(entry.clue_quality, "GOOD")
        self.assertEqual(entry.clue_model, "test_model")


class TestIntegrationScenarios(unittest.TestCase):
    """Test realistic integration scenarios."""
    
    @classmethod
    def setUpClass(cls):
        """Set up test environment."""
        if not INTEGRATED_AVAILABLE:
            cls.skipTest(cls, "Integrated generator not available")
        
        cls.test_token = os.getenv('HF_TOKEN')
    
    def test_education_crossword_scenario(self):
        """Test generating educational crossword content."""
        # Use cached vocabulary and embeddings
        cache_dir = str(Path(__file__).parent / 'model_cache')
        generator = IntegratedCrosswordGenerator(
            vocab_size_limit=50000,
            cache_dir=cache_dir
        )
        generator.initialize()
        
        if not generator.is_initialized:
            self.skipTest("Generator initialization failed")
        
        # Educational topics
        topics = ["science", "history", "mathematics"]
        
        for topic in topics:
            with self.subTest(topic=topic):
                entries = generator.generate_crossword_entries(
                    topic=topic,
                    num_words=3,
                    difficulty="medium"
                )
                
                # Should produce educational content
                self.assertIsInstance(entries, list)
                for entry in entries:
                    self.assertEqual(entry.topic, topic)
                    # Educational words should be substantial
                    self.assertGreaterEqual(len(entry.word), 3)
    
    def test_themed_puzzle_scenario(self):
        """Test generating themed puzzle content."""
        # Use cached vocabulary and embeddings
        cache_dir = str(Path(__file__).parent / 'model_cache')
        generator = IntegratedCrosswordGenerator(
            vocab_size_limit=50000,
            cache_dir=cache_dir
        )
        generator.initialize()
        
        if not generator.is_initialized:
            self.skipTest("Generator initialization failed")
        
        # Theme-based generation
        theme = "ocean life"
        entries = generator.generate_crossword_entries(
            topic=theme,
            num_words=5,
            difficulty="medium"
        )
        
        if entries:
            # All entries should be thematically related
            for entry in entries:
                self.assertEqual(entry.topic, theme)
                self.assertIsInstance(entry.similarity_score, float)
                self.assertGreater(entry.similarity_score, 0.0)
    
    def test_performance_benchmarking(self):
        """Test performance characteristics."""
        # Use cached vocabulary and embeddings for faster testing
        cache_dir = str(Path(__file__).parent / 'model_cache')
        generator = IntegratedCrosswordGenerator(
            vocab_size_limit=50000,
            cache_dir=cache_dir
        )
        generator.initialize()
        
        if not generator.is_initialized:
            self.skipTest("Generator initialization failed")
        
        # Benchmark generation time
        start_time = time.time()
        
        try:
            entries = generator.generate_crossword_entries(
                topic="technology",
                num_words=5,
                difficulty="medium"
            )
            
            generation_time = time.time() - start_time
            
            # Performance expectations
            self.assertLess(generation_time, 60.0)  # Should complete within 1 minute
            
            if entries:
                avg_time_per_entry = generation_time / len(entries)
                self.assertLess(avg_time_per_entry, 20.0)  # Max ~20s per entry
                
        except Exception as e:
            # Performance test is informational
            print(f"Performance test encountered: {e}")


def run_comprehensive_tests():
    """Run all integration tests with detailed reporting."""
    print("🧪 Comprehensive Integration Tests")
    print("=" * 60)
    print("📂 Using cached 50K vocabulary and embeddings from model_cache/")
    print("⚡ This significantly speeds up testing by avoiding re-computation")
    
    # Check environment
    hf_token = os.getenv('HF_TOKEN')
    if not hf_token:
        print("⚠️ HF_TOKEN not set - API tests may be limited")
    
    if not INTEGRATED_AVAILABLE:
        print("❌ Integrated system not available - cannot run tests")
        return
    
    # Create test suite
    loader = unittest.TestLoader()
    suite = unittest.TestSuite()
    
    # Add test cases
    suite.addTests(loader.loadTestsFromTestCase(TestIntegratedCrosswordGenerator))
    suite.addTests(loader.loadTestsFromTestCase(TestIntegrationScenarios))
    
    # Run tests with detailed output
    runner = unittest.TextTestRunner(verbosity=2, stream=sys.stdout)
    result = runner.run(suite)
    
    # Summary
    print("\n" + "=" * 60)
    print("📊 TEST SUMMARY")
    print("=" * 60)
    print(f"Tests run: {result.testsRun}")
    print(f"Failures: {len(result.failures)}")
    print(f"Errors: {len(result.errors)}")
    print(f"Skipped: {len(result.skipped)}")
    
    if result.failures:
        print("\n❌ FAILURES:")
        for test, trace in result.failures:
            print(f"  - {test}: {trace.splitlines()[-1]}")
    
    if result.errors:
        print("\n❌ ERRORS:")
        for test, trace in result.errors:
            print(f"  - {test}: {trace.splitlines()[-1]}")
    
    if result.skipped:
        print("\n⏭️ SKIPPED:")
        for test, reason in result.skipped:
            print(f"  - {test}: {reason}")
    
    success_rate = ((result.testsRun - len(result.failures) - len(result.errors)) / result.testsRun * 100) if result.testsRun > 0 else 0
    print(f"\n✅ Success rate: {success_rate:.1f}%")
    
    if result.wasSuccessful():
        print("🎉 All tests passed! Integration system is working correctly.")
    else:
        print("⚠️ Some tests failed. Check the system configuration.")
    
    return result.wasSuccessful()


def main():
    """Run the comprehensive test suite."""
    success = run_comprehensive_tests()
    sys.exit(0 if success else 1)


if __name__ == "__main__":
    main()