#!/usr/bin/env python3
"""
Quick Test: Improved Semantic Clue Generation
Test the enhanced semantic clue generator with specific examples.
"""

import sys
import logging
from pathlib import Path

# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

try:
    from semantic_clue_generator import SemanticClueGenerator
    from thematic_word_generator import UnifiedThematicWordGenerator
    GENERATOR_AVAILABLE = True
except ImportError as e:
    print(f"❌ Import error: {e}")
    GENERATOR_AVAILABLE = False

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


def test_improved_clues():
    """Test improved semantic clue generation with problematic examples."""
    if not GENERATOR_AVAILABLE:
        print("❌ Cannot run test - Enhanced generators not available")
        return
    
    print("🧪 Testing Improved Semantic Clue Generation")
    print("=" * 60)
    
    # Initialize thematic word generator first
    print("🔄 Initializing thematic word generator...")
    try:
        word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
        word_gen.initialize()
        print("✅ Thematic word generator initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize thematic word generator: {e}")
        return
    
    # Initialize semantic clue generator with thematic integration
    print("🔄 Initializing semantic clue generator with thematic integration...")
    clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)
    
    try:
        clue_gen.initialize()
        print("✅ Semantic clue generator initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize semantic clue generator: {e}")
        return
    
    # Test cases that were producing generic "Term related to X" clues
    test_cases = [
        # Your specific problematic examples
        ("BATSMAN", "cricket"),
        ("SWIMMING", "sports"),
        ("AIRPORT", "transportation"),
        
        # Additional challenging cases
        ("DATABASE", "technology"),
        ("GUITAR", "music"),
        ("PIZZA", "food"),
        ("MOUNTAIN", "geography"),
        ("SCIENTIST", "science"),
        ("ELEPHANT", "animals"),
    ]
    
    print(f"\n🎯 Testing {len(test_cases)} improved word-topic combinations")
    print("=" * 60)
    
    excellent_clues = 0
    good_clues = 0
    generic_clues = 0
    poor_clues = 0
    
    for word, topic in test_cases:
        print(f"\n📝 Testing: '{word}' + '{topic}'")
        print("-" * 40)
        
        try:
            # Generate clue with enhanced descriptions
            best_clue = clue_gen.generate_clue(
                word=word,
                topic=topic,
                clue_style="description",  # Use description style for best results
                difficulty="medium"
            )
            
            print(f"🏆 Generated clue: {best_clue}")
            
            # Enhanced quality evaluation
            if best_clue:
                word_lower = word.lower()
                clue_lower = best_clue.lower()
                
                # Check if word appears in clue (should not)
                contains_word = word_lower in clue_lower
                
                # Check for generic patterns
                is_generic = ("term related to" in clue_lower or
                             "associated with" in clue_lower or
                             f"{topic} concept" in clue_lower or
                             f"{topic.lower()} term" in clue_lower)
                
                # Check for specific descriptive content
                has_specific_content = (
                    len(best_clue.split()) >= 4 and
                    not is_generic and
                    not contains_word and
                    any(word in clue_lower for word in ["player", "sport", "hub", "system", "device", "instrument", "creature", "feature"])
                )
                
                if contains_word:
                    print("❌ Quality: POOR (contains target word)")
                    poor_clues += 1
                elif has_specific_content:
                    print("✅ Quality: EXCELLENT (specific and descriptive)")
                    excellent_clues += 1
                elif not is_generic and len(best_clue.split()) >= 3:
                    print("✅ Quality: GOOD (descriptive)")
                    good_clues += 1
                elif not is_generic:
                    print("🔄 Quality: ACCEPTABLE (basic)")
                    good_clues += 1
                else:
                    print("⚠️  Quality: GENERIC (fallback template)")
                    generic_clues += 1
            else:
                print("❌ No clue generated")
                poor_clues += 1
                
        except Exception as e:
            print(f"❌ Error generating clue: {e}")
            poor_clues += 1
    
    total_tests = len(test_cases)
    print(f"\n" + "=" * 60)
    print(f"📊 IMPROVED SEMANTIC RESULTS")
    print(f"=" * 60)
    print(f"Total tests: {total_tests}")
    print(f"Excellent clues: {excellent_clues}")
    print(f"Good clues: {good_clues}")
    print(f"Generic clues: {generic_clues}")
    print(f"Poor clues: {poor_clues}")
    print(f"Success rate: {((excellent_clues + good_clues)/total_tests)*100:.1f}%")
    print(f"Excellence rate: {(excellent_clues/total_tests)*100:.1f}%")
    
    # Evaluation
    if excellent_clues >= total_tests * 0.6:  # 60% excellent
        print("🎉 Major improvement! Semantic system produces excellent clues!")
    elif (excellent_clues + good_clues) >= total_tests * 0.8:  # 80% good+excellent
        print("🔄 Good improvement! Much better than generic templates")
    elif generic_clues <= total_tests * 0.3:  # Less than 30% generic
        print("⚠️  Some improvement, but still needs work")
    else:
        print("❌ Still too many generic clues, consider alternative approach")


def main():
    """Run the improved clue test."""
    test_improved_clues()


if __name__ == "__main__":
    main()