File size: 6,257 Bytes
486eff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python3
"""
Quick Test: Improved Semantic Clue Generation
Test the enhanced semantic clue generator with specific examples.
"""

import sys
import logging
from pathlib import Path

# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

try:
    from semantic_clue_generator import SemanticClueGenerator
    from thematic_word_generator import UnifiedThematicWordGenerator
    GENERATOR_AVAILABLE = True
except ImportError as e:
    print(f"❌ Import error: {e}")
    GENERATOR_AVAILABLE = False

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


def test_improved_clues():
    """Test improved semantic clue generation with problematic examples."""
    if not GENERATOR_AVAILABLE:
        print("❌ Cannot run test - Enhanced generators not available")
        return
    
    print("πŸ§ͺ Testing Improved Semantic Clue Generation")
    print("=" * 60)
    
    # Initialize thematic word generator first
    print("πŸ”„ Initializing thematic word generator...")
    try:
        word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
        word_gen.initialize()
        print("βœ… Thematic word generator initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize thematic word generator: {e}")
        return
    
    # Initialize semantic clue generator with thematic integration
    print("πŸ”„ Initializing semantic clue generator with thematic integration...")
    clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)
    
    try:
        clue_gen.initialize()
        print("βœ… Semantic clue generator initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize semantic clue generator: {e}")
        return
    
    # Test cases that were producing generic "Term related to X" clues
    test_cases = [
        # Your specific problematic examples
        ("BATSMAN", "cricket"),
        ("SWIMMING", "sports"),
        ("AIRPORT", "transportation"),
        
        # Additional challenging cases
        ("DATABASE", "technology"),
        ("GUITAR", "music"),
        ("PIZZA", "food"),
        ("MOUNTAIN", "geography"),
        ("SCIENTIST", "science"),
        ("ELEPHANT", "animals"),
    ]
    
    print(f"\n🎯 Testing {len(test_cases)} improved word-topic combinations")
    print("=" * 60)
    
    excellent_clues = 0
    good_clues = 0
    generic_clues = 0
    poor_clues = 0
    
    for word, topic in test_cases:
        print(f"\nπŸ“ Testing: '{word}' + '{topic}'")
        print("-" * 40)
        
        try:
            # Generate clue with enhanced descriptions
            best_clue = clue_gen.generate_clue(
                word=word,
                topic=topic,
                clue_style="description",  # Use description style for best results
                difficulty="medium"
            )
            
            print(f"πŸ† Generated clue: {best_clue}")
            
            # Enhanced quality evaluation
            if best_clue:
                word_lower = word.lower()
                clue_lower = best_clue.lower()
                
                # Check if word appears in clue (should not)
                contains_word = word_lower in clue_lower
                
                # Check for generic patterns
                is_generic = ("term related to" in clue_lower or
                             "associated with" in clue_lower or
                             f"{topic} concept" in clue_lower or
                             f"{topic.lower()} term" in clue_lower)
                
                # Check for specific descriptive content
                has_specific_content = (
                    len(best_clue.split()) >= 4 and
                    not is_generic and
                    not contains_word and
                    any(word in clue_lower for word in ["player", "sport", "hub", "system", "device", "instrument", "creature", "feature"])
                )
                
                if contains_word:
                    print("❌ Quality: POOR (contains target word)")
                    poor_clues += 1
                elif has_specific_content:
                    print("βœ… Quality: EXCELLENT (specific and descriptive)")
                    excellent_clues += 1
                elif not is_generic and len(best_clue.split()) >= 3:
                    print("βœ… Quality: GOOD (descriptive)")
                    good_clues += 1
                elif not is_generic:
                    print("πŸ”„ Quality: ACCEPTABLE (basic)")
                    good_clues += 1
                else:
                    print("⚠️  Quality: GENERIC (fallback template)")
                    generic_clues += 1
            else:
                print("❌ No clue generated")
                poor_clues += 1
                
        except Exception as e:
            print(f"❌ Error generating clue: {e}")
            poor_clues += 1
    
    total_tests = len(test_cases)
    print(f"\n" + "=" * 60)
    print(f"πŸ“Š IMPROVED SEMANTIC RESULTS")
    print(f"=" * 60)
    print(f"Total tests: {total_tests}")
    print(f"Excellent clues: {excellent_clues}")
    print(f"Good clues: {good_clues}")
    print(f"Generic clues: {generic_clues}")
    print(f"Poor clues: {poor_clues}")
    print(f"Success rate: {((excellent_clues + good_clues)/total_tests)*100:.1f}%")
    print(f"Excellence rate: {(excellent_clues/total_tests)*100:.1f}%")
    
    # Evaluation
    if excellent_clues >= total_tests * 0.6:  # 60% excellent
        print("πŸŽ‰ Major improvement! Semantic system produces excellent clues!")
    elif (excellent_clues + good_clues) >= total_tests * 0.8:  # 80% good+excellent
        print("πŸ”„ Good improvement! Much better than generic templates")
    elif generic_clues <= total_tests * 0.3:  # Less than 30% generic
        print("⚠️  Some improvement, but still needs work")
    else:
        print("❌ Still too many generic clues, consider alternative approach")


def main():
    """Run the improved clue test."""
    test_improved_clues()


if __name__ == "__main__":
    main()