File size: 11,122 Bytes
486eff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#!/usr/bin/env python3
"""
Enhanced Test: Semantic Clue Generator with ThematicWordGenerator Integration
Test semantic clue generation with proper embedding integration for better quality.
"""

import sys
import logging
from pathlib import Path

# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

try:
    from semantic_clue_generator import SemanticClueGenerator
    from thematic_word_generator import UnifiedThematicWordGenerator
    GENERATOR_AVAILABLE = True
except ImportError as e:
    print(f"❌ Import error: {e}")
    GENERATOR_AVAILABLE = False

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


def test_enhanced_semantic_clues():
    """Test semantic clue generation with thematic word generator integration."""
    if not GENERATOR_AVAILABLE:
        print("❌ Cannot run test - Enhanced generators not available")
        return
    
    print("πŸ§ͺ Testing Enhanced Semantic Clue Generation")
    print("=" * 60)
    
    # Initialize thematic word generator first
    print("πŸ”„ Initializing thematic word generator...")
    try:
        word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
        word_gen.initialize()
        print("βœ… Thematic word generator initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize thematic word generator: {e}")
        return
    
    # Initialize semantic clue generator with thematic integration
    print("πŸ”„ Initializing semantic clue generator with thematic integration...")
    clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)
    
    try:
        clue_gen.initialize()
        print("βœ… Semantic clue generator initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize semantic clue generator: {e}")
        return
    
    # Test cases that previously failed with LLM
    test_cases = [
        # Previously problematic examples
        ("CAT", "animals"),
        ("KITTY", "animals"), 
        ("MEAL", "food"),
        ("HUNGER", "food"),
        ("TECH", "technology"),
        ("SCIENTIST", "science"),
        
        # Additional challenging cases
        ("DOG", "animals"),
        ("PYTHON", "technology"),
        ("GUITAR", "music"),
        ("OCEAN", "geography"),
        ("ATOM", "science"),
        ("PIZZA", "food"),
        ("MOUNTAIN", "geography"),
        ("VIOLIN", "music"),
        ("DATABASE", "technology"),
    ]
    
    print(f"\n🎯 Testing {len(test_cases)} word-topic combinations with enhanced semantic analysis")
    print("=" * 60)
    
    successful_clues = 0
    total_tests = len(test_cases)
    high_quality_clues = 0
    
    for word, topic in test_cases:
        print(f"\nπŸ“ Testing: '{word}' + '{topic}'")
        print("-" * 40)
        
        try:
            # Generate multiple clues with different styles for variety
            styles = ["category", "definition", "description"]
            candidates = []
            
            for style in styles:
                clue = clue_gen.generate_clue(
                    word=word,
                    topic=topic,
                    clue_style=style,
                    difficulty="medium"
                )
                if clue and clue not in candidates:
                    candidates.append(clue)
            
            print(f"Generated {len(candidates)} candidates:")
            for i, candidate in enumerate(candidates, 1):
                print(f"  {i}. {candidate}")
            
            # Use the best clue (first one)
            best_clue = candidates[0] if candidates else None
            
            print(f"\nπŸ† Best clue: {best_clue}")
            
            # Enhanced quality evaluation
            if best_clue:
                # Basic quality check
                basic_quality = (len(best_clue) > 3 and 
                               word.lower() not in best_clue.lower())
                
                # Check for generic fallback patterns
                is_generic = ("term related to" in best_clue.lower() or
                             "associated with" in best_clue.lower())
                
                # Check for descriptive quality
                is_descriptive = (len(best_clue.split()) >= 3 and
                                not is_generic and
                                basic_quality)
                
                if is_descriptive:
                    high_quality_clues += 1
                    successful_clues += 1
                    print("βœ… Quality: EXCELLENT")
                elif basic_quality and not is_generic:
                    successful_clues += 1
                    print("βœ… Quality: GOOD")
                elif basic_quality:
                    successful_clues += 1
                    print("πŸ”„ Quality: ACCEPTABLE (generic)")
                else:
                    print("❌ Quality: POOR")
            else:
                print("❌ No clue generated")
                
        except Exception as e:
            print(f"❌ Error generating clue: {e}")
            logger.exception("Detailed error:")
    
    print(f"\n" + "=" * 60)
    print(f"πŸ“Š ENHANCED SEMANTIC RESULTS")
    print(f"=" * 60)
    print(f"Total tests: {total_tests}")
    print(f"Successful clues: {successful_clues}")
    print(f"High quality clues: {high_quality_clues}")
    print(f"Overall success rate: {(successful_clues/total_tests)*100:.1f}%")
    print(f"High quality rate: {(high_quality_clues/total_tests)*100:.1f}%")
    
    # Enhanced evaluation criteria
    if high_quality_clues >= total_tests * 0.6:  # 60% high quality
        print("πŸŽ‰ Enhanced semantic approach produces excellent clues!")
        print("πŸš€ Ready for integration into main crossword application")
    elif successful_clues >= total_tests * 0.8:  # 80% acceptable 
        print("πŸ”„ Good improvement over LLM, suitable for production use")
    elif successful_clues >= total_tests * 0.6:  # 60% acceptable
        print("⚠️  Decent improvement, may need more template refinement")
    else:
        print("❌ Still struggling, consider alternative approaches")


def interactive_test():
    """Interactive test mode for user-provided word-topic combinations."""
    print("πŸ§ͺ Interactive Semantic Clue Testing")
    print("=" * 60)
    
    # Initialize thematic word generator first
    print("πŸ”„ Initializing thematic word generator...")
    try:
        word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
        word_gen.initialize()
        print("βœ… Thematic word generator initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize thematic word generator: {e}")
        return
    
    # Initialize semantic clue generator with thematic integration
    print("πŸ”„ Initializing semantic clue generator with thematic integration...")
    clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)
    
    try:
        clue_gen.initialize()
        print("βœ… Semantic clue generator initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize semantic clue generator: {e}")
        return
    
    print("\n" + "=" * 60)
    print("🎯 INTERACTIVE MODE")
    print("=" * 60)
    print("Enter word-topic pairs to test clue generation.")
    print("Format: word,topic (e.g., 'cat,animals')")
    print("Type 'quit' or 'exit' to stop.")
    print("Type 'batch' to run the full test suite.")
    print("-" * 60)
    
    while True:
        try:
            user_input = input("\nπŸ“ Enter word,topic: ").strip()
            
            if user_input.lower() in ['quit', 'exit', 'q']:
                print("πŸ‘‹ Goodbye!")
                break
            elif user_input.lower() == 'batch':
                print("\nπŸ”„ Running full test suite...")
                test_enhanced_semantic_clues()
                print("\n" + "=" * 60)
                print("🎯 Back to interactive mode")
                print("-" * 60)
                continue
            elif not user_input or ',' not in user_input:
                print("❌ Invalid format. Use: word,topic (e.g., 'cat,animals')")
                continue
            
            # Parse input
            parts = user_input.split(',', 1)
            word = parts[0].strip().upper()
            topic = parts[1].strip().lower()
            
            if not word or not topic:
                print("❌ Both word and topic are required")
                continue
            
            print(f"\nπŸ“ Testing: '{word}' + '{topic}'")
            print("-" * 40)
            
            # Generate multiple clues with different styles for variety
            styles = ["category", "definition", "description"]
            candidates = []
            
            for style in styles:
                try:
                    clue = clue_gen.generate_clue(
                        word=word,
                        topic=topic,
                        clue_style=style,
                        difficulty="medium"
                    )
                    if clue and clue not in candidates:
                        candidates.append(clue)
                except Exception as e:
                    logger.debug(f"Error with style {style}: {e}")
            
            if candidates:
                print(f"Generated {len(candidates)} candidates:")
                for i, candidate in enumerate(candidates, 1):
                    print(f"  {i}. {candidate}")
                
                best_clue = candidates[0]
                print(f"\nπŸ† Best clue: {best_clue}")
                
                # Quality evaluation
                if (best_clue and 
                    len(best_clue) > 3 and 
                    word.lower() not in best_clue.lower()):
                    
                    is_generic = ("term related to" in best_clue.lower() or
                                 "associated with" in best_clue.lower())
                    
                    if len(best_clue.split()) >= 3 and not is_generic:
                        print("βœ… Quality: EXCELLENT")
                    elif not is_generic:
                        print("βœ… Quality: GOOD")
                    else:
                        print("πŸ”„ Quality: ACCEPTABLE (generic)")
                else:
                    print("❌ Quality: POOR")
            else:
                print("❌ No clues generated")
                
        except KeyboardInterrupt:
            print("\nπŸ‘‹ Goodbye!")
            break
        except Exception as e:
            print(f"❌ Error: {e}")


def main():
    """Run the enhanced semantic test."""
    import sys
    
    if len(sys.argv) > 1 and sys.argv[1] == '--interactive':
        interactive_test()
    else:
        print("Run with --interactive for user input mode, or without args for full test.")
        test_enhanced_semantic_clues()


if __name__ == "__main__":
    main()