File size: 11,122 Bytes
486eff6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 |
#!/usr/bin/env python3
"""
Enhanced Test: Semantic Clue Generator with ThematicWordGenerator Integration
Test semantic clue generation with proper embedding integration for better quality.
"""
import sys
import logging
from pathlib import Path
# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
try:
from semantic_clue_generator import SemanticClueGenerator
from thematic_word_generator import UnifiedThematicWordGenerator
GENERATOR_AVAILABLE = True
except ImportError as e:
print(f"β Import error: {e}")
GENERATOR_AVAILABLE = False
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def test_enhanced_semantic_clues():
"""Test semantic clue generation with thematic word generator integration."""
if not GENERATOR_AVAILABLE:
print("β Cannot run test - Enhanced generators not available")
return
print("π§ͺ Testing Enhanced Semantic Clue Generation")
print("=" * 60)
# Initialize thematic word generator first
print("π Initializing thematic word generator...")
try:
word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
word_gen.initialize()
print("β
Thematic word generator initialized successfully")
except Exception as e:
print(f"β Failed to initialize thematic word generator: {e}")
return
# Initialize semantic clue generator with thematic integration
print("π Initializing semantic clue generator with thematic integration...")
clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)
try:
clue_gen.initialize()
print("β
Semantic clue generator initialized successfully")
except Exception as e:
print(f"β Failed to initialize semantic clue generator: {e}")
return
# Test cases that previously failed with LLM
test_cases = [
# Previously problematic examples
("CAT", "animals"),
("KITTY", "animals"),
("MEAL", "food"),
("HUNGER", "food"),
("TECH", "technology"),
("SCIENTIST", "science"),
# Additional challenging cases
("DOG", "animals"),
("PYTHON", "technology"),
("GUITAR", "music"),
("OCEAN", "geography"),
("ATOM", "science"),
("PIZZA", "food"),
("MOUNTAIN", "geography"),
("VIOLIN", "music"),
("DATABASE", "technology"),
]
print(f"\nπ― Testing {len(test_cases)} word-topic combinations with enhanced semantic analysis")
print("=" * 60)
successful_clues = 0
total_tests = len(test_cases)
high_quality_clues = 0
for word, topic in test_cases:
print(f"\nπ Testing: '{word}' + '{topic}'")
print("-" * 40)
try:
# Generate multiple clues with different styles for variety
styles = ["category", "definition", "description"]
candidates = []
for style in styles:
clue = clue_gen.generate_clue(
word=word,
topic=topic,
clue_style=style,
difficulty="medium"
)
if clue and clue not in candidates:
candidates.append(clue)
print(f"Generated {len(candidates)} candidates:")
for i, candidate in enumerate(candidates, 1):
print(f" {i}. {candidate}")
# Use the best clue (first one)
best_clue = candidates[0] if candidates else None
print(f"\nπ Best clue: {best_clue}")
# Enhanced quality evaluation
if best_clue:
# Basic quality check
basic_quality = (len(best_clue) > 3 and
word.lower() not in best_clue.lower())
# Check for generic fallback patterns
is_generic = ("term related to" in best_clue.lower() or
"associated with" in best_clue.lower())
# Check for descriptive quality
is_descriptive = (len(best_clue.split()) >= 3 and
not is_generic and
basic_quality)
if is_descriptive:
high_quality_clues += 1
successful_clues += 1
print("β
Quality: EXCELLENT")
elif basic_quality and not is_generic:
successful_clues += 1
print("β
Quality: GOOD")
elif basic_quality:
successful_clues += 1
print("π Quality: ACCEPTABLE (generic)")
else:
print("β Quality: POOR")
else:
print("β No clue generated")
except Exception as e:
print(f"β Error generating clue: {e}")
logger.exception("Detailed error:")
print(f"\n" + "=" * 60)
print(f"π ENHANCED SEMANTIC RESULTS")
print(f"=" * 60)
print(f"Total tests: {total_tests}")
print(f"Successful clues: {successful_clues}")
print(f"High quality clues: {high_quality_clues}")
print(f"Overall success rate: {(successful_clues/total_tests)*100:.1f}%")
print(f"High quality rate: {(high_quality_clues/total_tests)*100:.1f}%")
# Enhanced evaluation criteria
if high_quality_clues >= total_tests * 0.6: # 60% high quality
print("π Enhanced semantic approach produces excellent clues!")
print("π Ready for integration into main crossword application")
elif successful_clues >= total_tests * 0.8: # 80% acceptable
print("π Good improvement over LLM, suitable for production use")
elif successful_clues >= total_tests * 0.6: # 60% acceptable
print("β οΈ Decent improvement, may need more template refinement")
else:
print("β Still struggling, consider alternative approaches")
def interactive_test():
"""Interactive test mode for user-provided word-topic combinations."""
print("π§ͺ Interactive Semantic Clue Testing")
print("=" * 60)
# Initialize thematic word generator first
print("π Initializing thematic word generator...")
try:
word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
word_gen.initialize()
print("β
Thematic word generator initialized successfully")
except Exception as e:
print(f"β Failed to initialize thematic word generator: {e}")
return
# Initialize semantic clue generator with thematic integration
print("π Initializing semantic clue generator with thematic integration...")
clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)
try:
clue_gen.initialize()
print("β
Semantic clue generator initialized successfully")
except Exception as e:
print(f"β Failed to initialize semantic clue generator: {e}")
return
print("\n" + "=" * 60)
print("π― INTERACTIVE MODE")
print("=" * 60)
print("Enter word-topic pairs to test clue generation.")
print("Format: word,topic (e.g., 'cat,animals')")
print("Type 'quit' or 'exit' to stop.")
print("Type 'batch' to run the full test suite.")
print("-" * 60)
while True:
try:
user_input = input("\nπ Enter word,topic: ").strip()
if user_input.lower() in ['quit', 'exit', 'q']:
print("π Goodbye!")
break
elif user_input.lower() == 'batch':
print("\nπ Running full test suite...")
test_enhanced_semantic_clues()
print("\n" + "=" * 60)
print("π― Back to interactive mode")
print("-" * 60)
continue
elif not user_input or ',' not in user_input:
print("β Invalid format. Use: word,topic (e.g., 'cat,animals')")
continue
# Parse input
parts = user_input.split(',', 1)
word = parts[0].strip().upper()
topic = parts[1].strip().lower()
if not word or not topic:
print("β Both word and topic are required")
continue
print(f"\nπ Testing: '{word}' + '{topic}'")
print("-" * 40)
# Generate multiple clues with different styles for variety
styles = ["category", "definition", "description"]
candidates = []
for style in styles:
try:
clue = clue_gen.generate_clue(
word=word,
topic=topic,
clue_style=style,
difficulty="medium"
)
if clue and clue not in candidates:
candidates.append(clue)
except Exception as e:
logger.debug(f"Error with style {style}: {e}")
if candidates:
print(f"Generated {len(candidates)} candidates:")
for i, candidate in enumerate(candidates, 1):
print(f" {i}. {candidate}")
best_clue = candidates[0]
print(f"\nπ Best clue: {best_clue}")
# Quality evaluation
if (best_clue and
len(best_clue) > 3 and
word.lower() not in best_clue.lower()):
is_generic = ("term related to" in best_clue.lower() or
"associated with" in best_clue.lower())
if len(best_clue.split()) >= 3 and not is_generic:
print("β
Quality: EXCELLENT")
elif not is_generic:
print("β
Quality: GOOD")
else:
print("π Quality: ACCEPTABLE (generic)")
else:
print("β Quality: POOR")
else:
print("β No clues generated")
except KeyboardInterrupt:
print("\nπ Goodbye!")
break
except Exception as e:
print(f"β Error: {e}")
def main():
"""Run the enhanced semantic test."""
import sys
if len(sys.argv) > 1 and sys.argv[1] == '--interactive':
interactive_test()
else:
print("Run with --interactive for user input mode, or without args for full test.")
test_enhanced_semantic_clues()
if __name__ == "__main__":
main() |