abc123 / hack /test_enhanced_semantic_clues.py
vimalk78's picture
feat(crossword): generated crosswords with clues
486eff6
raw
history blame
11.1 kB
#!/usr/bin/env python3
"""
Enhanced Test: Semantic Clue Generator with ThematicWordGenerator Integration
Test semantic clue generation with proper embedding integration for better quality.
"""
import sys
import logging
from pathlib import Path
# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
try:
from semantic_clue_generator import SemanticClueGenerator
from thematic_word_generator import UnifiedThematicWordGenerator
GENERATOR_AVAILABLE = True
except ImportError as e:
print(f"❌ Import error: {e}")
GENERATOR_AVAILABLE = False
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def test_enhanced_semantic_clues():
"""Test semantic clue generation with thematic word generator integration."""
if not GENERATOR_AVAILABLE:
print("❌ Cannot run test - Enhanced generators not available")
return
print("πŸ§ͺ Testing Enhanced Semantic Clue Generation")
print("=" * 60)
# Initialize thematic word generator first
print("πŸ”„ Initializing thematic word generator...")
try:
word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
word_gen.initialize()
print("βœ… Thematic word generator initialized successfully")
except Exception as e:
print(f"❌ Failed to initialize thematic word generator: {e}")
return
# Initialize semantic clue generator with thematic integration
print("πŸ”„ Initializing semantic clue generator with thematic integration...")
clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)
try:
clue_gen.initialize()
print("βœ… Semantic clue generator initialized successfully")
except Exception as e:
print(f"❌ Failed to initialize semantic clue generator: {e}")
return
# Test cases that previously failed with LLM
test_cases = [
# Previously problematic examples
("CAT", "animals"),
("KITTY", "animals"),
("MEAL", "food"),
("HUNGER", "food"),
("TECH", "technology"),
("SCIENTIST", "science"),
# Additional challenging cases
("DOG", "animals"),
("PYTHON", "technology"),
("GUITAR", "music"),
("OCEAN", "geography"),
("ATOM", "science"),
("PIZZA", "food"),
("MOUNTAIN", "geography"),
("VIOLIN", "music"),
("DATABASE", "technology"),
]
print(f"\n🎯 Testing {len(test_cases)} word-topic combinations with enhanced semantic analysis")
print("=" * 60)
successful_clues = 0
total_tests = len(test_cases)
high_quality_clues = 0
for word, topic in test_cases:
print(f"\nπŸ“ Testing: '{word}' + '{topic}'")
print("-" * 40)
try:
# Generate multiple clues with different styles for variety
styles = ["category", "definition", "description"]
candidates = []
for style in styles:
clue = clue_gen.generate_clue(
word=word,
topic=topic,
clue_style=style,
difficulty="medium"
)
if clue and clue not in candidates:
candidates.append(clue)
print(f"Generated {len(candidates)} candidates:")
for i, candidate in enumerate(candidates, 1):
print(f" {i}. {candidate}")
# Use the best clue (first one)
best_clue = candidates[0] if candidates else None
print(f"\nπŸ† Best clue: {best_clue}")
# Enhanced quality evaluation
if best_clue:
# Basic quality check
basic_quality = (len(best_clue) > 3 and
word.lower() not in best_clue.lower())
# Check for generic fallback patterns
is_generic = ("term related to" in best_clue.lower() or
"associated with" in best_clue.lower())
# Check for descriptive quality
is_descriptive = (len(best_clue.split()) >= 3 and
not is_generic and
basic_quality)
if is_descriptive:
high_quality_clues += 1
successful_clues += 1
print("βœ… Quality: EXCELLENT")
elif basic_quality and not is_generic:
successful_clues += 1
print("βœ… Quality: GOOD")
elif basic_quality:
successful_clues += 1
print("πŸ”„ Quality: ACCEPTABLE (generic)")
else:
print("❌ Quality: POOR")
else:
print("❌ No clue generated")
except Exception as e:
print(f"❌ Error generating clue: {e}")
logger.exception("Detailed error:")
print(f"\n" + "=" * 60)
print(f"πŸ“Š ENHANCED SEMANTIC RESULTS")
print(f"=" * 60)
print(f"Total tests: {total_tests}")
print(f"Successful clues: {successful_clues}")
print(f"High quality clues: {high_quality_clues}")
print(f"Overall success rate: {(successful_clues/total_tests)*100:.1f}%")
print(f"High quality rate: {(high_quality_clues/total_tests)*100:.1f}%")
# Enhanced evaluation criteria
if high_quality_clues >= total_tests * 0.6: # 60% high quality
print("πŸŽ‰ Enhanced semantic approach produces excellent clues!")
print("πŸš€ Ready for integration into main crossword application")
elif successful_clues >= total_tests * 0.8: # 80% acceptable
print("πŸ”„ Good improvement over LLM, suitable for production use")
elif successful_clues >= total_tests * 0.6: # 60% acceptable
print("⚠️ Decent improvement, may need more template refinement")
else:
print("❌ Still struggling, consider alternative approaches")
def interactive_test():
"""Interactive test mode for user-provided word-topic combinations."""
print("πŸ§ͺ Interactive Semantic Clue Testing")
print("=" * 60)
# Initialize thematic word generator first
print("πŸ”„ Initializing thematic word generator...")
try:
word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
word_gen.initialize()
print("βœ… Thematic word generator initialized successfully")
except Exception as e:
print(f"❌ Failed to initialize thematic word generator: {e}")
return
# Initialize semantic clue generator with thematic integration
print("πŸ”„ Initializing semantic clue generator with thematic integration...")
clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)
try:
clue_gen.initialize()
print("βœ… Semantic clue generator initialized successfully")
except Exception as e:
print(f"❌ Failed to initialize semantic clue generator: {e}")
return
print("\n" + "=" * 60)
print("🎯 INTERACTIVE MODE")
print("=" * 60)
print("Enter word-topic pairs to test clue generation.")
print("Format: word,topic (e.g., 'cat,animals')")
print("Type 'quit' or 'exit' to stop.")
print("Type 'batch' to run the full test suite.")
print("-" * 60)
while True:
try:
user_input = input("\nπŸ“ Enter word,topic: ").strip()
if user_input.lower() in ['quit', 'exit', 'q']:
print("πŸ‘‹ Goodbye!")
break
elif user_input.lower() == 'batch':
print("\nπŸ”„ Running full test suite...")
test_enhanced_semantic_clues()
print("\n" + "=" * 60)
print("🎯 Back to interactive mode")
print("-" * 60)
continue
elif not user_input or ',' not in user_input:
print("❌ Invalid format. Use: word,topic (e.g., 'cat,animals')")
continue
# Parse input
parts = user_input.split(',', 1)
word = parts[0].strip().upper()
topic = parts[1].strip().lower()
if not word or not topic:
print("❌ Both word and topic are required")
continue
print(f"\nπŸ“ Testing: '{word}' + '{topic}'")
print("-" * 40)
# Generate multiple clues with different styles for variety
styles = ["category", "definition", "description"]
candidates = []
for style in styles:
try:
clue = clue_gen.generate_clue(
word=word,
topic=topic,
clue_style=style,
difficulty="medium"
)
if clue and clue not in candidates:
candidates.append(clue)
except Exception as e:
logger.debug(f"Error with style {style}: {e}")
if candidates:
print(f"Generated {len(candidates)} candidates:")
for i, candidate in enumerate(candidates, 1):
print(f" {i}. {candidate}")
best_clue = candidates[0]
print(f"\nπŸ† Best clue: {best_clue}")
# Quality evaluation
if (best_clue and
len(best_clue) > 3 and
word.lower() not in best_clue.lower()):
is_generic = ("term related to" in best_clue.lower() or
"associated with" in best_clue.lower())
if len(best_clue.split()) >= 3 and not is_generic:
print("βœ… Quality: EXCELLENT")
elif not is_generic:
print("βœ… Quality: GOOD")
else:
print("πŸ”„ Quality: ACCEPTABLE (generic)")
else:
print("❌ Quality: POOR")
else:
print("❌ No clues generated")
except KeyboardInterrupt:
print("\nπŸ‘‹ Goodbye!")
break
except Exception as e:
print(f"❌ Error: {e}")
def main():
"""Run the enhanced semantic test."""
import sys
if len(sys.argv) > 1 and sys.argv[1] == '--interactive':
interactive_test()
else:
print("Run with --interactive for user input mode, or without args for full test.")
test_enhanced_semantic_clues()
if __name__ == "__main__":
main()