abc123 / hack /test_improved_clues.py
vimalk78's picture
feat(crossword): generated crosswords with clues
486eff6
raw
history blame
6.26 kB
#!/usr/bin/env python3
"""
Quick Test: Improved Semantic Clue Generation
Test the enhanced semantic clue generator with specific examples.
"""
import sys
import logging
from pathlib import Path
# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
try:
from semantic_clue_generator import SemanticClueGenerator
from thematic_word_generator import UnifiedThematicWordGenerator
GENERATOR_AVAILABLE = True
except ImportError as e:
print(f"❌ Import error: {e}")
GENERATOR_AVAILABLE = False
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def test_improved_clues():
"""Test improved semantic clue generation with problematic examples."""
if not GENERATOR_AVAILABLE:
print("❌ Cannot run test - Enhanced generators not available")
return
print("πŸ§ͺ Testing Improved Semantic Clue Generation")
print("=" * 60)
# Initialize thematic word generator first
print("πŸ”„ Initializing thematic word generator...")
try:
word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
word_gen.initialize()
print("βœ… Thematic word generator initialized successfully")
except Exception as e:
print(f"❌ Failed to initialize thematic word generator: {e}")
return
# Initialize semantic clue generator with thematic integration
print("πŸ”„ Initializing semantic clue generator with thematic integration...")
clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)
try:
clue_gen.initialize()
print("βœ… Semantic clue generator initialized successfully")
except Exception as e:
print(f"❌ Failed to initialize semantic clue generator: {e}")
return
# Test cases that were producing generic "Term related to X" clues
test_cases = [
# Your specific problematic examples
("BATSMAN", "cricket"),
("SWIMMING", "sports"),
("AIRPORT", "transportation"),
# Additional challenging cases
("DATABASE", "technology"),
("GUITAR", "music"),
("PIZZA", "food"),
("MOUNTAIN", "geography"),
("SCIENTIST", "science"),
("ELEPHANT", "animals"),
]
print(f"\n🎯 Testing {len(test_cases)} improved word-topic combinations")
print("=" * 60)
excellent_clues = 0
good_clues = 0
generic_clues = 0
poor_clues = 0
for word, topic in test_cases:
print(f"\nπŸ“ Testing: '{word}' + '{topic}'")
print("-" * 40)
try:
# Generate clue with enhanced descriptions
best_clue = clue_gen.generate_clue(
word=word,
topic=topic,
clue_style="description", # Use description style for best results
difficulty="medium"
)
print(f"πŸ† Generated clue: {best_clue}")
# Enhanced quality evaluation
if best_clue:
word_lower = word.lower()
clue_lower = best_clue.lower()
# Check if word appears in clue (should not)
contains_word = word_lower in clue_lower
# Check for generic patterns
is_generic = ("term related to" in clue_lower or
"associated with" in clue_lower or
f"{topic} concept" in clue_lower or
f"{topic.lower()} term" in clue_lower)
# Check for specific descriptive content
has_specific_content = (
len(best_clue.split()) >= 4 and
not is_generic and
not contains_word and
any(word in clue_lower for word in ["player", "sport", "hub", "system", "device", "instrument", "creature", "feature"])
)
if contains_word:
print("❌ Quality: POOR (contains target word)")
poor_clues += 1
elif has_specific_content:
print("βœ… Quality: EXCELLENT (specific and descriptive)")
excellent_clues += 1
elif not is_generic and len(best_clue.split()) >= 3:
print("βœ… Quality: GOOD (descriptive)")
good_clues += 1
elif not is_generic:
print("πŸ”„ Quality: ACCEPTABLE (basic)")
good_clues += 1
else:
print("⚠️ Quality: GENERIC (fallback template)")
generic_clues += 1
else:
print("❌ No clue generated")
poor_clues += 1
except Exception as e:
print(f"❌ Error generating clue: {e}")
poor_clues += 1
total_tests = len(test_cases)
print(f"\n" + "=" * 60)
print(f"πŸ“Š IMPROVED SEMANTIC RESULTS")
print(f"=" * 60)
print(f"Total tests: {total_tests}")
print(f"Excellent clues: {excellent_clues}")
print(f"Good clues: {good_clues}")
print(f"Generic clues: {generic_clues}")
print(f"Poor clues: {poor_clues}")
print(f"Success rate: {((excellent_clues + good_clues)/total_tests)*100:.1f}%")
print(f"Excellence rate: {(excellent_clues/total_tests)*100:.1f}%")
# Evaluation
if excellent_clues >= total_tests * 0.6: # 60% excellent
print("πŸŽ‰ Major improvement! Semantic system produces excellent clues!")
elif (excellent_clues + good_clues) >= total_tests * 0.8: # 80% good+excellent
print("πŸ”„ Good improvement! Much better than generic templates")
elif generic_clues <= total_tests * 0.3: # Less than 30% generic
print("⚠️ Some improvement, but still needs work")
else:
print("❌ Still too many generic clues, consider alternative approach")
def main():
"""Run the improved clue test."""
test_improved_clues()
if __name__ == "__main__":
main()