Spaces:

vimalk78
/

abc123

Running

App Files Files Community

abc123 / hack /test_improved_clues.py

vimalk78

feat(crossword): generated crosswords with clues

486eff6 24 days ago

raw

history blame

6.26 kB

	#!/usr/bin/env python3
	"""
	Quick Test: Improved Semantic Clue Generation
	Test the enhanced semantic clue generator with specific examples.
	"""

	import sys
	import logging
	from pathlib import Path

	# Add hack directory to path for imports
	sys.path.insert(0, str(Path(__file__).parent))

	try:
	from semantic_clue_generator import SemanticClueGenerator
	from thematic_word_generator import UnifiedThematicWordGenerator
	GENERATOR_AVAILABLE = True
	except ImportError as e:
	print(f"❌ Import error: {e}")
	GENERATOR_AVAILABLE = False

	# Set up logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)


	def test_improved_clues():
	"""Test improved semantic clue generation with problematic examples."""
	if not GENERATOR_AVAILABLE:
	print("❌ Cannot run test - Enhanced generators not available")
	return

	print("🧪 Testing Improved Semantic Clue Generation")
	print("=" * 60)

	# Initialize thematic word generator first
	print("🔄 Initializing thematic word generator...")
	try:
	word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
	word_gen.initialize()
	print("✅ Thematic word generator initialized successfully")
	except Exception as e:
	print(f"❌ Failed to initialize thematic word generator: {e}")
	return

	# Initialize semantic clue generator with thematic integration
	print("🔄 Initializing semantic clue generator with thematic integration...")
	clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)

	try:
	clue_gen.initialize()
	print("✅ Semantic clue generator initialized successfully")
	except Exception as e:
	print(f"❌ Failed to initialize semantic clue generator: {e}")
	return

	# Test cases that were producing generic "Term related to X" clues
	test_cases = [
	# Your specific problematic examples
	("BATSMAN", "cricket"),
	("SWIMMING", "sports"),
	("AIRPORT", "transportation"),

	# Additional challenging cases
	("DATABASE", "technology"),
	("GUITAR", "music"),
	("PIZZA", "food"),
	("MOUNTAIN", "geography"),
	("SCIENTIST", "science"),
	("ELEPHANT", "animals"),
	]

	print(f"\n🎯 Testing {len(test_cases)} improved word-topic combinations")
	print("=" * 60)

	excellent_clues = 0
	good_clues = 0
	generic_clues = 0
	poor_clues = 0

	for word, topic in test_cases:
	print(f"\n📝 Testing: '{word}' + '{topic}'")
	print("-" * 40)

	try:
	# Generate clue with enhanced descriptions
	best_clue = clue_gen.generate_clue(
	word=word,
	topic=topic,
	clue_style="description", # Use description style for best results
	difficulty="medium"
	)

	print(f"🏆 Generated clue: {best_clue}")

	# Enhanced quality evaluation
	if best_clue:
	word_lower = word.lower()
	clue_lower = best_clue.lower()

	# Check if word appears in clue (should not)
	contains_word = word_lower in clue_lower

	# Check for generic patterns
	is_generic = ("term related to" in clue_lower or
	"associated with" in clue_lower or
	f"{topic} concept" in clue_lower or
	f"{topic.lower()} term" in clue_lower)

	# Check for specific descriptive content
	has_specific_content = (
	len(best_clue.split()) >= 4 and
	not is_generic and
	not contains_word and
	any(word in clue_lower for word in ["player", "sport", "hub", "system", "device", "instrument", "creature", "feature"])
	)

	if contains_word:
	print("❌ Quality: POOR (contains target word)")
	poor_clues += 1
	elif has_specific_content:
	print("✅ Quality: EXCELLENT (specific and descriptive)")
	excellent_clues += 1
	elif not is_generic and len(best_clue.split()) >= 3:
	print("✅ Quality: GOOD (descriptive)")
	good_clues += 1
	elif not is_generic:
	print("🔄 Quality: ACCEPTABLE (basic)")
	good_clues += 1
	else:
	print("⚠️ Quality: GENERIC (fallback template)")
	generic_clues += 1
	else:
	print("❌ No clue generated")
	poor_clues += 1

	except Exception as e:
	print(f"❌ Error generating clue: {e}")
	poor_clues += 1

	total_tests = len(test_cases)
	print(f"\n" + "=" * 60)
	print(f"📊 IMPROVED SEMANTIC RESULTS")
	print(f"=" * 60)
	print(f"Total tests: {total_tests}")
	print(f"Excellent clues: {excellent_clues}")
	print(f"Good clues: {good_clues}")
	print(f"Generic clues: {generic_clues}")
	print(f"Poor clues: {poor_clues}")
	print(f"Success rate: {((excellent_clues + good_clues)/total_tests)*100:.1f}%")
	print(f"Excellence rate: {(excellent_clues/total_tests)*100:.1f}%")

	# Evaluation
	if excellent_clues >= total_tests * 0.6: # 60% excellent
	print("🎉 Major improvement! Semantic system produces excellent clues!")
	elif (excellent_clues + good_clues) >= total_tests * 0.8: # 80% good+excellent
	print("🔄 Good improvement! Much better than generic templates")
	elif generic_clues <= total_tests * 0.3: # Less than 30% generic
	print("⚠️ Some improvement, but still needs work")
	else:
	print("❌ Still too many generic clues, consider alternative approach")


	def main():
	"""Run the improved clue test."""
	test_improved_clues()


	if __name__ == "__main__":
	main()