Spaces:

vimalk78
/

abc123

Running

App Files Files Community

abc123 / hack /test_enhanced_semantic_clues.py

vimalk78

feat(crossword): generated crosswords with clues

486eff6 23 days ago

raw

history blame

11.1 kB

	#!/usr/bin/env python3
	"""
	Enhanced Test: Semantic Clue Generator with ThematicWordGenerator Integration
	Test semantic clue generation with proper embedding integration for better quality.
	"""

	import sys
	import logging
	from pathlib import Path

	# Add hack directory to path for imports
	sys.path.insert(0, str(Path(__file__).parent))

	try:
	from semantic_clue_generator import SemanticClueGenerator
	from thematic_word_generator import UnifiedThematicWordGenerator
	GENERATOR_AVAILABLE = True
	except ImportError as e:
	print(f"❌ Import error: {e}")
	GENERATOR_AVAILABLE = False

	# Set up logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)


	def test_enhanced_semantic_clues():
	"""Test semantic clue generation with thematic word generator integration."""
	if not GENERATOR_AVAILABLE:
	print("❌ Cannot run test - Enhanced generators not available")
	return

	print("🧪 Testing Enhanced Semantic Clue Generation")
	print("=" * 60)

	# Initialize thematic word generator first
	print("🔄 Initializing thematic word generator...")
	try:
	word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
	word_gen.initialize()
	print("✅ Thematic word generator initialized successfully")
	except Exception as e:
	print(f"❌ Failed to initialize thematic word generator: {e}")
	return

	# Initialize semantic clue generator with thematic integration
	print("🔄 Initializing semantic clue generator with thematic integration...")
	clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)

	try:
	clue_gen.initialize()
	print("✅ Semantic clue generator initialized successfully")
	except Exception as e:
	print(f"❌ Failed to initialize semantic clue generator: {e}")
	return

	# Test cases that previously failed with LLM
	test_cases = [
	# Previously problematic examples
	("CAT", "animals"),
	("KITTY", "animals"),
	("MEAL", "food"),
	("HUNGER", "food"),
	("TECH", "technology"),
	("SCIENTIST", "science"),

	# Additional challenging cases
	("DOG", "animals"),
	("PYTHON", "technology"),
	("GUITAR", "music"),
	("OCEAN", "geography"),
	("ATOM", "science"),
	("PIZZA", "food"),
	("MOUNTAIN", "geography"),
	("VIOLIN", "music"),
	("DATABASE", "technology"),
	]

	print(f"\n🎯 Testing {len(test_cases)} word-topic combinations with enhanced semantic analysis")
	print("=" * 60)

	successful_clues = 0
	total_tests = len(test_cases)
	high_quality_clues = 0

	for word, topic in test_cases:
	print(f"\n📝 Testing: '{word}' + '{topic}'")
	print("-" * 40)

	try:
	# Generate multiple clues with different styles for variety
	styles = ["category", "definition", "description"]
	candidates = []

	for style in styles:
	clue = clue_gen.generate_clue(
	word=word,
	topic=topic,
	clue_style=style,
	difficulty="medium"
	)
	if clue and clue not in candidates:
	candidates.append(clue)

	print(f"Generated {len(candidates)} candidates:")
	for i, candidate in enumerate(candidates, 1):
	print(f" {i}. {candidate}")

	# Use the best clue (first one)
	best_clue = candidates[0] if candidates else None

	print(f"\n🏆 Best clue: {best_clue}")

	# Enhanced quality evaluation
	if best_clue:
	# Basic quality check
	basic_quality = (len(best_clue) > 3 and
	word.lower() not in best_clue.lower())

	# Check for generic fallback patterns
	is_generic = ("term related to" in best_clue.lower() or
	"associated with" in best_clue.lower())

	# Check for descriptive quality
	is_descriptive = (len(best_clue.split()) >= 3 and
	not is_generic and
	basic_quality)

	if is_descriptive:
	high_quality_clues += 1
	successful_clues += 1
	print("✅ Quality: EXCELLENT")
	elif basic_quality and not is_generic:
	successful_clues += 1
	print("✅ Quality: GOOD")
	elif basic_quality:
	successful_clues += 1
	print("🔄 Quality: ACCEPTABLE (generic)")
	else:
	print("❌ Quality: POOR")
	else:
	print("❌ No clue generated")

	except Exception as e:
	print(f"❌ Error generating clue: {e}")
	logger.exception("Detailed error:")

	print(f"\n" + "=" * 60)
	print(f"📊 ENHANCED SEMANTIC RESULTS")
	print(f"=" * 60)
	print(f"Total tests: {total_tests}")
	print(f"Successful clues: {successful_clues}")
	print(f"High quality clues: {high_quality_clues}")
	print(f"Overall success rate: {(successful_clues/total_tests)*100:.1f}%")
	print(f"High quality rate: {(high_quality_clues/total_tests)*100:.1f}%")

	# Enhanced evaluation criteria
	if high_quality_clues >= total_tests * 0.6: # 60% high quality
	print("🎉 Enhanced semantic approach produces excellent clues!")
	print("🚀 Ready for integration into main crossword application")
	elif successful_clues >= total_tests * 0.8: # 80% acceptable
	print("🔄 Good improvement over LLM, suitable for production use")
	elif successful_clues >= total_tests * 0.6: # 60% acceptable
	print("⚠️ Decent improvement, may need more template refinement")
	else:
	print("❌ Still struggling, consider alternative approaches")


	def interactive_test():
	"""Interactive test mode for user-provided word-topic combinations."""
	print("🧪 Interactive Semantic Clue Testing")
	print("=" * 60)

	# Initialize thematic word generator first
	print("🔄 Initializing thematic word generator...")
	try:
	word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000)
	word_gen.initialize()
	print("✅ Thematic word generator initialized successfully")
	except Exception as e:
	print(f"❌ Failed to initialize thematic word generator: {e}")
	return

	# Initialize semantic clue generator with thematic integration
	print("🔄 Initializing semantic clue generator with thematic integration...")
	clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen)

	try:
	clue_gen.initialize()
	print("✅ Semantic clue generator initialized successfully")
	except Exception as e:
	print(f"❌ Failed to initialize semantic clue generator: {e}")
	return

	print("\n" + "=" * 60)
	print("🎯 INTERACTIVE MODE")
	print("=" * 60)
	print("Enter word-topic pairs to test clue generation.")
	print("Format: word,topic (e.g., 'cat,animals')")
	print("Type 'quit' or 'exit' to stop.")
	print("Type 'batch' to run the full test suite.")
	print("-" * 60)

	while True:
	try:
	user_input = input("\n📝 Enter word,topic: ").strip()

	if user_input.lower() in ['quit', 'exit', 'q']:
	print("👋 Goodbye!")
	break
	elif user_input.lower() == 'batch':
	print("\n🔄 Running full test suite...")
	test_enhanced_semantic_clues()
	print("\n" + "=" * 60)
	print("🎯 Back to interactive mode")
	print("-" * 60)
	continue
	elif not user_input or ',' not in user_input:
	print("❌ Invalid format. Use: word,topic (e.g., 'cat,animals')")
	continue

	# Parse input
	parts = user_input.split(',', 1)
	word = parts[0].strip().upper()
	topic = parts[1].strip().lower()

	if not word or not topic:
	print("❌ Both word and topic are required")
	continue

	print(f"\n📝 Testing: '{word}' + '{topic}'")
	print("-" * 40)

	# Generate multiple clues with different styles for variety
	styles = ["category", "definition", "description"]
	candidates = []

	for style in styles:
	try:
	clue = clue_gen.generate_clue(
	word=word,
	topic=topic,
	clue_style=style,
	difficulty="medium"
	)
	if clue and clue not in candidates:
	candidates.append(clue)
	except Exception as e:
	logger.debug(f"Error with style {style}: {e}")

	if candidates:
	print(f"Generated {len(candidates)} candidates:")
	for i, candidate in enumerate(candidates, 1):
	print(f" {i}. {candidate}")

	best_clue = candidates[0]
	print(f"\n🏆 Best clue: {best_clue}")

	# Quality evaluation
	if (best_clue and
	len(best_clue) > 3 and
	word.lower() not in best_clue.lower()):

	is_generic = ("term related to" in best_clue.lower() or
	"associated with" in best_clue.lower())

	if len(best_clue.split()) >= 3 and not is_generic:
	print("✅ Quality: EXCELLENT")
	elif not is_generic:
	print("✅ Quality: GOOD")
	else:
	print("🔄 Quality: ACCEPTABLE (generic)")
	else:
	print("❌ Quality: POOR")
	else:
	print("❌ No clues generated")

	except KeyboardInterrupt:
	print("\n👋 Goodbye!")
	break
	except Exception as e:
	print(f"❌ Error: {e}")


	def main():
	"""Run the enhanced semantic test."""
	import sys

	if len(sys.argv) > 1 and sys.argv[1] == '--interactive':
	interactive_test()
	else:
	print("Run with --interactive for user input mode, or without args for full test.")
	test_enhanced_semantic_clues()


	if __name__ == "__main__":
	main()