#!/usr/bin/env python3 """ Enhanced Test: Semantic Clue Generator with ThematicWordGenerator Integration Test semantic clue generation with proper embedding integration for better quality. """ import sys import logging from pathlib import Path # Add hack directory to path for imports sys.path.insert(0, str(Path(__file__).parent)) try: from semantic_clue_generator import SemanticClueGenerator from thematic_word_generator import UnifiedThematicWordGenerator GENERATOR_AVAILABLE = True except ImportError as e: print(f"โŒ Import error: {e}") GENERATOR_AVAILABLE = False # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def test_enhanced_semantic_clues(): """Test semantic clue generation with thematic word generator integration.""" if not GENERATOR_AVAILABLE: print("โŒ Cannot run test - Enhanced generators not available") return print("๐Ÿงช Testing Enhanced Semantic Clue Generation") print("=" * 60) # Initialize thematic word generator first print("๐Ÿ”„ Initializing thematic word generator...") try: word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000) word_gen.initialize() print("โœ… Thematic word generator initialized successfully") except Exception as e: print(f"โŒ Failed to initialize thematic word generator: {e}") return # Initialize semantic clue generator with thematic integration print("๐Ÿ”„ Initializing semantic clue generator with thematic integration...") clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen) try: clue_gen.initialize() print("โœ… Semantic clue generator initialized successfully") except Exception as e: print(f"โŒ Failed to initialize semantic clue generator: {e}") return # Test cases that previously failed with LLM test_cases = [ # Previously problematic examples ("CAT", "animals"), ("KITTY", "animals"), ("MEAL", "food"), ("HUNGER", "food"), ("TECH", "technology"), ("SCIENTIST", "science"), # Additional challenging cases ("DOG", "animals"), ("PYTHON", "technology"), ("GUITAR", "music"), ("OCEAN", "geography"), ("ATOM", "science"), ("PIZZA", "food"), ("MOUNTAIN", "geography"), ("VIOLIN", "music"), ("DATABASE", "technology"), ] print(f"\n๐ŸŽฏ Testing {len(test_cases)} word-topic combinations with enhanced semantic analysis") print("=" * 60) successful_clues = 0 total_tests = len(test_cases) high_quality_clues = 0 for word, topic in test_cases: print(f"\n๐Ÿ“ Testing: '{word}' + '{topic}'") print("-" * 40) try: # Generate multiple clues with different styles for variety styles = ["category", "definition", "description"] candidates = [] for style in styles: clue = clue_gen.generate_clue( word=word, topic=topic, clue_style=style, difficulty="medium" ) if clue and clue not in candidates: candidates.append(clue) print(f"Generated {len(candidates)} candidates:") for i, candidate in enumerate(candidates, 1): print(f" {i}. {candidate}") # Use the best clue (first one) best_clue = candidates[0] if candidates else None print(f"\n๐Ÿ† Best clue: {best_clue}") # Enhanced quality evaluation if best_clue: # Basic quality check basic_quality = (len(best_clue) > 3 and word.lower() not in best_clue.lower()) # Check for generic fallback patterns is_generic = ("term related to" in best_clue.lower() or "associated with" in best_clue.lower()) # Check for descriptive quality is_descriptive = (len(best_clue.split()) >= 3 and not is_generic and basic_quality) if is_descriptive: high_quality_clues += 1 successful_clues += 1 print("โœ… Quality: EXCELLENT") elif basic_quality and not is_generic: successful_clues += 1 print("โœ… Quality: GOOD") elif basic_quality: successful_clues += 1 print("๐Ÿ”„ Quality: ACCEPTABLE (generic)") else: print("โŒ Quality: POOR") else: print("โŒ No clue generated") except Exception as e: print(f"โŒ Error generating clue: {e}") logger.exception("Detailed error:") print(f"\n" + "=" * 60) print(f"๐Ÿ“Š ENHANCED SEMANTIC RESULTS") print(f"=" * 60) print(f"Total tests: {total_tests}") print(f"Successful clues: {successful_clues}") print(f"High quality clues: {high_quality_clues}") print(f"Overall success rate: {(successful_clues/total_tests)*100:.1f}%") print(f"High quality rate: {(high_quality_clues/total_tests)*100:.1f}%") # Enhanced evaluation criteria if high_quality_clues >= total_tests * 0.6: # 60% high quality print("๐ŸŽ‰ Enhanced semantic approach produces excellent clues!") print("๐Ÿš€ Ready for integration into main crossword application") elif successful_clues >= total_tests * 0.8: # 80% acceptable print("๐Ÿ”„ Good improvement over LLM, suitable for production use") elif successful_clues >= total_tests * 0.6: # 60% acceptable print("โš ๏ธ Decent improvement, may need more template refinement") else: print("โŒ Still struggling, consider alternative approaches") def interactive_test(): """Interactive test mode for user-provided word-topic combinations.""" print("๐Ÿงช Interactive Semantic Clue Testing") print("=" * 60) # Initialize thematic word generator first print("๐Ÿ”„ Initializing thematic word generator...") try: word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000) word_gen.initialize() print("โœ… Thematic word generator initialized successfully") except Exception as e: print(f"โŒ Failed to initialize thematic word generator: {e}") return # Initialize semantic clue generator with thematic integration print("๐Ÿ”„ Initializing semantic clue generator with thematic integration...") clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen) try: clue_gen.initialize() print("โœ… Semantic clue generator initialized successfully") except Exception as e: print(f"โŒ Failed to initialize semantic clue generator: {e}") return print("\n" + "=" * 60) print("๐ŸŽฏ INTERACTIVE MODE") print("=" * 60) print("Enter word-topic pairs to test clue generation.") print("Format: word,topic (e.g., 'cat,animals')") print("Type 'quit' or 'exit' to stop.") print("Type 'batch' to run the full test suite.") print("-" * 60) while True: try: user_input = input("\n๐Ÿ“ Enter word,topic: ").strip() if user_input.lower() in ['quit', 'exit', 'q']: print("๐Ÿ‘‹ Goodbye!") break elif user_input.lower() == 'batch': print("\n๐Ÿ”„ Running full test suite...") test_enhanced_semantic_clues() print("\n" + "=" * 60) print("๐ŸŽฏ Back to interactive mode") print("-" * 60) continue elif not user_input or ',' not in user_input: print("โŒ Invalid format. Use: word,topic (e.g., 'cat,animals')") continue # Parse input parts = user_input.split(',', 1) word = parts[0].strip().upper() topic = parts[1].strip().lower() if not word or not topic: print("โŒ Both word and topic are required") continue print(f"\n๐Ÿ“ Testing: '{word}' + '{topic}'") print("-" * 40) # Generate multiple clues with different styles for variety styles = ["category", "definition", "description"] candidates = [] for style in styles: try: clue = clue_gen.generate_clue( word=word, topic=topic, clue_style=style, difficulty="medium" ) if clue and clue not in candidates: candidates.append(clue) except Exception as e: logger.debug(f"Error with style {style}: {e}") if candidates: print(f"Generated {len(candidates)} candidates:") for i, candidate in enumerate(candidates, 1): print(f" {i}. {candidate}") best_clue = candidates[0] print(f"\n๐Ÿ† Best clue: {best_clue}") # Quality evaluation if (best_clue and len(best_clue) > 3 and word.lower() not in best_clue.lower()): is_generic = ("term related to" in best_clue.lower() or "associated with" in best_clue.lower()) if len(best_clue.split()) >= 3 and not is_generic: print("โœ… Quality: EXCELLENT") elif not is_generic: print("โœ… Quality: GOOD") else: print("๐Ÿ”„ Quality: ACCEPTABLE (generic)") else: print("โŒ Quality: POOR") else: print("โŒ No clues generated") except KeyboardInterrupt: print("\n๐Ÿ‘‹ Goodbye!") break except Exception as e: print(f"โŒ Error: {e}") def main(): """Run the enhanced semantic test.""" import sys if len(sys.argv) > 1 and sys.argv[1] == '--interactive': interactive_test() else: print("Run with --interactive for user input mode, or without args for full test.") test_enhanced_semantic_clues() if __name__ == "__main__": main()