File size: 4,576 Bytes
486eff6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
#!/usr/bin/env python3
"""
Quick Test: Semantic Template Clue Generation
Test the semantic template approach against the same problematic examples that failed with LLM.
"""
import sys
import logging
from pathlib import Path
# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
try:
from semantic_clue_generator import SemanticClueGenerator
GENERATOR_AVAILABLE = True
except ImportError as e:
print(f"β Import error: {e}")
GENERATOR_AVAILABLE = False
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def test_semantic_clues():
"""Test semantic template clue generation with problematic examples."""
if not GENERATOR_AVAILABLE:
print("β Cannot run test - Semantic generator not available")
return
print("π§ͺ Testing Semantic Template Clue Generation")
print("=" * 60)
# Initialize generator
print("π Initializing semantic clue generator...")
generator = SemanticClueGenerator()
try:
generator.initialize()
print("β
Generator initialized successfully")
except Exception as e:
print(f"β Failed to initialize generator: {e}")
return
# Test cases that failed with LLM - same examples user reported as junk
test_cases = [
# Previously bad LLM examples
("CAT", "animals"),
("KITTY", "animals"),
("MEAL", "food"),
("HUNGER", "food"),
("TECH", "technology"),
("SCIENTIST", "science"),
# Additional test cases
("DOG", "animals"),
("PYTHON", "technology"),
("GUITAR", "music"),
("OCEAN", "geography"),
("ATOM", "science"),
("PIZZA", "food"),
]
print(f"\nπ― Testing {len(test_cases)} word-topic combinations")
print("=" * 60)
successful_clues = 0
total_tests = len(test_cases)
for word, topic in test_cases:
print(f"\nπ Testing: '{word}' + '{topic}'")
print("-" * 40)
try:
# Generate multiple clues with different styles for variety
styles = ["category", "definition", "description"]
candidates = []
for style in styles:
clue = generator.generate_clue(
word=word,
topic=topic,
clue_style=style
)
if clue and clue not in candidates:
candidates.append(clue)
print(f"Generated {len(candidates)} candidates:")
for i, candidate in enumerate(candidates, 1):
print(f" {i}. {candidate}")
# Use the first/best clue
best_clue = candidates[0] if candidates else None
print(f"\nπ Best clue: {best_clue}")
# Quality evaluation - more comprehensive than LLM test
if (best_clue and
len(best_clue) > 3 and
word.lower() not in best_clue.lower() and
not any(junk in best_clue.lower() for junk in ['trick and treating', 'gritting your teeth', 'fender', 'occurrence'])):
successful_clues += 1
print("β
Quality: GOOD")
else:
print("β Quality: POOR")
except Exception as e:
print(f"β Error generating clue: {e}")
logger.exception("Detailed error:")
print(f"\n" + "=" * 60)
print(f"π SEMANTIC TEMPLATE RESULTS")
print(f"=" * 60)
print(f"Total tests: {total_tests}")
print(f"Successful clues: {successful_clues}")
print(f"Success rate: {(successful_clues/total_tests)*100:.1f}%")
# Compare with LLM performance (which was ~0% success)
if successful_clues >= total_tests * 0.8: # 80% success rate
print("π Semantic templates show MAJOR improvement over LLM!")
elif successful_clues >= total_tests * 0.6: # 60% success rate
print("π Good improvement, semantic approach is viable")
elif successful_clues >= total_tests * 0.3: # 30% success rate
print("β οΈ Some improvement, but templates need refinement")
else:
print("β Semantic approach also struggling, may need hybrid method")
def main():
"""Run the semantic template test."""
test_semantic_clues()
if __name__ == "__main__":
main() |