|
|
|
""" |
|
Quick Test: Semantic Template Clue Generation |
|
Test the semantic template approach against the same problematic examples that failed with LLM. |
|
""" |
|
|
|
import sys |
|
import logging |
|
from pathlib import Path |
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent)) |
|
|
|
try: |
|
from semantic_clue_generator import SemanticClueGenerator |
|
GENERATOR_AVAILABLE = True |
|
except ImportError as e: |
|
print(f"β Import error: {e}") |
|
GENERATOR_AVAILABLE = False |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
|
) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def test_semantic_clues(): |
|
"""Test semantic template clue generation with problematic examples.""" |
|
if not GENERATOR_AVAILABLE: |
|
print("β Cannot run test - Semantic generator not available") |
|
return |
|
|
|
print("π§ͺ Testing Semantic Template Clue Generation") |
|
print("=" * 60) |
|
|
|
|
|
print("π Initializing semantic clue generator...") |
|
generator = SemanticClueGenerator() |
|
|
|
try: |
|
generator.initialize() |
|
print("β
Generator initialized successfully") |
|
except Exception as e: |
|
print(f"β Failed to initialize generator: {e}") |
|
return |
|
|
|
|
|
test_cases = [ |
|
|
|
("CAT", "animals"), |
|
("KITTY", "animals"), |
|
("MEAL", "food"), |
|
("HUNGER", "food"), |
|
("TECH", "technology"), |
|
("SCIENTIST", "science"), |
|
|
|
|
|
("DOG", "animals"), |
|
("PYTHON", "technology"), |
|
("GUITAR", "music"), |
|
("OCEAN", "geography"), |
|
("ATOM", "science"), |
|
("PIZZA", "food"), |
|
] |
|
|
|
print(f"\nπ― Testing {len(test_cases)} word-topic combinations") |
|
print("=" * 60) |
|
|
|
successful_clues = 0 |
|
total_tests = len(test_cases) |
|
|
|
for word, topic in test_cases: |
|
print(f"\nπ Testing: '{word}' + '{topic}'") |
|
print("-" * 40) |
|
|
|
try: |
|
|
|
styles = ["category", "definition", "description"] |
|
candidates = [] |
|
|
|
for style in styles: |
|
clue = generator.generate_clue( |
|
word=word, |
|
topic=topic, |
|
clue_style=style |
|
) |
|
if clue and clue not in candidates: |
|
candidates.append(clue) |
|
|
|
print(f"Generated {len(candidates)} candidates:") |
|
for i, candidate in enumerate(candidates, 1): |
|
print(f" {i}. {candidate}") |
|
|
|
|
|
best_clue = candidates[0] if candidates else None |
|
|
|
print(f"\nπ Best clue: {best_clue}") |
|
|
|
|
|
if (best_clue and |
|
len(best_clue) > 3 and |
|
word.lower() not in best_clue.lower() and |
|
not any(junk in best_clue.lower() for junk in ['trick and treating', 'gritting your teeth', 'fender', 'occurrence'])): |
|
successful_clues += 1 |
|
print("β
Quality: GOOD") |
|
else: |
|
print("β Quality: POOR") |
|
|
|
except Exception as e: |
|
print(f"β Error generating clue: {e}") |
|
logger.exception("Detailed error:") |
|
|
|
print(f"\n" + "=" * 60) |
|
print(f"π SEMANTIC TEMPLATE RESULTS") |
|
print(f"=" * 60) |
|
print(f"Total tests: {total_tests}") |
|
print(f"Successful clues: {successful_clues}") |
|
print(f"Success rate: {(successful_clues/total_tests)*100:.1f}%") |
|
|
|
|
|
if successful_clues >= total_tests * 0.8: |
|
print("π Semantic templates show MAJOR improvement over LLM!") |
|
elif successful_clues >= total_tests * 0.6: |
|
print("π Good improvement, semantic approach is viable") |
|
elif successful_clues >= total_tests * 0.3: |
|
print("β οΈ Some improvement, but templates need refinement") |
|
else: |
|
print("β Semantic approach also struggling, may need hybrid method") |
|
|
|
|
|
def main(): |
|
"""Run the semantic template test.""" |
|
test_semantic_clues() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |