abc123 / hack /demo_complete_pipeline.py
vimalk78's picture
feat(crossword): generated crosswords with clues
486eff6
raw
history blame
15.7 kB
#!/usr/bin/env python3
"""
Complete Pipeline Demo for Integrated Crossword Generator
Demonstrates the full capabilities of the integrated system combining:
- UnifiedThematicWordGenerator: Smart word discovery with 100K+ vocabulary
- APIClueGenerator: High-quality clue generation using multiple AI models
Shows real crossword creation scenarios and advanced features.
"""
import sys
import os
import time
import asyncio
from pathlib import Path
# Add hack directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
try:
from integrated_crossword_generator import IntegratedCrosswordGenerator, CrosswordEntry
INTEGRATED_AVAILABLE = True
except ImportError as e:
print(f"โŒ Integration import error: {e}")
INTEGRATED_AVAILABLE = False
def demo_basic_integration():
"""Demo basic integration functionality."""
print("๐Ÿ”— Demo 1: Basic Integration")
print("=" * 50)
generator = IntegratedCrosswordGenerator(vocab_size_limit=50000)
generator.initialize()
system_info = generator.get_system_info()
print("๐Ÿ“Š System Components:")
for component, info in system_info['components'].items():
status = "โœ… Ready" if info['ready'] else "โŒ Not Ready"
details = ""
if component == 'thematic_generator' and info['ready']:
details = f" ({info['vocab_size']:,} words)"
elif component == 'api_clue_generator' and info['ready']:
details = f" ({len(info['models'])} models)"
print(f" {component}: {status}{details}")
# Basic generation test
print(f"\n๐ŸŽฏ Generating crossword entries for 'animals'...")
start_time = time.time()
entries = generator.generate_crossword_entries(
topic="animals",
num_words=5,
difficulty="medium"
)
generation_time = time.time() - start_time
if entries:
print(f"โœ… Generated {len(entries)} entries in {generation_time:.2f}s")
print("\nResults:")
for i, entry in enumerate(entries, 1):
quality_icon = {"EXCELLENT": "๐Ÿ†", "GOOD": "โœ…", "ACCEPTABLE": "๐Ÿ”„", "POOR": "โŒ", "BASIC": "๐Ÿ“"}.get(entry.clue_quality, "?")
print(f" {i}. {entry.word:<10} | {quality_icon} {entry.clue}")
print(f" Similarity: {entry.similarity_score:.3f} | Model: {entry.clue_model}")
else:
print("โŒ No entries generated")
return generator
def demo_difficulty_levels(generator):
"""Demo difficulty-based generation."""
print("\n๐ŸŽš๏ธ Demo 2: Difficulty Levels")
print("=" * 50)
topic = "technology"
difficulties = ["easy", "medium", "hard"]
for difficulty in difficulties:
print(f"\n๐Ÿ“ {difficulty.upper()} difficulty:")
print("-" * 25)
try:
entries = generator.generate_crossword_entries(
topic=topic,
num_words=4,
difficulty=difficulty
)
if entries:
for entry in entries:
# Show word characteristics by difficulty
length_indicator = f"({len(entry.word)} letters)"
tier_short = entry.frequency_tier.split('_')[1] if '_' in entry.frequency_tier else entry.frequency_tier
print(f" {entry.word:<12} {length_indicator:<12} Tier {tier_short} | {entry.clue}")
else:
print(" No entries generated")
except Exception as e:
print(f" โŒ Error: {e}")
def demo_multiple_topics(generator):
"""Demo multi-topic crossword generation."""
print("\n๐ŸŒ Demo 3: Multiple Topics")
print("=" * 50)
topics = ["science", "music", "sports", "food"]
print(f"๐ŸŽฏ Generating entries for {len(topics)} topics...")
start_time = time.time()
results = generator.generate_by_multiple_topics(
topics=topics,
words_per_topic=3,
difficulty="medium"
)
generation_time = time.time() - start_time
print(f"โœ… Generated entries for all topics in {generation_time:.2f}s")
for topic, entries in results.items():
print(f"\n๐Ÿ“š {topic.upper()}:")
print("-" * 20)
if entries:
for entry in entries:
quality_icon = {"EXCELLENT": "๐Ÿ†", "GOOD": "โœ…", "ACCEPTABLE": "๐Ÿ”„"}.get(entry.clue_quality, "๐Ÿ“")
print(f" {entry.word:<10} | {quality_icon} {entry.clue}")
else:
print(" No entries generated")
return results
def demo_advanced_filtering(generator):
"""Demo advanced filtering and selection."""
print("\n๐Ÿ” Demo 4: Advanced Filtering")
print("=" * 50)
# Generate a larger set to show filtering
print("๐ŸŽฏ Generating large set with filtering...")
entries = generator.generate_crossword_entries(
topic="nature",
num_words=8,
difficulty="medium",
min_similarity=0.4 # Higher threshold for better quality
)
if not entries:
print("โŒ No entries generated")
return
# Group by clue quality
quality_groups = {}
for entry in entries:
quality = entry.clue_quality
if quality not in quality_groups:
quality_groups[quality] = []
quality_groups[quality].append(entry)
print(f"โœ… Generated {len(entries)} entries, grouped by quality:")
quality_order = ["EXCELLENT", "GOOD", "ACCEPTABLE", "POOR", "BASIC"]
for quality in quality_order:
if quality in quality_groups:
group = quality_groups[quality]
icon = {"EXCELLENT": "๐Ÿ†", "GOOD": "โœ…", "ACCEPTABLE": "๐Ÿ”„", "POOR": "โŒ", "BASIC": "๐Ÿ“"}[quality]
print(f"\n{icon} {quality} ({len(group)} entries):")
for entry in sorted(group, key=lambda x: x.similarity_score, reverse=True):
print(f" {entry.word:<12} (sim: {entry.similarity_score:.3f}) | {entry.clue}")
def demo_performance_analysis(generator):
"""Demo performance analysis and statistics."""
print("\n๐Ÿ“Š Demo 5: Performance Analysis")
print("=" * 50)
# Reset stats
generator.stats = {
'words_discovered': 0,
'clues_generated': 0,
'api_calls': 0,
'cache_hits': 0,
'total_time': 0.0
}
# Performance test scenarios
scenarios = [
{"topic": "animals", "num_words": 3, "difficulty": "easy"},
{"topic": "technology", "num_words": 5, "difficulty": "medium"},
{"topic": "music", "num_words": 4, "difficulty": "hard"}
]
print("๐Ÿƒโ€โ™‚๏ธ Running performance scenarios...")
all_entries = []
scenario_times = []
for i, scenario in enumerate(scenarios, 1):
print(f"\nScenario {i}: {scenario['topic']} ({scenario['difficulty']})")
start_time = time.time()
entries = generator.generate_crossword_entries(**scenario)
scenario_time = time.time() - start_time
scenario_times.append(scenario_time)
all_entries.extend(entries)
print(f" Time: {scenario_time:.2f}s | Entries: {len(entries)}")
# Final statistics
final_stats = generator.get_stats()
print("\n๐Ÿ“ˆ Performance Summary:")
print(f" Total entries generated: {len(all_entries)}")
print(f" Total words discovered: {final_stats['words_discovered']}")
print(f" Total API calls: {final_stats['api_calls']}")
print(f" Total time: {final_stats['total_time']:.2f}s")
if len(all_entries) > 0:
avg_time_per_entry = final_stats['total_time'] / len(all_entries)
print(f" Avg time per entry: {avg_time_per_entry:.2f}s")
# Quality distribution
quality_counts = {}
for entry in all_entries:
quality = entry.clue_quality
quality_counts[quality] = quality_counts.get(quality, 0) + 1
print(f"\n๐ŸŽฏ Quality Distribution:")
for quality, count in sorted(quality_counts.items()):
percentage = (count / len(all_entries)) * 100
print(f" {quality}: {count} ({percentage:.1f}%)")
def demo_educational_crossword(generator):
"""Demo creating educational crossword content."""
print("\n๐ŸŽ“ Demo 6: Educational Crossword Creation")
print("=" * 50)
# Educational themes
educational_topics = {
"Biology": "cellular biology",
"Chemistry": "chemical elements",
"Physics": "fundamental forces",
"Mathematics": "algebra concepts"
}
print("๐Ÿ“š Creating educational crossword content...")
educational_entries = {}
for subject, specific_topic in educational_topics.items():
print(f"\n๐Ÿ”ฌ {subject} - {specific_topic}:")
print("-" * 30)
try:
entries = generator.generate_crossword_entries(
topic=specific_topic,
num_words=3,
difficulty="medium"
)
educational_entries[subject] = entries
if entries:
for entry in entries:
# Show educational context
tier_level = entry.frequency_tier.split('_')[1] if '_' in entry.frequency_tier else "unknown"
print(f" {entry.word:<15} | {entry.clue}")
print(f" โ””โ”€ Difficulty tier: {tier_level}, Model: {entry.clue_model}")
print()
else:
print(" No suitable educational terms found")
except Exception as e:
print(f" โŒ Error generating {subject} content: {e}")
# Summary of educational content
total_educational = sum(len(entries) for entries in educational_entries.values())
print(f"โœ… Created {total_educational} educational crossword entries across {len(educational_topics)} subjects")
def demo_interactive_generation():
"""Demo interactive generation mode."""
print("\n๐ŸŽฎ Demo 7: Interactive Generation")
print("=" * 50)
generator = IntegratedCrosswordGenerator(vocab_size_limit=25000)
generator.initialize()
if not (generator.thematic_ready or generator.api_ready):
print("โŒ System not ready for interactive demo")
return
print("๐ŸŽฏ Interactive Crossword Generator")
print("Enter topics to generate crossword entries (or 'quit' to exit)")
print("Format: <topic> [num_words] [difficulty]")
print("Example: animals 5 medium")
print()
while True:
try:
user_input = input("๐Ÿ“ Enter topic (or 'quit'): ").strip()
if user_input.lower() in ['quit', 'exit', 'q']:
break
if not user_input:
continue
# Parse input
parts = user_input.split()
topic = parts[0]
num_words = int(parts[1]) if len(parts) > 1 and parts[1].isdigit() else 4
difficulty = parts[2] if len(parts) > 2 and parts[2] in ['easy', 'medium', 'hard'] else 'medium'
print(f"\n๐Ÿ” Generating {num_words} {difficulty} entries for '{topic}'...")
start_time = time.time()
entries = generator.generate_crossword_entries(
topic=topic,
num_words=num_words,
difficulty=difficulty
)
generation_time = time.time() - start_time
if entries:
print(f"โœ… Generated {len(entries)} entries in {generation_time:.2f}s:")
print()
for i, entry in enumerate(entries, 1):
quality_icon = {"EXCELLENT": "๐Ÿ†", "GOOD": "โœ…", "ACCEPTABLE": "๐Ÿ”„", "POOR": "โŒ", "BASIC": "๐Ÿ“"}.get(entry.clue_quality, "?")
print(f" {i}. {entry.word:<12} | {quality_icon} {entry.clue}")
print(f" Similarity: {entry.similarity_score:.3f} | {entry.tier_description}")
print()
else:
print("โŒ No entries generated. Try a different topic.")
except KeyboardInterrupt:
print("\n\n๐Ÿ‘‹ Exiting interactive mode")
break
except Exception as e:
print(f"โŒ Error: {e}")
# Show final stats
final_stats = generator.get_stats()
print("\n๐Ÿ“Š Interactive Session Stats:")
print(f" Words discovered: {final_stats['words_discovered']}")
print(f" Clues generated: {final_stats['clues_generated']}")
print(f" Total time: {final_stats['total_time']:.2f}s")
def main():
"""Run complete pipeline demonstration."""
if not INTEGRATED_AVAILABLE:
print("โŒ Integrated crossword generator not available")
print("Please check that all dependencies are installed:")
print(" - thematic_word_generator.py")
print(" - api_clue_generator.py")
print(" - All required Python packages")
return
# Check for required token
hf_token = os.getenv('HF_TOKEN')
if not hf_token:
print("โŒ HF_TOKEN environment variable not set")
print("Set your token: export HF_TOKEN='your_token_here'")
print("Some features will be limited without API access")
print()
print("๐Ÿš€ Complete Pipeline Demo: Integrated Crossword Generator")
print("=" * 70)
print("Demonstrating the integration of:")
print(" ๐Ÿ” Smart Word Discovery (Thematic Generator)")
print(" ๐Ÿค– High-Quality Clue Generation (API Models)")
print(" ๐Ÿ“Š Difficulty Control (Frequency Tiers)")
print(" ๐ŸŽฏ Topic-Focused Generation")
print()
try:
# Run all demos
generator = demo_basic_integration()
if generator and generator.is_initialized:
demo_difficulty_levels(generator)
demo_multiple_topics(generator)
demo_advanced_filtering(generator)
demo_performance_analysis(generator)
demo_educational_crossword(generator)
# Optional interactive demo
print("\n" + "=" * 70)
choice = input("๐ŸŽฎ Run interactive demo? (y/n): ").strip().lower()
if choice in ['y', 'yes']:
demo_interactive_generation()
print("\n" + "=" * 70)
print("โœ… COMPLETE PIPELINE DEMO FINISHED")
print("=" * 70)
print("๐ŸŽ‰ Successfully demonstrated integrated crossword generation!")
print("\n๐Ÿ’ก This system combines the best of both worlds:")
print(" ๐Ÿ” Intelligent word discovery with 100K+ vocabulary")
print(" ๐Ÿค– AI-powered clue generation using multiple models")
print(" ๐Ÿ“Š Difficulty control using frequency analysis")
print(" ๐ŸŽฏ Theme-focused content generation")
print(" โšก Efficient caching and batch processing")
print(f"\n๐Ÿ“ Key files created:")
print(f" โ€ข integrated_crossword_generator.py - Main integration class")
print(f" โ€ข test_integrated_system.py - Comprehensive tests")
print(f" โ€ข demo_complete_pipeline.py - This demonstration")
print(f"\n๐Ÿš€ Ready for production use in crossword applications!")
except KeyboardInterrupt:
print("\n\n๐Ÿ‘‹ Demo interrupted by user")
except Exception as e:
print(f"\nโŒ Demo error: {e}")
print("Check system dependencies and configuration")
if __name__ == "__main__":
main()