Spaces:

vimalk78
/

abc123

Running

App Files Files Community

abc123 / hack /test_integrated_system.py

vimalk78

feat(crossword): generated crosswords with clues

486eff6 22 days ago

raw

history blame

18.8 kB

	#!/usr/bin/env python3
	"""
	Comprehensive Test Suite for Integrated Crossword Generator

	Tests the complete integration between thematic word discovery and API clue generation,
	ensuring the system works correctly and produces high-quality results.

	This test suite uses pre-cached embeddings and vocabulary files (50K words) from
	model_cache/ directory for faster test execution, avoiding re-initialization of
	the sentence transformer model and vocabulary generation.

	Performance: ~93s initialization with cache vs ~250s without cache (~2.7x faster)

	To verify cache setup before running tests:
	python verify_cached_tests.py

	To run the full test suite:
	export HF_TOKEN='your_token' && python test_integrated_system.py
	"""

	import sys
	import os
	import time
	import unittest
	from pathlib import Path
	from unittest.mock import Mock, patch

	# Add hack directory to path for imports
	sys.path.insert(0, str(Path(__file__).parent))

	try:
	from integrated_crossword_generator import IntegratedCrosswordGenerator, CrosswordEntry
	INTEGRATED_AVAILABLE = True
	except ImportError as e:
	print(f"❌ Integration import error: {e}")
	INTEGRATED_AVAILABLE = False


	class TestIntegratedCrosswordGenerator(unittest.TestCase):
	"""Test cases for the integrated crossword generator."""

	@classmethod
	def setUpClass(cls):
	"""Set up test environment."""
	if not INTEGRATED_AVAILABLE:
	cls.skipTest(cls, "Integrated generator not available")

	# Use test token if available
	cls.test_token = os.getenv('HF_TOKEN')
	if not cls.test_token:
	print("⚠️ HF_TOKEN not set - some tests may be skipped")

	def setUp(self):
	"""Set up each test."""
	# Use cached 50K vocabulary and embeddings from model_cache
	cache_dir = str(Path(__file__).parent / 'model_cache')
	self.generator = IntegratedCrosswordGenerator(
	vocab_size_limit=50000, # Use cached 50K vocabulary
	cache_dir=cache_dir
	)

	def test_initialization(self):
	"""Test generator initialization."""
	self.assertFalse(self.generator.is_initialized)

	# Initialize
	start_time = time.time()
	self.generator.initialize()
	init_time = time.time() - start_time

	self.assertTrue(self.generator.is_initialized)

	# Check system info
	system_info = self.generator.get_system_info()
	self.assertIn('components', system_info)
	self.assertIn('stats', system_info)

	# Verify cached files are being used (should still be reasonable time even with model loading)
	# Note: Model download/loading takes ~90s, but vocabulary/embeddings load from cache
	self.assertLess(init_time, 120.0, "Initialization should complete within 2 minutes with cached files")

	# If thematic generator is ready, verify it used cached data
	if self.generator.thematic_ready:
	vocab_size = self.generator.thematic_generator.get_vocabulary_size()
	self.assertEqual(vocab_size, 50000, "Should use full 50K cached vocabulary")

	def test_cached_files_usage(self):
	"""Test that cached vocabulary and embeddings are being used."""
	cache_dir = Path(self.generator.cache_dir)

	# Verify expected cache files exist
	vocab_file = cache_dir / "unified_vocabulary_50000.pkl"
	freq_file = cache_dir / "unified_frequencies_50000.pkl"
	embeddings_file = cache_dir / "unified_embeddings_all-mpnet-base-v2_50000.npy"

	self.assertTrue(vocab_file.exists(), f"Vocabulary cache file should exist: {vocab_file}")
	self.assertTrue(freq_file.exists(), f"Frequency cache file should exist: {freq_file}")
	self.assertTrue(embeddings_file.exists(), f"Embeddings cache file should exist: {embeddings_file}")

	# Initialize and verify vocabulary size
	self.generator.initialize()

	if self.generator.thematic_ready:
	vocab_size = self.generator.thematic_generator.get_vocabulary_size()
	self.assertEqual(vocab_size, 50000, "Should use cached 50K vocabulary")

	# Verify embeddings are loaded
	self.assertIsNotNone(self.generator.thematic_generator.vocab_embeddings)
	embeddings_shape = self.generator.thematic_generator.vocab_embeddings.shape
	self.assertEqual(embeddings_shape[0], 50000, "Embeddings should have 50K entries")
	self.assertEqual(embeddings_shape[1], 768, "Should use all-mpnet-base-v2 embeddings (768 dims)")

	def test_component_availability(self):
	"""Test availability of required components."""
	self.generator.initialize()

	# At least one component should be available
	has_thematic = self.generator.thematic_ready
	has_api = self.generator.api_ready

	self.assertTrue(has_thematic or has_api, "At least one component should be available")

	if has_thematic:
	self.assertIsNotNone(self.generator.thematic_generator)
	vocab_size = self.generator.thematic_generator.get_vocabulary_size()
	self.assertGreater(vocab_size, 0)

	if has_api:
	self.assertIsNotNone(self.generator.api_clue_generator)

	def test_word_discovery_only(self):
	"""Test word discovery when only thematic generator is available."""
	self.generator.initialize()

	if not self.generator.thematic_ready:
	self.skipTest("Thematic generator not available")

	# Mock API generator as unavailable
	self.generator.api_ready = False

	# Test word discovery
	words = self.generator._discover_words("animals", 5, "medium", 0.3)

	if words: # Only test if words are found
	self.assertIsInstance(words, list)
	for word, similarity, tier in words:
	self.assertIsInstance(word, str)
	self.assertIsInstance(similarity, float)
	self.assertIsInstance(tier, str)
	self.assertGreater(len(word), 2)
	self.assertGreaterEqual(similarity, 0.0)

	def test_api_clue_generation_only(self):
	"""Test API clue generation when only API generator is available."""
	if not self.test_token:
	self.skipTest("HF_TOKEN not available for API testing")

	self.generator.initialize()

	if not self.generator.api_ready:
	self.skipTest("API generator not available")

	# Mock thematic generator as unavailable
	self.generator.thematic_ready = False

	# Test with sample word data
	mock_words = [("CAT", 0.8, "tier_5_common"), ("DOG", 0.7, "tier_4_highly_common")]

	entries = self.generator._generate_clues_for_words(mock_words, "animals")

	self.assertIsInstance(entries, list)
	for entry in entries:
	self.assertIsInstance(entry, CrosswordEntry)
	self.assertIsInstance(entry.word, str)
	self.assertIsInstance(entry.clue, str)
	self.assertGreater(len(entry.clue), 5) # Clues should be substantial

	def test_full_integration(self):
	"""Test complete integration when both components are available."""
	self.generator.initialize()

	if not (self.generator.thematic_ready and self.generator.api_ready):
	self.skipTest("Full integration requires both components")

	# Test complete pipeline
	entries = self.generator.generate_crossword_entries(
	topic="animals",
	num_words=3,
	difficulty="medium"
	)

	self.assertIsInstance(entries, list)
	self.assertLessEqual(len(entries), 3) # Should not exceed requested count

	for entry in entries:
	self.assertIsInstance(entry, CrosswordEntry)
	self.assertIsInstance(entry.word, str)
	self.assertIsInstance(entry.clue, str)
	self.assertEqual(entry.topic, "animals")
	self.assertGreater(entry.similarity_score, 0.0)
	self.assertIn("tier_", entry.frequency_tier)

	def test_difficulty_filtering(self):
	"""Test difficulty-based word filtering."""
	self.generator.initialize()

	if not self.generator.thematic_ready:
	self.skipTest("Requires thematic generator for difficulty testing")

	# Test different difficulty levels
	difficulties = ["easy", "medium", "hard"]

	for difficulty in difficulties:
	with self.subTest(difficulty=difficulty):
	mock_results = [
	("CAT", 0.8, "tier_3_very_common"), # Easy word
	("ALGORITHM", 0.7, "tier_8_uncommon"), # Hard word
	("COMPUTER", 0.6, "tier_5_common") # Medium word
	]

	filtered = self.generator._filter_by_difficulty(mock_results, difficulty)
	self.assertIsInstance(filtered, list)

	# Check that filtering occurred
	self.assertLessEqual(len(filtered), len(mock_results))

	def test_multiple_topics(self):
	"""Test generation for multiple topics."""
	self.generator.initialize()

	if not self.generator.is_initialized:
	self.skipTest("Generator initialization failed")

	topics = ["animals", "technology"]
	results = self.generator.generate_by_multiple_topics(
	topics=topics,
	words_per_topic=2,
	difficulty="medium"
	)

	self.assertIsInstance(results, dict)
	self.assertEqual(len(results), len(topics))

	for topic in topics:
	self.assertIn(topic, results)
	self.assertIsInstance(results[topic], list)

	def test_stats_tracking(self):
	"""Test performance statistics tracking."""
	self.generator.initialize()

	# Initial stats
	initial_stats = self.generator.get_stats()
	self.assertIsInstance(initial_stats, dict)
	self.assertIn('words_discovered', initial_stats)
	self.assertIn('clues_generated', initial_stats)

	# Generate some entries to update stats
	if self.generator.thematic_ready or self.generator.api_ready:
	try:
	self.generator.generate_crossword_entries("test", 1, "medium")
	updated_stats = self.generator.get_stats()

	# Stats should have changed
	self.assertGreaterEqual(updated_stats['words_discovered'], initial_stats['words_discovered'])
	self.assertGreaterEqual(updated_stats['clues_generated'], initial_stats['clues_generated'])
	except Exception:
	pass # Stats test is secondary if generation fails

	def test_fallback_behavior(self):
	"""Test fallback behavior when components fail."""
	self.generator.initialize()

	# Test with unavailable topic that should trigger fallbacks
	entries = self.generator.generate_crossword_entries(
	topic="nonexistent_impossible_topic_xyz123",
	num_words=1,
	difficulty="medium"
	)

	# Should handle gracefully (empty list or basic entries)
	self.assertIsInstance(entries, list)

	def test_crossword_entry_structure(self):
	"""Test CrosswordEntry dataclass structure."""
	# Create sample entry
	entry = CrosswordEntry(
	word="TEST",
	clue="Sample clue",
	topic="testing",
	similarity_score=0.75,
	frequency_tier="tier_5_common",
	tier_description="Common words",
	clue_quality="GOOD",
	clue_model="test_model"
	)

	# Verify all fields
	self.assertEqual(entry.word, "TEST")
	self.assertEqual(entry.clue, "Sample clue")
	self.assertEqual(entry.topic, "testing")
	self.assertEqual(entry.similarity_score, 0.75)
	self.assertEqual(entry.frequency_tier, "tier_5_common")
	self.assertEqual(entry.tier_description, "Common words")
	self.assertEqual(entry.clue_quality, "GOOD")
	self.assertEqual(entry.clue_model, "test_model")


	class TestIntegrationScenarios(unittest.TestCase):
	"""Test realistic integration scenarios."""

	@classmethod
	def setUpClass(cls):
	"""Set up test environment."""
	if not INTEGRATED_AVAILABLE:
	cls.skipTest(cls, "Integrated generator not available")

	cls.test_token = os.getenv('HF_TOKEN')

	def test_education_crossword_scenario(self):
	"""Test generating educational crossword content."""
	# Use cached vocabulary and embeddings
	cache_dir = str(Path(__file__).parent / 'model_cache')
	generator = IntegratedCrosswordGenerator(
	vocab_size_limit=50000,
	cache_dir=cache_dir
	)
	generator.initialize()

	if not generator.is_initialized:
	self.skipTest("Generator initialization failed")

	# Educational topics
	topics = ["science", "history", "mathematics"]

	for topic in topics:
	with self.subTest(topic=topic):
	entries = generator.generate_crossword_entries(
	topic=topic,
	num_words=3,
	difficulty="medium"
	)

	# Should produce educational content
	self.assertIsInstance(entries, list)
	for entry in entries:
	self.assertEqual(entry.topic, topic)
	# Educational words should be substantial
	self.assertGreaterEqual(len(entry.word), 3)

	def test_themed_puzzle_scenario(self):
	"""Test generating themed puzzle content."""
	# Use cached vocabulary and embeddings
	cache_dir = str(Path(__file__).parent / 'model_cache')
	generator = IntegratedCrosswordGenerator(
	vocab_size_limit=50000,
	cache_dir=cache_dir
	)
	generator.initialize()

	if not generator.is_initialized:
	self.skipTest("Generator initialization failed")

	# Theme-based generation
	theme = "ocean life"
	entries = generator.generate_crossword_entries(
	topic=theme,
	num_words=5,
	difficulty="medium"
	)

	if entries:
	# All entries should be thematically related
	for entry in entries:
	self.assertEqual(entry.topic, theme)
	self.assertIsInstance(entry.similarity_score, float)
	self.assertGreater(entry.similarity_score, 0.0)

	def test_performance_benchmarking(self):
	"""Test performance characteristics."""
	# Use cached vocabulary and embeddings for faster testing
	cache_dir = str(Path(__file__).parent / 'model_cache')
	generator = IntegratedCrosswordGenerator(
	vocab_size_limit=50000,
	cache_dir=cache_dir
	)
	generator.initialize()

	if not generator.is_initialized:
	self.skipTest("Generator initialization failed")

	# Benchmark generation time
	start_time = time.time()

	try:
	entries = generator.generate_crossword_entries(
	topic="technology",
	num_words=5,
	difficulty="medium"
	)

	generation_time = time.time() - start_time

	# Performance expectations
	self.assertLess(generation_time, 60.0) # Should complete within 1 minute

	if entries:
	avg_time_per_entry = generation_time / len(entries)
	self.assertLess(avg_time_per_entry, 20.0) # Max ~20s per entry

	except Exception as e:
	# Performance test is informational
	print(f"Performance test encountered: {e}")


	def run_comprehensive_tests():
	"""Run all integration tests with detailed reporting."""
	print("🧪 Comprehensive Integration Tests")
	print("=" * 60)
	print("📂 Using cached 50K vocabulary and embeddings from model_cache/")
	print("⚡ This significantly speeds up testing by avoiding re-computation")

	# Check environment
	hf_token = os.getenv('HF_TOKEN')
	if not hf_token:
	print("⚠️ HF_TOKEN not set - API tests may be limited")

	if not INTEGRATED_AVAILABLE:
	print("❌ Integrated system not available - cannot run tests")
	return

	# Create test suite
	loader = unittest.TestLoader()
	suite = unittest.TestSuite()

	# Add test cases
	suite.addTests(loader.loadTestsFromTestCase(TestIntegratedCrosswordGenerator))
	suite.addTests(loader.loadTestsFromTestCase(TestIntegrationScenarios))

	# Run tests with detailed output
	runner = unittest.TextTestRunner(verbosity=2, stream=sys.stdout)
	result = runner.run(suite)

	# Summary
	print("\n" + "=" * 60)
	print("📊 TEST SUMMARY")
	print("=" * 60)
	print(f"Tests run: {result.testsRun}")
	print(f"Failures: {len(result.failures)}")
	print(f"Errors: {len(result.errors)}")
	print(f"Skipped: {len(result.skipped)}")

	if result.failures:
	print("\n❌ FAILURES:")
	for test, trace in result.failures:
	print(f" - {test}: {trace.splitlines()[-1]}")

	if result.errors:
	print("\n❌ ERRORS:")
	for test, trace in result.errors:
	print(f" - {test}: {trace.splitlines()[-1]}")

	if result.skipped:
	print("\n⏭️ SKIPPED:")
	for test, reason in result.skipped:
	print(f" - {test}: {reason}")

	success_rate = ((result.testsRun - len(result.failures) - len(result.errors)) / result.testsRun * 100) if result.testsRun > 0 else 0
	print(f"\n✅ Success rate: {success_rate:.1f}%")

	if result.wasSuccessful():
	print("🎉 All tests passed! Integration system is working correctly.")
	else:
	print("⚠️ Some tests failed. Check the system configuration.")

	return result.wasSuccessful()


	def main():
	"""Run the comprehensive test suite."""
	success = run_comprehensive_tests()
	sys.exit(0 if success else 1)


	if __name__ == "__main__":
	main()