#!/usr/bin/env python3 """ Test Adaptive Beta with Cricket+Sports Example Tests that the adaptive beta mechanism generates more words for constrained cases like "cricket sentence" + "sports topic". """ import os import sys import warnings import logging # Configure logging to see the adaptive beta messages logging.basicConfig(level=logging.INFO, format='%(message)s') # Suppress warnings for cleaner output warnings.filterwarnings("ignore") def setup_environment(): """Setup environment and add src to path""" # Set cache directory to root cache-dir folder cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir') cache_dir = os.path.abspath(cache_dir) os.environ['HF_HOME'] = cache_dir os.environ['TRANSFORMERS_CACHE'] = cache_dir os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir # Add backend source to path backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src') backend_path = os.path.abspath(backend_path) if backend_path not in sys.path: sys.path.insert(0, backend_path) print(f"Using cache directory: {cache_dir}") def test_adaptive_beta_cricket_sports(): """Test the cricket+sports case that previously generated only 16 words""" setup_environment() print("๐Ÿงช Testing Adaptive Beta with Cricket+Sports Example") print("=" * 60) # Set environment variables for soft minimum with adaptive beta os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum' os.environ['SOFT_MIN_BETA'] = '10.0' os.environ['SOFT_MIN_ADAPTIVE'] = 'true' os.environ['SOFT_MIN_MIN_WORDS'] = '15' os.environ['SOFT_MIN_MAX_RETRIES'] = '5' os.environ['SOFT_MIN_BETA_DECAY'] = '0.7' os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '5000' # Smaller vocab for faster testing try: from services.thematic_word_service import ThematicWordService print("Creating ThematicWordService with adaptive soft minimum...") service = ThematicWordService() print("Initializing service (adaptive beta configuration will be logged)...") service.initialize() # Test cases test_cases = [ { "name": "Cricket sentence only", "inputs": ["india won test series against england"], "expected": ">30 words (no constraint)", "description": "Single sentence - should generate many words" }, { "name": "Cricket sentence + Sports topic", "inputs": ["india won test series against england", "Sports"], "expected": "~15-25 words (adaptive beta should kick in)", "description": "Sentence + topic - adaptive beta should relax to get more words" }, { "name": "Multiple sports topics", "inputs": ["Cricket", "Tennis", "Football"], "expected": "~15-20 words (adaptive beta for 3 topics)", "description": "Three topics - should auto-adapt for more words" } ] for i, test_case in enumerate(test_cases, 1): print(f"\n๐Ÿ“Š Test {i}: {test_case['name']}") print(f" Description: {test_case['description']}") print(f" Expected: {test_case['expected']}") print(f" Inputs: {test_case['inputs']}") print("-" * 50) # Generate words results = service.generate_thematic_words( test_case['inputs'], num_words=50, min_similarity=0.3, multi_theme=False ) print(f"โœ… Generated {len(results)} words") print(f"Top 15 words:") for j, (word, similarity, tier) in enumerate(results[:15], 1): print(f" {j:2d}. {word:15s}: {similarity:.4f} ({tier})") # Analysis if len(results) >= 15: print(f" โœ… Success: Generated {len(results)} words (โ‰ฅ 15 minimum)") else: print(f" โš ๏ธ Warning: Only {len(results)} words generated (< 15 minimum)") print(" This suggests adaptive beta may need tuning") except Exception as e: print(f"โŒ Test failed: {e}") import traceback traceback.print_exc() def test_adaptive_beta_disabled(): """Test with adaptive beta disabled for comparison""" print(f"\n\n๐Ÿ”’ Testing with Adaptive Beta DISABLED") print("=" * 60) # Disable adaptive beta os.environ['SOFT_MIN_ADAPTIVE'] = 'false' try: from services.thematic_word_service import ThematicWordService service = ThematicWordService() service.initialize() # Test the problematic case inputs = ["india won test series against england", "Sports"] print(f"Testing cricket+sports with fixed beta=10.0...") results = service.generate_thematic_words( inputs, num_words=50, min_similarity=0.3, multi_theme=False ) print(f"โœ… Generated {len(results)} words (with fixed beta)") print(f"Top 10 words:") for j, (word, similarity, tier) in enumerate(results[:10], 1): print(f" {j:2d}. {word:15s}: {similarity:.4f}") if len(results) < 15: print(f" โš ๏ธ As expected: Only {len(results)} words with fixed beta (too strict)") else: print(f" โœ… Surprisingly good: {len(results)} words even with fixed beta") except Exception as e: print(f"โŒ Test failed: {e}") import traceback traceback.print_exc() def main(): """Main test runner""" print("๐Ÿงช Adaptive Beta Integration Test") print("Testing automatic beta relaxation for constrained word generation") print("=" * 70) try: # Test with adaptive beta enabled test_adaptive_beta_cricket_sports() # Test with adaptive beta disabled for comparison test_adaptive_beta_disabled() print("\n" + "=" * 70) print("๐ŸŽฏ ADAPTIVE BETA TEST RESULTS:") print("1. Adaptive beta should automatically relax when < 15 words found") print("2. Cricket+Sports should now generate 15+ words (was 16)") print("3. Complex multi-topic queries should auto-adapt for sufficient words") print("4. Logging shows beta adjustment process") print("=" * 70) except Exception as e: print(f"โŒ Adaptive beta test failed: {e}") import traceback traceback.print_exc() if __name__ == "__main__": main()