#!/usr/bin/env python3 """ Final test to validate that the crossword generator produces clean grids without unwanted prefixes, suffixes, or unintended letter sequences. """ import sys from pathlib import Path # Add project root to path project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py sys.path.insert(0, str(project_root)) from src.services.crossword_generator_fixed import CrosswordGeneratorFixed def test_clean_crossword_generation(): """Test that crossword generation produces clean grids without unwanted sequences.""" print("๐Ÿงช Final Crossword Validation Test\n") generator = CrosswordGeneratorFixed(vector_service=None) # Test multiple scenarios that previously caused issues test_scenarios = [ { "name": "Basic Technology Words", "words": [ {"word": "COMPUTER", "clue": "Electronic device"}, {"word": "MACHINE", "clue": "Device with moving parts"}, {"word": "SCIENCE", "clue": "Systematic study"}, {"word": "EXPERT", "clue": "Specialist"}, ] }, { "name": "Similar Words (MACHINE/MACHINERY)", "words": [ {"word": "MACHINE", "clue": "Device with moving parts"}, {"word": "MACHINERY", "clue": "Mechanical equipment"}, {"word": "TECHNOLOGY", "clue": "Applied science"}, {"word": "RESEARCH", "clue": "Investigation"}, ] }, { "name": "Animal Words", "words": [ {"word": "ELEPHANT", "clue": "Large mammal"}, {"word": "TIGER", "clue": "Striped cat"}, {"word": "BEAR", "clue": "Large carnivore"}, {"word": "HORSE", "clue": "Riding animal"}, {"word": "BIRD", "clue": "Flying creature"}, ] }, { "name": "Mixed Length Words", "words": [ {"word": "CAT", "clue": "Feline pet"}, {"word": "COMPUTER", "clue": "Electronic device"}, {"word": "A", "clue": "First letter"}, # Edge case {"word": "TECHNOLOGY", "clue": "Applied science"}, ] } ] all_passed = True for i, scenario in enumerate(test_scenarios): print(f"=" * 60) print(f"TEST {i+1}: {scenario['name']}") print(f"=" * 60) words = scenario["words"] print(f"Testing with {len(words)} words: {[w['word'] for w in words]}") try: result = generator._create_grid(words) if result: grid = result["grid"] placed_words = result["placed_words"] clues = result["clues"] print(f"โœ… Grid generated successfully") print(f" Grid size: {len(grid)}x{len(grid[0])}") print(f" Words placed: {len(placed_words)}") print(f" Clues generated: {len(clues)}") # Print the grid print("\nGenerated Grid:") print_clean_grid(grid) # Validate the grid validation_result = validate_grid_cleanliness(grid, placed_words) if validation_result["is_clean"]: print("โœ… Grid validation: CLEAN - No unwanted sequences") else: print("โŒ Grid validation: ISSUES FOUND") for issue in validation_result["issues"]: print(f" - {issue}") all_passed = False # Print word placements print("\nWord Placements:") for j, word_info in enumerate(placed_words): print(f" {j+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}") else: print("โš ๏ธ Grid generation returned None - algorithm may be too strict") # This might happen if validation is too restrictive except Exception as e: print(f"โŒ Grid generation failed: {e}") all_passed = False print() # Summary print("=" * 60) print("FINAL SUMMARY") print("=" * 60) if all_passed: print("๐ŸŽ‰ ALL TESTS PASSED!") print("โœ… Crossword generator produces clean grids without unwanted sequences") print("โœ… No more issues with unwanted prefixes, suffixes, or letter combinations") else: print("โŒ Some tests failed - additional improvements needed") return all_passed def print_clean_grid(grid): """Print grid in a clean, readable format.""" if not grid: print(" Empty grid") return # Print column headers print(" ", end="") for c in range(len(grid[0])): print(f"{c:2d}", end="") print() # Print rows for r in range(len(grid)): print(f" {r:2d}: ", end="") for c in range(len(grid[0])): cell = grid[r][c] if cell == ".": print(" .", end="") else: print(f" {cell}", end="") print() def validate_grid_cleanliness(grid, placed_words): """Validate that grid contains only intended words without unwanted sequences.""" issues = [] # Find all letter sequences in the grid all_sequences = [] # Horizontal sequences for r in range(len(grid)): current_seq = "" start_col = None for c in range(len(grid[0])): if grid[r][c] != ".": if start_col is None: start_col = c current_seq += grid[r][c] else: if current_seq and len(current_seq) > 1: all_sequences.append((r, start_col, "horizontal", current_seq)) current_seq = "" start_col = None # Handle end of row if current_seq and len(current_seq) > 1: all_sequences.append((r, start_col, "horizontal", current_seq)) # Vertical sequences for c in range(len(grid[0])): current_seq = "" start_row = None for r in range(len(grid)): if grid[r][c] != ".": if start_row is None: start_row = r current_seq += grid[r][c] else: if current_seq and len(current_seq) > 1: all_sequences.append((start_row, c, "vertical", current_seq)) current_seq = "" start_row = None # Handle end of column if current_seq and len(current_seq) > 1: all_sequences.append((start_row, c, "vertical", current_seq)) # Check if all sequences correspond to intended words intended_words = set() for word_info in placed_words: key = (word_info["row"], word_info["col"], word_info["direction"], word_info["word"]) intended_words.add(key) # Check each sequence for row, col, direction, sequence in all_sequences: key = (row, col, direction, sequence) if key not in intended_words: issues.append(f"Unintended sequence: '{sequence}' at ({row}, {col}) {direction}") # Check for specific problematic patterns for row, col, direction, sequence in all_sequences: # Check for 2-letter sequences (should not exist) if len(sequence) == 2: issues.append(f"Unwanted 2-letter sequence: '{sequence}' at ({row}, {col}) {direction}") # Check for words that appear to extend beyond their intended boundaries # But exclude cases where both the shorter and longer words are intentionally placed placed_word_set = {w["word"] for w in placed_words} for word_info in placed_words: word = word_info["word"] if word in sequence and sequence != word: if sequence.startswith(word) or sequence.endswith(word): # Check if the sequence itself is also an intended word if sequence not in placed_word_set: issues.append(f"Word '{word}' appears extended as '{sequence}' at ({row}, {col}) {direction}") return { "is_clean": len(issues) == 0, "issues": issues, "total_sequences": len(all_sequences), "intended_sequences": len(intended_words) } if __name__ == "__main__": test_clean_crossword_generation()