Spaces:

vimalk78
/

abc123

Sleeping

File size: 8,793 Bytes

38c016b

#!/usr/bin/env python3
"""
Final test to validate that the crossword generator produces clean grids 
without unwanted prefixes, suffixes, or unintended letter sequences.
"""

import sys
from pathlib import Path

# Add project root to path
project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
sys.path.insert(0, str(project_root))

from src.services.crossword_generator_fixed import CrosswordGeneratorFixed

def test_clean_crossword_generation():
    """Test that crossword generation produces clean grids without unwanted sequences."""
    
    print("🧪 Final Crossword Validation Test\n")
    
    generator = CrosswordGeneratorFixed(vector_service=None)
    
    # Test multiple scenarios that previously caused issues
    test_scenarios = [
        {
            "name": "Basic Technology Words",
            "words": [
                {"word": "COMPUTER", "clue": "Electronic device"},
                {"word": "MACHINE", "clue": "Device with moving parts"},
                {"word": "SCIENCE", "clue": "Systematic study"},
                {"word": "EXPERT", "clue": "Specialist"},
            ]
        },
        {
            "name": "Similar Words (MACHINE/MACHINERY)",
            "words": [
                {"word": "MACHINE", "clue": "Device with moving parts"},
                {"word": "MACHINERY", "clue": "Mechanical equipment"},
                {"word": "TECHNOLOGY", "clue": "Applied science"},
                {"word": "RESEARCH", "clue": "Investigation"},
            ]
        },
        {
            "name": "Animal Words",
            "words": [
                {"word": "ELEPHANT", "clue": "Large mammal"},
                {"word": "TIGER", "clue": "Striped cat"},
                {"word": "BEAR", "clue": "Large carnivore"},
                {"word": "HORSE", "clue": "Riding animal"},
                {"word": "BIRD", "clue": "Flying creature"},
            ]
        },
        {
            "name": "Mixed Length Words", 
            "words": [
                {"word": "CAT", "clue": "Feline pet"},
                {"word": "COMPUTER", "clue": "Electronic device"},
                {"word": "A", "clue": "First letter"},  # Edge case
                {"word": "TECHNOLOGY", "clue": "Applied science"},
            ]
        }
    ]
    
    all_passed = True
    
    for i, scenario in enumerate(test_scenarios):
        print(f"=" * 60)
        print(f"TEST {i+1}: {scenario['name']}")
        print(f"=" * 60)
        
        words = scenario["words"]
        print(f"Testing with {len(words)} words: {[w['word'] for w in words]}")
        
        try:
            result = generator._create_grid(words)
            
            if result:
                grid = result["grid"]
                placed_words = result["placed_words"]
                clues = result["clues"]
                
                print(f"✅ Grid generated successfully")
                print(f"   Grid size: {len(grid)}x{len(grid[0])}")
                print(f"   Words placed: {len(placed_words)}")
                print(f"   Clues generated: {len(clues)}")
                
                # Print the grid
                print("\nGenerated Grid:")
                print_clean_grid(grid)
                
                # Validate the grid
                validation_result = validate_grid_cleanliness(grid, placed_words)
                
                if validation_result["is_clean"]:
                    print("✅ Grid validation: CLEAN - No unwanted sequences")
                else:
                    print("❌ Grid validation: ISSUES FOUND")
                    for issue in validation_result["issues"]:
                        print(f"   - {issue}")
                    all_passed = False
                
                # Print word placements
                print("\nWord Placements:")
                for j, word_info in enumerate(placed_words):
                    print(f"   {j+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")
                
            else:
                print("⚠️ Grid generation returned None - algorithm may be too strict")
                # This might happen if validation is too restrictive
                
        except Exception as e:
            print(f"❌ Grid generation failed: {e}")
            all_passed = False
        
        print()
    
    # Summary
    print("=" * 60)
    print("FINAL SUMMARY")
    print("=" * 60)
    
    if all_passed:
        print("🎉 ALL TESTS PASSED!")
        print("✅ Crossword generator produces clean grids without unwanted sequences")
        print("✅ No more issues with unwanted prefixes, suffixes, or letter combinations")
    else:
        print("❌ Some tests failed - additional improvements needed")
    
    return all_passed

def print_clean_grid(grid):
    """Print grid in a clean, readable format."""
    if not grid:
        print("  Empty grid")
        return
    
    # Print column headers
    print("    ", end="")
    for c in range(len(grid[0])):
        print(f"{c:2d}", end="")
    print()
    
    # Print rows
    for r in range(len(grid)):
        print(f" {r:2d}: ", end="")
        for c in range(len(grid[0])):
            cell = grid[r][c]
            if cell == ".":
                print(" .", end="")
            else:
                print(f" {cell}", end="")
        print()

def validate_grid_cleanliness(grid, placed_words):
    """Validate that grid contains only intended words without unwanted sequences."""
    
    issues = []
    
    # Find all letter sequences in the grid
    all_sequences = []
    
    # Horizontal sequences
    for r in range(len(grid)):
        current_seq = ""
        start_col = None
        
        for c in range(len(grid[0])):
            if grid[r][c] != ".":
                if start_col is None:
                    start_col = c
                current_seq += grid[r][c]
            else:
                if current_seq and len(current_seq) > 1:
                    all_sequences.append((r, start_col, "horizontal", current_seq))
                current_seq = ""
                start_col = None
        
        # Handle end of row
        if current_seq and len(current_seq) > 1:
            all_sequences.append((r, start_col, "horizontal", current_seq))
    
    # Vertical sequences
    for c in range(len(grid[0])):
        current_seq = ""
        start_row = None
        
        for r in range(len(grid)):
            if grid[r][c] != ".":
                if start_row is None:
                    start_row = r
                current_seq += grid[r][c]
            else:
                if current_seq and len(current_seq) > 1:
                    all_sequences.append((start_row, c, "vertical", current_seq))
                current_seq = ""
                start_row = None
        
        # Handle end of column
        if current_seq and len(current_seq) > 1:
            all_sequences.append((start_row, c, "vertical", current_seq))
    
    # Check if all sequences correspond to intended words
    intended_words = set()
    for word_info in placed_words:
        key = (word_info["row"], word_info["col"], word_info["direction"], word_info["word"])
        intended_words.add(key)
    
    # Check each sequence
    for row, col, direction, sequence in all_sequences:
        key = (row, col, direction, sequence)
        if key not in intended_words:
            issues.append(f"Unintended sequence: '{sequence}' at ({row}, {col}) {direction}")
    
    # Check for specific problematic patterns
    for row, col, direction, sequence in all_sequences:
        # Check for 2-letter sequences (should not exist)
        if len(sequence) == 2:
            issues.append(f"Unwanted 2-letter sequence: '{sequence}' at ({row}, {col}) {direction}")
        
        # Check for words that appear to extend beyond their intended boundaries
        # But exclude cases where both the shorter and longer words are intentionally placed
        placed_word_set = {w["word"] for w in placed_words}
        for word_info in placed_words:
            word = word_info["word"]
            if word in sequence and sequence != word:
                if sequence.startswith(word) or sequence.endswith(word):
                    # Check if the sequence itself is also an intended word
                    if sequence not in placed_word_set:
                        issues.append(f"Word '{word}' appears extended as '{sequence}' at ({row}, {col}) {direction}")
    
    return {
        "is_clean": len(issues) == 0,
        "issues": issues,
        "total_sequences": len(all_sequences),
        "intended_sequences": len(intended_words)
    }

if __name__ == "__main__":
    test_clean_crossword_generation()