File size: 8,793 Bytes
38c016b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
#!/usr/bin/env python3
"""
Final test to validate that the crossword generator produces clean grids 
without unwanted prefixes, suffixes, or unintended letter sequences.
"""

import sys
from pathlib import Path

# Add project root to path
project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
sys.path.insert(0, str(project_root))

from src.services.crossword_generator_fixed import CrosswordGeneratorFixed

def test_clean_crossword_generation():
    """Test that crossword generation produces clean grids without unwanted sequences."""
    
    print("πŸ§ͺ Final Crossword Validation Test\n")
    
    generator = CrosswordGeneratorFixed(vector_service=None)
    
    # Test multiple scenarios that previously caused issues
    test_scenarios = [
        {
            "name": "Basic Technology Words",
            "words": [
                {"word": "COMPUTER", "clue": "Electronic device"},
                {"word": "MACHINE", "clue": "Device with moving parts"},
                {"word": "SCIENCE", "clue": "Systematic study"},
                {"word": "EXPERT", "clue": "Specialist"},
            ]
        },
        {
            "name": "Similar Words (MACHINE/MACHINERY)",
            "words": [
                {"word": "MACHINE", "clue": "Device with moving parts"},
                {"word": "MACHINERY", "clue": "Mechanical equipment"},
                {"word": "TECHNOLOGY", "clue": "Applied science"},
                {"word": "RESEARCH", "clue": "Investigation"},
            ]
        },
        {
            "name": "Animal Words",
            "words": [
                {"word": "ELEPHANT", "clue": "Large mammal"},
                {"word": "TIGER", "clue": "Striped cat"},
                {"word": "BEAR", "clue": "Large carnivore"},
                {"word": "HORSE", "clue": "Riding animal"},
                {"word": "BIRD", "clue": "Flying creature"},
            ]
        },
        {
            "name": "Mixed Length Words", 
            "words": [
                {"word": "CAT", "clue": "Feline pet"},
                {"word": "COMPUTER", "clue": "Electronic device"},
                {"word": "A", "clue": "First letter"},  # Edge case
                {"word": "TECHNOLOGY", "clue": "Applied science"},
            ]
        }
    ]
    
    all_passed = True
    
    for i, scenario in enumerate(test_scenarios):
        print(f"=" * 60)
        print(f"TEST {i+1}: {scenario['name']}")
        print(f"=" * 60)
        
        words = scenario["words"]
        print(f"Testing with {len(words)} words: {[w['word'] for w in words]}")
        
        try:
            result = generator._create_grid(words)
            
            if result:
                grid = result["grid"]
                placed_words = result["placed_words"]
                clues = result["clues"]
                
                print(f"βœ… Grid generated successfully")
                print(f"   Grid size: {len(grid)}x{len(grid[0])}")
                print(f"   Words placed: {len(placed_words)}")
                print(f"   Clues generated: {len(clues)}")
                
                # Print the grid
                print("\nGenerated Grid:")
                print_clean_grid(grid)
                
                # Validate the grid
                validation_result = validate_grid_cleanliness(grid, placed_words)
                
                if validation_result["is_clean"]:
                    print("βœ… Grid validation: CLEAN - No unwanted sequences")
                else:
                    print("❌ Grid validation: ISSUES FOUND")
                    for issue in validation_result["issues"]:
                        print(f"   - {issue}")
                    all_passed = False
                
                # Print word placements
                print("\nWord Placements:")
                for j, word_info in enumerate(placed_words):
                    print(f"   {j+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")
                
            else:
                print("⚠️ Grid generation returned None - algorithm may be too strict")
                # This might happen if validation is too restrictive
                
        except Exception as e:
            print(f"❌ Grid generation failed: {e}")
            all_passed = False
        
        print()
    
    # Summary
    print("=" * 60)
    print("FINAL SUMMARY")
    print("=" * 60)
    
    if all_passed:
        print("πŸŽ‰ ALL TESTS PASSED!")
        print("βœ… Crossword generator produces clean grids without unwanted sequences")
        print("βœ… No more issues with unwanted prefixes, suffixes, or letter combinations")
    else:
        print("❌ Some tests failed - additional improvements needed")
    
    return all_passed

def print_clean_grid(grid):
    """Print grid in a clean, readable format."""
    if not grid:
        print("  Empty grid")
        return
    
    # Print column headers
    print("    ", end="")
    for c in range(len(grid[0])):
        print(f"{c:2d}", end="")
    print()
    
    # Print rows
    for r in range(len(grid)):
        print(f" {r:2d}: ", end="")
        for c in range(len(grid[0])):
            cell = grid[r][c]
            if cell == ".":
                print(" .", end="")
            else:
                print(f" {cell}", end="")
        print()

def validate_grid_cleanliness(grid, placed_words):
    """Validate that grid contains only intended words without unwanted sequences."""
    
    issues = []
    
    # Find all letter sequences in the grid
    all_sequences = []
    
    # Horizontal sequences
    for r in range(len(grid)):
        current_seq = ""
        start_col = None
        
        for c in range(len(grid[0])):
            if grid[r][c] != ".":
                if start_col is None:
                    start_col = c
                current_seq += grid[r][c]
            else:
                if current_seq and len(current_seq) > 1:
                    all_sequences.append((r, start_col, "horizontal", current_seq))
                current_seq = ""
                start_col = None
        
        # Handle end of row
        if current_seq and len(current_seq) > 1:
            all_sequences.append((r, start_col, "horizontal", current_seq))
    
    # Vertical sequences
    for c in range(len(grid[0])):
        current_seq = ""
        start_row = None
        
        for r in range(len(grid)):
            if grid[r][c] != ".":
                if start_row is None:
                    start_row = r
                current_seq += grid[r][c]
            else:
                if current_seq and len(current_seq) > 1:
                    all_sequences.append((start_row, c, "vertical", current_seq))
                current_seq = ""
                start_row = None
        
        # Handle end of column
        if current_seq and len(current_seq) > 1:
            all_sequences.append((start_row, c, "vertical", current_seq))
    
    # Check if all sequences correspond to intended words
    intended_words = set()
    for word_info in placed_words:
        key = (word_info["row"], word_info["col"], word_info["direction"], word_info["word"])
        intended_words.add(key)
    
    # Check each sequence
    for row, col, direction, sequence in all_sequences:
        key = (row, col, direction, sequence)
        if key not in intended_words:
            issues.append(f"Unintended sequence: '{sequence}' at ({row}, {col}) {direction}")
    
    # Check for specific problematic patterns
    for row, col, direction, sequence in all_sequences:
        # Check for 2-letter sequences (should not exist)
        if len(sequence) == 2:
            issues.append(f"Unwanted 2-letter sequence: '{sequence}' at ({row}, {col}) {direction}")
        
        # Check for words that appear to extend beyond their intended boundaries
        # But exclude cases where both the shorter and longer words are intentionally placed
        placed_word_set = {w["word"] for w in placed_words}
        for word_info in placed_words:
            word = word_info["word"]
            if word in sequence and sequence != word:
                if sequence.startswith(word) or sequence.endswith(word):
                    # Check if the sequence itself is also an intended word
                    if sequence not in placed_word_set:
                        issues.append(f"Word '{word}' appears extended as '{sequence}' at ({row}, {col}) {direction}")
    
    return {
        "is_clean": len(issues) == 0,
        "issues": issues,
        "total_sequences": len(all_sequences),
        "intended_sequences": len(intended_words)
    }

if __name__ == "__main__":
    test_clean_crossword_generation()