abc123 / crossword-app /backend-py /test-integration /test_final_crossword_validation.py
vimalk78's picture
Add complete Python backend with AI-powered crossword generation
38c016b
raw
history blame
8.79 kB
#!/usr/bin/env python3
"""
Final test to validate that the crossword generator produces clean grids
without unwanted prefixes, suffixes, or unintended letter sequences.
"""
import sys
from pathlib import Path
# Add project root to path
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
sys.path.insert(0, str(project_root))
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
def test_clean_crossword_generation():
"""Test that crossword generation produces clean grids without unwanted sequences."""
print("🧪 Final Crossword Validation Test\n")
generator = CrosswordGeneratorFixed(vector_service=None)
# Test multiple scenarios that previously caused issues
test_scenarios = [
{
"name": "Basic Technology Words",
"words": [
{"word": "COMPUTER", "clue": "Electronic device"},
{"word": "MACHINE", "clue": "Device with moving parts"},
{"word": "SCIENCE", "clue": "Systematic study"},
{"word": "EXPERT", "clue": "Specialist"},
]
},
{
"name": "Similar Words (MACHINE/MACHINERY)",
"words": [
{"word": "MACHINE", "clue": "Device with moving parts"},
{"word": "MACHINERY", "clue": "Mechanical equipment"},
{"word": "TECHNOLOGY", "clue": "Applied science"},
{"word": "RESEARCH", "clue": "Investigation"},
]
},
{
"name": "Animal Words",
"words": [
{"word": "ELEPHANT", "clue": "Large mammal"},
{"word": "TIGER", "clue": "Striped cat"},
{"word": "BEAR", "clue": "Large carnivore"},
{"word": "HORSE", "clue": "Riding animal"},
{"word": "BIRD", "clue": "Flying creature"},
]
},
{
"name": "Mixed Length Words",
"words": [
{"word": "CAT", "clue": "Feline pet"},
{"word": "COMPUTER", "clue": "Electronic device"},
{"word": "A", "clue": "First letter"}, # Edge case
{"word": "TECHNOLOGY", "clue": "Applied science"},
]
}
]
all_passed = True
for i, scenario in enumerate(test_scenarios):
print(f"=" * 60)
print(f"TEST {i+1}: {scenario['name']}")
print(f"=" * 60)
words = scenario["words"]
print(f"Testing with {len(words)} words: {[w['word'] for w in words]}")
try:
result = generator._create_grid(words)
if result:
grid = result["grid"]
placed_words = result["placed_words"]
clues = result["clues"]
print(f"✅ Grid generated successfully")
print(f" Grid size: {len(grid)}x{len(grid[0])}")
print(f" Words placed: {len(placed_words)}")
print(f" Clues generated: {len(clues)}")
# Print the grid
print("\nGenerated Grid:")
print_clean_grid(grid)
# Validate the grid
validation_result = validate_grid_cleanliness(grid, placed_words)
if validation_result["is_clean"]:
print("✅ Grid validation: CLEAN - No unwanted sequences")
else:
print("❌ Grid validation: ISSUES FOUND")
for issue in validation_result["issues"]:
print(f" - {issue}")
all_passed = False
# Print word placements
print("\nWord Placements:")
for j, word_info in enumerate(placed_words):
print(f" {j+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")
else:
print("⚠️ Grid generation returned None - algorithm may be too strict")
# This might happen if validation is too restrictive
except Exception as e:
print(f"❌ Grid generation failed: {e}")
all_passed = False
print()
# Summary
print("=" * 60)
print("FINAL SUMMARY")
print("=" * 60)
if all_passed:
print("🎉 ALL TESTS PASSED!")
print("✅ Crossword generator produces clean grids without unwanted sequences")
print("✅ No more issues with unwanted prefixes, suffixes, or letter combinations")
else:
print("❌ Some tests failed - additional improvements needed")
return all_passed
def print_clean_grid(grid):
"""Print grid in a clean, readable format."""
if not grid:
print(" Empty grid")
return
# Print column headers
print(" ", end="")
for c in range(len(grid[0])):
print(f"{c:2d}", end="")
print()
# Print rows
for r in range(len(grid)):
print(f" {r:2d}: ", end="")
for c in range(len(grid[0])):
cell = grid[r][c]
if cell == ".":
print(" .", end="")
else:
print(f" {cell}", end="")
print()
def validate_grid_cleanliness(grid, placed_words):
"""Validate that grid contains only intended words without unwanted sequences."""
issues = []
# Find all letter sequences in the grid
all_sequences = []
# Horizontal sequences
for r in range(len(grid)):
current_seq = ""
start_col = None
for c in range(len(grid[0])):
if grid[r][c] != ".":
if start_col is None:
start_col = c
current_seq += grid[r][c]
else:
if current_seq and len(current_seq) > 1:
all_sequences.append((r, start_col, "horizontal", current_seq))
current_seq = ""
start_col = None
# Handle end of row
if current_seq and len(current_seq) > 1:
all_sequences.append((r, start_col, "horizontal", current_seq))
# Vertical sequences
for c in range(len(grid[0])):
current_seq = ""
start_row = None
for r in range(len(grid)):
if grid[r][c] != ".":
if start_row is None:
start_row = r
current_seq += grid[r][c]
else:
if current_seq and len(current_seq) > 1:
all_sequences.append((start_row, c, "vertical", current_seq))
current_seq = ""
start_row = None
# Handle end of column
if current_seq and len(current_seq) > 1:
all_sequences.append((start_row, c, "vertical", current_seq))
# Check if all sequences correspond to intended words
intended_words = set()
for word_info in placed_words:
key = (word_info["row"], word_info["col"], word_info["direction"], word_info["word"])
intended_words.add(key)
# Check each sequence
for row, col, direction, sequence in all_sequences:
key = (row, col, direction, sequence)
if key not in intended_words:
issues.append(f"Unintended sequence: '{sequence}' at ({row}, {col}) {direction}")
# Check for specific problematic patterns
for row, col, direction, sequence in all_sequences:
# Check for 2-letter sequences (should not exist)
if len(sequence) == 2:
issues.append(f"Unwanted 2-letter sequence: '{sequence}' at ({row}, {col}) {direction}")
# Check for words that appear to extend beyond their intended boundaries
# But exclude cases where both the shorter and longer words are intentionally placed
placed_word_set = {w["word"] for w in placed_words}
for word_info in placed_words:
word = word_info["word"]
if word in sequence and sequence != word:
if sequence.startswith(word) or sequence.endswith(word):
# Check if the sequence itself is also an intended word
if sequence not in placed_word_set:
issues.append(f"Word '{word}' appears extended as '{sequence}' at ({row}, {col}) {direction}")
return {
"is_clean": len(issues) == 0,
"issues": issues,
"total_sequences": len(all_sequences),
"intended_sequences": len(intended_words)
}
if __name__ == "__main__":
test_clean_crossword_generation()