Spaces:

vimalk78
/

abc123

Sleeping

App Files Files

xet

Community

abc123 / crossword-app /backend-py /test-integration /test_final_crossword_validation.py

vimalk78

Add complete Python backend with AI-powered crossword generation

38c016b about 2 months ago

raw

history blame

8.79 kB

	#!/usr/bin/env python3
	"""
	Final test to validate that the crossword generator produces clean grids
	without unwanted prefixes, suffixes, or unintended letter sequences.
	"""

	import sys
	from pathlib import Path

	# Add project root to path
	project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
	sys.path.insert(0, str(project_root))

	from src.services.crossword_generator_fixed import CrosswordGeneratorFixed

	def test_clean_crossword_generation():
	"""Test that crossword generation produces clean grids without unwanted sequences."""

	print("🧪 Final Crossword Validation Test\n")

	generator = CrosswordGeneratorFixed(vector_service=None)

	# Test multiple scenarios that previously caused issues
	test_scenarios = [
	{
	"name": "Basic Technology Words",
	"words": [
	{"word": "COMPUTER", "clue": "Electronic device"},
	{"word": "MACHINE", "clue": "Device with moving parts"},
	{"word": "SCIENCE", "clue": "Systematic study"},
	{"word": "EXPERT", "clue": "Specialist"},
	]
	},
	{
	"name": "Similar Words (MACHINE/MACHINERY)",
	"words": [
	{"word": "MACHINE", "clue": "Device with moving parts"},
	{"word": "MACHINERY", "clue": "Mechanical equipment"},
	{"word": "TECHNOLOGY", "clue": "Applied science"},
	{"word": "RESEARCH", "clue": "Investigation"},
	]
	},
	{
	"name": "Animal Words",
	"words": [
	{"word": "ELEPHANT", "clue": "Large mammal"},
	{"word": "TIGER", "clue": "Striped cat"},
	{"word": "BEAR", "clue": "Large carnivore"},
	{"word": "HORSE", "clue": "Riding animal"},
	{"word": "BIRD", "clue": "Flying creature"},
	]
	},
	{
	"name": "Mixed Length Words",
	"words": [
	{"word": "CAT", "clue": "Feline pet"},
	{"word": "COMPUTER", "clue": "Electronic device"},
	{"word": "A", "clue": "First letter"}, # Edge case
	{"word": "TECHNOLOGY", "clue": "Applied science"},
	]
	}
	]

	all_passed = True

	for i, scenario in enumerate(test_scenarios):
	print(f"=" * 60)
	print(f"TEST {i+1}: {scenario['name']}")
	print(f"=" * 60)

	words = scenario["words"]
	print(f"Testing with {len(words)} words: {[w['word'] for w in words]}")

	try:
	result = generator._create_grid(words)

	if result:
	grid = result["grid"]
	placed_words = result["placed_words"]
	clues = result["clues"]

	print(f"✅ Grid generated successfully")
	print(f" Grid size: {len(grid)}x{len(grid[0])}")
	print(f" Words placed: {len(placed_words)}")
	print(f" Clues generated: {len(clues)}")

	# Print the grid
	print("\nGenerated Grid:")
	print_clean_grid(grid)

	# Validate the grid
	validation_result = validate_grid_cleanliness(grid, placed_words)

	if validation_result["is_clean"]:
	print("✅ Grid validation: CLEAN - No unwanted sequences")
	else:
	print("❌ Grid validation: ISSUES FOUND")
	for issue in validation_result["issues"]:
	print(f" - {issue}")
	all_passed = False

	# Print word placements
	print("\nWord Placements:")
	for j, word_info in enumerate(placed_words):
	print(f" {j+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")

	else:
	print("⚠️ Grid generation returned None - algorithm may be too strict")
	# This might happen if validation is too restrictive

	except Exception as e:
	print(f"❌ Grid generation failed: {e}")
	all_passed = False

	print()

	# Summary
	print("=" * 60)
	print("FINAL SUMMARY")
	print("=" * 60)

	if all_passed:
	print("🎉 ALL TESTS PASSED!")
	print("✅ Crossword generator produces clean grids without unwanted sequences")
	print("✅ No more issues with unwanted prefixes, suffixes, or letter combinations")
	else:
	print("❌ Some tests failed - additional improvements needed")

	return all_passed

	def print_clean_grid(grid):
	"""Print grid in a clean, readable format."""
	if not grid:
	print(" Empty grid")
	return

	# Print column headers
	print(" ", end="")
	for c in range(len(grid[0])):
	print(f"{c:2d}", end="")
	print()

	# Print rows
	for r in range(len(grid)):
	print(f" {r:2d}: ", end="")
	for c in range(len(grid[0])):
	cell = grid[r][c]
	if cell == ".":
	print(" .", end="")
	else:
	print(f" {cell}", end="")
	print()

	def validate_grid_cleanliness(grid, placed_words):
	"""Validate that grid contains only intended words without unwanted sequences."""

	issues = []

	# Find all letter sequences in the grid
	all_sequences = []

	# Horizontal sequences
	for r in range(len(grid)):
	current_seq = ""
	start_col = None

	for c in range(len(grid[0])):
	if grid[r][c] != ".":
	if start_col is None:
	start_col = c
	current_seq += grid[r][c]
	else:
	if current_seq and len(current_seq) > 1:
	all_sequences.append((r, start_col, "horizontal", current_seq))
	current_seq = ""
	start_col = None

	# Handle end of row
	if current_seq and len(current_seq) > 1:
	all_sequences.append((r, start_col, "horizontal", current_seq))

	# Vertical sequences
	for c in range(len(grid[0])):
	current_seq = ""
	start_row = None

	for r in range(len(grid)):
	if grid[r][c] != ".":
	if start_row is None:
	start_row = r
	current_seq += grid[r][c]
	else:
	if current_seq and len(current_seq) > 1:
	all_sequences.append((start_row, c, "vertical", current_seq))
	current_seq = ""
	start_row = None

	# Handle end of column
	if current_seq and len(current_seq) > 1:
	all_sequences.append((start_row, c, "vertical", current_seq))

	# Check if all sequences correspond to intended words
	intended_words = set()
	for word_info in placed_words:
	key = (word_info["row"], word_info["col"], word_info["direction"], word_info["word"])
	intended_words.add(key)

	# Check each sequence
	for row, col, direction, sequence in all_sequences:
	key = (row, col, direction, sequence)
	if key not in intended_words:
	issues.append(f"Unintended sequence: '{sequence}' at ({row}, {col}) {direction}")

	# Check for specific problematic patterns
	for row, col, direction, sequence in all_sequences:
	# Check for 2-letter sequences (should not exist)
	if len(sequence) == 2:
	issues.append(f"Unwanted 2-letter sequence: '{sequence}' at ({row}, {col}) {direction}")

	# Check for words that appear to extend beyond their intended boundaries
	# But exclude cases where both the shorter and longer words are intentionally placed
	placed_word_set = {w["word"] for w in placed_words}
	for word_info in placed_words:
	word = word_info["word"]
	if word in sequence and sequence != word:
	if sequence.startswith(word) or sequence.endswith(word):
	# Check if the sequence itself is also an intended word
	if sequence not in placed_word_set:
	issues.append(f"Word '{word}' appears extended as '{sequence}' at ({row}, {col}) {direction}")

	return {
	"is_clean": len(issues) == 0,
	"issues": issues,
	"total_sequences": len(all_sequences),
	"intended_sequences": len(intended_words)
	}

	if __name__ == "__main__":
	test_clean_crossword_generation()