Spaces:

vimalk78
/

abc123

Running

vimalk78 commited on Aug 17

Commit

38c016b

1 Parent(s): eb7f5ba

Add complete Python backend with AI-powered crossword generation

- Implement FastAPI backend with vector search and machine learning capabilities
- Add comprehensive crossword generation algorithm with bounds checking fixes
- Include multi-layer word caching system with graceful fallback to static words
- Add extensive test suite (unit tests, integration tests, boundary condition tests)
- Update Docker configuration for Python backend deployment
- Add comprehensive documentation and development setup guides
- Integrate sentence-transformers and FAISS for semantic word discovery
- Maintain API compatibility with existing Node.js backend

Signed-off-by: Vimal Kumar <[email protected]>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +6 -1
CLAUDE.md +217 -0
Dockerfile +64 -15
crossword-app/Dockerfile +63 -15
crossword-app/backend-py/.coverage +0 -0
crossword-app/backend-py/.env.example +20 -0
crossword-app/backend-py/README-local-setup.md +78 -0
crossword-app/backend-py/README.md +332 -0
crossword-app/backend-py/__pycache__/test_bounds_comprehensive.cpython-313-pytest-8.4.1.pyc +0 -0
crossword-app/backend-py/app.py +146 -0
crossword-app/backend-py/data/data +1 -0
crossword-app/backend-py/data/word-lists/animals.json +165 -0
crossword-app/backend-py/data/word-lists/geography.json +161 -0
crossword-app/backend-py/data/word-lists/science.json +170 -0
crossword-app/backend-py/data/word-lists/technology.json +221 -0
crossword-app/backend-py/debug_full_generation.py +316 -0
crossword-app/backend-py/debug_grid_direct.py +293 -0
crossword-app/backend-py/debug_index_error.py +307 -0
crossword-app/backend-py/debug_simple.py +142 -0
crossword-app/backend-py/pytest.ini +16 -0
crossword-app/backend-py/requirements-dev.txt +18 -0
crossword-app/backend-py/requirements.txt +48 -0
crossword-app/backend-py/run_tests.py +89 -0
crossword-app/backend-py/src/__init__.py +1 -0
crossword-app/backend-py/src/__pycache__/__init__.cpython-313.pyc +0 -0
crossword-app/backend-py/src/routes/__init__.py +1 -0
crossword-app/backend-py/src/routes/__pycache__/__init__.cpython-313.pyc +0 -0
crossword-app/backend-py/src/routes/__pycache__/api.cpython-313.pyc +0 -0
crossword-app/backend-py/src/routes/api.py +186 -0
crossword-app/backend-py/src/services/__init__.py +1 -0
crossword-app/backend-py/src/services/__pycache__/__init__.cpython-313.pyc +0 -0
crossword-app/backend-py/src/services/__pycache__/crossword_generator.cpython-313.pyc +0 -0
crossword-app/backend-py/src/services/__pycache__/crossword_generator_fixed.cpython-313.pyc +0 -0
crossword-app/backend-py/src/services/__pycache__/crossword_generator_wrapper.cpython-313.pyc +0 -0
crossword-app/backend-py/src/services/__pycache__/vector_search.cpython-313.pyc +0 -0
crossword-app/backend-py/src/services/__pycache__/word_cache.cpython-313.pyc +0 -0
crossword-app/backend-py/src/services/crossword_generator.py +722 -0
crossword-app/backend-py/src/services/crossword_generator_wrapper.py +58 -0
crossword-app/backend-py/src/services/vector_search.py +587 -0
crossword-app/backend-py/src/services/word_cache.py +347 -0
crossword-app/backend-py/test-integration/test_boundary_fix.py +147 -0
crossword-app/backend-py/test-integration/test_bounds_comprehensive.py +266 -0
crossword-app/backend-py/test-integration/test_bounds_fix.py +90 -0
crossword-app/backend-py/test-integration/test_cache_permissions.py +88 -0
crossword-app/backend-py/test-integration/test_cache_system.py +127 -0
crossword-app/backend-py/test-integration/test_crossword_display.py +85 -0
crossword-app/backend-py/test-integration/test_final_crossword_validation.py +239 -0
crossword-app/backend-py/test-integration/test_final_validation.py +133 -0
crossword-app/backend-py/test-integration/test_intersection_issues.py +247 -0
crossword-app/backend-py/test-integration/test_local.py +98 -0

.gitignore CHANGED Viewed

@@ -47,4 +47,9 @@ pids
 .Spotlight-V100
 .Trashes
 ehthumbs.db
-Thumbs.db

 .Spotlight-V100
 .Trashes
 ehthumbs.db
+Thumbs.db
+hack
+issues/
+samples/
+venv/

CLAUDE.md ADDED Viewed

	@@ -0,0 +1,217 @@

+# CLAUDE.md
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+## Project Structure
+This is a full-stack crossword puzzle generator with two backend implementations:
+- **Node.js Backend** (`backend/`) - Original implementation with static word lists
+- **Python Backend** (`backend-py/`) - New implementation with AI-powered vector search
+- **React Frontend** (`frontend/`) - Modern React app with Vite
+Current deployment uses the Python backend with Docker containerization.
+## Development Commands
+### Frontend Development
+```bash
+cd frontend
+npm install
+npm run dev          # Start development server on http://localhost:5173
+npm run build        # Build for production
+npm run preview      # Preview production build
+```
+### Backend Development (Python - Primary)
+```bash
+cd backend-py
+# Testing
+python run_tests.py                                    # Run all tests
+python run_tests.py crossword_generator_fixed         # Run specific test
+pytest tests/ -v                                      # Direct pytest
+pytest tests/test_index_bug_fix.py -v                 # Core functionality tests
+python test_local.py                                  # Quick test without ML deps
+# Development server
+python app.py                                         # Start FastAPI server on port 7860
+# Debug/development tools
+python test_simple_generation.py                      # Test crossword generation
+python debug_grid_direct.py                          # Debug grid placement
+```
+### Backend Development (Node.js - Legacy)
+```bash
+cd backend
+npm install
+npm run dev          # Start Express server on http://localhost:3000
+npm test             # Run tests
+```
+### Docker Deployment
+```bash
+# Build and run locally
+docker build -t crossword-app .
+docker run -p 7860:7860 -e NODE_ENV=production crossword-app
+# Test deployment
+curl http://localhost:7860/api/topics
+curl http://localhost:7860/health
+```
+### Linting and Type Checking
+```bash
+# Python backend
+cd backend-py
+mypy src/           # Type checking (if mypy installed)
+ruff src/           # Linting (if ruff installed)
+# Frontend
+cd frontend
+npm run lint        # ESLint (if configured)
+```
+## Architecture Overview
+### Full-Stack Components
+**Frontend** (`frontend/`)
+- React 18 with hooks and functional components
+- Key components: `TopicSelector.jsx`, `PuzzleGrid.jsx`, `ClueList.jsx`
+- Custom hook: `useCrossword.js` manages puzzle state
+- Grid rendering using CSS Grid with interactive cell filling
+**Python Backend** (`backend-py/` - Primary)
+- FastAPI web framework serving both API and static frontend files
+- AI-powered word generation using vector similarity search
+- Comprehensive bounds checking fixes for crossword generation
+- Multi-layer caching system with graceful fallback to static words
+**Node.js Backend** (`backend/` - Legacy)
+- Express.js with file-based word storage
+- Original crossword generation algorithm
+- Static word lists organized by topic (animals.json, science.json, etc.)
+### Core Python Backend Components
+**CrosswordGeneratorFixed** (`backend-py/src/services/crossword_generator_fixed.py`)
+- Main crossword generation algorithm using backtracking
+- Handles grid placement, bounds checking, and word intersections
+- Contains fixes for "list index out of range" errors with comprehensive bounds validation
+- Key methods: `_create_grid()`, `_backtrack_placement()`, `_can_place_word()`, `_place_word()`
+**VectorSearchService** (`backend-py/src/services/vector_search.py`)
+- AI-powered word discovery using sentence-transformers + FAISS
+- Extracts 30K+ words from model vocabulary vs static word lists
+- Implements semantic similarity search with caching and fallback systems
+- Requires torch/sentence-transformers dependencies (optional for core functionality)
+**WordCache** (`backend-py/src/services/word_cache.py`)
+- Multi-layer caching system for vector-discovered words
+- Handles permission issues with fallback mechanisms
+- Reduces dependency on static word files
+### Data Flow
+1. **User Interaction** → React frontend (TopicSelector, PuzzleGrid)
+2. **API Request** → FastAPI backend (`backend-py/routes/api.py`)
+3. **Word Selection** → VectorSearchService (AI) or static word fallback
+4. **Grid Generation** → CrosswordGeneratorFixed backtracking algorithm
+5. **Response** → JSON with grid, clues, and metadata
+6. **Frontend Rendering** → Interactive crossword grid with clues
+### Critical Dependencies
+**Frontend:**
+- React 18, Vite (development/build)
+- Node.js 18+ and npm 9+
+**Python Backend (Primary):**
+- FastAPI, uvicorn, pydantic (web framework)
+- pytest, pytest-asyncio (testing)
+**Optional AI Features:**
+- torch, sentence-transformers, faiss-cpu (vector search)
+- httpx (for API testing)
+**Node.js Backend (Legacy):**
+- Express.js, cors, helmet
+- JSON file-based word storage
+The Python backend gracefully degrades to static word lists when AI dependencies are missing.
+### API Endpoints
+Both backends provide compatible REST APIs:
+- `GET /api/topics` - Get available topics
+- `POST /api/generate` - Generate crossword puzzle
+- `POST /api/validate` - Validate user answers
+- `GET /api/health` - Health check
+### Testing Strategy
+**Python Backend Tests:**
+- `test_crossword_generator_fixed.py` - Grid generation logic
+- `test_index_bug_fix.py` - Bounds checking and index error fixes (CRITICAL)
+- `test_vector_search.py` - AI word generation (needs torch)
+- `test_api_routes.py` - FastAPI endpoints (needs httpx)
+**Frontend Tests:**
+- Component testing with React Testing Library (if configured)
+- E2E testing with Playwright/Cypress (if configured)
+### Key Fixes Applied
+**Index Error Resolution:**
+- Added comprehensive bounds checking in `_can_place_word()`, `_place_word()`, `_remove_word()`
+- Fixed `_calculate_placement_score()` to validate grid coordinates before access
+- All grid access operations now validate row/col bounds
+**Word Boundary Issues:**
+- 2-letter sequences at crossword intersections are normal behavior, not bugs
+- Removed overly strict validation that was rejecting valid crossword patterns
+- Grid placement logic maintains compatibility with JavaScript backend quality
+### Environment Configuration
+**Python Backend (Production):**
+```bash
+NODE_ENV=production
+PORT=7860
+EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
+WORD_SIMILARITY_THRESHOLD=0.65
+PYTHONPATH=/app/backend-py
+PYTHONUNBUFFERED=1
+```
+**Frontend Development:**
+```bash
+VITE_API_BASE_URL=http://localhost:7860  # Points to Python backend
+```
+**Node.js Backend (Legacy):**
+```bash
+NODE_ENV=development
+PORT=3000
+DATABASE_URL=postgresql://user:pass@host:port/db  # Optional
+```
+### Performance Notes
+**Python Backend:**
+- **Startup**: ~30-60 seconds with AI (model download), ~2 seconds without
+- **Memory**: ~500MB-1GB with AI, ~100MB without
+- **Response Time**: ~200-500ms with vector search, ~100ms with static words
+- FAISS index building is the main startup bottleneck
+**Frontend:**
+- **Development**: Hot reload with Vite (~200ms)
+- **Build Time**: ~10-30 seconds for production build
+- **Bundle Size**: Optimized with Vite tree-shaking
+**Deployment:**
+- Docker build time: ~5-10 minutes (includes frontend build + Python deps)
+- Container size: ~1.5GB (includes ML models and dependencies)
+- Hugging Face Spaces deployment: Automatic on git push
+- run unit tests after fixing a bug

Dockerfile CHANGED Viewed

@@ -1,36 +1,85 @@
-# Use Node.js 18 as base image
-FROM node:18-alpine
 # Set working directory
 WORKDIR /app
-# Copy package files for both frontend and backend
-COPY crossword-app/frontend/package*.json ./frontend/
-COPY crossword-app/backend/package*.json ./backend/
-# Install dependencies for both frontend and backend
 RUN cd frontend && npm ci
-RUN cd backend && npm ci --only=production
-# Copy source code
 COPY crossword-app/frontend/ ./frontend/
 COPY crossword-app/backend/ ./backend/
 # Build the React frontend
 RUN cd frontend && npm run build
-# Copy built frontend files to backend public directory
-RUN mkdir -p backend/public && cp -r frontend/dist/* backend/public/
-# Set working directory to backend for runtime
-WORKDIR /app/backend
 # Expose port 7860 (Hugging Face Spaces standard)
 EXPOSE 7860
-# Set environment to production
 ENV NODE_ENV=production
 ENV PORT=7860
-# Start the backend server
-CMD ["npm", "start"]

+# Multi-stage build to optimize performance and security
+# Stage 1: Builder - Install dependencies and build as root
+FROM python:3.11-slim as builder
 # Set working directory
 WORKDIR /app
+# Install system dependencies for sentence-transformers and FAISS
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    wget \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Install Node.js for frontend build
+RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
+    apt-get install -y nodejs
+# Copy frontend package files and install dependencies first (for better caching)
+COPY crossword-app/frontend/package*.json ./frontend/
 RUN cd frontend && npm ci
+# Copy Python backend requirements and install dependencies
+COPY crossword-app/backend-py/requirements.txt ./backend-py/
+COPY crossword-app/backend-py/requirements-dev.txt ./backend-py/
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r backend-py/requirements-dev.txt
+# Copy all source code
 COPY crossword-app/frontend/ ./frontend/
 COPY crossword-app/backend/ ./backend/
+COPY crossword-app/backend-py/ ./backend-py/
 # Build the React frontend
 RUN cd frontend && npm run build
+# Copy built frontend files to Python backend public directory
+RUN mkdir -p backend-py/public && cp -r frontend/dist/* backend-py/public/
+# Create symlink for shared data (word lists)
+RUN cd backend-py && ln -sf ../backend/data data
+# Stage 2: Runtime - Copy only necessary files as non-root user
+FROM python:3.11-slim as runtime
+# Copy Python packages from builder stage
+COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+# Install minimal runtime dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Create non-root user
+RUN useradd -m -u 1000 appuser
+# Set working directory
+WORKDIR /app/backend-py
+# Copy built application files with correct ownership
+COPY --from=builder --chown=appuser:appuser /app/backend-py ./
+COPY --from=builder --chown=appuser:appuser /app/backend/data ./data
+# Switch to non-root user
+USER appuser
 # Expose port 7860 (Hugging Face Spaces standard)
 EXPOSE 7860
+# Set environment variables for production
 ENV NODE_ENV=production
 ENV PORT=7860
+ENV PYTHONPATH=/app/backend-py
+ENV PYTHONUNBUFFERED=1
+ENV PIP_NO_CACHE_DIR=1
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# Start the Python backend server with uvicorn for better production performance
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

crossword-app/Dockerfile CHANGED Viewed

@@ -1,36 +1,84 @@
-# Use Node.js 18 as base image
-FROM node:18-alpine
 # Set working directory
 WORKDIR /app
-# Copy package files for both frontend and backend
-COPY frontend/package*.json ./frontend/
-COPY backend/package*.json ./backend/
-# Install dependencies for both frontend and backend
 RUN cd frontend && npm ci
-RUN cd backend && npm ci --only=production
-# Copy source code
 COPY frontend/ ./frontend/
 COPY backend/ ./backend/
 # Build the React frontend
 RUN cd frontend && npm run build
-# Copy built frontend files to backend public directory
-RUN mkdir -p backend/public && cp -r frontend/dist/* backend/public/
-# Set working directory to backend for runtime
-WORKDIR /app/backend
 # Expose port 7860 (Hugging Face Spaces standard)
 EXPOSE 7860
-# Set environment to production
 ENV NODE_ENV=production
 ENV PORT=7860
-# Start the backend server
-CMD ["npm", "start"]

+# Multi-stage build to optimize performance and security
+# Stage 1: Builder - Install dependencies and build as root
+FROM python:3.11-slim as builder
 # Set working directory
 WORKDIR /app
+# Install system dependencies for sentence-transformers and FAISS
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    wget \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Install Node.js for frontend build
+RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
+    apt-get install -y nodejs
+# Copy frontend package files and install dependencies first (for better caching)
+COPY frontend/package*.json ./frontend/
 RUN cd frontend && npm ci
+# Copy Python backend requirements and install dependencies
+COPY backend-py/requirements.txt ./backend-py/
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r backend-py/requirements.txt
+# Copy all source code
 COPY frontend/ ./frontend/
 COPY backend/ ./backend/
+COPY backend-py/ ./backend-py/
 # Build the React frontend
 RUN cd frontend && npm run build
+# Copy built frontend files to Python backend public directory
+RUN mkdir -p backend-py/public && cp -r frontend/dist/* backend-py/public/
+# Create symlink for shared data (word lists)
+RUN cd backend-py && ln -sf ../backend/data data
+# Stage 2: Runtime - Copy only necessary files as non-root user
+FROM python:3.11-slim as runtime
+# Copy Python packages from builder stage
+COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+# Install minimal runtime dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Create non-root user
+RUN useradd -m -u 1000 appuser
+# Set working directory
+WORKDIR /app/backend-py
+# Copy built application files with correct ownership
+COPY --from=builder --chown=appuser:appuser /app/backend-py ./
+COPY --from=builder --chown=appuser:appuser /app/backend/data ./data
+# Switch to non-root user
+USER appuser
 # Expose port 7860 (Hugging Face Spaces standard)
 EXPOSE 7860
+# Set environment variables for production
 ENV NODE_ENV=production
 ENV PORT=7860
+ENV PYTHONPATH=/app/backend-py
+ENV PYTHONUNBUFFERED=1
+ENV PIP_NO_CACHE_DIR=1
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# Start the Python backend server with uvicorn for better production performance
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

crossword-app/backend-py/.coverage ADDED Viewed

Binary file (53.2 kB). View file

crossword-app/backend-py/.env.example ADDED Viewed

	@@ -0,0 +1,20 @@

+# Python Backend Environment Configuration
+# Server Configuration
+PORT=7860
+HOST=0.0.0.0
+NODE_ENV=production
+# AI/ML Configuration
+EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
+WORD_SIMILARITY_THRESHOLD=0.65
+MAX_VOCAB_SIZE=30000
+# HuggingFace Configuration (if needed for cloud inference)
+HUGGINGFACE_API_KEY=your_huggingface_api_key_here
+# Logging
+LOG_LEVEL=INFO
+# Development settings
+RELOAD=false

crossword-app/backend-py/README-local-setup.md ADDED Viewed

	@@ -0,0 +1,78 @@

+# Local Development Setup
+## Quick Start
+```bash
+# Install all dependencies (same as production)
+pip install -r requirements.txt
+```
+## Python Version Support
+- **Recommended**: Python 3.10-3.12
+- **Minimum**: Python 3.10 (matches Docker)
+## Installation Troubleshooting
+### If you get PyTorch installation errors:
+```bash
+# Install PyTorch first with CPU support
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+# Then install remaining dependencies
+pip install -r requirements-local.txt --no-deps
+pip install fastapi uvicorn[standard] python-dotenv python-multipart
+```
+### For M1/M2 Macs:
+```bash
+# Use conda for better compatibility
+conda install pytorch::pytorch torchvision torchaudio -c pytorch
+pip install -r requirements-local.txt --no-deps
+pip install sentence-transformers faiss-cpu transformers huggingface-hub
+```
+## Running Locally
+```bash
+cd crossword-app/backend-py
+python app.py
+```
+The server will start on http://localhost:7860
+## Features Available
+### Features Available:
+- ✅ AI word generation via vector search
+- ✅ 30K+ vocabulary from sentence-transformers
+- ✅ Static word fallback
+- ✅ All crossword features
+- ✅ Same as production environment
+## Environment Variables
+Create a `.env` file:
+```bash
+# Optional - defaults to sentence-transformers/all-mpnet-base-v2
+EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+# Optional - similarity threshold for AI words
+WORD_SIMILARITY_THRESHOLD=0.65
+# Optional - logging level
+LOG_LEVEL=INFO
+```
+## Testing
+```bash
+# Test basic components
+python test_local.py
+# Test with pytest
+pytest
+```
+## Docker vs Local Development
+Both use the same `requirements.txt` with modern, compatible versions that work across Python 3.9-3.12 and different platforms.

crossword-app/backend-py/README.md ADDED Viewed

	@@ -0,0 +1,332 @@

+# Python Backend with Vector Similarity Search
+This is the Python implementation of the crossword generator backend, featuring true AI word generation via vector similarity search.
+## 🚀 Features
+- **True Vector Search**: Uses sentence-transformers + FAISS for semantic word discovery
+- **30K+ Vocabulary**: Searches through full model vocabulary instead of limited static lists
+- **FastAPI**: Modern, fast Python web framework
+- **Same API**: Compatible with existing React frontend
+- **Hybrid Approach**: AI vector search with static word fallback
+## 🔄 Differences from JavaScript Backend
+| Feature | JavaScript Backend | Python Backend |
+|---------|-------------------|----------------|
+| **Word Generation** | Embedding filtering of static lists | True vector similarity search |
+| **Vocabulary Size** | ~100 words per topic | 30K+ words from model |
+| **AI Approach** | Semantic similarity filtering | Nearest neighbor search |
+| **Performance** | Fast but limited | Slower startup, better results |
+| **Dependencies** | Node.js + HuggingFace API | Python + ML libraries |
+## 🛠️ Setup & Installation
+### Prerequisites
+- Python 3.11+ (3.11 recommended for Docker compatibility)
+- pip (Python package manager)
+### Basic Setup (Core Functionality)
+```bash
+# Clone and navigate to backend directory
+cd crossword-app/backend-py
+# Create virtual environment (recommended)
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+# Install core dependencies
+pip install -r requirements.txt
+# Start the server
+python app.py
+```
+### Full Development Setup (with AI features)
+```bash
+# Install development dependencies including AI/ML libraries
+pip install -r requirements-dev.txt
+# This includes:
+# - All core dependencies
+# - AI/ML libraries (torch, sentence-transformers, etc.)
+# - Development tools (pytest, coverage, etc.)
+```
+### Requirements Files
+- **`requirements.txt`**: Core dependencies for basic functionality
+- **`requirements-dev.txt`**: Full development environment with AI features
+> **Note**: The AI/ML dependencies are large (~2GB). For basic testing without AI features, use `requirements.txt` only.
+> **Python Version**: Both local development and Docker use Python 3.11+ for optimal performance and latest package compatibility.
+## 📁 Structure
+```
+backend-py/
+├── app.py                          # FastAPI application entry point
+├── requirements.txt                # Core Python dependencies
+├── requirements-dev.txt            # Full development dependencies
+├── src/
+│   ├── services/
+│   │   ├── vector_search.py        # Core vector similarity search
+│   │   └── crossword_generator.py  # Puzzle generation logic
+│   └── routes/
+│       └── api.py                  # API endpoints (matches JS backend)
+├── test-unit/                      # Unit tests (pytest framework) - 5 files
+│   ├── test_crossword_generator.py
+│   ├── test_api_routes.py
+│   └── test_vector_search.py
+├── test-integration/               # Integration tests (standalone scripts) - 16 files
+│   ├── test_simple_generation.py
+│   ├── test_boundary_fix.py
+│   └── test_local.py               # (+ 13 more test files)
+├── data/ -> ../backend/data/       # Symlink to shared word data
+└── public/                         # Frontend static files (copied during build)
+```
+## 🛠 Dependencies
+### Core ML Stack
+- `sentence-transformers`: Local model loading and embeddings
+- `faiss-cpu`: Fast vector similarity search
+- `torch`: PyTorch for model inference
+- `numpy`: Vector operations
+### Web Framework
+- `fastapi`: Modern Python web framework
+- `uvicorn`: ASGI server
+- `pydantic`: Data validation
+### Testing
+- `pytest`: Testing framework
+- `pytest-asyncio`: Async test support
+## 🧪 Testing
+### 📁 Test Organization (Reorganized for Clarity)
+**We've reorganized the test structure for better developer experience:**
+| Test Type | Location | Purpose | Framework | Count |
+|-----------|----------|---------|-----------|-------|
+| **Unit Tests** | `test-unit/` | Test individual components in isolation | pytest | 5 files |
+| **Integration Tests** | `test-integration/` | Test complete workflows end-to-end | Standalone scripts | 16 files |
+**Benefits of this structure:**
+- ✅ **Clear separation** between unit and integration testing
+- ✅ **Intuitive naming** - developers immediately understand test types
+- ✅ **Better tooling** - can run different test types independently
+- ✅ **Easier maintenance** - organized by testing strategy
+> **Note**: Previously tests were mixed in `tests/` folder and root-level `test_*.py` files. The new structure provides much better organization.
+### Unit Tests Details (`test-unit/`)
+**What they test:** Individual components with mocking and isolation
+- `test_crossword_generator.py` - Core crossword generation logic
+- `test_api_routes.py` - FastAPI endpoint handlers
+- `test_crossword_generator_wrapper.py` - Service wrapper layer
+- `test_index_bug_fix.py` - Specific bug fix validations
+- `test_vector_search.py` - AI vector search functionality (requires torch)
+### Run Unit Tests (Formal Test Suite)
+```bash
+# Run all unit tests
+python run_tests.py
+# Run specific test modules
+python run_tests.py crossword_generator
+pytest test-unit/test_crossword_generator.py -v
+# Run core tests (excluding AI dependencies)
+pytest test-unit/ -v --ignore=test-unit/test_vector_search.py
+# Run individual unit test classes
+pytest test-unit/test_crossword_generator.py::TestCrosswordGenerator::test_init -v
+```
+### Integration Tests Details (`test-integration/`)
+**What they test:** Complete workflows without mocking - real functionality
+- `test_simple_generation.py` - End-to-end crossword generation
+- `test_boundary_fix.py` - Word boundary validation (our major fix!)
+- `test_local.py` - Local environment and dependencies
+- `test_word_boundaries.py` - Comprehensive boundary testing
+- `test_bounds_comprehensive.py` - Advanced bounds checking
+- `test_final_validation.py` - API integration testing
+- And 10 more specialized feature tests...
+### Run Integration Tests (End-to-End Scripts)
+```bash
+# Test core functionality
+python test-integration/test_simple_generation.py
+python test-integration/test_boundary_fix.py
+python test-integration/test_local.py
+# Test specific features
+python test-integration/test_word_boundaries.py
+python test-integration/test_bounds_comprehensive.py
+# Test API integration
+python test-integration/test_final_validation.py
+```
+### Test Coverage
+```bash
+# Run core tests with coverage (requires requirements-dev.txt)
+pytest test-unit/test_crossword_generator.py --cov=src --cov-report=html
+pytest test-unit/test_crossword_generator.py --cov=src --cov-report=term
+# Full coverage report (may fail without AI dependencies)
+pytest test-unit/ --cov=src --cov-report=html --ignore=test-unit/test_vector_search.py
+```
+### Test Status
+- ✅ **Core crossword generation**: 15/19 unit tests passing
+- ✅ **Boundary validation**: All integration tests passing
+- ⚠️ **AI/Vector search**: Requires torch dependencies
+- ⚠️ **Some async mocking**: Minor test infrastructure issues
+### 🔄 Migration Guide (For Existing Developers)
+**If you had previous commands, update them:**
+| Old Command | New Command |
+|-------------|-------------|
+| `pytest tests/` | `pytest test-unit/` |
+| `python test_simple_generation.py` | `python test-integration/test_simple_generation.py` |
+| `pytest tests/ --cov=src` | `pytest test-unit/ --cov=src` |
+**All functionality is preserved** - just organized better!
+## 🔧 Configuration
+Environment variables (set in HuggingFace Spaces):
+```bash
+# Core settings
+PORT=7860
+NODE_ENV=production
+# AI Configuration
+EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
+WORD_SIMILARITY_THRESHOLD=0.65
+# Optional
+LOG_LEVEL=INFO
+```
+## 🎯 Vector Search Process
+1. **Initialization**:
+   - Load sentence-transformers model locally
+   - Extract 30K+ vocabulary from model tokenizer
+   - Pre-compute embeddings for all vocabulary words
+   - Build FAISS index for fast similarity search
+2. **Word Generation**:
+   - Get topic embedding: `"Animals" → [768-dim vector]`
+   - Search FAISS index for nearest neighbors
+   - Filter by similarity threshold (0.65+)
+   - Filter by difficulty (word length)
+   - Return top matches with generated clues
+3. **Fallback**:
+   - If vector search fails → use static word lists
+   - If insufficient AI words → supplement with static words
+## 🧪 Testing
+```bash
+# Local testing (without full vector search)
+cd backend-py
+python test_local.py
+# Start development server
+python app.py
+```
+## 🐳 Docker Deployment
+The Dockerfile has been updated to use Python backend:
+```dockerfile
+FROM python:3.9-slim
+# ... install dependencies
+# ... build frontend (same as before)
+# ... copy to backend-py/public/
+CMD ["python", "app.py"]
+```
+## 🧪 Testing
+### Quick Test
+```bash
+# Basic functionality test (no model download)
+python test_local.py
+```
+### Comprehensive Unit Tests
+```bash
+# Run all unit tests
+python run_tests.py
+# Or use pytest directly
+pytest tests/ -v
+# Run specific test file
+python run_tests.py crossword_generator_fixed
+pytest tests/test_crossword_generator_fixed.py -v
+# Run with coverage
+pytest tests/ --cov=src --cov-report=html
+```
+### Test Structure
+- `tests/test_crossword_generator_fixed.py` - Core grid generation logic
+- `tests/test_vector_search.py` - Vector similarity search
+- `tests/test_crossword_generator_wrapper.py` - Service wrapper
+- `tests/test_api_routes.py` - FastAPI endpoints
+### Key Test Features
+- ✅ **Index alignment fix**: Tests the list index out of range bug fix
+- ✅ **Mocked vector search**: Tests without downloading models
+- ✅ **API validation**: Tests all endpoints and error cases
+- ✅ **Async support**: Full pytest-asyncio integration
+- ✅ **Error handling**: Tests malformed inputs and edge cases
+## 📊 Performance Comparison
+**Startup Time**:
+- JavaScript: ~2 seconds
+- Python: ~30-60 seconds (model download + index building)
+**Word Quality**:
+- JavaScript: Limited by static word lists
+- Python: Access to full model vocabulary with semantic understanding
+**Memory Usage**:
+- JavaScript: ~100MB
+- Python: ~500MB-1GB (model + embeddings + FAISS index)
+**API Response Time**:
+- JavaScript: ~100ms (after cache warm-up)
+- Python: ~200-500ms (vector search + filtering)
+## 🔄 Migration Strategy
+1. **Phase 1** ✅: Basic Python backend structure
+2. **Phase 2**: Test vector search functionality
+3. **Phase 3**: Docker deployment and production testing
+4. **Phase 4**: Compare with JavaScript backend
+5. **Phase 5**: Production switch with rollback capability
+## 🎯 Next Steps
+- [ ] Test vector search with real model
+- [ ] Optimize FAISS index performance
+- [ ] Add more sophisticated crossword grid generation
+- [ ] Implement LLM-based clue generation
+- [ ] Add caching for frequently requested topics

crossword-app/backend-py/__pycache__/test_bounds_comprehensive.cpython-313-pytest-8.4.1.pyc ADDED Viewed

Binary file (39.4 kB). View file

crossword-app/backend-py/app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+"""
+FastAPI backend for crossword puzzle generator with vector similarity search.
+"""
+import os
+import logging
+import time
+from datetime import datetime
+from contextlib import asynccontextmanager
+from pathlib import Path
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+import uvicorn
+from dotenv import load_dotenv
+from src.routes.api import router as api_router
+from src.services.vector_search import VectorSearchService
+# Load environment variables
+load_dotenv()
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def log_with_timestamp(message):
+    """Helper to log with precise timestamp."""
+    timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
+    logger.info(f"[{timestamp}] {message}")
+# Global vector search service instance
+vector_service = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Initialize and cleanup application resources."""
+    global vector_service
+    # Startup
+    startup_time = time.time()
+    log_with_timestamp("🚀 Initializing Python backend with vector search...")
+    # Initialize vector search service
+    try:
+        service_start = time.time()
+        log_with_timestamp("🔧 Creating VectorSearchService instance...")
+        vector_service = VectorSearchService()
+        log_with_timestamp("⚡ Starting vector search initialization...")
+        await vector_service.initialize()
+        init_time = time.time() - service_start
+        log_with_timestamp(f"✅ Vector search service initialized in {init_time:.2f}s")
+    except Exception as e:
+        logger.error(f"❌ Failed to initialize vector search service: {e}")
+        # Continue without vector search (will fallback to static words)
+    # Make vector service available to routes
+    app.state.vector_service = vector_service
+    yield
+    # Shutdown
+    logger.info("🛑 Shutting down Python backend...")
+    if vector_service:
+        await vector_service.cleanup()
+# Create FastAPI app
+app = FastAPI(
+    title="Crossword Puzzle Generator API",
+    description="Python backend with AI-powered vector similarity search",
+    version="2.0.0",
+    lifespan=lifespan
+)
+# CORS configuration
+cors_origins = []
+if os.getenv("NODE_ENV") == "production":
+    # Production: same origin
+    cors_origins = ["*"]  # HuggingFace Spaces
+else:
+    # Development: allow dev servers
+    cors_origins = [
+        "http://localhost:5173",  # Vite dev server
+        "http://localhost:3000",  # Alternative dev server
+        "http://localhost:7860",  # Local production test
+    ]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=cors_origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include API routes
+app.include_router(api_router, prefix="/api")
+# Serve static files (frontend)
+static_path = Path(__file__).parent / "public"
+if static_path.exists():
+    app.mount("/assets", StaticFiles(directory=static_path / "assets"), name="assets")
+    @app.get("/")
+    async def serve_frontend():
+        """Serve the React frontend."""
+        index_path = static_path / "index.html"
+        if index_path.exists():
+            return FileResponse(index_path)
+        else:
+            raise HTTPException(status_code=404, detail="Frontend not found")
+    @app.get("/{full_path:path}")
+    async def serve_spa_routes(full_path: str):
+        """Serve React SPA routes."""
+        # For any non-API route, serve the React app
+        if not full_path.startswith("api/"):
+            index_path = static_path / "index.html"
+            if index_path.exists():
+                return FileResponse(index_path)
+        raise HTTPException(status_code=404, detail="Not found")
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {
+        "status": "healthy",
+        "backend": "python",
+        "vector_search": vector_service.is_initialized if vector_service else False
+    }
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", 7860))
+    host = "0.0.0.0" if os.getenv("NODE_ENV") == "production" else "127.0.0.1"
+    logger.info(f"🐍 Starting Python backend on {host}:{port}")
+    uvicorn.run(
+        "app:app",
+        host=host,
+        port=port,
+        reload=os.getenv("NODE_ENV") != "production"
+    )

crossword-app/backend-py/data/data ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../backend/data

crossword-app/backend-py/data/word-lists/animals.json ADDED Viewed

	@@ -0,0 +1,165 @@

+[
+  { "word": "DOG", "clue": "Man's best friend" },
+  { "word": "CAT", "clue": "Feline pet that purrs" },
+  { "word": "ELEPHANT", "clue": "Large mammal with a trunk" },
+  { "word": "TIGER", "clue": "Striped big cat" },
+  { "word": "WHALE", "clue": "Largest marine mammal" },
+  { "word": "BUTTERFLY", "clue": "Colorful flying insect" },
+  { "word": "BIRD", "clue": "Flying creature with feathers" },
+  { "word": "FISH", "clue": "Aquatic animal with gills" },
+  { "word": "LION", "clue": "King of the jungle" },
+  { "word": "BEAR", "clue": "Large mammal that hibernates" },
+  { "word": "RABBIT", "clue": "Hopping mammal with long ears" },
+  { "word": "HORSE", "clue": "Riding animal with hooves" },
+  { "word": "SHEEP", "clue": "Woolly farm animal" },
+  { "word": "GOAT", "clue": "Horned farm animal" },
+  { "word": "DUCK", "clue": "Water bird that quacks" },
+  { "word": "CHICKEN", "clue": "Farm bird that lays eggs" },
+  { "word": "SNAKE", "clue": "Slithering reptile" },
+  { "word": "TURTLE", "clue": "Shelled reptile" },
+  { "word": "FROG", "clue": "Amphibian that croaks" },
+  { "word": "SHARK", "clue": "Predatory ocean fish" },
+  { "word": "DOLPHIN", "clue": "Intelligent marine mammal" },
+  { "word": "PENGUIN", "clue": "Flightless Antarctic bird" },
+  { "word": "MONKEY", "clue": "Primate that swings in trees" },
+  { "word": "ZEBRA", "clue": "Striped African animal" },
+  { "word": "GIRAFFE", "clue": "Tallest land animal" },
+  { "word": "WOLF", "clue": "Wild canine that howls" },
+  { "word": "FOX", "clue": "Cunning red-furred animal" },
+  { "word": "DEER", "clue": "Graceful forest animal with antlers" },
+  { "word": "MOOSE", "clue": "Large antlered animal" },
+  { "word": "SQUIRREL", "clue": "Tree-climbing nut gatherer" },
+  { "word": "RACCOON", "clue": "Masked nocturnal animal" },
+  { "word": "BEAVER", "clue": "Dam-building rodent" },
+  { "word": "OTTER", "clue": "Playful water mammal" },
+  { "word": "SEAL", "clue": "Marine mammal with flippers" },
+  { "word": "WALRUS", "clue": "Tusked Arctic marine mammal" },
+  { "word": "RHINO", "clue": "Horned thick-skinned mammal" },
+  { "word": "HIPPO", "clue": "Large African river mammal" },
+  { "word": "CHEETAH", "clue": "Fastest land animal" },
+  { "word": "LEOPARD", "clue": "Spotted big cat" },
+  { "word": "JAGUAR", "clue": "South American big cat" },
+  { "word": "PUMA", "clue": "Mountain lion" },
+  { "word": "LYNX", "clue": "Wild cat with tufted ears" },
+  { "word": "KANGAROO", "clue": "Hopping Australian marsupial" },
+  { "word": "KOALA", "clue": "Eucalyptus-eating marsupial" },
+  { "word": "PANDA", "clue": "Black and white bamboo eater" },
+  { "word": "SLOTH", "clue": "Slow-moving tree dweller" },
+  { "word": "ARMADILLO", "clue": "Armored mammal" },
+  { "word": "ANTEATER", "clue": "Long-snouted insect eater" },
+  { "word": "PLATYPUS", "clue": "Egg-laying mammal with a bill" },
+  { "word": "BAT", "clue": "Flying mammal" },
+  { "word": "MOLE", "clue": "Underground tunnel digger" },
+  { "word": "HEDGEHOG", "clue": "Spiny small mammal" },
+  { "word": "PORCUPINE", "clue": "Quill-covered rodent" },
+  { "word": "SKUNK", "clue": "Black and white scent-spraying mammal" },
+  { "word": "WEASEL", "clue": "Small carnivorous mammal" },
+  { "word": "BADGER", "clue": "Burrowing black and white mammal" },
+  { "word": "FERRET", "clue": "Domesticated hunting animal" },
+  { "word": "MINK", "clue": "Valuable fur-bearing animal" },
+  { "word": "EAGLE", "clue": "Majestic bird of prey" },
+  { "word": "HAWK", "clue": "Sharp-eyed hunting bird" },
+  { "word": "OWL", "clue": "Nocturnal bird with large eyes" },
+  { "word": "FALCON", "clue": "Fast diving bird of prey" },
+  { "word": "VULTURE", "clue": "Scavenging bird" },
+  { "word": "CROW", "clue": "Black intelligent bird" },
+  { "word": "RAVEN", "clue": "Large black corvid" },
+  { "word": "ROBIN", "clue": "Red-breasted songbird" },
+  { "word": "SPARROW", "clue": "Small brown songbird" },
+  { "word": "CARDINAL", "clue": "Bright red songbird" },
+  { "word": "BLUEJAY", "clue": "Blue crested bird" },
+  { "word": "WOODPECKER", "clue": "Tree-pecking bird" },
+  { "word": "HUMMINGBIRD", "clue": "Tiny fast-flying bird" },
+  { "word": "PELICAN", "clue": "Large-billed water bird" },
+  { "word": "FLAMINGO", "clue": "Pink wading bird" },
+  { "word": "STORK", "clue": "Long-legged wading bird" },
+  { "word": "HERON", "clue": "Tall fishing bird" },
+  { "word": "CRANE", "clue": "Large wading bird" },
+  { "word": "SWAN", "clue": "Elegant white water bird" },
+  { "word": "GOOSE", "clue": "Large waterfowl" },
+  { "word": "TURKEY", "clue": "Large ground bird" },
+  { "word": "PHEASANT", "clue": "Colorful game bird" },
+  { "word": "QUAIL", "clue": "Small ground bird" },
+  { "word": "PEACOCK", "clue": "Bird with spectacular tail feathers" },
+  { "word": "OSTRICH", "clue": "Largest flightless bird" },
+  { "word": "EMU", "clue": "Australian flightless bird" },
+  { "word": "KIWI", "clue": "Small flightless New Zealand bird" },
+  { "word": "PARROT", "clue": "Colorful talking bird" },
+  { "word": "TOUCAN", "clue": "Large-billed tropical bird" },
+  { "word": "MACAW", "clue": "Large colorful parrot" },
+  { "word": "COCKATOO", "clue": "Crested parrot" },
+  { "word": "CANARY", "clue": "Yellow singing bird" },
+  { "word": "FINCH", "clue": "Small seed-eating bird" },
+  { "word": "PIGEON", "clue": "Common city bird" },
+  { "word": "DOVE", "clue": "Symbol of peace" },
+  { "word": "SEAGULL", "clue": "Coastal scavenging bird" },
+  { "word": "ALBATROSS", "clue": "Large ocean bird" },
+  { "word": "PUFFIN", "clue": "Colorful-billed seabird" },
+  { "word": "LIZARD", "clue": "Small scaly reptile" },
+  { "word": "IGUANA", "clue": "Large tropical lizard" },
+  { "word": "GECKO", "clue": "Wall-climbing lizard" },
+  { "word": "CHAMELEON", "clue": "Color-changing reptile" },
+  { "word": "ALLIGATOR", "clue": "Large American crocodilian" },
+  { "word": "CROCODILE", "clue": "Large aquatic reptile" },
+  { "word": "PYTHON", "clue": "Large constricting snake" },
+  { "word": "COBRA", "clue": "Venomous hooded snake" },
+  { "word": "VIPER", "clue": "Poisonous snake" },
+  { "word": "RATTLESNAKE", "clue": "Snake with warning tail" },
+  { "word": "SALAMANDER", "clue": "Amphibian that can regrow limbs" },
+  { "word": "NEWT", "clue": "Small aquatic salamander" },
+  { "word": "TOAD", "clue": "Warty amphibian" },
+  { "word": "TADPOLE", "clue": "Frog larva" },
+  { "word": "SALMON", "clue": "Fish that swims upstream" },
+  { "word": "TROUT", "clue": "Freshwater game fish" },
+  { "word": "BASS", "clue": "Popular sport fish" },
+  { "word": "TUNA", "clue": "Large ocean fish" },
+  { "word": "SWORDFISH", "clue": "Fish with long pointed bill" },
+  { "word": "MARLIN", "clue": "Large billfish" },
+  { "word": "MANTA", "clue": "Large ray fish" },
+  { "word": "STINGRAY", "clue": "Flat fish with barbed tail" },
+  { "word": "EEL", "clue": "Snake-like fish" },
+  { "word": "SEAHORSE", "clue": "Horse-shaped fish" },
+  { "word": "ANGELFISH", "clue": "Colorful tropical fish" },
+  { "word": "GOLDFISH", "clue": "Common pet fish" },
+  { "word": "CLOWNFISH", "clue": "Orange and white anemone fish" },
+  { "word": "JELLYFISH", "clue": "Transparent stinging sea creature" },
+  { "word": "OCTOPUS", "clue": "Eight-armed sea creature" },
+  { "word": "SQUID", "clue": "Ten-armed cephalopod" },
+  { "word": "CRAB", "clue": "Sideways-walking crustacean" },
+  { "word": "LOBSTER", "clue": "Large marine crustacean" },
+  { "word": "SHRIMP", "clue": "Small crustacean" },
+  { "word": "STARFISH", "clue": "Five-armed sea creature" },
+  { "word": "URCHIN", "clue": "Spiny sea creature" },
+  { "word": "CORAL", "clue": "Marine organism that builds reefs" },
+  { "word": "SPONGE", "clue": "Simple marine animal" },
+  { "word": "OYSTER", "clue": "Pearl-producing mollusk" },
+  { "word": "CLAM", "clue": "Burrowing shellfish" },
+  { "word": "MUSSEL", "clue": "Dark-shelled mollusk" },
+  { "word": "SNAIL", "clue": "Spiral-shelled gastropod" },
+  { "word": "SLUG", "clue": "Shell-less gastropod" },
+  { "word": "WORM", "clue": "Segmented invertebrate" },
+  { "word": "SPIDER", "clue": "Eight-legged web spinner" },
+  { "word": "SCORPION", "clue": "Arachnid with stinging tail" },
+  { "word": "ANT", "clue": "Social insect worker" },
+  { "word": "BEE", "clue": "Honey-making insect" },
+  { "word": "WASP", "clue": "Stinging flying insect" },
+  { "word": "HORNET", "clue": "Large aggressive wasp" },
+  { "word": "FLY", "clue": "Common buzzing insect" },
+  { "word": "MOSQUITO", "clue": "Blood-sucking insect" },
+  { "word": "BEETLE", "clue": "Hard-shelled insect" },
+  { "word": "LADYBUG", "clue": "Red spotted beneficial insect" },
+  { "word": "DRAGONFLY", "clue": "Large-winged flying insect" },
+  { "word": "GRASSHOPPER", "clue": "Jumping green insect" },
+  { "word": "CRICKET", "clue": "Chirping insect" },
+  { "word": "MANTIS", "clue": "Praying insect predator" },
+  { "word": "MOTH", "clue": "Nocturnal butterfly relative" },
+  { "word": "CATERPILLAR", "clue": "Butterfly larva" },
+  { "word": "COCOON", "clue": "Insect transformation casing" },
+  { "word": "TERMITE", "clue": "Wood-eating social insect" },
+  { "word": "TICK", "clue": "Blood-sucking parasite" },
+  { "word": "FLEA", "clue": "Jumping parasite" },
+  { "word": "LOUSE", "clue": "Small parasitic insect" },
+  { "word": "APHID", "clue": "Plant-sucking insect" },
+  { "word": "MAGGOT", "clue": "Fly larva" },
+  { "word": "GRUB", "clue": "Beetle larva" }
+]

crossword-app/backend-py/data/word-lists/geography.json ADDED Viewed

	@@ -0,0 +1,161 @@

+[
+  { "word": "MOUNTAIN", "clue": "High elevation landform" },
+  { "word": "OCEAN", "clue": "Large body of salt water" },
+  { "word": "DESERT", "clue": "Dry, arid region" },
+  { "word": "CONTINENT", "clue": "Large landmass" },
+  { "word": "RIVER", "clue": "Flowing body of water" },
+  { "word": "ISLAND", "clue": "Land surrounded by water" },
+  { "word": "FOREST", "clue": "Dense area of trees" },
+  { "word": "VALLEY", "clue": "Low area between hills" },
+  { "word": "LAKE", "clue": "Body of freshwater" },
+  { "word": "BEACH", "clue": "Sandy shore by water" },
+  { "word": "CLIFF", "clue": "Steep rock face" },
+  { "word": "PLATEAU", "clue": "Elevated flat area" },
+  { "word": "CANYON", "clue": "Deep gorge with steep sides" },
+  { "word": "GLACIER", "clue": "Moving mass of ice" },
+  { "word": "VOLCANO", "clue": "Mountain that erupts" },
+  { "word": "PENINSULA", "clue": "Land surrounded by water on three sides" },
+  { "word": "ARCHIPELAGO", "clue": "Group of islands" },
+  { "word": "PRAIRIE", "clue": "Grassland plain" },
+  { "word": "TUNDRA", "clue": "Cold, treeless region" },
+  { "word": "SAVANNA", "clue": "Tropical grassland" },
+  { "word": "EQUATOR", "clue": "Earth's middle line" },
+  { "word": "LATITUDE", "clue": "Distance from equator" },
+  { "word": "LONGITUDE", "clue": "Distance from prime meridian" },
+  { "word": "CLIMATE", "clue": "Long-term weather pattern" },
+  { "word": "MONSOON", "clue": "Seasonal wind pattern" },
+  { "word": "CAPITAL", "clue": "Main city of country" },
+  { "word": "BORDER", "clue": "Boundary between countries" },
+  { "word": "COAST", "clue": "Land meeting the sea" },
+  { "word": "STRAIT", "clue": "Narrow water passage" },
+  { "word": "DELTA", "clue": "River mouth formation" },
+  { "word": "FJORD", "clue": "Narrow inlet between cliffs" },
+  { "word": "ATOLL", "clue": "Ring-shaped coral island" },
+  { "word": "MESA", "clue": "Flat-topped hill" },
+  { "word": "BUTTE", "clue": "Isolated hill with steep sides" },
+  { "word": "GORGE", "clue": "Deep narrow valley" },
+  { "word": "RAVINE", "clue": "Small narrow gorge" },
+  { "word": "RIDGE", "clue": "Long narrow hilltop" },
+  { "word": "PEAK", "clue": "Mountain summit" },
+  { "word": "SUMMIT", "clue": "Highest point" },
+  { "word": "FOOTHILLS", "clue": "Hills at base of mountains" },
+  { "word": "RANGE", "clue": "Chain of mountains" },
+  { "word": "BASIN", "clue": "Low-lying area" },
+  { "word": "WATERSHED", "clue": "Drainage area" },
+  { "word": "ESTUARY", "clue": "Where river meets sea" },
+  { "word": "BAY", "clue": "Curved inlet of water" },
+  { "word": "GULF", "clue": "Large bay" },
+  { "word": "CAPE", "clue": "Point of land into water" },
+  { "word": "HEADLAND", "clue": "High point of land" },
+  { "word": "LAGOON", "clue": "Shallow coastal body of water" },
+  { "word": "REEF", "clue": "Underwater rock formation" },
+  { "word": "SHOAL", "clue": "Shallow area in water" },
+  { "word": "CHANNEL", "clue": "Deep water passage" },
+  { "word": "SOUND", "clue": "Large sea inlet" },
+  { "word": "HARBOR", "clue": "Sheltered port area" },
+  { "word": "INLET", "clue": "Small bay" },
+  { "word": "COVE", "clue": "Small sheltered bay" },
+  { "word": "MARSH", "clue": "Wetland area" },
+  { "word": "SWAMP", "clue": "Forested wetland" },
+  { "word": "BOG", "clue": "Acidic wetland" },
+  { "word": "OASIS", "clue": "Fertile spot in desert" },
+  { "word": "DUNE", "clue": "Sand hill" },
+  { "word": "PLAIN", "clue": "Flat grassland" },
+  { "word": "STEPPE", "clue": "Dry grassland" },
+  { "word": "TAIGA", "clue": "Northern coniferous forest" },
+  { "word": "RAINFOREST", "clue": "Dense tropical forest" },
+  { "word": "JUNGLE", "clue": "Dense tropical vegetation" },
+  { "word": "WOODLAND", "clue": "Area with scattered trees" },
+  { "word": "GROVE", "clue": "Small group of trees" },
+  { "word": "MEADOW", "clue": "Grassy field" },
+  { "word": "PASTURE", "clue": "Grazing land" },
+  { "word": "FIELD", "clue": "Open area of land" },
+  { "word": "MOOR", "clue": "Open uncultivated land" },
+  { "word": "HEATH", "clue": "Shrubland area" },
+  { "word": "ARCTIC", "clue": "Cold northern region" },
+  { "word": "ANTARCTIC", "clue": "Cold southern region" },
+  { "word": "POLAR", "clue": "Of the poles" },
+  { "word": "TROPICAL", "clue": "Hot humid climate zone" },
+  { "word": "TEMPERATE", "clue": "Moderate climate zone" },
+  { "word": "ARID", "clue": "Very dry" },
+  { "word": "HUMID", "clue": "Moist air" },
+  { "word": "ALTITUDE", "clue": "Height above sea level" },
+  { "word": "ELEVATION", "clue": "Height of land" },
+  { "word": "TERRAIN", "clue": "Physical features of land" },
+  { "word": "TOPOGRAPHY", "clue": "Surface features of area" },
+  { "word": "GEOGRAPHY", "clue": "Study of Earth's features" },
+  { "word": "CARTOGRAPHY", "clue": "Map making" },
+  { "word": "MERIDIAN", "clue": "Longitude line" },
+  { "word": "PARALLEL", "clue": "Latitude line" },
+  { "word": "HEMISPHERE", "clue": "Half of Earth" },
+  { "word": "TROPICS", "clue": "Hot climate zone" },
+  { "word": "POLES", "clue": "Earth's endpoints" },
+  { "word": "AXIS", "clue": "Earth's rotation line" },
+  { "word": "ORBIT", "clue": "Path around sun" },
+  { "word": "SEASON", "clue": "Time of year" },
+  { "word": "SOLSTICE", "clue": "Longest or shortest day" },
+  { "word": "EQUINOX", "clue": "Equal day and night" },
+  { "word": "COMPASS", "clue": "Direction-finding tool" },
+  { "word": "NAVIGATION", "clue": "Finding your way" },
+  { "word": "BEARING", "clue": "Direction or course" },
+  { "word": "AZIMUTH", "clue": "Compass direction" },
+  { "word": "SCALE", "clue": "Map size ratio" },
+  { "word": "LEGEND", "clue": "Map symbol key" },
+  { "word": "CONTOUR", "clue": "Elevation line on map" },
+  { "word": "GRID", "clue": "Map reference system" },
+  { "word": "PROJECTION", "clue": "Map flattening method" },
+  { "word": "SURVEY", "clue": "Land measurement" },
+  { "word": "BOUNDARY", "clue": "Dividing line" },
+  { "word": "FRONTIER", "clue": "Border region" },
+  { "word": "TERRITORY", "clue": "Area of land" },
+  { "word": "REGION", "clue": "Geographic area" },
+  { "word": "ZONE", "clue": "Designated area" },
+  { "word": "DISTRICT", "clue": "Administrative area" },
+  { "word": "PROVINCE", "clue": "Political subdivision" },
+  { "word": "STATE", "clue": "Political entity" },
+  { "word": "COUNTY", "clue": "Local government area" },
+  { "word": "CITY", "clue": "Large urban area" },
+  { "word": "TOWN", "clue": "Small urban area" },
+  { "word": "VILLAGE", "clue": "Small rural community" },
+  { "word": "HAMLET", "clue": "Very small village" },
+  { "word": "SUBURB", "clue": "Residential area outside city" },
+  { "word": "URBAN", "clue": "City-like" },
+  { "word": "RURAL", "clue": "Countryside" },
+  { "word": "METROPOLITAN", "clue": "Large city area" },
+  { "word": "POPULATION", "clue": "Number of people" },
+  { "word": "DENSITY", "clue": "Crowdedness" },
+  { "word": "SETTLEMENT", "clue": "Place where people live" },
+  { "word": "COLONY", "clue": "Overseas territory" },
+  { "word": "NATION", "clue": "Country" },
+  { "word": "REPUBLIC", "clue": "Democratic state" },
+  { "word": "KINGDOM", "clue": "Monarchy" },
+  { "word": "EMPIRE", "clue": "Large political entity" },
+  { "word": "FEDERATION", "clue": "Union of states" },
+  { "word": "ALLIANCE", "clue": "Partnership of nations" },
+  { "word": "TREATY", "clue": "International agreement" },
+  { "word": "TRADE", "clue": "Commercial exchange" },
+  { "word": "EXPORT", "clue": "Goods sent abroad" },
+  { "word": "IMPORT", "clue": "Goods brought in" },
+  { "word": "COMMERCE", "clue": "Business activity" },
+  { "word": "INDUSTRY", "clue": "Manufacturing" },
+  { "word": "AGRICULTURE", "clue": "Farming" },
+  { "word": "MINING", "clue": "Extracting minerals" },
+  { "word": "FORESTRY", "clue": "Tree management" },
+  { "word": "FISHING", "clue": "Catching fish" },
+  { "word": "TOURISM", "clue": "Travel industry" },
+  { "word": "TRANSPORTATION", "clue": "Moving people and goods" },
+  { "word": "INFRASTRUCTURE", "clue": "Basic facilities" },
+  { "word": "COMMUNICATION", "clue": "Information exchange" },
+  { "word": "CULTURE", "clue": "Way of life" },
+  { "word": "LANGUAGE", "clue": "Communication system" },
+  { "word": "RELIGION", "clue": "Belief system" },
+  { "word": "ETHNICITY", "clue": "Cultural group" },
+  { "word": "MIGRATION", "clue": "Movement of people" },
+  { "word": "IMMIGRATION", "clue": "Moving into country" },
+  { "word": "EMIGRATION", "clue": "Moving out of country" },
+  { "word": "DIASPORA", "clue": "Scattered population" },
+  { "word": "NOMAD", "clue": "Wandering person" },
+  { "word": "REFUGEE", "clue": "Displaced person" },
+  { "word": "CENSUS", "clue": "Population count" },
+  { "word": "DEMOGRAPHIC", "clue": "Population characteristic" }
+]

crossword-app/backend-py/data/word-lists/science.json ADDED Viewed

	@@ -0,0 +1,170 @@

+[
+  { "word": "ATOM", "clue": "Smallest unit of matter" },
+  { "word": "GRAVITY", "clue": "Force that pulls objects down" },
+  { "word": "MOLECULE", "clue": "Group of atoms bonded together" },
+  { "word": "PHOTON", "clue": "Particle of light" },
+  { "word": "CHEMISTRY", "clue": "Study of matter and reactions" },
+  { "word": "PHYSICS", "clue": "Study of matter and energy" },
+  { "word": "BIOLOGY", "clue": "Study of living organisms" },
+  { "word": "ELEMENT", "clue": "Pure chemical substance" },
+  { "word": "OXYGEN", "clue": "Gas essential for breathing" },
+  { "word": "CARBON", "clue": "Element found in all life" },
+  { "word": "HYDROGEN", "clue": "Lightest chemical element" },
+  { "word": "ENERGY", "clue": "Capacity to do work" },
+  { "word": "FORCE", "clue": "Push or pull on an object" },
+  { "word": "VELOCITY", "clue": "Speed with direction" },
+  { "word": "MASS", "clue": "Amount of matter in object" },
+  { "word": "VOLUME", "clue": "Amount of space occupied" },
+  { "word": "DENSITY", "clue": "Mass per unit volume" },
+  { "word": "PRESSURE", "clue": "Force per unit area" },
+  { "word": "TEMPERATURE", "clue": "Measure of heat" },
+  { "word": "ELECTRON", "clue": "Negatively charged particle" },
+  { "word": "PROTON", "clue": "Positively charged particle" },
+  { "word": "NEUTRON", "clue": "Neutral atomic particle" },
+  { "word": "NUCLEUS", "clue": "Center of an atom" },
+  { "word": "CELL", "clue": "Basic unit of life" },
+  { "word": "DNA", "clue": "Genetic blueprint molecule" },
+  { "word": "PROTEIN", "clue": "Complex biological molecule" },
+  { "word": "ENZYME", "clue": "Biological catalyst" },
+  { "word": "VIRUS", "clue": "Infectious agent" },
+  { "word": "BACTERIA", "clue": "Single-celled organisms" },
+  { "word": "EVOLUTION", "clue": "Change in species over time" },
+  { "word": "ISOTOPE", "clue": "Atom variant with different neutrons" },
+  { "word": "ION", "clue": "Charged atom or molecule" },
+  { "word": "COMPOUND", "clue": "Chemical combination of elements" },
+  { "word": "MIXTURE", "clue": "Combined substances retaining properties" },
+  { "word": "SOLUTION", "clue": "Dissolved mixture" },
+  { "word": "ACID", "clue": "Sour chemical with low pH" },
+  { "word": "BASE", "clue": "Alkaline substance with high pH" },
+  { "word": "SALT", "clue": "Ionic compound from acid-base reaction" },
+  { "word": "CATALYST", "clue": "Substance that speeds reactions" },
+  { "word": "RNA", "clue": "Genetic messenger molecule" },
+  { "word": "GENE", "clue": "Heredity unit on chromosome" },
+  { "word": "CHROMOSOME", "clue": "Gene-carrying structure" },
+  { "word": "TISSUE", "clue": "Group of similar cells" },
+  { "word": "ORGAN", "clue": "Body part with specific function" },
+  { "word": "SYSTEM", "clue": "Group of organs working together" },
+  { "word": "ORGANISM", "clue": "Living individual entity" },
+  { "word": "SPECIES", "clue": "Group of similar organisms" },
+  { "word": "ADAPTATION", "clue": "Survival-enhancing change" },
+  { "word": "MUTATION", "clue": "Genetic change in DNA" },
+  { "word": "HEREDITY", "clue": "Passing traits to offspring" },
+  { "word": "ECOSYSTEM", "clue": "Community and environment" },
+  { "word": "HABITAT", "clue": "Natural living environment" },
+  { "word": "BIODIVERSITY", "clue": "Variety of life forms" },
+  { "word": "PHOTOSYNTHESIS", "clue": "Plant energy-making process" },
+  { "word": "RESPIRATION", "clue": "Cellular breathing process" },
+  { "word": "METABOLISM", "clue": "Chemical processes in body" },
+  { "word": "HOMEOSTASIS", "clue": "Body's internal balance" },
+  { "word": "MITOSIS", "clue": "Cell division for growth" },
+  { "word": "MEIOSIS", "clue": "Cell division for reproduction" },
+  { "word": "EMBRYO", "clue": "Early development stage" },
+  { "word": "FOSSIL", "clue": "Preserved ancient remains" },
+  { "word": "GEOLOGY", "clue": "Study of Earth's structure" },
+  { "word": "MINERAL", "clue": "Natural inorganic crystal" },
+  { "word": "ROCK", "clue": "Solid earth material" },
+  { "word": "SEDIMENT", "clue": "Settled particles" },
+  { "word": "EROSION", "clue": "Gradual wearing away" },
+  { "word": "VOLCANO", "clue": "Earth opening spewing lava" },
+  { "word": "EARTHQUAKE", "clue": "Ground shaking from plate movement" },
+  { "word": "PLATE", "clue": "Earth's crust section" },
+  { "word": "MAGMA", "clue": "Molten rock beneath surface" },
+  { "word": "LAVA", "clue": "Molten rock on surface" },
+  { "word": "CRYSTAL", "clue": "Ordered atomic structure" },
+  { "word": "ATMOSPHERE", "clue": "Layer of gases around Earth" },
+  { "word": "CLIMATE", "clue": "Long-term weather pattern" },
+  { "word": "WEATHER", "clue": "Short-term atmospheric conditions" },
+  { "word": "PRECIPITATION", "clue": "Water falling from clouds" },
+  { "word": "HUMIDITY", "clue": "Moisture in air" },
+  { "word": "WIND", "clue": "Moving air mass" },
+  { "word": "STORM", "clue": "Violent weather event" },
+  { "word": "HURRICANE", "clue": "Powerful tropical cyclone" },
+  { "word": "TORNADO", "clue": "Rotating column of air" },
+  { "word": "LIGHTNING", "clue": "Electrical discharge in sky" },
+  { "word": "THUNDER", "clue": "Sound of lightning" },
+  { "word": "RAINBOW", "clue": "Spectrum of light in sky" },
+  { "word": "ASTRONOMY", "clue": "Study of celestial objects" },
+  { "word": "GALAXY", "clue": "Collection of stars and planets" },
+  { "word": "PLANET", "clue": "Large orbiting celestial body" },
+  { "word": "STAR", "clue": "Self-luminous celestial body" },
+  { "word": "MOON", "clue": "Natural satellite of planet" },
+  { "word": "COMET", "clue": "Icy body with tail" },
+  { "word": "ASTEROID", "clue": "Rocky space object" },
+  { "word": "METEOR", "clue": "Space rock entering atmosphere" },
+  { "word": "ORBIT", "clue": "Curved path around object" },
+  { "word": "LIGHT", "clue": "Electromagnetic radiation" },
+  { "word": "SPECTRUM", "clue": "Range of electromagnetic radiation" },
+  { "word": "WAVELENGTH", "clue": "Distance between wave peaks" },
+  { "word": "FREQUENCY", "clue": "Waves per unit time" },
+  { "word": "AMPLITUDE", "clue": "Wave height or intensity" },
+  { "word": "SOUND", "clue": "Vibrations in air" },
+  { "word": "ECHO", "clue": "Reflected sound" },
+  { "word": "RESONANCE", "clue": "Vibration amplification" },
+  { "word": "DOPPLER", "clue": "Wave frequency shift effect" },
+  { "word": "MOTION", "clue": "Change in position" },
+  { "word": "ACCELERATION", "clue": "Change in velocity" },
+  { "word": "MOMENTUM", "clue": "Mass times velocity" },
+  { "word": "INERTIA", "clue": "Resistance to motion change" },
+  { "word": "FRICTION", "clue": "Resistance to sliding" },
+  { "word": "HEAT", "clue": "Thermal energy transfer" },
+  { "word": "COMBUSTION", "clue": "Burning chemical reaction" },
+  { "word": "OXIDATION", "clue": "Reaction with oxygen" },
+  { "word": "REDUCTION", "clue": "Gain of electrons" },
+  { "word": "ELECTROLYSIS", "clue": "Chemical breakdown by electricity" },
+  { "word": "CONDUCTIVITY", "clue": "Ability to transfer energy" },
+  { "word": "INSULATOR", "clue": "Material blocking energy flow" },
+  { "word": "SEMICONDUCTOR", "clue": "Partial electrical conductor" },
+  { "word": "MAGNETISM", "clue": "Force of magnetic attraction" },
+  { "word": "FIELD", "clue": "Region of force influence" },
+  { "word": "CIRCUIT", "clue": "Closed electrical path" },
+  { "word": "CURRENT", "clue": "Flow of electric charge" },
+  { "word": "VOLTAGE", "clue": "Electric potential difference" },
+  { "word": "RESISTANCE", "clue": "Opposition to current flow" },
+  { "word": "CAPACITOR", "clue": "Device storing electric charge" },
+  { "word": "INDUCTOR", "clue": "Device storing magnetic energy" },
+  { "word": "TRANSISTOR", "clue": "Electronic switching device" },
+  { "word": "LASER", "clue": "Focused beam of light" },
+  { "word": "RADAR", "clue": "Radio detection system" },
+  { "word": "SONAR", "clue": "Sound detection system" },
+  { "word": "TELESCOPE", "clue": "Instrument for viewing distant objects" },
+  { "word": "MICROSCOPE", "clue": "Instrument for viewing small objects" },
+  { "word": "HYPOTHESIS", "clue": "Testable scientific prediction" },
+  { "word": "THEORY", "clue": "Well-tested scientific explanation" },
+  { "word": "LAW", "clue": "Consistently observed scientific rule" },
+  { "word": "EXPERIMENT", "clue": "Controlled scientific test" },
+  { "word": "OBSERVATION", "clue": "Careful scientific watching" },
+  { "word": "MEASUREMENT", "clue": "Quantified observation" },
+  { "word": "ANALYSIS", "clue": "Detailed examination of data" },
+  { "word": "SYNTHESIS", "clue": "Combining elements into whole" },
+  { "word": "VARIABLE", "clue": "Factor that can change" },
+  { "word": "CONTROL", "clue": "Unchanged comparison group" },
+  { "word": "DATA", "clue": "Information collected from tests" },
+  { "word": "STATISTICS", "clue": "Mathematical analysis of data" },
+  { "word": "PROBABILITY", "clue": "Likelihood of occurrence" },
+  { "word": "PRECISION", "clue": "Exactness of measurement" },
+  { "word": "ACCURACY", "clue": "Correctness of measurement" },
+  { "word": "ERROR", "clue": "Difference from true value" },
+  { "word": "UNCERTAINTY", "clue": "Range of doubt in measurement" },
+  { "word": "CALIBRATION", "clue": "Adjusting instrument accuracy" },
+  { "word": "STANDARD", "clue": "Reference for measurement" },
+  { "word": "UNIT", "clue": "Base measure of quantity" },
+  { "word": "METRIC", "clue": "Decimal measurement system" },
+  { "word": "WEIGHT", "clue": "Force of gravity on mass" },
+  { "word": "CONCENTRATION", "clue": "Amount of substance per volume" },
+  { "word": "MOLARITY", "clue": "Moles of solute per liter" },
+  { "word": "EQUILIBRIUM", "clue": "State of balanced forces" },
+  { "word": "STABILITY", "clue": "Resistance to change" },
+  { "word": "DECAY", "clue": "Gradual breakdown process" },
+  { "word": "RADIATION", "clue": "Energy emitted from source" },
+  { "word": "RADIOACTIVE", "clue": "Emitting nuclear radiation" },
+  { "word": "HALFLIFE", "clue": "Time for half to decay" },
+  { "word": "FUSION", "clue": "Nuclear combining reaction" },
+  { "word": "FISSION", "clue": "Nuclear splitting reaction" },
+  { "word": "QUANTUM", "clue": "Discrete packet of energy" },
+  { "word": "PARTICLE", "clue": "Tiny piece of matter" },
+  { "word": "WAVE", "clue": "Energy transfer disturbance" },
+  { "word": "INTERFERENCE", "clue": "Wave interaction effect" },
+  { "word": "DIFFRACTION", "clue": "Wave bending around obstacle" },
+  { "word": "REFLECTION", "clue": "Bouncing back of waves" },
+  { "word": "REFRACTION", "clue": "Bending of waves through medium" }
+]

crossword-app/backend-py/data/word-lists/technology.json ADDED Viewed

	@@ -0,0 +1,221 @@

+[
+  { "word": "COMPUTER", "clue": "Electronic processing device" },
+  { "word": "INTERNET", "clue": "Global computer network" },
+  { "word": "ALGORITHM", "clue": "Set of rules for solving problems" },
+  { "word": "DATABASE", "clue": "Organized collection of data" },
+  { "word": "SOFTWARE", "clue": "Computer programs" },
+  { "word": "HARDWARE", "clue": "Physical computer components" },
+  { "word": "NETWORK", "clue": "Connected system of computers" },
+  { "word": "CODE", "clue": "Programming instructions" },
+  { "word": "ROBOT", "clue": "Automated machine" },
+  { "word": "ARTIFICIAL", "clue": "Made by humans, not natural" },
+  { "word": "DIGITAL", "clue": "Using binary data" },
+  { "word": "BINARY", "clue": "Base-2 number system" },
+  { "word": "PROCESSOR", "clue": "Computer's brain" },
+  { "word": "MEMORY", "clue": "Data storage component" },
+  { "word": "KEYBOARD", "clue": "Input device with keys" },
+  { "word": "MONITOR", "clue": "Computer display screen" },
+  { "word": "MOUSE", "clue": "Pointing input device" },
+  { "word": "PRINTER", "clue": "Device that prints documents" },
+  { "word": "SCANNER", "clue": "Device that digitizes images" },
+  { "word": "CAMERA", "clue": "Device that captures images" },
+  { "word": "SMARTPHONE", "clue": "Portable computing device" },
+  { "word": "TABLET", "clue": "Touchscreen computing device" },
+  { "word": "LAPTOP", "clue": "Portable computer" },
+  { "word": "SERVER", "clue": "Computer that serves data" },
+  { "word": "CLOUD", "clue": "Internet-based computing" },
+  { "word": "WEBSITE", "clue": "Collection of web pages" },
+  { "word": "EMAIL", "clue": "Electronic mail" },
+  { "word": "BROWSER", "clue": "Web navigation software" },
+  { "word": "SEARCH", "clue": "Look for information" },
+  { "word": "DOWNLOAD", "clue": "Transfer data to device" },
+  { "word": "UPLOAD", "clue": "Transfer data from device" },
+  { "word": "BANDWIDTH", "clue": "Data transfer capacity" },
+  { "word": "PROTOCOL", "clue": "Communication rules" },
+  { "word": "FIREWALL", "clue": "Network security barrier" },
+  { "word": "ENCRYPTION", "clue": "Data scrambling for security" },
+  { "word": "PASSWORD", "clue": "Secret access code" },
+  { "word": "SECURITY", "clue": "Protection from threats" },
+  { "word": "VIRUS", "clue": "Malicious computer program" },
+  { "word": "MALWARE", "clue": "Harmful software" },
+  { "word": "ANTIVIRUS", "clue": "Protection software" },
+  { "word": "BACKUP", "clue": "Data safety copy" },
+  { "word": "RECOVERY", "clue": "Data restoration process" },
+  { "word": "STORAGE", "clue": "Data keeping capacity" },
+  { "word": "HARDDRIVE", "clue": "Magnetic storage device" },
+  { "word": "FLASH", "clue": "Solid state storage" },
+  { "word": "RAM", "clue": "Random access memory" },
+  { "word": "ROM", "clue": "Read-only memory" },
+  { "word": "CPU", "clue": "Central processing unit" },
+  { "word": "GPU", "clue": "Graphics processing unit" },
+  { "word": "MOTHERBOARD", "clue": "Main circuit board" },
+  { "word": "CHIP", "clue": "Integrated circuit" },
+  { "word": "CIRCUIT", "clue": "Electronic pathway" },
+  { "word": "TRANSISTOR", "clue": "Electronic switch" },
+  { "word": "SILICON", "clue": "Semiconductor material" },
+  { "word": "NANOTECHNOLOGY", "clue": "Extremely small scale tech" },
+  { "word": "AUTOMATION", "clue": "Self-operating technology" },
+  { "word": "MACHINE", "clue": "Mechanical device" },
+  { "word": "SENSOR", "clue": "Detection device" },
+  { "word": "ACTUATOR", "clue": "Movement device" },
+  { "word": "FEEDBACK", "clue": "System response information" },
+  { "word": "PROGRAMMING", "clue": "Writing computer instructions" },
+  { "word": "FUNCTION", "clue": "Reusable code block" },
+  { "word": "VARIABLE", "clue": "Data storage container" },
+  { "word": "LOOP", "clue": "Repeating code structure" },
+  { "word": "CONDITION", "clue": "Decision-making logic" },
+  { "word": "DEBUG", "clue": "Find and fix errors" },
+  { "word": "COMPILE", "clue": "Convert code to executable" },
+  { "word": "RUNTIME", "clue": "Program execution time" },
+  { "word": "API", "clue": "Application programming interface" },
+  { "word": "FRAMEWORK", "clue": "Code structure foundation" },
+  { "word": "LIBRARY", "clue": "Reusable code collection" },
+  { "word": "MODULE", "clue": "Self-contained code unit" },
+  { "word": "OBJECT", "clue": "Data and methods container" },
+  { "word": "CLASS", "clue": "Object blueprint" },
+  { "word": "INHERITANCE", "clue": "Code reuse mechanism" },
+  { "word": "INTERFACE", "clue": "System interaction boundary" },
+  { "word": "PROTOCOL", "clue": "Communication standard" },
+  { "word": "FORMAT", "clue": "Data structure standard" },
+  { "word": "SYNTAX", "clue": "Language rules" },
+  { "word": "SEMANTIC", "clue": "Meaning in code" },
+  { "word": "PARSING", "clue": "Analyzing code structure" },
+  { "word": "COMPILER", "clue": "Code translation program" },
+  { "word": "INTERPRETER", "clue": "Code execution program" },
+  { "word": "VIRTUAL", "clue": "Simulated environment" },
+  { "word": "SIMULATION", "clue": "Computer modeling" },
+  { "word": "EMULATION", "clue": "System imitation" },
+  { "word": "OPTIMIZATION", "clue": "Performance improvement" },
+  { "word": "EFFICIENCY", "clue": "Resource usage effectiveness" },
+  { "word": "PERFORMANCE", "clue": "System speed and quality" },
+  { "word": "BENCHMARK", "clue": "Performance measurement" },
+  { "word": "TESTING", "clue": "Quality verification process" },
+  { "word": "VALIDATION", "clue": "Correctness checking" },
+  { "word": "VERIFICATION", "clue": "Accuracy confirmation" },
+  { "word": "QUALITY", "clue": "Standard of excellence" },
+  { "word": "MAINTENANCE", "clue": "System upkeep" },
+  { "word": "UPDATE", "clue": "Software improvement" },
+  { "word": "PATCH", "clue": "Software fix" },
+  { "word": "VERSION", "clue": "Software release number" },
+  { "word": "RELEASE", "clue": "Software distribution" },
+  { "word": "DEPLOYMENT", "clue": "Software installation" },
+  { "word": "CONFIGURATION", "clue": "System setup" },
+  { "word": "INSTALLATION", "clue": "Software setup process" },
+  { "word": "MIGRATION", "clue": "System transition" },
+  { "word": "INTEGRATION", "clue": "System combination" },
+  { "word": "COMPATIBILITY", "clue": "System cooperation ability" },
+  { "word": "INTEROPERABILITY", "clue": "Cross-system communication" },
+  { "word": "SCALABILITY", "clue": "Growth accommodation ability" },
+  { "word": "RELIABILITY", "clue": "Consistent performance" },
+  { "word": "AVAILABILITY", "clue": "System accessibility" },
+  { "word": "REDUNDANCY", "clue": "Backup system duplication" },
+  { "word": "FAULT", "clue": "System error condition" },
+  { "word": "TOLERANCE", "clue": "Error handling ability" },
+  { "word": "RECOVERY", "clue": "System restoration" },
+  { "word": "MONITORING", "clue": "System observation" },
+  { "word": "LOGGING", "clue": "Event recording" },
+  { "word": "ANALYTICS", "clue": "Data analysis" },
+  { "word": "METRICS", "clue": "Measurement data" },
+  { "word": "DASHBOARD", "clue": "Information display panel" },
+  { "word": "INTERFACE", "clue": "User interaction design" },
+  { "word": "EXPERIENCE", "clue": "User interaction quality" },
+  { "word": "USABILITY", "clue": "Ease of use" },
+  { "word": "ACCESSIBILITY", "clue": "Universal design principle" },
+  { "word": "RESPONSIVE", "clue": "Adaptive design" },
+  { "word": "MOBILE", "clue": "Portable device category" },
+  { "word": "TOUCHSCREEN", "clue": "Touch-sensitive display" },
+  { "word": "GESTURE", "clue": "Touch movement command" },
+  { "word": "VOICE", "clue": "Speech interaction" },
+  { "word": "RECOGNITION", "clue": "Pattern identification" },
+  { "word": "LEARNING", "clue": "Adaptive improvement" },
+  { "word": "INTELLIGENCE", "clue": "Artificial reasoning" },
+  { "word": "NEURAL", "clue": "Brain-inspired network" },
+  { "word": "DEEP", "clue": "Multi-layered learning" },
+  { "word": "MACHINE", "clue": "Automated learning system" },
+  { "word": "DATA", "clue": "Information collection" },
+  { "word": "BIG", "clue": "Large scale data" },
+  { "word": "MINING", "clue": "Data pattern extraction" },
+  { "word": "ANALYSIS", "clue": "Data examination" },
+  { "word": "VISUALIZATION", "clue": "Data graphic representation" },
+  { "word": "DASHBOARD", "clue": "Data monitoring panel" },
+  { "word": "REPORT", "clue": "Data summary document" },
+  { "word": "QUERY", "clue": "Data search request" },
+  { "word": "INDEX", "clue": "Data location reference" },
+  { "word": "SCHEMA", "clue": "Data structure blueprint" },
+  { "word": "TABLE", "clue": "Data organization structure" },
+  { "word": "RECORD", "clue": "Data entry" },
+  { "word": "FIELD", "clue": "Data element" },
+  { "word": "PRIMARY", "clue": "Main identifier key" },
+  { "word": "FOREIGN", "clue": "Reference relationship key" },
+  { "word": "RELATION", "clue": "Data connection" },
+  { "word": "JOIN", "clue": "Data combination operation" },
+  { "word": "TRANSACTION", "clue": "Data operation sequence" },
+  { "word": "COMMIT", "clue": "Data change confirmation" },
+  { "word": "ROLLBACK", "clue": "Data change reversal" },
+  { "word": "CONCURRENCY", "clue": "Simultaneous access handling" },
+  { "word": "LOCK", "clue": "Data access control" },
+  { "word": "SYNCHRONIZATION", "clue": "Timing coordination" },
+  { "word": "THREAD", "clue": "Execution sequence" },
+  { "word": "PROCESS", "clue": "Running program instance" },
+  { "word": "MULTITASKING", "clue": "Multiple process handling" },
+  { "word": "PARALLEL", "clue": "Simultaneous execution" },
+  { "word": "DISTRIBUTED", "clue": "Spread across multiple systems" },
+  { "word": "CLUSTER", "clue": "Group of connected computers" },
+  { "word": "GRID", "clue": "Distributed computing network" },
+  { "word": "PEER", "clue": "Equal network participant" },
+  { "word": "CLIENT", "clue": "Service requesting system" },
+  { "word": "SERVICE", "clue": "System functionality provider" },
+  { "word": "MICROSERVICE", "clue": "Small independent service" },
+  { "word": "CONTAINER", "clue": "Isolated application environment" },
+  { "word": "DOCKER", "clue": "Containerization platform" },
+  { "word": "KUBERNETES", "clue": "Container orchestration" },
+  { "word": "DEVOPS", "clue": "Development operations practice" },
+  { "word": "AGILE", "clue": "Flexible development method" },
+  { "word": "SCRUM", "clue": "Iterative development framework" },
+  { "word": "SPRINT", "clue": "Short development cycle" },
+  { "word": "KANBAN", "clue": "Visual workflow management" },
+  { "word": "CONTINUOUS", "clue": "Ongoing integration practice" },
+  { "word": "PIPELINE", "clue": "Automated workflow" },
+  { "word": "BUILD", "clue": "Software compilation process" },
+  { "word": "TESTING", "clue": "Quality assurance process" },
+  { "word": "AUTOMATION", "clue": "Manual task elimination" },
+  { "word": "SCRIPT", "clue": "Automated task sequence" },
+  { "word": "BATCH", "clue": "Group processing" },
+  { "word": "STREAMING", "clue": "Continuous data flow" },
+  { "word": "REALTIME", "clue": "Immediate processing" },
+  { "word": "LATENCY", "clue": "Response delay time" },
+  { "word": "THROUGHPUT", "clue": "Processing capacity" },
+  { "word": "BOTTLENECK", "clue": "Performance limitation point" },
+  { "word": "CACHE", "clue": "Fast temporary storage" },
+  { "word": "BUFFER", "clue": "Temporary data holder" },
+  { "word": "QUEUE", "clue": "Ordered waiting line" },
+  { "word": "STACK", "clue": "Last-in-first-out structure" },
+  { "word": "HEAP", "clue": "Dynamic memory area" },
+  { "word": "POINTER", "clue": "Memory address reference" },
+  { "word": "REFERENCE", "clue": "Object location indicator" },
+  { "word": "GARBAGE", "clue": "Unused memory collection" },
+  { "word": "ALLOCATION", "clue": "Memory assignment" },
+  { "word": "DEALLOCATION", "clue": "Memory release" },
+  { "word": "LEAK", "clue": "Memory usage error" },
+  { "word": "OVERFLOW", "clue": "Capacity exceeding error" },
+  { "word": "UNDERFLOW", "clue": "Insufficient data error" },
+  { "word": "EXCEPTION", "clue": "Error handling mechanism" },
+  { "word": "INTERRUPT", "clue": "Process suspension signal" },
+  { "word": "SIGNAL", "clue": "Process communication" },
+  { "word": "EVENT", "clue": "System occurrence" },
+  { "word": "HANDLER", "clue": "Event processing function" },
+  { "word": "CALLBACK", "clue": "Function reference" },
+  { "word": "PROMISE", "clue": "Future value placeholder" },
+  { "word": "ASYNC", "clue": "Non-blocking operation" },
+  { "word": "AWAIT", "clue": "Pause for completion" },
+  { "word": "YIELD", "clue": "Temporary function pause" },
+  { "word": "GENERATOR", "clue": "Value sequence producer" },
+  { "word": "ITERATOR", "clue": "Sequential access pattern" },
+  { "word": "RECURSION", "clue": "Self-calling function" },
+  { "word": "CLOSURE", "clue": "Function scope retention" },
+  { "word": "LAMBDA", "clue": "Anonymous function" },
+  { "word": "FUNCTIONAL", "clue": "Function-based programming" },
+  { "word": "PROCEDURAL", "clue": "Step-by-step programming" },
+  { "word": "DECLARATIVE", "clue": "What-not-how programming" },
+  { "word": "IMPERATIVE", "clue": "Command-based programming" }
+]

crossword-app/backend-py/debug_full_generation.py ADDED Viewed

	@@ -0,0 +1,316 @@

+#!/usr/bin/env python3
+"""
+Debug the complete crossword generation process to identify display/numbering issues.
+"""
+import asyncio
+import sys
+import json
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
+async def debug_complete_generation():
+    """Debug the complete crossword generation process."""
+    print("🔍 Debugging Complete Crossword Generation Process\n")
+    # Create generator with no vector service to use static words
+    generator = CrosswordGeneratorFixed(vector_service=None)
+    # Override the word selection to use controlled test words
+    test_words = [
+        {"word": "MACHINE", "clue": "Device with moving parts"},
+        {"word": "COMPUTER", "clue": "Electronic device"},
+        {"word": "EXPERT", "clue": "Person with specialized knowledge"},
+        {"word": "SCIENCE", "clue": "Systematic study"},
+        {"word": "TECHNOLOGY", "clue": "Applied science"},
+        {"word": "RESEARCH", "clue": "Systematic investigation"},
+        {"word": "ANALYSIS", "clue": "Detailed examination"},
+        {"word": "METHOD", "clue": "Systematic approach"}
+    ]
+    # Mock the word selection method
+    async def mock_select_words(topics, difficulty, use_ai):
+        return test_words
+    generator._select_words = mock_select_words
+    print("=" * 70)
+    print("GENERATING COMPLETE CROSSWORD")
+    print("=" * 70)
+    try:
+        result = await generator.generate_puzzle(["technology"], "medium", use_ai=False)
+        if result:
+            print("✅ Crossword generation successful!")
+            # Analyze the complete result
+            analyze_crossword_result(result)
+        else:
+            print("❌ Crossword generation failed - returned None")
+    except Exception as e:
+        print(f"❌ Crossword generation failed with error: {e}")
+        import traceback
+        traceback.print_exc()
+def analyze_crossword_result(result):
+    """Analyze the complete crossword result for potential issues."""
+    print("\n" + "=" * 70)
+    print("CROSSWORD RESULT ANALYSIS")
+    print("=" * 70)
+    # Print basic metadata
+    metadata = result.get("metadata", {})
+    print("Metadata:")
+    for key, value in metadata.items():
+        print(f"  {key}: {value}")
+    # Analyze the grid
+    grid = result.get("grid", [])
+    print(f"\nGrid dimensions: {len(grid)}x{len(grid[0]) if grid else 0}")
+    print("\nGrid layout:")
+    print_numbered_grid(grid)
+    # Analyze placed words vs clues
+    clues = result.get("clues", [])
+    print(f"\nNumber of clues generated: {len(clues)}")
+    print("\nClue analysis:")
+    for i, clue in enumerate(clues):
+        print(f"  Clue {i+1}:")
+        print(f"    Number: {clue.get('number', 'MISSING')}")
+        print(f"    Word: {clue.get('word', 'MISSING')}")
+        print(f"    Direction: {clue.get('direction', 'MISSING')}")
+        print(f"    Position: {clue.get('position', 'MISSING')}")
+        print(f"    Text: {clue.get('text', 'MISSING')}")
+    # Check for potential issues
+    print("\n" + "=" * 70)
+    print("ISSUE DETECTION")
+    print("=" * 70)
+    check_word_boundary_consistency(grid, clues)
+    check_numbering_consistency(clues)
+    check_grid_word_alignment(grid, clues)
+def print_numbered_grid(grid):
+    """Print grid with coordinates for analysis."""
+    if not grid:
+        print("  Empty grid")
+        return
+    # Print column headers
+    print("    ", end="")
+    for c in range(len(grid[0])):
+        print(f"{c:2d}", end="")
+    print()
+    # Print rows with row numbers
+    for r in range(len(grid)):
+        print(f" {r:2d}: ", end="")
+        for c in range(len(grid[0])):
+            cell = grid[r][c]
+            if cell == ".":
+                print(" .", end="")
+            else:
+                print(f" {cell}", end="")
+        print()
+def check_word_boundary_consistency(grid, clues):
+    """Check if words in clues match what's actually in the grid."""
+    print("Checking word boundary consistency:")
+    issues_found = []
+    for clue in clues:
+        word = clue.get("word", "")
+        position = clue.get("position", {})
+        direction = clue.get("direction", "")
+        if not all([word, position, direction]):
+            issues_found.append(f"Incomplete clue data: {clue}")
+            continue
+        row = position.get("row", -1)
+        col = position.get("col", -1)
+        if row < 0 or col < 0:
+            issues_found.append(f"Invalid position for word '{word}': {position}")
+            continue
+        # Extract the actual word from the grid
+        grid_word = extract_word_from_grid(grid, row, col, direction, len(word))
+        if grid_word != word:
+            issues_found.append(f"Mismatch for '{word}' at ({row}, {col}) {direction}: grid shows '{grid_word}'")
+    if issues_found:
+        print("  ❌ Issues found:")
+        for issue in issues_found:
+            print(f"    {issue}")
+    else:
+        print("  ✅ All words match grid positions")
+def extract_word_from_grid(grid, row, col, direction, expected_length):
+    """Extract a word from the grid at the given position and direction."""
+    if row >= len(grid) or col >= len(grid[0]):
+        return "OUT_OF_BOUNDS"
+    word = ""
+    if direction == "across":  # horizontal
+        for i in range(expected_length):
+            if col + i >= len(grid[0]):
+                return word + "TRUNCATED"
+            word += grid[row][col + i]
+    elif direction == "down":  # vertical
+        for i in range(expected_length):
+            if row + i >= len(grid):
+                return word + "TRUNCATED"
+            word += grid[row + i][col]
+    return word
+def check_numbering_consistency(clues):
+    """Check if clue numbering is consistent and logical."""
+    print("\nChecking numbering consistency:")
+    numbers = [clue.get("number", -1) for clue in clues]
+    issues = []
+    # Check for duplicate numbers
+    if len(numbers) != len(set(numbers)):
+        issues.append("Duplicate clue numbers found")
+    # Check for missing numbers in sequence
+    if numbers:
+        min_num = min(numbers)
+        max_num = max(numbers)
+        expected = set(range(min_num, max_num + 1))
+        actual = set(numbers)
+        if expected != actual:
+            missing = expected - actual
+            extra = actual - expected
+            if missing:
+                issues.append(f"Missing numbers: {sorted(missing)}")
+            if extra:
+                issues.append(f"Extra numbers: {sorted(extra)}")
+    if issues:
+        print("  ❌ Numbering issues:")
+        for issue in issues:
+            print(f"    {issue}")
+    else:
+        print("  ✅ Numbering is consistent")
+def check_grid_word_alignment(grid, clues):
+    """Check if all words are properly aligned and don't create unintended extensions."""
+    print("\nChecking grid word alignment:")
+    # Find all letter sequences in the grid
+    horizontal_sequences = find_horizontal_sequences(grid)
+    vertical_sequences = find_vertical_sequences(grid)
+    print(f"  Found {len(horizontal_sequences)} horizontal sequences")
+    print(f"  Found {len(vertical_sequences)} vertical sequences")
+    # Check if each sequence corresponds to a clue
+    clue_words = {}
+    for clue in clues:
+        pos = clue.get("position", {})
+        key = (pos.get("row"), pos.get("col"), clue.get("direction"))
+        clue_words[key] = clue.get("word", "")
+    issues = []
+    # Check horizontal sequences
+    for seq in horizontal_sequences:
+        row, start_col, word = seq
+        key = (row, start_col, "across")
+        if key not in clue_words:
+            issues.append(f"Unaccounted horizontal sequence: '{word}' at ({row}, {start_col})")
+        elif clue_words[key] != word:
+            issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({row}, {start_col})")
+    # Check vertical sequences
+    for seq in vertical_sequences:
+        col, start_row, word = seq
+        key = (start_row, col, "down")
+        if key not in clue_words:
+            issues.append(f"Unaccounted vertical sequence: '{word}' at ({start_row}, {col})")
+        elif clue_words[key] != word:
+            issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({start_row}, {col})")
+    if issues:
+        print("  ❌ Alignment issues found:")
+        for issue in issues:
+            print(f"    {issue}")
+    else:
+        print("  ✅ All words are properly aligned")
+def find_horizontal_sequences(grid):
+    """Find all horizontal letter sequences of length > 1."""
+    sequences = []
+    for r in range(len(grid)):
+        current_word = ""
+        start_col = None
+        for c in range(len(grid[0])):
+            if grid[r][c] != ".":
+                if start_col is None:
+                    start_col = c
+                current_word += grid[r][c]
+            else:
+                if current_word and len(current_word) > 1:
+                    sequences.append((r, start_col, current_word))
+                current_word = ""
+                start_col = None
+        # Handle word at end of row
+        if current_word and len(current_word) > 1:
+            sequences.append((r, start_col, current_word))
+    return sequences
+def find_vertical_sequences(grid):
+    """Find all vertical letter sequences of length > 1."""
+    sequences = []
+    for c in range(len(grid[0])):
+        current_word = ""
+        start_row = None
+        for r in range(len(grid)):
+            if grid[r][c] != ".":
+                if start_row is None:
+                    start_row = r
+                current_word += grid[r][c]
+            else:
+                if current_word and len(current_word) > 1:
+                    sequences.append((c, start_row, current_word))
+                current_word = ""
+                start_row = None
+        # Handle word at end of column
+        if current_word and len(current_word) > 1:
+            sequences.append((c, start_row, current_word))
+    return sequences
+if __name__ == "__main__":
+    asyncio.run(debug_complete_generation())

crossword-app/backend-py/debug_grid_direct.py ADDED Viewed

	@@ -0,0 +1,293 @@

+#!/usr/bin/env python3
+"""
+Direct grid generation test to identify word boundary/display issues.
+"""
+import sys
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
+def test_direct_grid_generation():
+    """Test grid generation directly with controlled words."""
+    print("🔍 Direct Grid Generation Test\n")
+    generator = CrosswordGeneratorFixed(vector_service=None)
+    # Test words that might cause the issues seen in the images
+    test_words = [
+        {"word": "MACHINE", "clue": "Device with moving parts"},
+        {"word": "COMPUTER", "clue": "Electronic device"},
+        {"word": "EXPERT", "clue": "Person with specialized knowledge"},
+        {"word": "SCIENCE", "clue": "Systematic study"},
+        {"word": "CAMERA", "clue": "Device for taking photos"},
+        {"word": "METHOD", "clue": "Systematic approach"}
+    ]
+    print("=" * 60)
+    print("TEST 1: Direct grid creation")
+    print("=" * 60)
+    # Test the _create_grid method directly
+    result = generator._create_grid(test_words)
+    if result:
+        print("✅ Grid generation successful!")
+        grid = result["grid"]
+        placed_words = result["placed_words"]
+        clues = result["clues"]
+        print(f"Grid size: {len(grid)}x{len(grid[0])}")
+        print(f"Words placed: {len(placed_words)}")
+        print(f"Clues generated: {len(clues)}")
+        # Print the grid
+        print("\nGenerated Grid:")
+        print_grid_with_coordinates(grid)
+        # Print placed words details
+        print("\nPlaced Words:")
+        for i, word_info in enumerate(placed_words):
+            print(f"  {i+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")
+        # Print clues
+        print("\nGenerated Clues:")
+        for clue in clues:
+            print(f"  {clue['number']}. {clue['direction']}: {clue['word']} - {clue['text']}")
+        # Analyze for potential issues
+        print("\n" + "=" * 60)
+        print("ANALYSIS")
+        print("=" * 60)
+        analyze_grid_issues(grid, placed_words, clues)
+    else:
+        print("❌ Grid generation failed")
+    # Test another scenario that might reproduce the image issues
+    print("\n" + "=" * 60)
+    print("TEST 2: Scenario with potential extension words")
+    print("=" * 60)
+    # Words that might create the "MACHINERY" type issue
+    extension_words = [
+        {"word": "MACHINE", "clue": "Device with moving parts"},
+        {"word": "MACHINERY", "clue": "Mechanical equipment"},  # Might cause confusion
+        {"word": "EXPERT", "clue": "Specialist"},
+        {"word": "TECHNOLOGY", "clue": "Applied science"},
+    ]
+    result2 = generator._create_grid(extension_words)
+    if result2:
+        print("✅ Extension test grid generated!")
+        grid2 = result2["grid"]
+        placed_words2 = result2["placed_words"]
+        print("\nExtension Test Grid:")
+        print_grid_with_coordinates(grid2)
+        print("\nPlaced Words:")
+        for i, word_info in enumerate(placed_words2):
+            print(f"  {i+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")
+        # Check specifically for MACHINE vs MACHINERY issues
+        check_machine_machinery_issue(grid2, placed_words2)
+    else:
+        print("❌ Extension test grid generation failed")
+def print_grid_with_coordinates(grid):
+    """Print grid with row and column coordinates."""
+    if not grid:
+        print("  Empty grid")
+        return
+    # Print column headers
+    print("     ", end="")
+    for c in range(len(grid[0])):
+        print(f"{c:2d}", end="")
+    print()
+    # Print rows
+    for r in range(len(grid)):
+        print(f" {r:2d}: ", end="")
+        for c in range(len(grid[0])):
+            cell = grid[r][c]
+            if cell == ".":
+                print(" .", end="")
+            else:
+                print(f" {cell}", end="")
+        print()
+def analyze_grid_issues(grid, placed_words, clues):
+    """Analyze the grid for potential boundary/display issues."""
+    print("Checking for potential issues...")
+    issues = []
+    # Check 1: Verify each placed word actually exists in the grid
+    for word_info in placed_words:
+        word = word_info["word"]
+        row = word_info["row"]
+        col = word_info["col"]
+        direction = word_info["direction"]
+        grid_word = extract_word_from_grid(grid, row, col, direction, len(word))
+        if grid_word != word:
+            issues.append(f"Word mismatch: '{word}' expected at ({row},{col}) {direction}, but grid shows '{grid_word}'")
+    # Check 2: Look for unintended letter sequences
+    all_sequences = find_all_letter_sequences(grid)
+    intended_words = {(w["row"], w["col"], w["direction"]): w["word"] for w in placed_words}
+    for seq_info in all_sequences:
+        row, col, direction, seq_word = seq_info
+        key = (row, col, direction)
+        if key not in intended_words:
+            if len(seq_word) > 1:  # Only care about multi-letter sequences
+                issues.append(f"Unintended sequence: '{seq_word}' at ({row},{col}) {direction}")
+        elif intended_words[key] != seq_word:
+            issues.append(f"Sequence mismatch: expected '{intended_words[key]}' but found '{seq_word}' at ({row},{col}) {direction}")
+    # Check 3: Verify clue consistency
+    for clue in clues:
+        clue_word = clue["word"]
+        pos = clue["position"]
+        clue_row = pos["row"]
+        clue_col = pos["col"]
+        clue_direction = clue["direction"]
+        # Convert direction format if needed
+        direction_map = {"across": "horizontal", "down": "vertical"}
+        normalized_direction = direction_map.get(clue_direction, clue_direction)
+        grid_word = extract_word_from_grid(grid, clue_row, clue_col, normalized_direction, len(clue_word))
+        if grid_word != clue_word:
+            issues.append(f"Clue mismatch: clue says '{clue_word}' at ({clue_row},{clue_col}) {clue_direction}, but grid shows '{grid_word}'")
+    # Report results
+    if issues:
+        print("❌ Issues found:")
+        for issue in issues:
+            print(f"  {issue}")
+    else:
+        print("✅ No issues detected - grid appears consistent")
+def extract_word_from_grid(grid, row, col, direction, expected_length):
+    """Extract word from grid at given position and direction."""
+    if row >= len(grid) or col >= len(grid[0]) or row < 0 or col < 0:
+        return "OUT_OF_BOUNDS"
+    word = ""
+    if direction in ["horizontal", "across"]:
+        for i in range(expected_length):
+            if col + i >= len(grid[0]):
+                return word + "[TRUNCATED]"
+            word += grid[row][col + i]
+    elif direction in ["vertical", "down"]:
+        for i in range(expected_length):
+            if row + i >= len(grid):
+                return word + "[TRUNCATED]"
+            word += grid[row + i][col]
+    return word
+def find_all_letter_sequences(grid):
+    """Find all letter sequences (horizontal and vertical) in the grid."""
+    sequences = []
+    # Horizontal sequences
+    for r in range(len(grid)):
+        current_word = ""
+        start_col = None
+        for c in range(len(grid[0])):
+            if grid[r][c] != ".":
+                if start_col is None:
+                    start_col = c
+                current_word += grid[r][c]
+            else:
+                if current_word and len(current_word) > 1:
+                    sequences.append((r, start_col, "horizontal", current_word))
+                current_word = ""
+                start_col = None
+        # Handle end of row
+        if current_word and len(current_word) > 1:
+            sequences.append((r, start_col, "horizontal", current_word))
+    # Vertical sequences
+    for c in range(len(grid[0])):
+        current_word = ""
+        start_row = None
+        for r in range(len(grid)):
+            if grid[r][c] != ".":
+                if start_row is None:
+                    start_row = r
+                current_word += grid[r][c]
+            else:
+                if current_word and len(current_word) > 1:
+                    sequences.append((start_row, c, "vertical", current_word))
+                current_word = ""
+                start_row = None
+        # Handle end of column
+        if current_word and len(current_word) > 1:
+            sequences.append((start_row, c, "vertical", current_word))
+    return sequences
+def check_machine_machinery_issue(grid, placed_words):
+    """Specifically check for MACHINE vs MACHINERY confusion."""
+    print("\nChecking for MACHINE/MACHINERY issue:")
+    machine_words = [w for w in placed_words if "MACHINE" in w["word"]]
+    if not machine_words:
+        print("  No MACHINE-related words found")
+        return
+    for word_info in machine_words:
+        word = word_info["word"]
+        row = word_info["row"]
+        col = word_info["col"]
+        direction = word_info["direction"]
+        print(f"  Found: '{word}' at ({row},{col}) {direction}")
+        # Check what's actually in the grid at this location
+        grid_word = extract_word_from_grid(grid, row, col, direction, len(word))
+        print(f"    Grid shows: '{grid_word}'")
+        # Check if there are extra letters that might create confusion
+        if direction == "horizontal":
+            # Check for letters after the word
+            end_col = col + len(word)
+            if end_col < len(grid[0]) and grid[row][end_col] != ".":
+                extra_letters = ""
+                check_col = end_col
+                while check_col < len(grid[0]) and grid[row][check_col] != ".":
+                    extra_letters += grid[row][check_col]
+                    check_col += 1
+                if extra_letters:
+                    print(f"    ⚠️ Extra letters after word: '{extra_letters}'")
+                    print(f"    This might make '{word}' appear as '{word + extra_letters}'")
+if __name__ == "__main__":
+    test_direct_grid_generation()

crossword-app/backend-py/debug_index_error.py ADDED Viewed

	@@ -0,0 +1,307 @@

+#!/usr/bin/env python3
+"""
+Debug the recurring index error by adding comprehensive bounds checking.
+"""
+import asyncio
+import sys
+import logging
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
+from src.services.vector_search import VectorSearchService
+# Enable debug logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+class DebugCrosswordGenerator(CrosswordGeneratorFixed):
+    """Debug version with comprehensive bounds checking."""
+    def _can_place_word(self, grid, word, row, col, direction):
+        """Enhanced _can_place_word with comprehensive bounds checking."""
+        try:
+            size = len(grid)
+            logger.debug(f"_can_place_word: word={word}, row={row}, col={col}, direction={direction}, grid_size={size}")
+            # Check initial boundaries
+            if row < 0 or col < 0 or row >= size or col >= size:
+                logger.debug(f"Initial bounds check failed: row={row}, col={col}, size={size}")
+                return False
+            if direction == "horizontal":
+                if col + len(word) > size:
+                    logger.debug(f"Horizontal bounds check failed: col+len(word)={col + len(word)} > size={size}")
+                    return False
+                # Check word boundaries (no adjacent letters) - with bounds check
+                if col > 0:
+                    if row >= size or col - 1 >= size or row < 0 or col - 1 < 0:
+                        logger.debug(f"Horizontal left boundary check failed: row={row}, col-1={col-1}, size={size}")
+                        return False
+                    if grid[row][col - 1] != ".":
+                        logger.debug(f"Horizontal left boundary has adjacent letter")
+                        return False
+                if col + len(word) < size:
+                    if row >= size or col + len(word) >= size or row < 0 or col + len(word) < 0:
+                        logger.debug(f"Horizontal right boundary check failed: row={row}, col+len={col + len(word)}, size={size}")
+                        return False
+                    if grid[row][col + len(word)] != ".":
+                        logger.debug(f"Horizontal right boundary has adjacent letter")
+                        return False
+                # Check each letter position
+                for i, letter in enumerate(word):
+                    check_row = row
+                    check_col = col + i
+                    if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                        logger.debug(f"Horizontal letter position check failed: letter {i}, row={check_row}, col={check_col}, size={size}")
+                        return False
+                    current_cell = grid[check_row][check_col]
+                    if current_cell != "." and current_cell != letter:
+                        logger.debug(f"Horizontal letter conflict: expected {letter}, found {current_cell}")
+                        return False
+            else:  # vertical
+                if row + len(word) > size:
+                    logger.debug(f"Vertical bounds check failed: row+len(word)={row + len(word)} > size={size}")
+                    return False
+                # Check word boundaries - with bounds check
+                if row > 0:
+                    if row - 1 >= size or col >= size or row - 1 < 0 or col < 0:
+                        logger.debug(f"Vertical top boundary check failed: row-1={row-1}, col={col}, size={size}")
+                        return False
+                    if grid[row - 1][col] != ".":
+                        logger.debug(f"Vertical top boundary has adjacent letter")
+                        return False
+                if row + len(word) < size:
+                    if row + len(word) >= size or col >= size or row + len(word) < 0 or col < 0:
+                        logger.debug(f"Vertical bottom boundary check failed: row+len={row + len(word)}, col={col}, size={size}")
+                        return False
+                    if grid[row + len(word)][col] != ".":
+                        logger.debug(f"Vertical bottom boundary has adjacent letter")
+                        return False
+                # Check each letter position
+                for i, letter in enumerate(word):
+                    check_row = row + i
+                    check_col = col
+                    if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                        logger.debug(f"Vertical letter position check failed: letter {i}, row={check_row}, col={check_col}, size={size}")
+                        return False
+                    current_cell = grid[check_row][check_col]
+                    if current_cell != "." and current_cell != letter:
+                        logger.debug(f"Vertical letter conflict: expected {letter}, found {current_cell}")
+                        return False
+            logger.debug(f"_can_place_word: SUCCESS for word={word}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ ERROR in _can_place_word: {e}")
+            logger.error(f"   word={word}, row={row}, col={col}, direction={direction}")
+            logger.error(f"   grid_size={len(grid) if grid else 'None'}")
+            import traceback
+            traceback.print_exc()
+            return False
+    def _place_word(self, grid, word, row, col, direction):
+        """Enhanced _place_word with comprehensive bounds checking."""
+        try:
+            size = len(grid)
+            logger.debug(f"_place_word: word={word}, row={row}, col={col}, direction={direction}, grid_size={size}")
+            original_state = []
+            if direction == "horizontal":
+                for i, letter in enumerate(word):
+                    check_row = row
+                    check_col = col + i
+                    if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                        logger.error(f"❌ _place_word horizontal bounds error: row={check_row}, col={check_col}, size={size}")
+                        raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+                    original_state.append({
+                        "row": check_row,
+                        "col": check_col,
+                        "value": grid[check_row][check_col]
+                    })
+                    grid[check_row][check_col] = letter
+            else:
+                for i, letter in enumerate(word):
+                    check_row = row + i
+                    check_col = col
+                    if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                        logger.error(f"❌ _place_word vertical bounds error: row={check_row}, col={check_col}, size={size}")
+                        raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+                    original_state.append({
+                        "row": check_row,
+                        "col": check_col,
+                        "value": grid[check_row][check_col]
+                    })
+                    grid[check_row][check_col] = letter
+            logger.debug(f"_place_word: SUCCESS for word={word}")
+            return original_state
+        except Exception as e:
+            logger.error(f"❌ ERROR in _place_word: {e}")
+            logger.error(f"   word={word}, row={row}, col={col}, direction={direction}")
+            logger.error(f"   grid_size={len(grid) if grid else 'None'}")
+            import traceback
+            traceback.print_exc()
+            raise
+    def _remove_word(self, grid, original_state):
+        """Enhanced _remove_word with comprehensive bounds checking."""
+        try:
+            size = len(grid)
+            logger.debug(f"_remove_word: restoring {len(original_state)} positions, grid_size={size}")
+            for state in original_state:
+                check_row = state["row"]
+                check_col = state["col"]
+                if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                    logger.error(f"❌ _remove_word bounds error: row={check_row}, col={check_col}, size={size}")
+                    raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+                grid[check_row][check_col] = state["value"]
+            logger.debug(f"_remove_word: SUCCESS")
+        except Exception as e:
+            logger.error(f"❌ ERROR in _remove_word: {e}")
+            logger.error(f"   grid_size={len(grid) if grid else 'None'}")
+            logger.error(f"   original_state={original_state}")
+            import traceback
+            traceback.print_exc()
+            raise
+    def _create_simple_cross(self, word_list, word_objs):
+        """Enhanced _create_simple_cross with comprehensive bounds checking."""
+        try:
+            logger.debug(f"_create_simple_cross: words={word_list}")
+            if len(word_list) < 2:
+                logger.debug("Not enough words for simple cross")
+                return None
+            word1, word2 = word_list[0], word_list[1]
+            intersections = self._find_word_intersections(word1, word2)
+            if not intersections:
+                logger.debug("No intersections found")
+                return None
+            # Use first intersection
+            intersection = intersections[0]
+            size = max(len(word1), len(word2)) + 4
+            logger.debug(f"Creating grid of size {size} for simple cross")
+            grid = [["." for _ in range(size)] for _ in range(size)]
+            # Place first word horizontally in center
+            center_row = size // 2
+            center_col = (size - len(word1)) // 2
+            logger.debug(f"Placing word1 '{word1}' at row={center_row}, col={center_col}")
+            for i, letter in enumerate(word1):
+                check_row = center_row
+                check_col = center_col + i
+                if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                    logger.error(f"❌ _create_simple_cross word1 bounds error: row={check_row}, col={check_col}, size={size}")
+                    raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+                grid[check_row][check_col] = letter
+            # Place second word vertically at intersection
+            intersection_col = center_col + intersection["word_pos"]
+            word2_start_row = center_row - intersection["placed_pos"]
+            logger.debug(f"Placing word2 '{word2}' at row={word2_start_row}, col={intersection_col}")
+            for i, letter in enumerate(word2):
+                check_row = word2_start_row + i
+                check_col = intersection_col
+                if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                    logger.error(f"❌ _create_simple_cross word2 bounds error: row={check_row}, col={check_col}, size={size}")
+                    raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+                grid[check_row][check_col] = letter
+            placed_words = [
+                {"word": word1, "row": center_row, "col": center_col, "direction": "horizontal", "number": 1},
+                {"word": word2, "row": word2_start_row, "col": intersection_col, "direction": "vertical", "number": 2}
+            ]
+            logger.debug(f"_create_simple_cross: SUCCESS")
+            trimmed = self._trim_grid(grid, placed_words)
+            clues = self._generate_clues(word_objs[:2], trimmed["placed_words"])
+            return {
+                "grid": trimmed["grid"],
+                "placed_words": trimmed["placed_words"],
+                "clues": clues
+            }
+        except Exception as e:
+            logger.error(f"❌ ERROR in _create_simple_cross: {e}")
+            import traceback
+            traceback.print_exc()
+            raise
+async def test_debug_generator():
+    """Test the debug generator to catch index errors."""
+    try:
+        print("🧪 Testing debug crossword generator...")
+        # Create mock vector service
+        vector_service = VectorSearchService()
+        # Create debug generator
+        generator = DebugCrosswordGenerator(vector_service)
+        # Test with various topics and difficulties
+        test_cases = [
+            (["animals"], "medium"),
+            (["science"], "hard"),
+            (["technology"], "easy"),
+            (["animals", "science"], "medium"),
+        ]
+        for i, (topics, difficulty) in enumerate(test_cases):
+            print(f"\n🔬 Test {i+1}: topics={topics}, difficulty={difficulty}")
+            try:
+                result = await generator.generate_puzzle(topics, difficulty, use_ai=False)
+                if result:
+                    print(f"✅ Test {i+1} succeeded")
+                    grid_size = len(result['grid'])
+                    word_count = len(result['clues'])
+                    print(f"   Grid: {grid_size}x{grid_size}, Words: {word_count}")
+                else:
+                    print(f"⚠️ Test {i+1} returned None")
+            except Exception as e:
+                print(f"❌ Test {i+1} failed: {e}")
+                import traceback
+                traceback.print_exc()
+                return False
+        print(f"\n✅ All debug tests completed!")
+        return True
+    except Exception as e:
+        print(f"❌ Debug test setup failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+if __name__ == "__main__":
+    asyncio.run(test_debug_generator())

crossword-app/backend-py/debug_simple.py ADDED Viewed

	@@ -0,0 +1,142 @@

+#!/usr/bin/env python3
+"""
+Simple debug test for crossword generator index errors.
+"""
+import asyncio
+import sys
+import logging
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
+# Enable debug logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+async def test_with_static_words():
+    """Test generator with static word lists."""
+    # Create generator without vector service
+    generator = CrosswordGeneratorFixed(vector_service=None)
+    # Create test words
+    test_words = [
+        {"word": "CAT", "clue": "Feline pet"},
+        {"word": "DOG", "clue": "Man's best friend"},
+        {"word": "BIRD", "clue": "Flying animal"},
+        {"word": "FISH", "clue": "Aquatic animal"},
+        {"word": "ELEPHANT", "clue": "Large mammal"},
+        {"word": "TIGER", "clue": "Striped cat"},
+        {"word": "HORSE", "clue": "Riding animal"},
+        {"word": "BEAR", "clue": "Large carnivore"}
+    ]
+    print(f"🧪 Testing crossword generation with {len(test_words)} words...")
+    try:
+        # Test multiple times to catch intermittent errors
+        for attempt in range(10):
+            print(f"\n🔬 Attempt {attempt + 1}/10")
+            # Shuffle words to create different scenarios
+            import random
+            random.shuffle(test_words)
+            # Override the word selection to use our test words
+            generator._select_words = lambda topics, difficulty, use_ai: test_words
+            result = await generator.generate_puzzle(["animals"], "medium", use_ai=False)
+            if result:
+                grid_size = len(result['grid'])
+                word_count = len(result['clues'])
+                print(f"✅ Attempt {attempt + 1} succeeded: {grid_size}x{grid_size} grid, {word_count} words")
+            else:
+                print(f"⚠️ Attempt {attempt + 1} returned None")
+    except IndexError as e:
+        print(f"❌ INDEX ERROR caught on attempt {attempt + 1}: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+    except Exception as e:
+        print(f"❌ Other error on attempt {attempt + 1}: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+    print(f"\n✅ All 10 attempts completed successfully!")
+    return True
+async def test_grid_placement_directly():
+    """Test grid placement functions directly with problematic data."""
+    generator = CrosswordGeneratorFixed(vector_service=None)
+    # Test data that might cause issues
+    test_cases = [
+        {
+            "words": ["A", "I"],  # Very short words
+            "description": "Very short words"
+        },
+        {
+            "words": ["VERYLONGWORDTHATMIGHTCAUSEISSUES", "SHORT"],
+            "description": "Very long word with short word"
+        },
+        {
+            "words": ["ABCDEFGHIJKLMNOP", "QRSTUVWXYZ"],  # Long words
+            "description": "Two long words"
+        },
+        {
+            "words": ["TEST", "SETS", "NETS", "PETS"],  # Multiple similar words
+            "description": "Similar words with same endings"
+        }
+    ]
+    for i, test_case in enumerate(test_cases):
+        print(f"\n🔬 Grid test {i+1}: {test_case['description']}")
+        try:
+            word_list = test_case["words"]
+            word_objs = [{"word": w, "clue": f"Clue for {w}"} for w in word_list]
+            result = generator._create_grid(word_objs)
+            if result:
+                grid_size = len(result['grid'])
+                word_count = len(result['placed_words'])
+                print(f"✅ Grid test {i+1} succeeded: {grid_size}x{grid_size} grid, {word_count} words")
+            else:
+                print(f"⚠️ Grid test {i+1} returned None")
+        except IndexError as e:
+            print(f"❌ INDEX ERROR in grid test {i+1}: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+        except Exception as e:
+            print(f"❌ Other error in grid test {i+1}: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+    return True
+if __name__ == "__main__":
+    print("🧪 Starting debug tests for crossword generator...")
+    async def run_tests():
+        success1 = await test_with_static_words()
+        success2 = await test_grid_placement_directly()
+        if success1 and success2:
+            print("\n🎉 All debug tests passed! No index errors detected.")
+        else:
+            print("\n❌ Some debug tests failed.")
+    asyncio.run(run_tests())

crossword-app/backend-py/pytest.ini ADDED Viewed

	@@ -0,0 +1,16 @@

+[tool:pytest]
+testpaths = test-unit
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts =
+    -v
+    --tb=short
+    --strict-markers
+    --disable-warnings
+    --color=yes
+markers =
+    slow: marks tests as slow (deselect with '-m "not slow"')
+    integration: marks tests as integration tests
+    unit: marks tests as unit tests
+asyncio_mode = auto

crossword-app/backend-py/requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+# Development requirements with AI/ML dependencies
+# This file includes ALL dependencies for full development environment
+# Include base requirements
+-r requirements.txt
+# AI/ML dependencies for vector-powered word generation
+sentence-transformers==3.3.0
+torch==2.5.1
+transformers==4.47.1
+scikit-learn==1.5.2
+huggingface-hub==0.26.2
+faiss-cpu==1.9.0
+# Additional development tools
+pytest-cov==6.0.0  # For test coverage reports
+black==24.8.0       # Code formatter (optional)
+flake8==7.1.1       # Linting (optional)

crossword-app/backend-py/requirements.txt ADDED Viewed

	@@ -0,0 +1,48 @@

+# Core FastAPI and web server dependencies
+fastapi==0.116.1
+uvicorn[standard]==0.32.1
+starlette==0.47.2
+python-dotenv==1.0.1
+python-multipart==0.0.12
+# Data validation and serialization
+pydantic==2.11.7
+pydantic-core==2.33.2
+typing-extensions==4.14.1
+typing-inspection==0.4.1
+# HTTP client dependencies
+httpx==0.28.1
+httpcore==1.0.9
+h11==0.16.0
+anyio==4.10.0
+requests==2.32.4
+certifi==2025.8.3
+idna==3.10
+# Core data processing
+numpy==2.3.2
+# Logging and monitoring
+structlog==25.4.0
+# Development and testing dependencies
+pytest==8.4.1
+pytest-asyncio==1.1.0
+iniconfig==2.1.0
+packaging==25.0
+pluggy==1.6.0
+pygments==2.19.2
+# AI/ML dependencies (optional - install separately if needed)
+# Uncomment these lines if you want AI-powered word generation:
+# sentence-transformers==3.3.0
+# torch==2.5.1
+# transformers==4.47.1
+# scikit-learn==1.5.2
+# huggingface-hub==0.26.2
+# faiss-cpu==1.9.0
+# Additional utility dependencies
+annotated-types==0.7.0
+sniffio==1.3.1

crossword-app/backend-py/run_tests.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/usr/bin/env python3
+"""
+Test runner script for the backend-py project.
+Run this script to execute all unit tests.
+"""
+import sys
+import subprocess
+from pathlib import Path
+def run_tests():
+    """Run all tests using pytest."""
+    print("🧪 Running Python Backend Unit Tests\n")
+    # Change to project directory
+    project_root = Path(__file__).parent
+    try:
+        # Run pytest with coverage if available
+        cmd = [
+            sys.executable, "-m", "pytest",
+            "test-unit/",
+            "-v",
+            "--tb=short",
+            "--color=yes"
+        ]
+        # Try to add coverage if pytest-cov is available
+        try:
+            import pytest_cov
+            cmd.extend([
+                "--cov=src",
+                "--cov-report=term-missing",
+                "--cov-report=html:htmlcov"
+            ])
+            print("📊 Running tests with coverage analysis")
+        except ImportError:
+            print("📝 Running tests without coverage (install pytest-cov for coverage)")
+        print(f"🏃 Command: {' '.join(cmd)}\n")
+        result = subprocess.run(cmd, cwd=project_root)
+        if result.returncode == 0:
+            print("\n✅ All tests passed!")
+            if 'pytest_cov' in locals():
+                print("📊 Coverage report generated in htmlcov/index.html")
+        else:
+            print(f"\n❌ Tests failed with exit code {result.returncode}")
+        return result.returncode
+    except FileNotFoundError:
+        print("❌ pytest not found. Install it with: pip install pytest pytest-asyncio")
+        return 1
+    except Exception as e:
+        print(f"❌ Error running tests: {e}")
+        return 1
+def run_specific_test(test_file):
+    """Run a specific test file."""
+    print(f"🎯 Running specific test: {test_file}\n")
+    try:
+        cmd = [sys.executable, "-m", "pytest", f"test-unit/{test_file}", "-v"]
+        result = subprocess.run(cmd, cwd=Path(__file__).parent)
+        return result.returncode
+    except Exception as e:
+        print(f"❌ Error running test {test_file}: {e}")
+        return 1
+def main():
+    """Main entry point."""
+    if len(sys.argv) > 1:
+        # Run specific test file
+        test_file = sys.argv[1]
+        if not test_file.startswith("test_"):
+            test_file = f"test_{test_file}"
+        if not test_file.endswith(".py"):
+            test_file = f"{test_file}.py"
+        return run_specific_test(test_file)
+    else:
+        # Run all tests
+        return run_tests()
+if __name__ == "__main__":
+    exit_code = main()
+    sys.exit(exit_code)

crossword-app/backend-py/src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Python backend package

crossword-app/backend-py/src/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (179 Bytes). View file

crossword-app/backend-py/src/routes/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Routes package

crossword-app/backend-py/src/routes/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (186 Bytes). View file

crossword-app/backend-py/src/routes/__pycache__/api.cpython-313.pyc ADDED Viewed

Binary file (8.43 kB). View file

crossword-app/backend-py/src/routes/api.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+API routes for crossword puzzle generator.
+Matches the existing JavaScript API for frontend compatibility.
+"""
+import logging
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from fastapi import APIRouter, HTTPException, Request, Depends
+from pydantic import BaseModel, Field
+from ..services.crossword_generator_wrapper import CrosswordGenerator
+logger = logging.getLogger(__name__)
+router = APIRouter()
+# Request/Response models
+class GeneratePuzzleRequest(BaseModel):
+    topics: List[str] = Field(..., description="List of topics for the puzzle")
+    difficulty: str = Field(default="medium", description="Difficulty level: easy, medium, hard")
+    useAI: bool = Field(default=False, description="Use AI vector search for word generation")
+class WordInfo(BaseModel):
+    word: str
+    clue: str
+    similarity: Optional[float] = None
+    source: Optional[str] = None
+class ClueInfo(BaseModel):
+    number: int
+    word: str
+    text: str
+    direction: str  # "across" or "down"
+    position: Dict[str, int]  # {"row": int, "col": int}
+class PuzzleMetadata(BaseModel):
+    topics: List[str]
+    difficulty: str
+    wordCount: int
+    size: int
+    aiGenerated: bool
+class PuzzleResponse(BaseModel):
+    grid: List[List[str]]
+    clues: List[ClueInfo]
+    metadata: PuzzleMetadata
+class TopicInfo(BaseModel):
+    id: str
+    name: str
+# Global crossword generator instance (will be initialized in lifespan)
+generator = None
+def get_crossword_generator(request: Request) -> CrosswordGenerator:
+    """Dependency to get the crossword generator with vector search service."""
+    global generator
+    if generator is None:
+        vector_service = getattr(request.app.state, 'vector_service', None)
+        generator = CrosswordGenerator(vector_service)
+    return generator
+@router.get("/topics", response_model=List[TopicInfo])
+async def get_topics():
+    """Get available topics for puzzle generation."""
+    # Return the same topics as JavaScript backend for consistency
+    topics = [
+        {"id": "animals", "name": "Animals"},
+        {"id": "geography", "name": "Geography"},
+        {"id": "science", "name": "Science"},
+        {"id": "technology", "name": "Technology"}
+    ]
+    return topics
+@router.post("/generate", response_model=PuzzleResponse)
+async def generate_puzzle(
+    request: GeneratePuzzleRequest,
+    crossword_gen: CrosswordGenerator = Depends(get_crossword_generator)
+):
+    """
+    Generate a crossword puzzle with optional AI vector search.
+    This endpoint matches the JavaScript API exactly for frontend compatibility.
+    """
+    try:
+        logger.info(f"🎯 Generating puzzle for topics: {request.topics}, difficulty: {request.difficulty}, useAI: {request.useAI}")
+        # Validate topics
+        if not request.topics:
+            raise HTTPException(status_code=400, detail="At least one topic is required")
+        valid_difficulties = ["easy", "medium", "hard"]
+        if request.difficulty not in valid_difficulties:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid difficulty. Must be one of: {valid_difficulties}"
+            )
+        # Generate puzzle
+        puzzle_data = await crossword_gen.generate_puzzle(
+            topics=request.topics,
+            difficulty=request.difficulty,
+            use_ai=request.useAI
+        )
+        if not puzzle_data:
+            raise HTTPException(status_code=500, detail="Failed to generate puzzle")
+        logger.info(f"✅ Generated puzzle with {puzzle_data['metadata']['wordCount']} words")
+        return puzzle_data
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"❌ Error generating puzzle: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/words")
+async def generate_words(
+    request: GeneratePuzzleRequest,
+    crossword_gen: CrosswordGenerator = Depends(get_crossword_generator)
+):
+    """
+    Generate words for given topics (debug endpoint).
+    This endpoint allows testing word generation without full puzzle creation.
+    """
+    try:
+        words = await crossword_gen.generate_words_for_topics(
+            topics=request.topics,
+            difficulty=request.difficulty,
+            use_ai=request.useAI
+        )
+        return {
+            "topics": request.topics,
+            "difficulty": request.difficulty,
+            "useAI": request.useAI,
+            "wordCount": len(words),
+            "words": words
+        }
+    except Exception as e:
+        logger.error(f"❌ Error generating words: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/health")
+async def api_health():
+    """API health check."""
+    return {
+        "status": "healthy",
+        "timestamp": datetime.utcnow().isoformat(),
+        "backend": "python",
+        "version": "2.0.0"
+    }
+@router.get("/debug/vector-search")
+async def debug_vector_search(
+    topic: str,
+    difficulty: str = "medium",
+    max_words: int = 10,
+    request: Request = None
+):
+    """
+    Debug endpoint to test vector search directly.
+    """
+    try:
+        vector_service = getattr(request.app.state, 'vector_service', None)
+        if not vector_service or not vector_service.is_initialized:
+            raise HTTPException(status_code=503, detail="Vector search service not available")
+        words = await vector_service.find_similar_words(topic, difficulty, max_words)
+        return {
+            "topic": topic,
+            "difficulty": difficulty,
+            "max_words": max_words,
+            "found_words": len(words),
+            "words": words
+        }
+    except Exception as e:
+        logger.error(f"❌ Vector search debug failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))

crossword-app/backend-py/src/services/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Services package

crossword-app/backend-py/src/services/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (188 Bytes). View file

crossword-app/backend-py/src/services/__pycache__/crossword_generator.cpython-313.pyc ADDED Viewed

Binary file (33.3 kB). View file

crossword-app/backend-py/src/services/__pycache__/crossword_generator_fixed.cpython-313.pyc ADDED Viewed

Binary file (33.4 kB). View file

crossword-app/backend-py/src/services/__pycache__/crossword_generator_wrapper.cpython-313.pyc ADDED Viewed

Binary file (2.91 kB). View file

crossword-app/backend-py/src/services/__pycache__/vector_search.cpython-313.pyc ADDED Viewed

Binary file (28.5 kB). View file

crossword-app/backend-py/src/services/__pycache__/word_cache.cpython-313.pyc ADDED Viewed

Binary file (17.3 kB). View file

crossword-app/backend-py/src/services/crossword_generator.py ADDED Viewed

	@@ -0,0 +1,722 @@

+"""
+Fixed Crossword Generator - Ported from working JavaScript implementation.
+"""
+import asyncio
+import json
+import random
+import time
+from pathlib import Path
+from typing import Dict, List, Optional, Any, Tuple
+import structlog
+logger = structlog.get_logger(__name__)
+class CrosswordGenerator:
+    def __init__(self, vector_service=None):
+        self.max_attempts = 100
+        self.min_words = 6
+        self.max_words = 10  # Reduced from 12 to 10 for better success rate
+        self.vector_service = vector_service
+    async def generate_puzzle(self, topics: List[str], difficulty: str = "medium", use_ai: bool = False) -> Optional[Dict[str, Any]]:
+        """
+        Generate a complete crossword puzzle.
+        """
+        try:
+            # Import here to avoid circular imports - with fallback
+            try:
+                from .vector_search import VectorSearchService
+            except ImportError as import_error:
+                logger.warning(f"⚠️ Could not import VectorSearchService: {import_error}. Using static words only.")
+                # Continue without vector service
+            logger.info(f"🎯 Generating puzzle for topics: {topics}, difficulty: {difficulty}, AI: {use_ai}")
+            # Get words (from AI or static)
+            words = await self._select_words(topics, difficulty, use_ai)
+            if len(words) < self.min_words:
+                logger.error(f"❌ Not enough words: {len(words)} < {self.min_words}")
+                raise Exception(f"Not enough words generated: {len(words)} < {self.min_words}")
+            # Create grid
+            grid_result = self._create_grid(words)
+            if not grid_result:
+                logger.error("❌ Grid creation failed")
+                raise Exception("Could not create crossword grid")
+            logger.info(f"✅ Generated crossword with {len(grid_result['placed_words'])} words")
+            return {
+                "grid": grid_result["grid"],
+                "clues": grid_result["clues"],
+                "metadata": {
+                    "topics": topics,
+                    "difficulty": difficulty,
+                    "wordCount": len(grid_result["placed_words"]),
+                    "size": len(grid_result["grid"]),
+                    "aiGenerated": use_ai
+                }
+            }
+        except Exception as e:
+            logger.error(f"❌ Error generating puzzle: {e}")
+            raise
+    async def _select_words(self, topics: List[str], difficulty: str, use_ai: bool) -> List[Dict[str, Any]]:
+        """Select words for the crossword."""
+        all_words = []
+        if use_ai and self.vector_service:
+            # Use the initialized vector service
+            logger.info(f"🤖 Using initialized vector service for AI word generation")
+            for topic in topics:
+                ai_words = await self.vector_service.find_similar_words(topic, difficulty, self.max_words // len(topics))
+                all_words.extend(ai_words)
+            if len(all_words) >= self.min_words:
+                logger.info(f"✅ AI generated {len(all_words)} words")
+                return self._sort_words_for_crossword(all_words[:self.max_words])
+            else:
+                logger.warning(f"⚠️ AI only generated {len(all_words)} words, falling back to static")
+        # Fallback to cached words
+        if self.vector_service:
+            # Use the cached words from the initialized service
+            logger.info(f"📦 Using cached words from initialized vector service")
+            for topic in topics:
+                cached_words = await self.vector_service._get_cached_fallback(topic, difficulty, self.max_words // len(topics))
+                all_words.extend(cached_words)
+        else:
+            # Last resort: load static words directly
+            logger.warning(f"⚠️ No vector service available, loading static words directly")
+            all_words = await self._get_static_words(topics, difficulty)
+        return self._sort_words_for_crossword(all_words[:self.max_words])
+    async def _get_static_words(self, topics: List[str], difficulty: str) -> List[Dict[str, Any]]:
+        """Get static words from JSON files."""
+        all_words = []
+        for topic in topics:
+            # Try multiple case variations
+            for topic_variation in [topic, topic.capitalize(), topic.lower()]:
+                word_file = Path(__file__).parent.parent.parent / "data" / "word-lists" / f"{topic_variation.lower()}.json"
+                if word_file.exists():
+                    with open(word_file, 'r') as f:
+                        words = json.load(f)
+                        # Filter by difficulty
+                        filtered = self._filter_by_difficulty(words, difficulty)
+                        all_words.extend(filtered)
+                    break
+        return all_words
+    def _filter_by_difficulty(self, words: List[Dict[str, Any]], difficulty: str) -> List[Dict[str, Any]]:
+        """Filter words by difficulty (length)."""
+        difficulty_map = {
+            "easy": {"min_len": 3, "max_len": 8},
+            "medium": {"min_len": 4, "max_len": 10},
+            "hard": {"min_len": 5, "max_len": 15}
+        }
+        criteria = difficulty_map.get(difficulty, difficulty_map["medium"])
+        return [w for w in words if criteria["min_len"] <= len(w["word"]) <= criteria["max_len"]]
+    def _sort_words_for_crossword(self, words: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Sort words by crossword suitability."""
+        scored_words = []
+        for word_obj in words:
+            word = word_obj["word"].upper()
+            score = 0
+            # Strongly prefer shorter words for crossword viability
+            if 3 <= len(word) <= 5:
+                score += 20  # Short words get highest priority
+            elif 6 <= len(word) <= 7:
+                score += 15  # Medium words get good priority
+            elif len(word) == 8:
+                score += 8   # Long words get lower priority
+            elif len(word) == 9:
+                score += 4   # Very long words get much lower priority
+            elif len(word) >= 10:
+                score += 1   # Extremely long words get minimal priority
+            # Bonus for common letters
+            common_letters = ['E', 'A', 'R', 'I', 'O', 'T', 'N', 'S']
+            for letter in word:
+                if letter in common_letters:
+                    score += 1
+            # Vowel distribution bonus
+            vowels = ['A', 'E', 'I', 'O', 'U']
+            vowel_count = sum(1 for letter in word if letter in vowels)
+            score += vowel_count
+            # Penalty for very long words to discourage their selection
+            if len(word) >= 9:
+                score -= 5
+            scored_words.append({**word_obj, "crossword_score": score})
+        # Sort by score with some randomization
+        scored_words.sort(key=lambda w: w["crossword_score"] + random.randint(-2, 2), reverse=True)
+        return scored_words
+    def _create_grid(self, words: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+        """Create crossword grid using backtracking algorithm."""
+        if not words:
+            logger.error(f"❌ No words provided to grid generator")
+            return None
+        logger.info(f"🎯 Creating crossword grid with {len(words)} words")
+        # Debug: log the structure of words
+        logger.info(f"🔍 Word structures: {[type(w) for w in words[:3]]}")
+        if words:
+            logger.info(f"🔍 First word sample: {words[0]}")
+        # Sort words by length (longest first) - keeping objects aligned
+        try:
+            # Create paired list of (word_string, word_object)
+            word_pairs = []
+            for i, w in enumerate(words):
+                if isinstance(w, dict) and "word" in w:
+                    word_pairs.append((w["word"].upper(), w))
+                elif isinstance(w, str):
+                    # Create dict for string-only words
+                    word_obj = {"word": w.upper(), "clue": f"Clue for {w.upper()}"}
+                    word_pairs.append((w.upper(), word_obj))
+                else:
+                    logger.warning(f"⚠️ Unexpected word format at index {i}: {w}")
+            # Sort pairs by word length (longest first)
+            word_pairs.sort(key=lambda pair: len(pair[0]), reverse=True)
+            # Extract sorted lists
+            word_list = [pair[0] for pair in word_pairs]
+            sorted_word_objs = [pair[1] for pair in word_pairs]
+            logger.info(f"🎯 Processed {len(word_list)} words for grid: {word_list[:5]}")
+        except Exception as e:
+            logger.error(f"❌ Error processing words: {e}")
+            return None
+        size = self._calculate_grid_size(word_list)
+        # Try multiple attempts
+        for attempt in range(3):
+            current_size = size + attempt
+            try:
+                logger.info(f"🔧 Attempt {attempt + 1}: word_list length={len(word_list)}, sorted_word_objs length={len(sorted_word_objs)}")
+                result = self._place_words_in_grid(word_list, sorted_word_objs, current_size)
+                if result:
+                    return result
+            except Exception as e:
+                logger.error(f"❌ Grid placement attempt {attempt + 1} failed: {e}")
+                import traceback
+                traceback.print_exc()
+            # Try with fewer words
+            if len(word_list) > 7:
+                reduced_words = word_list[:len(word_list) - 1]
+                reduced_word_objs = sorted_word_objs[:len(reduced_words)]
+                try:
+                    logger.info(f"🔧 Reduced attempt {attempt + 1}: reduced_words length={len(reduced_words)}, reduced_word_objs length={len(reduced_word_objs)}")
+                    result = self._place_words_in_grid(reduced_words, reduced_word_objs, current_size)
+                    if result:
+                        return result
+                except Exception as e:
+                    logger.error(f"❌ Reduced grid placement attempt {attempt + 1} failed: {e}")
+                    import traceback
+                    traceback.print_exc()
+        # Last resort: simple cross with 2 words
+        if len(word_list) >= 2:
+            return self._create_simple_cross(word_list[:2], sorted_word_objs[:2])
+        return None
+    def _calculate_grid_size(self, words: List[str]) -> int:
+        """Calculate appropriate grid size with more generous spacing."""
+        total_chars = sum(len(word) for word in words)
+        longest_word = max(len(word) for word in words) if words else 8
+        # More generous grid size calculation
+        base_size = int((total_chars * 2.0) ** 0.5)  # Increased multiplier from 1.5 to 2.0
+        return max(
+            base_size,
+            longest_word + 4,  # Add padding to longest word
+            12  # Minimum grid size increased from 8 to 12
+        )
+    def _place_words_in_grid(self, word_list: List[str], word_objs: List[Dict[str, Any]], size: int) -> Optional[Dict[str, Any]]:
+        """Place words in grid using backtracking."""
+        logger.info(f"🔧 _place_words_in_grid: word_list={len(word_list)}, word_objs={len(word_objs)}, size={size}")
+        grid = [["." for _ in range(size)] for _ in range(size)]
+        placed_words = []
+        start_time = time.time()
+        timeout = 5.0  # 5 second timeout
+        try:
+            if self._backtrack_placement(grid, word_list, word_objs, 0, placed_words, start_time, timeout):
+                logger.info(f"🔧 Backtrack successful, trimming grid...")
+                trimmed = self._trim_grid(grid, placed_words)
+                logger.info(f"🔧 Grid trimmed, generating clues...")
+                clues = self._generate_clues(word_objs, trimmed["placed_words"])
+                return {
+                    "grid": trimmed["grid"],
+                    "placed_words": trimmed["placed_words"],
+                    "clues": clues
+                }
+            else:
+                logger.info(f"🔧 Backtrack failed")
+                return None
+        except Exception as e:
+            logger.error(f"❌ Error in _place_words_in_grid: {e}")
+            import traceback
+            traceback.print_exc()
+            return None
+    def _backtrack_placement(self, grid: List[List[str]], word_list: List[str], word_objs: List[Dict[str, Any]],
+                           word_index: int, placed_words: List[Dict[str, Any]], start_time: float,
+                           timeout: float, call_count: int = 0) -> bool:
+        """Backtracking algorithm for word placement."""
+        # Timeout check
+        if call_count % 50 == 0 and time.time() - start_time > timeout:
+            return False
+        if word_index >= len(word_list):
+            return True
+        word = word_list[word_index]
+        size = len(grid)
+        # First word: place horizontally in center
+        if word_index == 0:
+            center_row = size // 2
+            center_col = (size - len(word)) // 2
+            if self._can_place_word(grid, word, center_row, center_col, "horizontal"):
+                original_state = self._place_word(grid, word, center_row, center_col, "horizontal")
+                placed_words.append({
+                    "word": word,
+                    "row": center_row,
+                    "col": center_col,
+                    "direction": "horizontal",
+                    "number": 1
+                })
+                if self._backtrack_placement(grid, word_list, word_objs, word_index + 1, placed_words, start_time, timeout, call_count + 1):
+                    return True
+                self._remove_word(grid, original_state)
+                placed_words.pop()
+            return False
+        # Subsequent words: find intersections
+        all_placements = self._find_all_intersection_placements(grid, word, placed_words)
+        all_placements.sort(key=lambda p: p["score"], reverse=True)
+        for placement in all_placements:
+            row, col, direction = placement["row"], placement["col"], placement["direction"]
+            if self._can_place_word(grid, word, row, col, direction):
+                original_state = self._place_word(grid, word, row, col, direction)
+                placed_words.append({
+                    "word": word,
+                    "row": row,
+                    "col": col,
+                    "direction": direction,
+                    "number": word_index + 1
+                })
+                if self._backtrack_placement(grid, word_list, word_objs, word_index + 1, placed_words, start_time, timeout, call_count + 1):
+                    return True
+                self._remove_word(grid, original_state)
+                placed_words.pop()
+        return False
+    def _can_place_word(self, grid: List[List[str]], word: str, row: int, col: int, direction: str) -> bool:
+        """Check if word can be placed at position."""
+        size = len(grid)
+        # Check boundaries
+        if row < 0 or col < 0 or row >= size or col >= size:
+            return False
+        if direction == "horizontal":
+            if col + len(word) > size:
+                return False
+            # CRITICAL: Check word boundaries - no letters immediately before/after
+            if col > 0 and grid[row][col - 1] != ".":
+                return False  # Word would have a preceding letter
+            if col + len(word) < size and grid[row][col + len(word)] != ".":
+                return False  # Word would have a trailing letter
+            # Check each letter position
+            for i, letter in enumerate(word):
+                check_row = row
+                check_col = col + i
+                if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                    return False
+                current_cell = grid[check_row][check_col]
+                if current_cell != "." and current_cell != letter:
+                    return False
+                # For empty cells, check perpendicular validity
+                if current_cell == ".":
+                    if not self._is_valid_perpendicular_placement(grid, letter, check_row, check_col, "vertical"):
+                        return False
+        else:  # vertical
+            if row + len(word) > size:
+                return False
+            # CRITICAL: Check word boundaries - no letters immediately before/after
+            if row > 0 and grid[row - 1][col] != ".":
+                return False  # Word would have a preceding letter
+            if row + len(word) < size and grid[row + len(word)][col] != ".":
+                return False  # Word would have a trailing letter
+            # Check each letter position
+            for i, letter in enumerate(word):
+                check_row = row + i
+                check_col = col
+                if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                    return False
+                current_cell = grid[check_row][check_col]
+                if current_cell != "." and current_cell != letter:
+                    return False
+                # For empty cells, check perpendicular validity
+                if current_cell == ".":
+                    if not self._is_valid_perpendicular_placement(grid, letter, check_row, check_col, "horizontal"):
+                        return False
+        return True
+    def _is_valid_perpendicular_placement(self, grid: List[List[str]], letter: str, row: int, col: int, check_direction: str) -> bool:
+        """Check if placing a letter would create valid perpendicular word boundaries."""
+        size = len(grid)
+        if check_direction == "vertical":
+            # Check if placing this letter would create an invalid vertical sequence
+            has_above = row > 0 and grid[row - 1][col] != "."
+            has_below = row < size - 1 and grid[row + 1][col] != "."
+            # Don't allow this letter to extend an existing vertical word
+            # unless it's exactly at an intersection point with matching letters
+            if has_above or has_below:
+                return grid[row][col] == letter
+        else:  # horizontal
+            # Check if placing this letter would create an invalid horizontal sequence
+            has_left = col > 0 and grid[row][col - 1] != "."
+            has_right = col < size - 1 and grid[row][col + 1] != "."
+            # Don't allow this letter to extend an existing horizontal word
+            # unless it's exactly at an intersection point with matching letters
+            if has_left or has_right:
+                return grid[row][col] == letter
+        return True
+    def _place_word(self, grid: List[List[str]], word: str, row: int, col: int, direction: str) -> List[Dict[str, Any]]:
+        """Place word in grid and return original state."""
+        original_state = []
+        size = len(grid)
+        if direction == "horizontal":
+            for i, letter in enumerate(word):
+                check_row = row
+                check_col = col + i
+                if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                    raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+                original_state.append({
+                    "row": check_row,
+                    "col": check_col,
+                    "value": grid[check_row][check_col]
+                })
+                grid[check_row][check_col] = letter
+        else:
+            for i, letter in enumerate(word):
+                check_row = row + i
+                check_col = col
+                if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                    raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+                original_state.append({
+                    "row": check_row,
+                    "col": check_col,
+                    "value": grid[check_row][check_col]
+                })
+                grid[check_row][check_col] = letter
+        return original_state
+    def _remove_word(self, grid: List[List[str]], original_state: List[Dict[str, Any]]):
+        """Remove word from grid."""
+        size = len(grid)
+        for state in original_state:
+            check_row = state["row"]
+            check_col = state["col"]
+            if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+            grid[check_row][check_col] = state["value"]
+    def _find_all_intersection_placements(self, grid: List[List[str]], word: str, placed_words: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Find all possible intersection placements for a word."""
+        placements = []
+        for placed_word in placed_words:
+            intersections = self._find_word_intersections(word, placed_word["word"])
+            for intersection in intersections:
+                word_pos, placed_pos = intersection["word_pos"], intersection["placed_pos"]
+                placement_info = self._calculate_intersection_placement(word, word_pos, placed_word, placed_pos)
+                if placement_info:
+                    score = self._calculate_placement_score(grid, word, placement_info, placed_words)
+                    placements.append({
+                        **placement_info,
+                        "score": score
+                    })
+        return placements
+    def _find_word_intersections(self, word1: str, word2: str) -> List[Dict[str, int]]:
+        """Find letter intersections between two words."""
+        intersections = []
+        for i, letter1 in enumerate(word1):
+            for j, letter2 in enumerate(word2):
+                if letter1 == letter2:
+                    intersections.append({
+                        "word_pos": i,
+                        "placed_pos": j
+                    })
+        return intersections
+    def _calculate_intersection_placement(self, new_word: str, new_word_pos: int,
+                                        placed_word: Dict[str, Any], placed_word_pos: int) -> Optional[Dict[str, Any]]:
+        """Calculate where new word should be placed for intersection."""
+        placed_row, placed_col = placed_word["row"], placed_word["col"]
+        placed_direction = placed_word["direction"]
+        # Find intersection point in grid
+        if placed_direction == "horizontal":
+            intersection_row = placed_row
+            intersection_col = placed_col + placed_word_pos
+        else:
+            intersection_row = placed_row + placed_word_pos
+            intersection_col = placed_col
+        # Calculate new word position
+        new_direction = "vertical" if placed_direction == "horizontal" else "horizontal"
+        if new_direction == "horizontal":
+            new_row = intersection_row
+            new_col = intersection_col - new_word_pos
+        else:
+            new_row = intersection_row - new_word_pos
+            new_col = intersection_col
+        return {
+            "row": new_row,
+            "col": new_col,
+            "direction": new_direction
+        }
+    def _calculate_placement_score(self, grid: List[List[str]], word: str, placement: Dict[str, Any],
+                                 placed_words: List[Dict[str, Any]]) -> int:
+        """Score a placement for quality."""
+        row, col, direction = placement["row"], placement["col"], placement["direction"]
+        grid_size = len(grid)
+        score = 100  # Base score for intersection
+        # Count intersections - with bounds checking
+        intersection_count = 0
+        if direction == "horizontal":
+            for i, letter in enumerate(word):
+                target_row = row
+                target_col = col + i
+                # Check bounds before accessing grid
+                if (0 <= target_row < grid_size and
+                    0 <= target_col < grid_size and
+                    grid[target_row][target_col] == letter):
+                    intersection_count += 1
+        else:  # vertical
+            for i, letter in enumerate(word):
+                target_row = row + i
+                target_col = col
+                # Check bounds before accessing grid
+                if (0 <= target_row < grid_size and
+                    0 <= target_col < grid_size and
+                    grid[target_row][target_col] == letter):
+                    intersection_count += 1
+        score += intersection_count * 200
+        # Bonus for central placement
+        center = grid_size // 2
+        distance_from_center = abs(row - center) + abs(col - center)
+        score -= distance_from_center * 5
+        return score
+    def _trim_grid(self, grid: List[List[str]], placed_words: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Trim grid to remove excess empty space."""
+        if not placed_words:
+            return {"grid": grid, "placed_words": placed_words}
+        # Find bounds
+        min_row = min_col = len(grid)
+        max_row = max_col = -1
+        for word in placed_words:
+            row, col, direction, word_text = word["row"], word["col"], word["direction"], word["word"]
+            min_row = min(min_row, row)
+            min_col = min(min_col, col)
+            if direction == "horizontal":
+                max_row = max(max_row, row)
+                max_col = max(max_col, col + len(word_text) - 1)
+            else:
+                max_row = max(max_row, row + len(word_text) - 1)
+                max_col = max(max_col, col)
+        # Add padding with proper bounds checking
+        min_row = max(0, min_row - 1)
+        min_col = max(0, min_col - 1)
+        max_row = min(len(grid) - 1, max_row + 1)
+        max_col = min(len(grid[0]) - 1, max_col + 1)
+        # Ensure bounds are valid
+        max_row = min(max_row, len(grid) - 1)
+        max_col = min(max_col, len(grid[0]) - 1)
+        # Create trimmed grid
+        trimmed_grid = []
+        for r in range(min_row, max_row + 1):
+            row = []
+            for c in range(min_col, max_col + 1):
+                # Double-check bounds before accessing
+                if r < 0 or r >= len(grid) or c < 0 or c >= len(grid[0]):
+                    logger.error(f"Invalid bounds: r={r}, c={c}, grid_size={len(grid)}x{len(grid[0])}")
+                    continue
+                row.append(grid[r][c])
+            trimmed_grid.append(row)
+        # Update word positions
+        updated_words = []
+        for word in placed_words:
+            updated_words.append({
+                **word,
+                "row": word["row"] - min_row,
+                "col": word["col"] - min_col
+            })
+        return {"grid": trimmed_grid, "placed_words": updated_words}
+    def _create_simple_cross(self, word_list: List[str], word_objs: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+        """Create simple cross with two words."""
+        if len(word_list) < 2:
+            return None
+        word1, word2 = word_list[0], word_list[1]
+        intersections = self._find_word_intersections(word1, word2)
+        if not intersections:
+            return None
+        # Use first intersection
+        intersection = intersections[0]
+        size = max(len(word1), len(word2)) + 4
+        grid = [["." for _ in range(size)] for _ in range(size)]
+        # Place first word horizontally in center
+        center_row = size // 2
+        center_col = (size - len(word1)) // 2
+        for i, letter in enumerate(word1):
+            check_row = center_row
+            check_col = center_col + i
+            if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+            grid[check_row][check_col] = letter
+        # Place second word vertically at intersection
+        intersection_col = center_col + intersection["word_pos"]
+        word2_start_row = center_row - intersection["placed_pos"]
+        for i, letter in enumerate(word2):
+            check_row = word2_start_row + i
+            check_col = intersection_col
+            if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
+                raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
+            grid[check_row][check_col] = letter
+        placed_words = [
+            {"word": word1, "row": center_row, "col": center_col, "direction": "horizontal", "number": 1},
+            {"word": word2, "row": word2_start_row, "col": intersection_col, "direction": "vertical", "number": 2}
+        ]
+        trimmed = self._trim_grid(grid, placed_words)
+        clues = self._generate_clues(word_objs[:2], trimmed["placed_words"])
+        return {
+            "grid": trimmed["grid"],
+            "placed_words": trimmed["placed_words"],
+            "clues": clues
+        }
+    def _generate_clues(self, word_objs: List[Dict[str, Any]], placed_words: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Generate clues for placed words."""
+        logger.info(f"🔧 _generate_clues: word_objs={len(word_objs)}, placed_words={len(placed_words)}")
+        clues = []
+        try:
+            for i, placed_word in enumerate(placed_words):
+                logger.info(f"🔧 Processing placed word {i}: {placed_word.get('word', 'UNKNOWN')}")
+                # Find matching word object
+                word_obj = next((w for w in word_objs if w["word"].upper() == placed_word["word"]), None)
+                if word_obj:
+                    logger.info(f"🔧 Found matching word_obj: {word_obj.get('word', 'UNKNOWN')}")
+                    clue_text = word_obj["clue"] if "clue" in word_obj else f"Clue for {placed_word['word']}"
+                else:
+                    logger.warning(f"⚠️ No matching word_obj found for {placed_word['word']}")
+                    clue_text = f"Clue for {placed_word['word']}"
+                clues.append({
+                    "number": placed_word["number"],
+                    "word": placed_word["word"],
+                    "text": clue_text,
+                    "direction": "across" if placed_word["direction"] == "horizontal" else "down",
+                    "position": {"row": placed_word["row"], "col": placed_word["col"]}
+                })
+            logger.info(f"🔧 Generated {len(clues)} clues")
+            return clues
+        except Exception as e:
+            logger.error(f"❌ Error in _generate_clues: {e}")
+            import traceback
+            traceback.print_exc()
+            raise

crossword-app/backend-py/src/services/crossword_generator_wrapper.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""
+Crossword Generator - Simple wrapper for the fixed implementation
+"""
+import logging
+from typing import List, Dict, Any
+logger = logging.getLogger(__name__)
+class CrosswordGenerator:
+    """
+    Wrapper that uses the fixed crossword generator implementation.
+    """
+    def __init__(self, vector_service=None):
+        self.vector_service = vector_service
+        self.min_words = 8
+        self.max_words = 15
+    async def generate_puzzle(
+        self,
+        topics: List[str],
+        difficulty: str = "medium",
+        use_ai: bool = False
+    ) -> Dict[str, Any]:
+        """
+        Generate a complete crossword puzzle using the fixed generator.
+        Args:
+            topics: List of topic strings
+            difficulty: "easy", "medium", or "hard"
+            use_ai: Whether to use vector search for word generation
+        Returns:
+            Dictionary containing grid, clues, and metadata
+        """
+        try:
+            logger.info(f"🎯 Using fixed crossword generator for topics: {topics}")
+            # Use the fixed generator implementation with the initialized vector service
+            from .crossword_generator import CrosswordGenerator as ActualGenerator
+            actual_generator = ActualGenerator(vector_service=self.vector_service)
+            puzzle = await actual_generator.generate_puzzle(topics, difficulty, use_ai)
+            logger.info(f"✅ Generated crossword with fixed algorithm")
+            return puzzle
+        except Exception as e:
+            logger.error(f"❌ Failed to generate puzzle: {e}")
+            raise
+    async def generate_words_for_topics(self, topics: List[str], difficulty: str, use_ai: bool) -> List[Dict[str, Any]]:
+        """Backward compatibility method."""
+        # This method is kept for compatibility but delegates to the fixed generator
+        from .crossword_generator import CrosswordGenerator as ActualGenerator
+        actual_generator = ActualGenerator()
+        return await actual_generator._select_words(topics, difficulty, use_ai)

crossword-app/backend-py/src/services/vector_search.py ADDED Viewed

	@@ -0,0 +1,587 @@

+"""
+Vector similarity search service using sentence-transformers and FAISS.
+This implements true AI word generation via vector space nearest neighbor search.
+"""
+import os
+import logging
+import asyncio
+import time
+from datetime import datetime
+from typing import List, Dict, Any, Optional, Tuple
+import json
+import numpy as np
+import torch
+from sentence_transformers import SentenceTransformer
+import faiss
+from pathlib import Path
+logger = logging.getLogger(__name__)
+def log_with_timestamp(message):
+    """Helper to log with precise timestamp."""
+    timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
+    logger.info(f"[{timestamp}] {message}")
+class VectorSearchService:
+    """
+    Service for finding semantically similar words using vector similarity search.
+    This replaces the old approach of filtering static word lists with true
+    vector space search through the model's full vocabulary.
+    """
+    def __init__(self):
+        self.model = None
+        self.vocab = None
+        self.word_embeddings = None
+        self.faiss_index = None
+        self.is_initialized = False
+        # Configuration
+        self.model_name = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-mpnet-base-v2")
+        self.similarity_threshold = float(os.getenv("WORD_SIMILARITY_THRESHOLD", "0.3"))
+        self.max_results = 20
+        # Cache manager for word fallback
+        self.cache_manager = None
+    async def initialize(self):
+        """Initialize the vector search service."""
+        try:
+            start_time = time.time()
+            log_with_timestamp(f"🔧 Loading model: {self.model_name}")
+            # Load sentence transformer model
+            model_start = time.time()
+            self.model = SentenceTransformer(self.model_name)
+            model_time = time.time() - model_start
+            log_with_timestamp(f"✅ Model loaded in {model_time:.2f}s: {self.model_name}")
+            # Get model vocabulary from tokenizer
+            vocab_start = time.time()
+            tokenizer = self.model.tokenizer
+            vocab_dict = tokenizer.get_vocab()
+            # Filter vocabulary for crossword-suitable words
+            self.vocab = self._filter_vocabulary(vocab_dict)
+            vocab_time = time.time() - vocab_start
+            log_with_timestamp(f"📚 Filtered vocabulary in {vocab_time:.2f}s: {len(self.vocab)} words")
+            # Pre-compute embeddings for all vocabulary words
+            embedding_start = time.time()
+            log_with_timestamp("🔄 Starting embedding generation...")
+            await self._build_embeddings_index()
+            embedding_time = time.time() - embedding_start
+            log_with_timestamp(f"🔄 Embeddings built in {embedding_time:.2f}s")
+            # Initialize cache manager
+            cache_start = time.time()
+            log_with_timestamp("📦 Initializing word cache manager...")
+            try:
+                from .word_cache import WordCacheManager
+                self.cache_manager = WordCacheManager()
+                await self.cache_manager.initialize()
+                cache_time = time.time() - cache_start
+                log_with_timestamp(f"📦 Cache manager initialized in {cache_time:.2f}s")
+            except Exception as e:
+                cache_time = time.time() - cache_start
+                log_with_timestamp(f"⚠️ Cache manager initialization failed in {cache_time:.2f}s: {e}")
+                log_with_timestamp("📝 Continuing without persistent caching (in-memory only)")
+                self.cache_manager = None
+            self.is_initialized = True
+            total_time = time.time() - start_time
+            log_with_timestamp(f"✅ Vector search service fully initialized in {total_time:.2f}s")
+        except Exception as e:
+            logger.error(f"❌ Failed to initialize vector search: {e}")
+            self.is_initialized = False
+            raise
+    def _filter_vocabulary(self, vocab_dict: Dict[str, int]) -> List[str]:
+        """Filter vocabulary to keep only crossword-suitable words."""
+        filtered = []
+        # Words to exclude - boring, generic, or problematic for crosswords
+        excluded_words = {
+            # Generic/boring words
+            'THE', 'AND', 'FOR', 'ARE', 'BUT', 'NOT', 'YOU', 'ALL', 'THIS', 'THAT', 'WITH', 'FROM', 'THEY', 'WERE', 'BEEN', 'HAVE', 'THEIR', 'SAID', 'EACH', 'WHICH', 'WHAT', 'THERE', 'WILL', 'MORE', 'WHEN', 'SOME', 'LIKE', 'INTO', 'TIME', 'VERY', 'ONLY', 'HAS', 'HAD', 'WHO', 'OIL', 'ITS', 'NOW', 'FIND', 'LONG', 'DOWN', 'DAY', 'DID', 'GET', 'COME', 'MADE', 'MAY', 'PART',
+            # Topic words that are too obvious
+            'ANIMAL', 'ANIMALS', 'CREATURE', 'CREATURES', 'BEAST', 'BEASTS', 'THING', 'THINGS'
+        }
+        for word, _ in vocab_dict.items():
+            # Clean word (remove special tokens)
+            clean_word = word.strip("##").upper()
+            # Filter criteria for crossword words
+            if (
+                len(clean_word) >= 3 and                    # Minimum length
+                len(clean_word) <= 12 and                   # Reasonable max length
+                clean_word.isalpha() and                    # Only letters
+                not clean_word.startswith('[') and          # No special tokens
+                not clean_word.startswith('<') and          # No special tokens
+                clean_word not in excluded_words and        # Avoid boring words
+                not self._is_plural(clean_word) and         # No plurals
+                not self._is_boring_word(clean_word)        # No boring patterns
+            ):
+                filtered.append(clean_word)
+        # Remove duplicates and sort
+        return sorted(list(set(filtered)))
+    def _is_plural(self, word: str) -> bool:
+        """Check if word is likely a plural."""
+        # Simple plural detection
+        if len(word) < 4:
+            return False
+        return (
+            word.endswith('S') and not word.endswith('SS') and
+            not word.endswith('US') and not word.endswith('IS')
+        )
+    def _is_boring_word(self, word: str) -> bool:
+        """Check if word is boring or too generic for crosswords."""
+        boring_patterns = [
+            # Words ending in common suffixes that are often generic
+            word.endswith('ING') and len(word) > 6,
+            word.endswith('TION') and len(word) > 7,
+            word.endswith('NESS') and len(word) > 6,
+            # Very common short words
+            word in ['GET', 'GOT', 'PUT', 'SET', 'LET', 'RUN', 'CUT', 'HIT', 'SIT', 'WIN', 'BIG', 'NEW', 'OLD', 'BAD', 'GOOD', 'BEST', 'LAST', 'NEXT', 'REAL']
+        ]
+        return any(boring_patterns)
+    async def _build_embeddings_index(self):
+        """Build FAISS index with pre-computed embeddings for all vocabulary."""
+        logger.info("🔨 Building embeddings index...")
+        # Compute embeddings in batches to avoid memory issues
+        batch_size = 100
+        embeddings_list = []
+        for i in range(0, len(self.vocab), batch_size):
+            batch = self.vocab[i:i + batch_size]
+            batch_embeddings = self.model.encode(batch, convert_to_numpy=True)
+            embeddings_list.append(batch_embeddings)
+            if i % 1000 == 0:
+                logger.info(f"📊 Processed {i}/{len(self.vocab)} words")
+        # Combine all embeddings
+        self.word_embeddings = np.vstack(embeddings_list)
+        logger.info(f"📈 Generated embeddings shape: {self.word_embeddings.shape}")
+        # Build FAISS index for fast similarity search
+        dimension = self.word_embeddings.shape[1]
+        self.faiss_index = faiss.IndexFlatIP(dimension)  # Inner product similarity
+        # Normalize embeddings for cosine similarity
+        faiss.normalize_L2(self.word_embeddings)
+        self.faiss_index.add(self.word_embeddings)
+        logger.info(f"🔍 FAISS index built with {self.faiss_index.ntotal} vectors")
+    async def find_similar_words(
+        self,
+        topic: str,
+        difficulty: str = "medium",
+        max_words: int = 15
+    ) -> List[Dict[str, Any]]:
+        """
+        Find words similar to the given topic using vector similarity search.
+        This is the core function that replaces embedding filtering with true
+        vector space nearest neighbor search.
+        """
+        logger.info(f"🔍 Starting word search for topic: '{topic}', difficulty: '{difficulty}', max_words: {max_words}")
+        logger.info(f"🤖 Vector search initialized: {self.is_initialized}")
+        if not self.is_initialized:
+            logger.warning("🔄 Vector search not initialized, using cached fallback")
+            return await self._get_cached_fallback(topic, difficulty, max_words)
+        try:
+            # Get topic embedding
+            topic_embedding = self.model.encode([topic], convert_to_numpy=True)
+            # Add small amount of noise to create variety in search results (with fallback)
+            import numpy as np
+            noise_factor = float(os.getenv("SEARCH_RANDOMNESS", "0.02"))  # 2% noise by default
+            if noise_factor > 0:
+                try:
+                    noise = np.random.normal(0, noise_factor, topic_embedding.shape)
+                    topic_embedding_noisy = topic_embedding + noise
+                    # Ensure the array is contiguous and correct type for FAISS
+                    topic_embedding = np.ascontiguousarray(topic_embedding_noisy, dtype=np.float32)
+                except Exception as noise_error:
+                    logger.warning(f"⚠️ Failed to add search noise: {noise_error}, using original embedding")
+                    topic_embedding = np.ascontiguousarray(topic_embedding, dtype=np.float32)
+            else:
+                topic_embedding = np.ascontiguousarray(topic_embedding, dtype=np.float32)
+            # Normalize for cosine similarity with error handling
+            try:
+                faiss.normalize_L2(topic_embedding)
+            except Exception as norm_error:
+                logger.warning(f"⚠️ FAISS normalization failed: {norm_error}, trying without noise")
+                # Fallback: use original embedding without noise
+                topic_embedding = self.model.encode([topic], convert_to_numpy=True)
+                topic_embedding = np.ascontiguousarray(topic_embedding, dtype=np.float32)
+                faiss.normalize_L2(topic_embedding)
+            # Search for similar words using FAISS (get more results for diversity)
+            search_size = min(self.max_results * 6, 150)  # Get many more candidates for variety
+            scores, indices = self.faiss_index.search(topic_embedding, search_size)
+            # Debug: log search results
+            logger.info(f"🔍 FAISS search returned {len(scores[0])} results")
+            logger.info(f"🔍 Top 5 scores: {scores[0][:5]}")
+            logger.info(f"🔍 Similarity threshold: {self.similarity_threshold}")
+            # Collect candidates with scores
+            candidates = []
+            above_threshold = 0
+            difficulty_passed = 0
+            interesting_passed = 0
+            for score, idx in zip(scores[0], indices[0]):
+                if score < self.similarity_threshold:
+                    continue
+                above_threshold += 1
+                word = self.vocab[idx]
+                # Filter by difficulty and quality
+                if self._matches_difficulty(word, difficulty):
+                    difficulty_passed += 1
+                    if self._is_interesting_word(word, topic):
+                        interesting_passed += 1
+                        candidates.append({
+                            "word": word,
+                            "clue": self._generate_clue(word, topic),
+                            "similarity": float(score),
+                            "source": "vector_search"
+                        })
+            logger.info(f"🔍 Filtering results: {len(scores[0])} total → {above_threshold} above threshold → {difficulty_passed} difficulty OK → {interesting_passed} interesting → {len(candidates)} final")
+            # Smart randomization: favor good words but add variety
+            import random
+            if len(candidates) > max_words * 2:
+                # Weighted random selection favoring higher similarity scores
+                similar_words = self._weighted_random_selection(candidates, max_words)
+            else:
+                # If not many candidates, use all but in random order
+                random.shuffle(candidates)
+                similar_words = candidates[:max_words]
+            logger.info(f"🎯 Found {len(similar_words)} similar words for '{topic}' via vector search")
+            # Cache successful results for future use
+            if similar_words:
+                await self._cache_successful_search(topic, difficulty, similar_words)
+            # If not enough words found, supplement with cached words
+            if len(similar_words) < max_words // 2:
+                cached_supplement = await self._get_cached_fallback(
+                    topic, difficulty, max_words - len(similar_words)
+                )
+                similar_words.extend(cached_supplement)
+                logger.info(f"🔄 Supplemented with {len(cached_supplement)} cached words")
+            return similar_words[:max_words]
+        except Exception as e:
+            logger.error(f"❌ Vector search failed for '{topic}': {e}")
+            # Try cached fallback first
+            cached_words = await self._get_cached_fallback(topic, difficulty, max_words)
+            if cached_words:
+                return cached_words
+            # Last resort: bootstrap with simple topic-related words
+            logger.warning(f"⚠️ No cached words available, using emergency bootstrap for '{topic}'")
+            return self._get_emergency_bootstrap(topic, difficulty, max_words)
+    def _matches_difficulty(self, word: str, difficulty: str) -> bool:
+        """Check if word matches difficulty criteria."""
+        difficulty_map = {
+            "easy": {"min_len": 3, "max_len": 8},
+            "medium": {"min_len": 4, "max_len": 10},
+            "hard": {"min_len": 5, "max_len": 15}
+        }
+        criteria = difficulty_map.get(difficulty, difficulty_map["medium"])
+        return criteria["min_len"] <= len(word) <= criteria["max_len"]
+    def _generate_clue(self, word: str, topic: str) -> str:
+        """Generate a simple clue for the word."""
+        # Basic clue templates - can be enhanced with LLM generation later
+        clue_templates = {
+            "Animals": f"{word.lower()} (animal)",
+            "Technology": f"{word.lower()} (tech term)",
+            "Science": f"{word.lower()} (scientific term)",
+            "Geography": f"{word.lower()} (geographic feature)"
+        }
+        return clue_templates.get(topic, f"{word.lower()} (related to {topic.lower()})")
+    def _is_interesting_word(self, word: str, topic: str) -> bool:
+        """Check if word is interesting enough for crosswords."""
+        # Exclude words that are too obvious for the topic
+        topic_lower = topic.lower()
+        word_lower = word.lower()
+        # Don't include the topic itself or obvious variations
+        if word_lower == topic_lower or word_lower in topic_lower:
+            return False
+        # Topic-specific filtering
+        if topic_lower == 'animals':
+            obvious_animals = ['mammal', 'mammals', 'wildlife', 'organism', 'organisms', 'livestock']
+            if word_lower in obvious_animals:
+                return False
+        # Prefer concrete nouns over abstract concepts
+        abstract_endings = ['tion', 'ness', 'ment', 'ity', 'ism']
+        if any(word_lower.endswith(ending) for ending in abstract_endings) and len(word) > 8:
+            return False
+        return True
+    def _weighted_random_selection(self, candidates: List[Dict[str, Any]], max_words: int) -> List[Dict[str, Any]]:
+        """
+        Weighted random selection that favors higher similarity scores but adds variety.
+        This ensures we don't always get the exact same words, while still preferring
+        high-quality matches.
+        """
+        import random
+        if len(candidates) <= max_words:
+            return candidates
+        # Create tiers based on similarity scores
+        candidates_sorted = sorted(candidates, key=lambda w: w["similarity"], reverse=True)
+        # Tier 1: Top 25% - very high probability
+        tier1_size = max(1, len(candidates_sorted) // 4)
+        tier1 = candidates_sorted[:tier1_size]
+        # Tier 2: Next 25% - high probability
+        tier2_size = max(1, len(candidates_sorted) // 4)
+        tier2 = candidates_sorted[tier1_size:tier1_size + tier2_size]
+        # Tier 3: Next 35% - medium probability
+        tier3_size = max(1, len(candidates_sorted) * 35 // 100)
+        tier3 = candidates_sorted[tier1_size + tier2_size:tier1_size + tier2_size + tier3_size]
+        # Tier 4: Remaining - low probability
+        tier4 = candidates_sorted[tier1_size + tier2_size + tier3_size:]
+        selected = []
+        # Always include some from tier 1 (but not all)
+        tier1_count = min(max_words // 3, len(tier1))
+        selected.extend(random.sample(tier1, tier1_count))
+        # Fill remaining slots with weighted random selection
+        remaining_slots = max_words - len(selected)
+        if remaining_slots > 0:
+            # Create weighted pool
+            weighted_pool = []
+            weighted_pool.extend([(w, 3) for w in tier2])  # 3x weight
+            weighted_pool.extend([(w, 2) for w in tier3])  # 2x weight
+            weighted_pool.extend([(w, 1) for w in tier4])  # 1x weight
+            # Also add remaining tier1 words with high weight
+            remaining_tier1 = [w for w in tier1 if w not in selected]
+            weighted_pool.extend([(w, 4) for w in remaining_tier1])  # 4x weight
+            # Weighted random selection
+            for _ in range(remaining_slots):
+                if not weighted_pool:
+                    break
+                # Create weighted list
+                weighted_words = []
+                for word, weight in weighted_pool:
+                    weighted_words.extend([word] * weight)
+                if weighted_words:
+                    chosen = random.choice(weighted_words)
+                    selected.append(chosen)
+                    # Remove chosen word from pool
+                    weighted_pool = [(w, wt) for w, wt in weighted_pool if w != chosen]
+        # Final shuffle to mix up the order
+        random.shuffle(selected)
+        logger.info(f"🎲 Weighted selection: {len(selected)} words from {len(candidates)} candidates")
+        return selected[:max_words]
+    async def _get_cached_fallback(
+        self,
+        topic: str,
+        difficulty: str,
+        max_words: int
+    ) -> List[Dict[str, Any]]:
+        """Fallback to cached words when vector search fails."""
+        if not self.cache_manager:
+            logger.warning(f"📭 No cache manager available for fallback")
+            return []
+        logger.info(f"🔄 Looking for cached words for topic: '{topic}', difficulty: '{difficulty}'")
+        try:
+            cached_words = await self.cache_manager.get_cached_words(topic, difficulty, max_words)
+            if cached_words:
+                logger.info(f"📦 Found {len(cached_words)} cached words for '{topic}/{difficulty}'")
+                return cached_words
+            else:
+                logger.info(f"📭 No cached words available for '{topic}/{difficulty}'")
+                return []
+        except Exception as e:
+            logger.error(f"❌ Failed to get cached fallback for '{topic}': {e}")
+            return []
+    async def _cache_successful_search(
+        self,
+        topic: str,
+        difficulty: str,
+        words: List[Dict[str, Any]]
+    ):
+        """Cache successful vector search results for future use."""
+        if not self.cache_manager:
+            return
+        try:
+            # Filter out any existing cached words to avoid duplicates
+            vector_words = [w for w in words if w.get("source") == "vector_search"]
+            if vector_words:
+                success = await self.cache_manager.cache_words(topic, difficulty, vector_words)
+                if success:
+                    logger.info(f"💾 Successfully cached {len(vector_words)} words for {topic}/{difficulty}")
+        except Exception as e:
+            logger.error(f"❌ Failed to cache search results: {e}")
+    def _get_emergency_bootstrap(self, topic: str, difficulty: str, max_words: int) -> List[Dict[str, Any]]:
+        """
+        Emergency bootstrap words when vector search and cache both fail.
+        This prevents complete failure by providing basic topic-related words.
+        """
+        bootstrap_words = {
+            "animals": [
+                {"word": "DOG", "clue": "Man's best friend"},
+                {"word": "CAT", "clue": "Feline pet"},
+                {"word": "ELEPHANT", "clue": "Large mammal with trunk"},
+                {"word": "TIGER", "clue": "Striped big cat"},
+                {"word": "BIRD", "clue": "Flying creature"},
+                {"word": "FISH", "clue": "Aquatic animal"},
+                {"word": "HORSE", "clue": "Riding animal"},
+                {"word": "BEAR", "clue": "Large mammal"},
+                {"word": "WHALE", "clue": "Marine mammal"},
+                {"word": "LION", "clue": "King of jungle"},
+                {"word": "RABBIT", "clue": "Hopping mammal"},
+                {"word": "SNAKE", "clue": "Slithering reptile"}
+            ],
+            "science": [
+                {"word": "ATOM", "clue": "Basic unit of matter"},
+                {"word": "CELL", "clue": "Basic unit of life"},
+                {"word": "DNA", "clue": "Genetic material"},
+                {"word": "ENERGY", "clue": "Capacity to do work"},
+                {"word": "FORCE", "clue": "Push or pull"},
+                {"word": "GRAVITY", "clue": "Force of attraction"},
+                {"word": "LIGHT", "clue": "Electromagnetic radiation"},
+                {"word": "MATTER", "clue": "Physical substance"},
+                {"word": "MOTION", "clue": "Change in position"},
+                {"word": "OXYGEN", "clue": "Essential gas"},
+                {"word": "PHYSICS", "clue": "Study of matter and energy"},
+                {"word": "THEORY", "clue": "Scientific explanation"}
+            ],
+            "technology": [
+                {"word": "COMPUTER", "clue": "Electronic device"},
+                {"word": "INTERNET", "clue": "Global network"},
+                {"word": "SOFTWARE", "clue": "Computer programs"},
+                {"word": "ROBOT", "clue": "Automated machine"},
+                {"word": "DATA", "clue": "Information"},
+                {"word": "CODE", "clue": "Programming instructions"},
+                {"word": "DIGITAL", "clue": "Electronic format"},
+                {"word": "NETWORK", "clue": "Connected systems"},
+                {"word": "SYSTEM", "clue": "Organized whole"},
+                {"word": "DEVICE", "clue": "Technical apparatus"},
+                {"word": "MOBILE", "clue": "Portable technology"},
+                {"word": "SCREEN", "clue": "Display surface"}
+            ],
+            "geography": [
+                {"word": "MOUNTAIN", "clue": "High landform"},
+                {"word": "RIVER", "clue": "Flowing water"},
+                {"word": "OCEAN", "clue": "Large body of water"},
+                {"word": "DESERT", "clue": "Arid region"},
+                {"word": "FOREST", "clue": "Dense trees"},
+                {"word": "ISLAND", "clue": "Land surrounded by water"},
+                {"word": "VALLEY", "clue": "Low area between hills"},
+                {"word": "LAKE", "clue": "Inland water body"},
+                {"word": "COAST", "clue": "Land by the sea"},
+                {"word": "PLAIN", "clue": "Flat land"},
+                {"word": "HILL", "clue": "Small elevation"},
+                {"word": "CLIFF", "clue": "Steep rock face"}
+            ]
+        }
+        topic_lower = topic.lower()
+        words = bootstrap_words.get(topic_lower, [])
+        if not words:
+            # Generic fallback for unknown topics
+            words = [
+                {"word": "WORD", "clue": "Unit of language"},
+                {"word": "PUZZLE", "clue": "Brain teaser"},
+                {"word": "GAME", "clue": "Form of play"},
+                {"word": "CROSS", "clue": "Intersecting lines"},
+                {"word": "GRID", "clue": "Pattern of squares"},
+                {"word": "CLUE", "clue": "Helpful hint"}
+            ]
+        # Filter by difficulty and format
+        filtered_words = []
+        for word_obj in words:
+            word = word_obj["word"]
+            if self._matches_difficulty(word, difficulty):
+                filtered_words.append({
+                    "word": word,
+                    "clue": word_obj["clue"],
+                    "similarity": 0.7,  # Moderate relevance
+                    "source": "emergency_bootstrap"
+                })
+        # Shuffle and limit
+        import random
+        random.shuffle(filtered_words)
+        result = filtered_words[:max_words]
+        logger.info(f"🆘 Emergency bootstrap provided {len(result)} words for '{topic}'")
+        return result
+    async def cleanup(self):
+        """Cleanup resources."""
+        logger.info("🧹 Cleaning up vector search service")
+        if hasattr(self, 'model'):
+            del self.model
+        if hasattr(self, 'word_embeddings'):
+            del self.word_embeddings
+        if hasattr(self, 'faiss_index'):
+            del self.faiss_index
+        if self.cache_manager:
+            await self.cache_manager.cleanup_expired_caches()
+        self.is_initialized = False

crossword-app/backend-py/src/services/word_cache.py ADDED Viewed

	@@ -0,0 +1,347 @@

+"""
+Word Cache Manager - Replaces static word file dependencies with intelligent caching.
+Caches vector-discovered words with quality clues for fast retrieval.
+"""
+import os
+import json
+import logging
+import time
+from datetime import datetime, timedelta
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+import asyncio
+logger = logging.getLogger(__name__)
+class WordCacheManager:
+    """
+    Manages cached word data to replace static word file dependencies.
+    Features:
+    - Caches vector-discovered words with quality clues
+    - Supports cache expiration and refresh
+    - Fallback for when vector search fails
+    - Progressive cache building from successful searches
+    """
+    def __init__(self, cache_dir: str = None):
+        # Use appropriate default cache directory for the environment
+        if cache_dir is None:
+            # Check if we're in a Docker container or HuggingFace Spaces
+            if os.path.exists("/.dockerenv") or os.getenv("SPACE_ID"):
+                # Use /tmp for containers/spaces where write permissions are limited
+                cache_dir = os.getenv("WORD_CACHE_DIR", "/tmp/crossword_cache")
+            else:
+                # Use local cache directory for development
+                cache_dir = os.getenv("WORD_CACHE_DIR", "cache")
+        self.cache_dir = Path(cache_dir)
+        # Try to create cache directory with fallback
+        try:
+            self.cache_dir.mkdir(parents=True, exist_ok=True)
+            logger.info(f"📁 Cache directory created: {self.cache_dir}")
+        except (PermissionError, OSError) as e:
+            # Fallback to temp directory
+            try:
+                import tempfile
+                temp_cache = Path(tempfile.gettempdir()) / "crossword_cache"
+                temp_cache.mkdir(exist_ok=True)
+                self.cache_dir = temp_cache
+                logger.warning(f"⚠️ Permission denied for '{cache_dir}', using temp: {self.cache_dir}")
+            except Exception as temp_error:
+                # Last resort: use in-memory only
+                logger.error(f"❌ Failed to create temp cache directory: {temp_error}")
+                logger.warning("⚠️ Using in-memory cache only (no persistence)")
+                self.cache_dir = None
+        except Exception as e:
+            # Last resort: use in-memory only
+            logger.error(f"❌ Failed to create cache directory: {e}")
+            logger.warning("⚠️ Using in-memory cache only (no persistence)")
+            self.cache_dir = None
+        # Cache configuration
+        self.cache_expiry_hours = int(os.getenv("CACHE_EXPIRY_HOURS", "24"))
+        self.max_cached_words_per_topic = int(os.getenv("MAX_CACHED_WORDS", "100"))
+        self.cache_version = "1.0"
+        # In-memory cache for fast access
+        self.memory_cache: Dict[str, List[Dict[str, Any]]] = {}
+        self.cache_metadata: Dict[str, Dict[str, Any]] = {}
+        logger.info(f"📦 WordCacheManager initialized with cache_dir: {self.cache_dir}")
+    async def initialize(self):
+        """Initialize cache manager by loading existing cache files."""
+        try:
+            logger.info("🔧 Loading existing cache files...")
+            # Skip file loading if no cache directory (in-memory only)
+            if self.cache_dir is None:
+                logger.info("📝 In-memory cache mode - no file loading")
+                return
+            # Load all cache files into memory
+            cache_files = list(self.cache_dir.glob("*.json"))
+            loaded_count = 0
+            for cache_file in cache_files:
+                if cache_file.stem.endswith("_meta"):
+                    continue  # Skip metadata files
+                try:
+                    cache_key = cache_file.stem
+                    with open(cache_file, 'r') as f:
+                        cached_data = json.load(f)
+                    # Validate cache structure
+                    if self._validate_cache_data(cached_data):
+                        self.memory_cache[cache_key] = cached_data["words"]
+                        self.cache_metadata[cache_key] = cached_data["metadata"]
+                        loaded_count += 1
+                        logger.info(f"📥 Loaded cache: {cache_key} ({len(cached_data['words'])} words)")
+                    else:
+                        logger.warning(f"⚠️ Invalid cache file: {cache_file}")
+                except Exception as e:
+                    logger.error(f"❌ Failed to load cache file {cache_file}: {e}")
+            logger.info(f"✅ Cache manager initialized with {loaded_count} cached topics")
+        except Exception as e:
+            logger.error(f"❌ Failed to initialize cache manager: {e}")
+    def _validate_cache_data(self, data: Dict[str, Any]) -> bool:
+        """Validate cache data structure."""
+        required_keys = ["words", "metadata", "version"]
+        if not all(key in data for key in required_keys):
+            return False
+        # Check metadata structure
+        metadata = data["metadata"]
+        required_meta_keys = ["created_at", "topic", "difficulty", "word_count"]
+        if not all(key in metadata for key in required_meta_keys):
+            return False
+        # Check words structure
+        words = data["words"]
+        if not isinstance(words, list) or not words:
+            return True  # Empty cache is valid
+        # Validate first word structure
+        sample_word = words[0]
+        required_word_keys = ["word", "clue", "similarity", "source"]
+        return all(key in sample_word for key in required_word_keys)
+    async def get_cached_words(
+        self,
+        topic: str,
+        difficulty: str = "medium",
+        max_words: int = 15
+    ) -> List[Dict[str, Any]]:
+        """
+        Get cached words for a topic and difficulty.
+        Returns cached words if available and fresh, empty list otherwise.
+        """
+        cache_key = self._get_cache_key(topic, difficulty)
+        # Check memory cache first
+        if cache_key in self.memory_cache:
+            # Check if cache is still fresh
+            if self._is_cache_fresh(cache_key):
+                cached_words = self.memory_cache[cache_key]
+                logger.info(f"📦 Using cached words for {cache_key}: {len(cached_words)} words")
+                # Return requested number of words
+                return cached_words[:max_words]
+            else:
+                logger.info(f"⏰ Cache expired for {cache_key}")
+                await self._remove_expired_cache(cache_key)
+        logger.info(f"📭 No fresh cache available for {cache_key}")
+        return []
+    async def cache_words(
+        self,
+        topic: str,
+        difficulty: str,
+        words: List[Dict[str, Any]],
+        source: str = "vector_search"
+    ) -> bool:
+        """
+        Cache words for future use.
+        Args:
+            topic: Topic name
+            difficulty: Difficulty level
+            words: List of word objects with clues
+            source: Source of the words (e.g., "vector_search")
+        """
+        try:
+            cache_key = self._get_cache_key(topic, difficulty)
+            # Enhance words with caching metadata
+            enhanced_words = []
+            for word in words[:self.max_cached_words_per_topic]:
+                enhanced_word = {
+                    **word,
+                    "cached_at": datetime.utcnow().isoformat(),
+                    "cache_source": source
+                }
+                enhanced_words.append(enhanced_word)
+            # Create cache data structure
+            cache_data = {
+                "version": self.cache_version,
+                "words": enhanced_words,
+                "metadata": {
+                    "topic": topic,
+                    "difficulty": difficulty,
+                    "word_count": len(enhanced_words),
+                    "created_at": datetime.utcnow().isoformat(),
+                    "source": source,
+                    "expiry_hours": self.cache_expiry_hours
+                }
+            }
+            # Save to file (if cache directory available)
+            if self.cache_dir is not None:
+                cache_file = self.cache_dir / f"{cache_key}.json"
+                with open(cache_file, 'w') as f:
+                    json.dump(cache_data, f, indent=2)
+            # Update memory cache
+            self.memory_cache[cache_key] = enhanced_words
+            self.cache_metadata[cache_key] = cache_data["metadata"]
+            logger.info(f"💾 Cached {len(enhanced_words)} words for {cache_key}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Failed to cache words for {topic}/{difficulty}: {e}")
+            return False
+    def _get_cache_key(self, topic: str, difficulty: str) -> str:
+        """Generate cache key from topic and difficulty."""
+        return f"{topic.lower()}_{difficulty.lower()}"
+    def _is_cache_fresh(self, cache_key: str) -> bool:
+        """Check if cache is still fresh (not expired)."""
+        if cache_key not in self.cache_metadata:
+            return False
+        metadata = self.cache_metadata[cache_key]
+        created_at = datetime.fromisoformat(metadata["created_at"])
+        expiry_hours = metadata.get("expiry_hours", self.cache_expiry_hours)
+        expiry_time = created_at + timedelta(hours=expiry_hours)
+        return datetime.utcnow() < expiry_time
+    async def _remove_expired_cache(self, cache_key: str):
+        """Remove expired cache from memory and disk."""
+        try:
+            # Remove from memory
+            if cache_key in self.memory_cache:
+                del self.memory_cache[cache_key]
+            if cache_key in self.cache_metadata:
+                del self.cache_metadata[cache_key]
+            # Remove from disk (if cache directory available)
+            if self.cache_dir is not None:
+                cache_file = self.cache_dir / f"{cache_key}.json"
+                if cache_file.exists():
+                    cache_file.unlink()
+            logger.info(f"🗑️ Removed expired cache: {cache_key}")
+        except Exception as e:
+            logger.error(f"❌ Failed to remove expired cache {cache_key}: {e}")
+    async def warm_cache_from_static(self, static_words: Dict[str, List[Dict[str, Any]]]):
+        """
+        Warm cache with high-quality static words as bootstrap data.
+        This converts the existing static words to cache format.
+        """
+        try:
+            logger.info("🔥 Warming cache with bootstrap data from static words...")
+            cached_count = 0
+            for topic, words in static_words.items():
+                if not words:
+                    continue
+                # Convert static words to cache format
+                cache_words = []
+                for word_obj in words:
+                    cache_word = {
+                        "word": word_obj["word"].upper(),
+                        "clue": word_obj.get("clue", f"Related to {topic.lower()}"),
+                        "similarity": 0.9,  # Mark as high quality
+                        "source": "bootstrap_static",
+                        "quality_score": 100  # High quality bootstrap data
+                    }
+                    cache_words.append(cache_word)
+                # Cache for different difficulties
+                for difficulty in ["easy", "medium", "hard"]:
+                    # Filter by difficulty
+                    filtered_words = self._filter_words_by_difficulty(cache_words, difficulty)
+                    if filtered_words:
+                        success = await self.cache_words(topic, difficulty, filtered_words, "bootstrap")
+                        if success:
+                            cached_count += 1
+            logger.info(f"🔥 Cache warming completed: {cached_count} topic/difficulty combinations cached")
+        except Exception as e:
+            logger.error(f"❌ Failed to warm cache: {e}")
+    def _filter_words_by_difficulty(self, words: List[Dict[str, Any]], difficulty: str) -> List[Dict[str, Any]]:
+        """Filter words by difficulty level."""
+        difficulty_map = {
+            "easy": {"min_len": 3, "max_len": 8},
+            "medium": {"min_len": 4, "max_len": 10},
+            "hard": {"min_len": 5, "max_len": 15}
+        }
+        criteria = difficulty_map.get(difficulty, difficulty_map["medium"])
+        filtered = []
+        for word_obj in words:
+            word_len = len(word_obj["word"])
+            if criteria["min_len"] <= word_len <= criteria["max_len"]:
+                filtered.append(word_obj)
+        return filtered
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """Get cache statistics for monitoring."""
+        total_words = sum(len(words) for words in self.memory_cache.values())
+        # Count fresh vs expired caches
+        fresh_caches = sum(1 for key in self.memory_cache.keys() if self._is_cache_fresh(key))
+        total_caches = len(self.memory_cache)
+        return {
+            "total_cached_topics": total_caches,
+            "fresh_caches": fresh_caches,
+            "expired_caches": total_caches - fresh_caches,
+            "total_cached_words": total_words,
+            "cache_directory": str(self.cache_dir),
+            "cache_expiry_hours": self.cache_expiry_hours
+        }
+    async def cleanup_expired_caches(self):
+        """Clean up all expired caches."""
+        expired_keys = [
+            key for key in self.memory_cache.keys()
+            if not self._is_cache_fresh(key)
+        ]
+        for key in expired_keys:
+            await self._remove_expired_cache(key)
+        logger.info(f"🧹 Cleaned up {len(expired_keys)} expired caches")

crossword-app/backend-py/test-integration/test_boundary_fix.py ADDED Viewed

	@@ -0,0 +1,147 @@

+#!/usr/bin/env python3
+import sys
+import asyncio
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
+sys.path.insert(0, str(project_root))
+from src.services.crossword_generator import CrosswordGenerator
+async def test_boundary_fix():
+    """Test that the boundary fix works correctly."""
+    # Sample words that are known to cause boundary issues
+    test_words = [
+        {"word": "COMPUTER", "clue": "Electronic device"},
+        {"word": "MACHINE", "clue": "Device with moving parts"},
+        {"word": "SCIENCE", "clue": "Systematic study"},
+        {"word": "EXPERT", "clue": "Specialist"},
+        {"word": "CODE", "clue": "Programming text"},
+        {"word": "DATA", "clue": "Information"}
+    ]
+    generator = CrosswordGenerator()
+    print("🧪 Testing Boundary Fix")
+    print("=" * 50)
+    # Generate a crossword
+    result = generator._create_grid(test_words)
+    if not result:
+        print("❌ Grid generation failed")
+        return False
+    grid = result["grid"]
+    placed_words = result["placed_words"]
+    print(f"✅ Generated grid with {len(placed_words)} words")
+    print(f"Grid size: {len(grid)}x{len(grid[0])}")
+    # Display the grid
+    print("\nGenerated Grid:")
+    for i, row in enumerate(grid):
+        row_str = " ".join(cell if cell != "." else " " for cell in row)
+        print(f"{i:2d} | {row_str}")
+    print(f"\nPlaced Words:")
+    for word in placed_words:
+        print(f"  {word['word']} at ({word['row']},{word['col']}) {word['direction']}")
+    # Analyze for boundary violations
+    print(f"\n🔍 Analyzing for boundary violations...")
+    violations = []
+    # Check horizontal words
+    for r in range(len(grid)):
+        current_word = ""
+        word_start = -1
+        for c in range(len(grid[r])):
+            if grid[r][c] != ".":
+                if current_word == "":
+                    word_start = c
+                current_word += grid[r][c]
+            else:
+                if current_word:
+                    # Word ended - check if it's a valid placed word
+                    is_valid_word = any(
+                        placed['word'] == current_word and
+                        placed['row'] == r and
+                        placed['col'] == word_start and
+                        placed['direction'] == 'horizontal'
+                        for placed in placed_words
+                    )
+                    if not is_valid_word and len(current_word) > 1:
+                        violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})")
+                    current_word = ""
+        # Check word at end of row
+        if current_word:
+            is_valid_word = any(
+                placed['word'] == current_word and
+                placed['row'] == r and
+                placed['col'] == word_start and
+                placed['direction'] == 'horizontal'
+                for placed in placed_words
+            )
+            if not is_valid_word and len(current_word) > 1:
+                violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})")
+    # Check vertical words
+    for c in range(len(grid[0])):
+        current_word = ""
+        word_start = -1
+        for r in range(len(grid)):
+            if grid[r][c] != ".":
+                if current_word == "":
+                    word_start = r
+                current_word += grid[r][c]
+            else:
+                if current_word:
+                    # Word ended - check if it's a valid placed word
+                    is_valid_word = any(
+                        placed['word'] == current_word and
+                        placed['row'] == word_start and
+                        placed['col'] == c and
+                        placed['direction'] == 'vertical'
+                        for placed in placed_words
+                    )
+                    if not is_valid_word and len(current_word) > 1:
+                        violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})")
+                    current_word = ""
+        # Check word at end of column
+        if current_word:
+            is_valid_word = any(
+                placed['word'] == current_word and
+                placed['row'] == word_start and
+                placed['col'] == c and
+                placed['direction'] == 'vertical'
+                for placed in placed_words
+            )
+            if not is_valid_word and len(current_word) > 1:
+                violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})")
+    # Report results
+    if violations:
+        print(f"❌ Found {len(violations)} boundary violations:")
+        for violation in violations:
+            print(f"  - {violation}")
+        return False
+    else:
+        print(f"✅ No boundary violations found!")
+        print(f"✅ All words in grid are properly placed and bounded")
+        return True
+if __name__ == "__main__":
+    success = asyncio.run(test_boundary_fix())
+    if success:
+        print(f"\n🎉 Boundary fix is working correctly!")
+    else:
+        print(f"\n💥 Boundary fix needs more work!")

crossword-app/backend-py/test-integration/test_bounds_comprehensive.py ADDED Viewed

	@@ -0,0 +1,266 @@

+#!/usr/bin/env python3
+"""
+Comprehensive test for bounds checking fixes in crossword generator.
+"""
+import asyncio
+import sys
+import pytest
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
+sys.path.insert(0, str(project_root))
+from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
+class TestBoundsChecking:
+    """Test all bounds checking in crossword generator."""
+    def setup_method(self):
+        """Setup test instance."""
+        self.generator = CrosswordGeneratorFixed(vector_service=None)
+    def test_can_place_word_bounds_horizontal(self):
+        """Test _can_place_word bounds checking for horizontal placement."""
+        # Create small grid
+        grid = [["." for _ in range(5)] for _ in range(5)]
+        # Test cases that should fail bounds checking
+        assert not self.generator._can_place_word(grid, "TOOLONG", 2, 1, "horizontal")  # Word too long
+        assert not self.generator._can_place_word(grid, "TEST", -1, 1, "horizontal")    # Negative row
+        assert not self.generator._can_place_word(grid, "TEST", 1, -1, "horizontal")    # Negative col
+        assert not self.generator._can_place_word(grid, "TEST", 5, 1, "horizontal")     # Row >= size
+        assert not self.generator._can_place_word(grid, "TEST", 1, 5, "horizontal")     # Col >= size
+        assert not self.generator._can_place_word(grid, "TEST", 1, 3, "horizontal")     # Word extends beyond grid
+        # Test cases that should pass
+        assert self.generator._can_place_word(grid, "TEST", 2, 1, "horizontal")         # Valid placement
+        assert self.generator._can_place_word(grid, "A", 0, 0, "horizontal")            # Single letter
+    def test_can_place_word_bounds_vertical(self):
+        """Test _can_place_word bounds checking for vertical placement."""
+        # Create small grid
+        grid = [["." for _ in range(5)] for _ in range(5)]
+        # Test cases that should fail bounds checking
+        assert not self.generator._can_place_word(grid, "TOOLONG", 1, 2, "vertical")    # Word too long
+        assert not self.generator._can_place_word(grid, "TEST", -1, 1, "vertical")      # Negative row
+        assert not self.generator._can_place_word(grid, "TEST", 1, -1, "vertical")      # Negative col
+        assert not self.generator._can_place_word(grid, "TEST", 5, 1, "vertical")       # Row >= size
+        assert not self.generator._can_place_word(grid, "TEST", 1, 5, "vertical")       # Col >= size
+        assert not self.generator._can_place_word(grid, "TEST", 3, 1, "vertical")       # Word extends beyond grid
+        # Test cases that should pass
+        assert self.generator._can_place_word(grid, "TEST", 1, 2, "vertical")           # Valid placement
+        assert self.generator._can_place_word(grid, "A", 0, 0, "vertical")              # Single letter
+    def test_place_word_bounds_horizontal(self):
+        """Test _place_word bounds checking for horizontal placement."""
+        grid = [["." for _ in range(5)] for _ in range(5)]
+        # Valid placement should work
+        original_state = self.generator._place_word(grid, "TEST", 2, 1, "horizontal")
+        assert len(original_state) == 4
+        assert grid[2][1] == "T"
+        assert grid[2][4] == "T"
+        # Test out-of-bounds placement should raise IndexError
+        with pytest.raises(IndexError):
+            self.generator._place_word(grid, "TOOLONG", 2, 1, "horizontal")
+        with pytest.raises(IndexError):
+            self.generator._place_word(grid, "TEST", -1, 1, "horizontal")
+        with pytest.raises(IndexError):
+            self.generator._place_word(grid, "TEST", 5, 1, "horizontal")
+        with pytest.raises(IndexError):
+            self.generator._place_word(grid, "TEST", 1, 5, "horizontal")
+    def test_place_word_bounds_vertical(self):
+        """Test _place_word bounds checking for vertical placement."""
+        grid = [["." for _ in range(5)] for _ in range(5)]
+        # Valid placement should work
+        original_state = self.generator._place_word(grid, "TEST", 1, 2, "vertical")
+        assert len(original_state) == 4
+        assert grid[1][2] == "T"
+        assert grid[4][2] == "T"
+        # Test out-of-bounds placement should raise IndexError
+        with pytest.raises(IndexError):
+            self.generator._place_word(grid, "TOOLONG", 1, 2, "vertical")
+        with pytest.raises(IndexError):
+            self.generator._place_word(grid, "TEST", -1, 2, "vertical")
+        with pytest.raises(IndexError):
+            self.generator._place_word(grid, "TEST", 5, 2, "vertical")
+        with pytest.raises(IndexError):
+            self.generator._place_word(grid, "TEST", 2, 5, "vertical")
+    def test_remove_word_bounds(self):
+        """Test _remove_word bounds checking."""
+        grid = [["." for _ in range(5)] for _ in range(5)]
+        # Place a word first
+        original_state = self.generator._place_word(grid, "TEST", 2, 1, "horizontal")
+        # Normal removal should work
+        self.generator._remove_word(grid, original_state)
+        assert grid[2][1] == "."
+        # Test invalid original state should raise IndexError
+        bad_state = [{"row": -1, "col": 1, "value": "."}]
+        with pytest.raises(IndexError):
+            self.generator._remove_word(grid, bad_state)
+        bad_state = [{"row": 5, "col": 1, "value": "."}]
+        with pytest.raises(IndexError):
+            self.generator._remove_word(grid, bad_state)
+        bad_state = [{"row": 1, "col": -1, "value": "."}]
+        with pytest.raises(IndexError):
+            self.generator._remove_word(grid, bad_state)
+        bad_state = [{"row": 1, "col": 5, "value": "."}]
+        with pytest.raises(IndexError):
+            self.generator._remove_word(grid, bad_state)
+    def test_create_simple_cross_bounds(self):
+        """Test _create_simple_cross bounds checking."""
+        # Test with words that have intersections
+        word_list = ["CAT", "TOY"]  # 'T' intersection
+        word_objs = [{"word": w, "clue": f"Clue for {w}"} for w in word_list]
+        # This should work without bounds errors
+        result = self.generator._create_simple_cross(word_list, word_objs)
+        assert result is not None
+        assert len(result["placed_words"]) == 2
+        # Test with words that might cause issues
+        word_list = ["A", "A"]  # Same single letter
+        word_objs = [{"word": w, "clue": f"Clue for {w}"} for w in word_list]
+        # This should not crash with bounds errors
+        result = self.generator._create_simple_cross(word_list, word_objs)
+        # May return None due to placement issues, but should not crash
+    def test_trim_grid_bounds(self):
+        """Test _trim_grid bounds checking."""
+        # Create a grid with words placed
+        grid = [["." for _ in range(10)] for _ in range(10)]
+        # Place some letters
+        grid[5][3] = "T"
+        grid[5][4] = "E"
+        grid[5][5] = "S"
+        grid[5][6] = "T"
+        placed_words = [{
+            "word": "TEST",
+            "row": 5,
+            "col": 3,
+            "direction": "horizontal",
+            "number": 1
+        }]
+        # This should work without bounds errors
+        result = self.generator._trim_grid(grid, placed_words)
+        assert result is not None
+        assert "grid" in result
+        assert "placed_words" in result
+        # Test with edge case placements
+        placed_words = [{
+            "word": "A",
+            "row": 0,
+            "col": 0,
+            "direction": "horizontal",
+            "number": 1
+        }]
+        grid[0][0] = "A"
+        result = self.generator._trim_grid(grid, placed_words)
+        assert result is not None
+    def test_calculation_placement_score_bounds(self):
+        """Test _calculate_placement_score bounds checking."""
+        grid = [["." for _ in range(5)] for _ in range(5)]
+        # Place some letters for intersection testing
+        grid[2][2] = "T"
+        grid[2][3] = "E"
+        placement = {"row": 2, "col": 2, "direction": "horizontal"}
+        placed_words = []
+        # This should work without bounds errors
+        score = self.generator._calculate_placement_score(grid, "TEST", placement, placed_words)
+        assert isinstance(score, int)
+        # Test with out-of-bounds placement (should handle gracefully)
+        placement = {"row": 4, "col": 3, "direction": "horizontal"}  # Would extend beyond grid
+        score = self.generator._calculate_placement_score(grid, "TEST", placement, placed_words)
+        assert isinstance(score, int)
+        # Test with negative placement (should handle gracefully)
+        placement = {"row": -1, "col": 0, "direction": "horizontal"}
+        score = self.generator._calculate_placement_score(grid, "TEST", placement, placed_words)
+        assert isinstance(score, int)
+async def test_full_generation_stress():
+    """Stress test full generation to catch index errors."""
+    generator = CrosswordGeneratorFixed(vector_service=None)
+    # Mock word selection to return test words
+    test_words = [
+        {"word": "CAT", "clue": "Feline pet"},
+        {"word": "DOG", "clue": "Man's best friend"},
+        {"word": "BIRD", "clue": "Flying animal"},
+        {"word": "FISH", "clue": "Aquatic animal"},
+        {"word": "ELEPHANT", "clue": "Large mammal"},
+        {"word": "TIGER", "clue": "Striped cat"},
+        {"word": "HORSE", "clue": "Riding animal"},
+        {"word": "BEAR", "clue": "Large carnivore"},
+        {"word": "WOLF", "clue": "Pack animal"},
+        {"word": "LION", "clue": "King of jungle"}
+    ]
+    generator._select_words = lambda topics, difficulty, use_ai: test_words
+    # Run multiple generation attempts
+    for i in range(20):
+        try:
+            result = await generator.generate_puzzle(["animals"], "medium", use_ai=False)
+            if result:
+                print(f"✅ Generation {i+1} succeeded")
+            else:
+                print(f"⚠️ Generation {i+1} returned None")
+        except IndexError as e:
+            print(f"❌ Index error in generation {i+1}: {e}")
+            raise
+        except Exception as e:
+            print(f"⚠️ Other error in generation {i+1}: {e}")
+            # Don't raise for other errors, just continue
+    print("✅ All stress test generations completed without index errors!")
+if __name__ == "__main__":
+    # Run tests
+    print("🧪 Running comprehensive bounds checking tests...")
+    # Run pytest on this file
+    import subprocess
+    result = subprocess.run([sys.executable, "-m", "pytest", __file__, "-v"],
+                          capture_output=True, text=True)
+    print("STDOUT:", result.stdout)
+    if result.stderr:
+        print("STDERR:", result.stderr)
+    # Run stress test
+    print("\n🏋️ Running stress test...")
+    asyncio.run(test_full_generation_stress())

crossword-app/backend-py/test-integration/test_bounds_fix.py ADDED Viewed

	@@ -0,0 +1,90 @@

+#!/usr/bin/env python3
+"""
+Quick test to verify the bounds checking fix.
+"""
+import sys
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
+sys.path.insert(0, str(project_root))
+from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
+def test_bounds_checking():
+    """Test that placement score calculation doesn't crash with out-of-bounds access."""
+    print("🧪 Testing bounds checking fix...")
+    generator = CrosswordGeneratorFixed()
+    # Create a small grid
+    grid = [["." for _ in range(5)] for _ in range(5)]
+    # Test placement that would go out of bounds
+    placement = {
+        "row": 3,       # Starting at row 3
+        "col": 2,       # Starting at col 2
+        "direction": "vertical"
+    }
+    # Word that would extend beyond grid (3+8=11 > 5)
+    word = "ELEPHANT"  # 8 letters, would go from row 3 to row 10 (out of bounds)
+    try:
+        # This should NOT crash with bounds checking
+        score = generator._calculate_placement_score(grid, word, placement, [])
+        print(f"✅ Success! Placement score calculated: {score}")
+        print("✅ Bounds checking is working correctly")
+        return True
+    except IndexError as e:
+        print(f"❌ IndexError still occurs: {e}")
+        return False
+    except Exception as e:
+        print(f"❌ Other error: {e}")
+        return False
+def test_valid_placement():
+    """Test that valid placements still work correctly."""
+    print("\n🧪 Testing valid placement scoring...")
+    generator = CrosswordGeneratorFixed()
+    # Create a grid with some letters
+    grid = [["." for _ in range(8)] for _ in range(8)]
+    grid[2][2] = "A"  # Place an 'A' at position (2,2)
+    # Test placement that intersects properly
+    placement = {
+        "row": 2,
+        "col": 1,
+        "direction": "horizontal"
+    }
+    word = "CAT"  # Should intersect at the 'A'
+    try:
+        score = generator._calculate_placement_score(grid, word, placement, [])
+        print(f"✅ Valid placement score: {score}")
+        # Should have intersection bonus (score > 100)
+        if score > 300:  # Base 100 + intersection 200
+            print("✅ Intersection detection working")
+        else:
+            print(f"⚠️ Expected intersection bonus, got score {score}")
+        return True
+    except Exception as e:
+        print(f"❌ Error with valid placement: {e}")
+        return False
+if __name__ == "__main__":
+    print("🔧 Testing crossword generator bounds fix\n")
+    test1_pass = test_bounds_checking()
+    test2_pass = test_valid_placement()
+    if test1_pass and test2_pass:
+        print("\n✅ All tests passed! The bounds checking fix is working.")
+    else:
+        print("\n❌ Some tests failed. More work needed.")

crossword-app/backend-py/test-integration/test_cache_permissions.py ADDED Viewed

	@@ -0,0 +1,88 @@

+#!/usr/bin/env python3
+"""
+Test cache permission handling.
+"""
+import asyncio
+import sys
+import tempfile
+import os
+from pathlib import Path
+from unittest.mock import patch
+# Add project root to path
+project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
+sys.path.insert(0, str(project_root))
+from src.services.word_cache import WordCacheManager
+async def test_permission_denied():
+    """Test cache handling when permissions are denied."""
+    print("🧪 Testing permission denied scenario...")
+    # Mock Path.mkdir to raise PermissionError
+    with patch.object(Path, 'mkdir', side_effect=PermissionError("Permission denied")):
+        try:
+            cache_manager = WordCacheManager(cache_dir="/some/protected/path")
+            await cache_manager.initialize()
+            print(f"✅ Cache manager created with fallback: {cache_manager.cache_dir}")
+            # Test caching still works (in-memory or temp dir)
+            test_words = [
+                {"word": "TEST", "clue": "A test word", "similarity": 0.8, "source": "test"}
+            ]
+            success = await cache_manager.cache_words("TestTopic", "medium", test_words)
+            print(f"✅ Caching {'succeeded' if success else 'failed'}")
+            cached_words = await cache_manager.get_cached_words("TestTopic", "medium", 5)
+            print(f"✅ Retrieved {len(cached_words)} cached words")
+            return True
+        except Exception as e:
+            print(f"❌ Permission handling failed: {e}")
+            return False
+async def test_in_memory_mode():
+    """Test pure in-memory cache mode."""
+    print("\n🧪 Testing in-memory only mode...")
+    # Force in-memory mode by setting cache_dir to None
+    cache_manager = WordCacheManager()
+    cache_manager.cache_dir = None  # Force in-memory mode
+    await cache_manager.initialize()
+    # Test that caching still works in memory
+    test_words = [
+        {"word": "MEMORY", "clue": "Stored in RAM", "similarity": 0.9, "source": "test"}
+    ]
+    success = await cache_manager.cache_words("Memory", "medium", test_words)
+    print(f"✅ In-memory caching {'succeeded' if success else 'failed'}")
+    cached_words = await cache_manager.get_cached_words("Memory", "medium", 5)
+    print(f"✅ Retrieved {len(cached_words)} words from memory")
+    stats = cache_manager.get_cache_stats()
+    print(f"📊 Cache stats: {stats}")
+    return len(cached_words) > 0
+async def main():
+    """Run permission tests."""
+    print("🔐 Testing Cache Permission Handling\n")
+    test1 = await test_permission_denied()
+    test2 = await test_in_memory_mode()
+    if test1 and test2:
+        print("\n✅ All permission tests passed!")
+        print("📦 Cache system gracefully handles permission issues")
+    else:
+        print("\n❌ Some permission tests failed")
+if __name__ == "__main__":
+    asyncio.run(main())

crossword-app/backend-py/test-integration/test_cache_system.py ADDED Viewed

	@@ -0,0 +1,127 @@

+#!/usr/bin/env python3
+"""
+Test the new cache system to verify it works correctly.
+"""
+import asyncio
+import sys
+import tempfile
+import shutil
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
+sys.path.insert(0, str(project_root))
+from src.services.word_cache import WordCacheManager
+async def test_cache_system():
+    """Test the cache system functionality."""
+    print("🧪 Testing Word Cache System\n")
+    # Create temporary cache directory
+    temp_dir = tempfile.mkdtemp()
+    print(f"📁 Using temporary cache directory: {temp_dir}")
+    try:
+        # Initialize cache manager
+        cache_manager = WordCacheManager(cache_dir=temp_dir)
+        await cache_manager.initialize()
+        # Test 1: Cache some words
+        print("\n🧪 Test 1: Caching words")
+        test_words = [
+            {"word": "ELEPHANT", "clue": "Large mammal with trunk", "similarity": 0.8, "source": "vector_search"},
+            {"word": "TIGER", "clue": "Striped big cat", "similarity": 0.7, "source": "vector_search"},
+            {"word": "LION", "clue": "King of jungle", "similarity": 0.75, "source": "vector_search"},
+        ]
+        success = await cache_manager.cache_words("Animals", "medium", test_words)
+        print(f"✅ Cache operation {'succeeded' if success else 'failed'}")
+        # Test 2: Retrieve cached words
+        print("\n🧪 Test 2: Retrieving cached words")
+        cached_words = await cache_manager.get_cached_words("Animals", "medium", 5)
+        print(f"📦 Retrieved {len(cached_words)} cached words")
+        if cached_words:
+            print("📝 Cached words:")
+            for word in cached_words:
+                print(f"  - {word['word']}: {word['clue']}")
+        # Test 3: Cache statistics
+        print("\n🧪 Test 3: Cache statistics")
+        stats = cache_manager.get_cache_stats()
+        print(f"📊 Cache stats: {stats}")
+        # Test 4: Test non-existent topic
+        print("\n🧪 Test 4: Non-existent topic")
+        empty_words = await cache_manager.get_cached_words("NonExistent", "medium", 5)
+        print(f"📭 Non-existent topic returned {len(empty_words)} words (expected 0)")
+        # Test 5: Test bootstrap warming (if static data exists)
+        print("\n🧪 Test 5: Bootstrap warming simulation")
+        static_data = {
+            "Technology": [
+                {"word": "COMPUTER", "clue": "Electronic device"},
+                {"word": "ROBOT", "clue": "Automated machine"},
+            ]
+        }
+        await cache_manager.warm_cache_from_static(static_data)
+        tech_words = await cache_manager.get_cached_words("Technology", "medium", 5)
+        print(f"🔥 Bootstrap warming: Retrieved {len(tech_words)} tech words")
+        print("\n✅ All cache system tests completed!")
+        return True
+    except Exception as e:
+        print(f"\n❌ Cache system test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+    finally:
+        # Cleanup temporary directory
+        shutil.rmtree(temp_dir)
+        print(f"🧹 Cleaned up temporary directory")
+async def test_vector_integration():
+    """Test integration with vector search service."""
+    print("\n🔗 Testing Vector Search Integration\n")
+    try:
+        from src.services.vector_search import VectorSearchService
+        # Create vector service (won't initialize model, just test cache integration)
+        vector_service = VectorSearchService()
+        # Test cache fallback without initialization
+        print("🧪 Testing cache fallback when vector search not initialized")
+        fallback_words = await vector_service._get_cached_fallback("Animals", "medium", 5)
+        print(f"📦 Fallback returned {len(fallback_words)} words")
+        print("✅ Vector integration test completed!")
+        return True
+    except Exception as e:
+        print(f"❌ Vector integration test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+async def main():
+    """Run all tests."""
+    print("🚀 Testing Cache System Replacement\n")
+    cache_test = await test_cache_system()
+    integration_test = await test_vector_integration()
+    if cache_test and integration_test:
+        print("\n🎉 All tests passed! Cache system is working correctly.")
+        print("📦 Static word dependencies have been successfully replaced with caching.")
+    else:
+        print("\n❌ Some tests failed. Check the output above.")
+if __name__ == "__main__":
+    asyncio.run(main())

crossword-app/backend-py/test-integration/test_crossword_display.py ADDED Viewed

	@@ -0,0 +1,85 @@

+#!/usr/bin/env python3
+import json
+# The crossword grid from the API response
+grid = [
+    [".", ".", ".", ".", ".", ".", ".", ".", "D", ".", "."],
+    [".", ".", ".", ".", ".", "M", "I", "C", "E", ".", "."],
+    [".", ".", ".", "H", "U", "M", "A", "N", "E", ".", "."],
+    [".", ".", ".", "W", "H", "A", "L", "E", "R", ".", "."],
+    [".", "P", "Z", ".", ".", ".", "L", ".", ".", ".", "."],
+    ["Z", "O", "O", "L", "O", "G", "I", "C", "A", "L", "."],
+    [".", "U", "O", ".", ".", ".", "G", "E", "E", "S", "E"],
+    [".", "L", "L", "H", "U", "M", "A", "N", "I", "T", "Y"],
+    [".", "T", "O", ".", ".", ".", "T", "I", "G", "E", "R"],
+    [".", "R", "G", ".", "B", "I", "O", "L", "O", "G", "Y"],
+    [".", "Y", "Y", ".", ".", ".", "R", ".", ".", ".", "."]
+]
+print("Generated Crossword Grid:")
+print("=" * 50)
+for i, row in enumerate(grid):
+    row_str = ""
+    for j, cell in enumerate(row):
+        if cell == ".":
+            row_str += "  "  # Empty space
+        else:
+            row_str += f"{cell} "
+    print(f"{i:2d} | {row_str}")
+print("=" * 50)
+# Check for word boundaries
+def check_word_boundaries(grid):
+    issues = []
+    # Horizontal words
+    for r in range(len(grid)):
+        in_word = False
+        word_start = -1
+        for c in range(len(grid[r])):
+            if grid[r][c] != ".":
+                if not in_word:
+                    in_word = True
+                    word_start = c
+            else:
+                if in_word:
+                    # Word ended
+                    word_length = c - word_start
+                    word = "".join(grid[r][word_start:c])
+                    print(f"Horizontal word at ({r},{word_start}): {word} (length {word_length})")
+                    in_word = False
+        # Check if word extends to end of row
+        if in_word:
+            word_length = len(grid[r]) - word_start
+            word = "".join(grid[r][word_start:])
+            print(f"Horizontal word at ({r},{word_start}): {word} (length {word_length})")
+    # Vertical words
+    for c in range(len(grid[0])):
+        in_word = False
+        word_start = -1
+        for r in range(len(grid)):
+            if grid[r][c] != ".":
+                if not in_word:
+                    in_word = True
+                    word_start = r
+            else:
+                if in_word:
+                    # Word ended
+                    word_length = r - word_start
+                    word = "".join([grid[i][c] for i in range(word_start, r)])
+                    print(f"Vertical word at ({word_start},{c}): {word} (length {word_length})")
+                    in_word = False
+        # Check if word extends to end of column
+        if in_word:
+            word_length = len(grid) - word_start
+            word = "".join([grid[i][c] for i in range(word_start, len(grid))])
+            print(f"Vertical word at ({word_start},{c}): {word} (length {word_length})")
+print("\nWord boundary analysis:")
+check_word_boundaries(grid)

crossword-app/backend-py/test-integration/test_final_crossword_validation.py ADDED Viewed

	@@ -0,0 +1,239 @@

+#!/usr/bin/env python3
+"""
+Final test to validate that the crossword generator produces clean grids
+without unwanted prefixes, suffixes, or unintended letter sequences.
+"""
+import sys
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
+sys.path.insert(0, str(project_root))
+from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
+def test_clean_crossword_generation():
+    """Test that crossword generation produces clean grids without unwanted sequences."""
+    print("🧪 Final Crossword Validation Test\n")
+    generator = CrosswordGeneratorFixed(vector_service=None)
+    # Test multiple scenarios that previously caused issues
+    test_scenarios = [
+        {
+            "name": "Basic Technology Words",
+            "words": [
+                {"word": "COMPUTER", "clue": "Electronic device"},
+                {"word": "MACHINE", "clue": "Device with moving parts"},
+                {"word": "SCIENCE", "clue": "Systematic study"},
+                {"word": "EXPERT", "clue": "Specialist"},
+            ]
+        },
+        {
+            "name": "Similar Words (MACHINE/MACHINERY)",
+            "words": [
+                {"word": "MACHINE", "clue": "Device with moving parts"},
+                {"word": "MACHINERY", "clue": "Mechanical equipment"},
+                {"word": "TECHNOLOGY", "clue": "Applied science"},
+                {"word": "RESEARCH", "clue": "Investigation"},
+            ]
+        },
+        {
+            "name": "Animal Words",
+            "words": [
+                {"word": "ELEPHANT", "clue": "Large mammal"},
+                {"word": "TIGER", "clue": "Striped cat"},
+                {"word": "BEAR", "clue": "Large carnivore"},
+                {"word": "HORSE", "clue": "Riding animal"},
+                {"word": "BIRD", "clue": "Flying creature"},
+            ]
+        },
+        {
+            "name": "Mixed Length Words",
+            "words": [
+                {"word": "CAT", "clue": "Feline pet"},
+                {"word": "COMPUTER", "clue": "Electronic device"},
+                {"word": "A", "clue": "First letter"},  # Edge case
+                {"word": "TECHNOLOGY", "clue": "Applied science"},
+            ]
+        }
+    ]
+    all_passed = True
+    for i, scenario in enumerate(test_scenarios):
+        print(f"=" * 60)
+        print(f"TEST {i+1}: {scenario['name']}")
+        print(f"=" * 60)
+        words = scenario["words"]
+        print(f"Testing with {len(words)} words: {[w['word'] for w in words]}")
+        try:
+            result = generator._create_grid(words)
+            if result:
+                grid = result["grid"]
+                placed_words = result["placed_words"]
+                clues = result["clues"]
+                print(f"✅ Grid generated successfully")
+                print(f"   Grid size: {len(grid)}x{len(grid[0])}")
+                print(f"   Words placed: {len(placed_words)}")
+                print(f"   Clues generated: {len(clues)}")
+                # Print the grid
+                print("\nGenerated Grid:")
+                print_clean_grid(grid)
+                # Validate the grid
+                validation_result = validate_grid_cleanliness(grid, placed_words)
+                if validation_result["is_clean"]:
+                    print("✅ Grid validation: CLEAN - No unwanted sequences")
+                else:
+                    print("❌ Grid validation: ISSUES FOUND")
+                    for issue in validation_result["issues"]:
+                        print(f"   - {issue}")
+                    all_passed = False
+                # Print word placements
+                print("\nWord Placements:")
+                for j, word_info in enumerate(placed_words):
+                    print(f"   {j+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")
+            else:
+                print("⚠️ Grid generation returned None - algorithm may be too strict")
+                # This might happen if validation is too restrictive
+        except Exception as e:
+            print(f"❌ Grid generation failed: {e}")
+            all_passed = False
+        print()
+    # Summary
+    print("=" * 60)
+    print("FINAL SUMMARY")
+    print("=" * 60)
+    if all_passed:
+        print("🎉 ALL TESTS PASSED!")
+        print("✅ Crossword generator produces clean grids without unwanted sequences")
+        print("✅ No more issues with unwanted prefixes, suffixes, or letter combinations")
+    else:
+        print("❌ Some tests failed - additional improvements needed")
+    return all_passed
+def print_clean_grid(grid):
+    """Print grid in a clean, readable format."""
+    if not grid:
+        print("  Empty grid")
+        return
+    # Print column headers
+    print("    ", end="")
+    for c in range(len(grid[0])):
+        print(f"{c:2d}", end="")
+    print()
+    # Print rows
+    for r in range(len(grid)):
+        print(f" {r:2d}: ", end="")
+        for c in range(len(grid[0])):
+            cell = grid[r][c]
+            if cell == ".":
+                print(" .", end="")
+            else:
+                print(f" {cell}", end="")
+        print()
+def validate_grid_cleanliness(grid, placed_words):
+    """Validate that grid contains only intended words without unwanted sequences."""
+    issues = []
+    # Find all letter sequences in the grid
+    all_sequences = []
+    # Horizontal sequences
+    for r in range(len(grid)):
+        current_seq = ""
+        start_col = None
+        for c in range(len(grid[0])):
+            if grid[r][c] != ".":
+                if start_col is None:
+                    start_col = c
+                current_seq += grid[r][c]
+            else:
+                if current_seq and len(current_seq) > 1:
+                    all_sequences.append((r, start_col, "horizontal", current_seq))
+                current_seq = ""
+                start_col = None
+        # Handle end of row
+        if current_seq and len(current_seq) > 1:
+            all_sequences.append((r, start_col, "horizontal", current_seq))
+    # Vertical sequences
+    for c in range(len(grid[0])):
+        current_seq = ""
+        start_row = None
+        for r in range(len(grid)):
+            if grid[r][c] != ".":
+                if start_row is None:
+                    start_row = r
+                current_seq += grid[r][c]
+            else:
+                if current_seq and len(current_seq) > 1:
+                    all_sequences.append((start_row, c, "vertical", current_seq))
+                current_seq = ""
+                start_row = None
+        # Handle end of column
+        if current_seq and len(current_seq) > 1:
+            all_sequences.append((start_row, c, "vertical", current_seq))
+    # Check if all sequences correspond to intended words
+    intended_words = set()
+    for word_info in placed_words:
+        key = (word_info["row"], word_info["col"], word_info["direction"], word_info["word"])
+        intended_words.add(key)
+    # Check each sequence
+    for row, col, direction, sequence in all_sequences:
+        key = (row, col, direction, sequence)
+        if key not in intended_words:
+            issues.append(f"Unintended sequence: '{sequence}' at ({row}, {col}) {direction}")
+    # Check for specific problematic patterns
+    for row, col, direction, sequence in all_sequences:
+        # Check for 2-letter sequences (should not exist)
+        if len(sequence) == 2:
+            issues.append(f"Unwanted 2-letter sequence: '{sequence}' at ({row}, {col}) {direction}")
+        # Check for words that appear to extend beyond their intended boundaries
+        # But exclude cases where both the shorter and longer words are intentionally placed
+        placed_word_set = {w["word"] for w in placed_words}
+        for word_info in placed_words:
+            word = word_info["word"]
+            if word in sequence and sequence != word:
+                if sequence.startswith(word) or sequence.endswith(word):
+                    # Check if the sequence itself is also an intended word
+                    if sequence not in placed_word_set:
+                        issues.append(f"Word '{word}' appears extended as '{sequence}' at ({row}, {col}) {direction}")
+    return {
+        "is_clean": len(issues) == 0,
+        "issues": issues,
+        "total_sequences": len(all_sequences),
+        "intended_sequences": len(intended_words)
+    }
+if __name__ == "__main__":
+    test_clean_crossword_generation()

crossword-app/backend-py/test-integration/test_final_validation.py ADDED Viewed

	@@ -0,0 +1,133 @@

+#!/usr/bin/env python3
+import requests
+import json
+def test_api_crossword():
+    """Test that the API generates valid crosswords without boundary issues."""
+    url = "http://localhost:7860/api/generate"
+    data = {
+        "topics": ["animals"],
+        "difficulty": "medium",
+        "useAI": True
+    }
+    print("🧪 Testing API Crossword Generation")
+    print("=" * 50)
+    try:
+        response = requests.post(url, json=data, timeout=30)
+        if response.status_code != 200:
+            print(f"❌ API Error: {response.status_code}")
+            print(response.text)
+            return False
+        result = response.json()
+        if 'detail' in result:
+            print(f"❌ Error: {result['detail']}")
+            return False
+        grid = result['grid']
+        clues = result['clues']
+        metadata = result['metadata']
+        print(f"✅ Generated crossword with {metadata['wordCount']} words")
+        print(f"Grid size: {len(grid)}x{len(grid[0])}")
+        print(f"AI Generated: {metadata['aiGenerated']}")
+        # Validate boundary issues
+        violations = validate_word_boundaries(grid, clues)
+        if violations:
+            print(f"\n❌ Found {len(violations)} boundary violations:")
+            for violation in violations:
+                print(f"  - {violation}")
+            return False
+        else:
+            print(f"\n✅ No boundary violations found!")
+            print(f"✅ All words are properly bounded")
+            # Display sample of the grid
+            print(f"\nSample Grid (first 8 rows):")
+            for i, row in enumerate(grid[:8]):
+                row_str = " ".join(cell if cell != "." else " " for cell in row)
+                print(f"{i:2d} | {row_str}")
+            return True
+    except Exception as e:
+        print(f"❌ Test failed: {e}")
+        return False
+def validate_word_boundaries(grid, clues):
+    """Validate that all words in the grid have proper boundaries."""
+    violations = []
+    # Create a set of valid word placements from clues
+    valid_words = set()
+    for clue in clues:
+        word = clue['word']
+        pos = clue['position']
+        direction = clue['direction']
+        row, col = pos['row'], pos['col']
+        if direction == 'across':
+            valid_words.add((word, row, col, 'horizontal'))
+        else:
+            valid_words.add((word, row, col, 'vertical'))
+    # Check all horizontal sequences in grid
+    for r in range(len(grid)):
+        current_word = ""
+        word_start = -1
+        for c in range(len(grid[r])):
+            if grid[r][c] != ".":
+                if current_word == "":
+                    word_start = c
+                current_word += grid[r][c]
+            else:
+                if current_word and len(current_word) > 1:
+                    # Check if this is a valid placed word
+                    if (current_word, r, word_start, 'horizontal') not in valid_words:
+                        violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})")
+                current_word = ""
+        # Check word at end of row
+        if current_word and len(current_word) > 1:
+            if (current_word, r, word_start, 'horizontal') not in valid_words:
+                violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})")
+    # Check all vertical sequences in grid
+    for c in range(len(grid[0])):
+        current_word = ""
+        word_start = -1
+        for r in range(len(grid)):
+            if grid[r][c] != ".":
+                if current_word == "":
+                    word_start = r
+                current_word += grid[r][c]
+            else:
+                if current_word and len(current_word) > 1:
+                    # Check if this is a valid placed word
+                    if (current_word, word_start, c, 'vertical') not in valid_words:
+                        violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})")
+                current_word = ""
+        # Check word at end of column
+        if current_word and len(current_word) > 1:
+            if (current_word, word_start, c, 'vertical') not in valid_words:
+                violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})")
+    return violations
+if __name__ == "__main__":
+    success = test_api_crossword()
+    if success:
+        print(f"\n🎉 All tests passed! The boundary fix is working correctly.")
+    else:
+        print(f"\n💥 Tests failed! The boundary issue still exists.")

crossword-app/backend-py/test-integration/test_intersection_issues.py ADDED Viewed

	@@ -0,0 +1,247 @@

+#!/usr/bin/env python3
+"""
+Test to reproduce the exact intersection and boundary issues seen in the crossword images.
+"""
+import sys
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
+sys.path.insert(0, str(project_root))
+from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
+def reproduce_image_issues():
+    """Try to reproduce the specific issues seen in the crossword images."""
+    print("🔍 Reproducing crossword boundary issues from images...\n")
+    generator = CrosswordGeneratorFixed(vector_service=None)
+    # Test Case 1: Try to reproduce the "MACHINERY" extension issue
+    print("=" * 60)
+    print("TEST 1: Reproducing MACHINERY extension issue")
+    print("=" * 60)
+    grid = [["." for _ in range(15)] for _ in range(15)]
+    placed_words = []
+    # Place MACHINE first
+    if generator._can_place_word(grid, "MACHINE", 6, 3, "horizontal"):
+        generator._place_word(grid, "MACHINE", 6, 3, "horizontal")
+        placed_words.append({
+            "word": "MACHINE", "row": 6, "col": 3, "direction": "horizontal", "number": 1
+        })
+        print("✅ Placed MACHINE")
+        print_grid(grid, 4, 10, 0, 12)
+    # Now try to place words that might create the extension
+    test_placements = [
+        ("VERY", 4, 8, "vertical"),    # V-E-R-Y going down, might intersect with E in MACHINE
+        ("EXPERT", 5, 8, "horizontal"), # Horizontal word that might extend MACHINE
+        ("PROTOTYPE", 6, 9, "horizontal"), # Direct extension after MACHINE
+    ]
+    for word, row, col, direction in test_placements:
+        print(f"\n🔍 Testing: '{word}' at ({row}, {col}) {direction}")
+        can_place = generator._can_place_word(grid, word, row, col, direction)
+        print(f"Can place: {can_place}")
+        if can_place:
+            # Make a copy and test the placement
+            test_grid = [r[:] for r in grid]
+            generator._place_word(test_grid, word, row, col, direction)
+            print("After placement:")
+            print_grid(test_grid, 4, 10, 0, 15)
+            # Check if MACHINE now appears to be extended
+            machine_row = 6
+            extended_word = ""
+            for c in range(15):
+                if test_grid[machine_row][c] != ".":
+                    extended_word += test_grid[machine_row][c]
+                elif extended_word:
+                    break
+            if extended_word != "MACHINE":
+                print(f"⚠️ MACHINE appears extended to: '{extended_word}'")
+        print("-" * 40)
+    # Test Case 2: Check intersection logic specifically
+    print("\n" + "=" * 60)
+    print("TEST 2: Checking intersection calculation logic")
+    print("=" * 60)
+    # Test the intersection finding logic
+    word1 = "MACHINE"
+    word2 = "EXPERT"
+    intersections = generator._find_word_intersections(word1, word2)
+    print(f"Intersections between '{word1}' and '{word2}': {intersections}")
+    for intersection in intersections:
+        word_pos = intersection["word_pos"]
+        placed_pos = intersection["placed_pos"]
+        print(f"  Letter '{word1[word_pos]}' at pos {word_pos} in '{word1}' matches")
+        print(f"  Letter '{word2[placed_pos]}' at pos {placed_pos} in '{word2}'")
+        # Calculate where EXPERT would be placed to intersect with MACHINE
+        machine_placement = {"word": "MACHINE", "row": 6, "col": 3, "direction": "horizontal"}
+        placement = generator._calculate_intersection_placement(
+            word2, placed_pos, machine_placement, word_pos
+        )
+        if placement:
+            print(f"  EXPERT would be placed at: row={placement['row']}, col={placement['col']}, dir={placement['direction']}")
+            # Check if this would be valid
+            can_place = generator._can_place_word(grid, word2, placement['row'], placement['col'], placement['direction'])
+            print(f"  Valid placement: {can_place}")
+    # Test Case 3: Multi-word intersection scenario
+    print("\n" + "=" * 60)
+    print("TEST 3: Multi-word intersection scenario")
+    print("=" * 60)
+    # Create a more complex scenario like in the images
+    complex_grid = [["." for _ in range(15)] for _ in range(15)]
+    complex_words = []
+    # Place several words to create intersection opportunities
+    word_placements = [
+        ("MACHINE", 7, 4, "horizontal"),
+        ("EXPERT", 5, 6, "vertical"),  # Try to intersect at 'E'
+        ("SMART", 6, 8, "vertical"),   # Try to intersect at another letter
+    ]
+    for word, row, col, direction in word_placements:
+        print(f"\nPlacing '{word}' at ({row}, {col}) {direction}")
+        if generator._can_place_word(complex_grid, word, row, col, direction):
+            generator._place_word(complex_grid, word, row, col, direction)
+            complex_words.append({
+                "word": word, "row": row, "col": col, "direction": direction, "number": len(complex_words) + 1
+            })
+            print(f"✅ Placed '{word}'")
+        else:
+            print(f"❌ Cannot place '{word}'")
+        print_grid(complex_grid, 4, 11, 2, 13)
+    # Check for any unintended word formations
+    print("\nChecking for unintended word formations:")
+    check_unintended_words(complex_grid, complex_words)
+def print_grid(grid, start_row, end_row, start_col, end_col):
+    """Print a section of the grid."""
+    print("Grid:")
+    for r in range(max(0, start_row), min(end_row, len(grid))):
+        row_str = f"R{r:2d}: "
+        for c in range(max(0, start_col), min(end_col, len(grid[0]))):
+            if grid[r][c] == ".":
+                row_str += ". "
+            else:
+                row_str += f"{grid[r][c]} "
+        print(row_str)
+    print()
+def check_unintended_words(grid, placed_words):
+    """Check for unintended word formations in the grid."""
+    unintended = []
+    # Check all horizontal sequences
+    for r in range(len(grid)):
+        current_word = ""
+        start_col = None
+        for c in range(len(grid[0])):
+            if grid[r][c] != ".":
+                if start_col is None:
+                    start_col = c
+                current_word += grid[r][c]
+            else:
+                if current_word and len(current_word) > 1:
+                    # Check if this is an intended word
+                    intended = False
+                    for word_info in placed_words:
+                        if (word_info["direction"] == "horizontal" and
+                            word_info["row"] == r and
+                            word_info["col"] == start_col and
+                            word_info["word"] == current_word):
+                            intended = True
+                            break
+                    if not intended:
+                        unintended.append(f"Horizontal '{current_word}' at row {r}, col {start_col}")
+                current_word = ""
+                start_col = None
+        # Check final word if row ends with letters
+        if current_word and len(current_word) > 1:
+            intended = False
+            for word_info in placed_words:
+                if (word_info["direction"] == "horizontal" and
+                    word_info["row"] == r and
+                    word_info["col"] == start_col and
+                    word_info["word"] == current_word):
+                    intended = True
+                    break
+            if not intended:
+                unintended.append(f"Horizontal '{current_word}' at row {r}, col {start_col}")
+    # Check all vertical sequences
+    for c in range(len(grid[0])):
+        current_word = ""
+        start_row = None
+        for r in range(len(grid)):
+            if grid[r][c] != ".":
+                if start_row is None:
+                    start_row = r
+                current_word += grid[r][c]
+            else:
+                if current_word and len(current_word) > 1:
+                    # Check if this is an intended word
+                    intended = False
+                    for word_info in placed_words:
+                        if (word_info["direction"] == "vertical" and
+                            word_info["col"] == c and
+                            word_info["row"] == start_row and
+                            word_info["word"] == current_word):
+                            intended = True
+                            break
+                    if not intended:
+                        unintended.append(f"Vertical '{current_word}' at row {start_row}, col {c}")
+                current_word = ""
+                start_row = None
+        # Check final word if column ends with letters
+        if current_word and len(current_word) > 1:
+            intended = False
+            for word_info in placed_words:
+                if (word_info["direction"] == "vertical" and
+                    word_info["col"] == c and
+                    word_info["row"] == start_row and
+                    word_info["word"] == current_word):
+                    intended = True
+                    break
+            if not intended:
+                unintended.append(f"Vertical '{current_word}' at row {start_row}, col {c}")
+    if unintended:
+        print("❌ Unintended words found:")
+        for word in unintended:
+            print(f"  {word}")
+    else:
+        print("✅ No unintended words detected")
+if __name__ == "__main__":
+    reproduce_image_issues()

crossword-app/backend-py/test-integration/test_local.py ADDED Viewed

	@@ -0,0 +1,98 @@

+#!/usr/bin/env python3
+"""
+Simple test script to verify Python backend works locally.
+"""
+import asyncio
+import sys
+import os
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent.parent  # Go up from test-integration to backend-py
+sys.path.insert(0, str(project_root))
+async def test_vector_search():
+    """Test vector search service initialization."""
+    try:
+        from src.services.vector_search import VectorSearchService
+        print("🔧 Testing Vector Search Service...")
+        # Set minimal configuration for testing
+        os.environ["EMBEDDING_MODEL"] = "sentence-transformers/all-MiniLM-L6-v2"  # Smaller model for testing
+        os.environ["WORD_SIMILARITY_THRESHOLD"] = "0.6"
+        service = VectorSearchService()
+        print("📦 Initializing service (this may take a moment)...")
+        await service.initialize()
+        if service.is_initialized:
+            print("✅ Vector search service initialized successfully!")
+            # Test word generation
+            print("\n🧪 Testing word generation for 'Animals'...")
+            words = await service.find_similar_words("Animals", "medium", 5)
+            print(f"Found {len(words)} words:")
+            for i, word_obj in enumerate(words, 1):
+                word = word_obj["word"]
+                similarity = word_obj.get("similarity", 0)
+                source = word_obj.get("source", "unknown")
+                print(f"  {i}. {word} (similarity: {similarity:.3f}, source: {source})")
+        else:
+            print("❌ Service initialization failed")
+        await service.cleanup()
+    except Exception as e:
+        print(f"❌ Test failed: {e}")
+        import traceback
+        traceback.print_exc()
+async def test_crossword_generator():
+    """Test crossword generator."""
+    try:
+        from src.services.crossword_generator_wrapper import CrosswordGenerator
+        print("\n🎯 Testing Crossword Generator...")
+        generator = CrosswordGenerator()
+        # Test static word generation
+        words = await generator.generate_words_for_topics(
+            topics=["Animals"],
+            difficulty="medium",
+            use_ai=False
+        )
+        print(f"✅ Generated {len(words)} static words for Animals:")
+        for word_obj in words[:3]:  # Show first 3
+            print(f"  - {word_obj['word']}: {word_obj['clue']}")
+    except Exception as e:
+        print(f"❌ Crossword generator test failed: {e}")
+        import traceback
+        traceback.print_exc()
+async def main():
+    """Run all tests."""
+    print("🐍 Testing Python Backend Components\n")
+    # Test individual components
+    await test_crossword_generator()
+    # Test vector search (commented out as it requires large download)
+    print("\n⚠️ Skipping vector search test (requires model download)")
+    print("💡 To test vector search, uncomment the line below:")
+    print("# await test_vector_search()")
+    print("\n✅ Basic tests completed!")
+    print("🚀 Ready to test with FastAPI server")
+    print("\n🧪 For comprehensive unit tests, run:")
+    print("   python run_tests.py")
+    print("   or: pytest tests/ -v")
+if __name__ == "__main__":
+    asyncio.run(main())