diff --git a/.gitignore b/.gitignore index 8033876eb5591057ff4e032358820b4919d50bdc..307e0388086eb0b74cc97e1114d0633afc525936 100644 --- a/.gitignore +++ b/.gitignore @@ -47,4 +47,9 @@ pids .Spotlight-V100 .Trashes ehthumbs.db -Thumbs.db \ No newline at end of file +Thumbs.db + +hack +issues/ +samples/ +venv/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000000000000000000000000000000000000..4706828551cc021eaa607163494e6f0f4636db83 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,217 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Structure + +This is a full-stack crossword puzzle generator with two backend implementations: +- **Node.js Backend** (`backend/`) - Original implementation with static word lists +- **Python Backend** (`backend-py/`) - New implementation with AI-powered vector search +- **React Frontend** (`frontend/`) - Modern React app with Vite + +Current deployment uses the Python backend with Docker containerization. + +## Development Commands + +### Frontend Development +```bash +cd frontend +npm install +npm run dev # Start development server on http://localhost:5173 +npm run build # Build for production +npm run preview # Preview production build +``` + +### Backend Development (Python - Primary) +```bash +cd backend-py + +# Testing +python run_tests.py # Run all tests +python run_tests.py crossword_generator_fixed # Run specific test +pytest tests/ -v # Direct pytest +pytest tests/test_index_bug_fix.py -v # Core functionality tests +python test_local.py # Quick test without ML deps + +# Development server +python app.py # Start FastAPI server on port 7860 + +# Debug/development tools +python test_simple_generation.py # Test crossword generation +python debug_grid_direct.py # Debug grid placement +``` + +### Backend Development (Node.js - Legacy) +```bash +cd backend +npm install +npm run dev # Start Express server on http://localhost:3000 +npm test # Run tests +``` + +### Docker Deployment +```bash +# Build and run locally +docker build -t crossword-app . +docker run -p 7860:7860 -e NODE_ENV=production crossword-app + +# Test deployment +curl http://localhost:7860/api/topics +curl http://localhost:7860/health +``` + +### Linting and Type Checking +```bash +# Python backend +cd backend-py +mypy src/ # Type checking (if mypy installed) +ruff src/ # Linting (if ruff installed) + +# Frontend +cd frontend +npm run lint # ESLint (if configured) +``` + +## Architecture Overview + +### Full-Stack Components + +**Frontend** (`frontend/`) +- React 18 with hooks and functional components +- Key components: `TopicSelector.jsx`, `PuzzleGrid.jsx`, `ClueList.jsx` +- Custom hook: `useCrossword.js` manages puzzle state +- Grid rendering using CSS Grid with interactive cell filling + +**Python Backend** (`backend-py/` - Primary) +- FastAPI web framework serving both API and static frontend files +- AI-powered word generation using vector similarity search +- Comprehensive bounds checking fixes for crossword generation +- Multi-layer caching system with graceful fallback to static words + +**Node.js Backend** (`backend/` - Legacy) +- Express.js with file-based word storage +- Original crossword generation algorithm +- Static word lists organized by topic (animals.json, science.json, etc.) + +### Core Python Backend Components + +**CrosswordGeneratorFixed** (`backend-py/src/services/crossword_generator_fixed.py`) +- Main crossword generation algorithm using backtracking +- Handles grid placement, bounds checking, and word intersections +- Contains fixes for "list index out of range" errors with comprehensive bounds validation +- Key methods: `_create_grid()`, `_backtrack_placement()`, `_can_place_word()`, `_place_word()` + +**VectorSearchService** (`backend-py/src/services/vector_search.py`) +- AI-powered word discovery using sentence-transformers + FAISS +- Extracts 30K+ words from model vocabulary vs static word lists +- Implements semantic similarity search with caching and fallback systems +- Requires torch/sentence-transformers dependencies (optional for core functionality) + +**WordCache** (`backend-py/src/services/word_cache.py`) +- Multi-layer caching system for vector-discovered words +- Handles permission issues with fallback mechanisms +- Reduces dependency on static word files + +### Data Flow + +1. **User Interaction** → React frontend (TopicSelector, PuzzleGrid) +2. **API Request** → FastAPI backend (`backend-py/routes/api.py`) +3. **Word Selection** → VectorSearchService (AI) or static word fallback +4. **Grid Generation** → CrosswordGeneratorFixed backtracking algorithm +5. **Response** → JSON with grid, clues, and metadata +6. **Frontend Rendering** → Interactive crossword grid with clues + +### Critical Dependencies + +**Frontend:** +- React 18, Vite (development/build) +- Node.js 18+ and npm 9+ + +**Python Backend (Primary):** +- FastAPI, uvicorn, pydantic (web framework) +- pytest, pytest-asyncio (testing) + +**Optional AI Features:** +- torch, sentence-transformers, faiss-cpu (vector search) +- httpx (for API testing) + +**Node.js Backend (Legacy):** +- Express.js, cors, helmet +- JSON file-based word storage + +The Python backend gracefully degrades to static word lists when AI dependencies are missing. + +### API Endpoints + +Both backends provide compatible REST APIs: +- `GET /api/topics` - Get available topics +- `POST /api/generate` - Generate crossword puzzle +- `POST /api/validate` - Validate user answers +- `GET /api/health` - Health check + +### Testing Strategy + +**Python Backend Tests:** +- `test_crossword_generator_fixed.py` - Grid generation logic +- `test_index_bug_fix.py` - Bounds checking and index error fixes (CRITICAL) +- `test_vector_search.py` - AI word generation (needs torch) +- `test_api_routes.py` - FastAPI endpoints (needs httpx) + +**Frontend Tests:** +- Component testing with React Testing Library (if configured) +- E2E testing with Playwright/Cypress (if configured) + +### Key Fixes Applied + +**Index Error Resolution:** +- Added comprehensive bounds checking in `_can_place_word()`, `_place_word()`, `_remove_word()` +- Fixed `_calculate_placement_score()` to validate grid coordinates before access +- All grid access operations now validate row/col bounds + +**Word Boundary Issues:** +- 2-letter sequences at crossword intersections are normal behavior, not bugs +- Removed overly strict validation that was rejecting valid crossword patterns +- Grid placement logic maintains compatibility with JavaScript backend quality + +### Environment Configuration + +**Python Backend (Production):** +```bash +NODE_ENV=production +PORT=7860 +EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2 +WORD_SIMILARITY_THRESHOLD=0.65 +PYTHONPATH=/app/backend-py +PYTHONUNBUFFERED=1 +``` + +**Frontend Development:** +```bash +VITE_API_BASE_URL=http://localhost:7860 # Points to Python backend +``` + +**Node.js Backend (Legacy):** +```bash +NODE_ENV=development +PORT=3000 +DATABASE_URL=postgresql://user:pass@host:port/db # Optional +``` + +### Performance Notes + +**Python Backend:** +- **Startup**: ~30-60 seconds with AI (model download), ~2 seconds without +- **Memory**: ~500MB-1GB with AI, ~100MB without +- **Response Time**: ~200-500ms with vector search, ~100ms with static words +- FAISS index building is the main startup bottleneck + +**Frontend:** +- **Development**: Hot reload with Vite (~200ms) +- **Build Time**: ~10-30 seconds for production build +- **Bundle Size**: Optimized with Vite tree-shaking + +**Deployment:** +- Docker build time: ~5-10 minutes (includes frontend build + Python deps) +- Container size: ~1.5GB (includes ML models and dependencies) +- Hugging Face Spaces deployment: Automatic on git push +- run unit tests after fixing a bug \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 883c5c9cfff1ede273d897a84ca1df1d36704f38..f6e1da1ec1d273580b98988df310df686c643df8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,36 +1,85 @@ -# Use Node.js 18 as base image -FROM node:18-alpine +# Multi-stage build to optimize performance and security +# Stage 1: Builder - Install dependencies and build as root +FROM python:3.11-slim as builder # Set working directory WORKDIR /app -# Copy package files for both frontend and backend -COPY crossword-app/frontend/package*.json ./frontend/ -COPY crossword-app/backend/package*.json ./backend/ +# Install system dependencies for sentence-transformers and FAISS +RUN apt-get update && apt-get install -y \ + gcc \ + g++ \ + wget \ + curl \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js for frontend build +RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs -# Install dependencies for both frontend and backend +# Copy frontend package files and install dependencies first (for better caching) +COPY crossword-app/frontend/package*.json ./frontend/ RUN cd frontend && npm ci -RUN cd backend && npm ci --only=production -# Copy source code +# Copy Python backend requirements and install dependencies +COPY crossword-app/backend-py/requirements.txt ./backend-py/ +COPY crossword-app/backend-py/requirements-dev.txt ./backend-py/ +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r backend-py/requirements-dev.txt + +# Copy all source code COPY crossword-app/frontend/ ./frontend/ COPY crossword-app/backend/ ./backend/ +COPY crossword-app/backend-py/ ./backend-py/ # Build the React frontend RUN cd frontend && npm run build -# Copy built frontend files to backend public directory -RUN mkdir -p backend/public && cp -r frontend/dist/* backend/public/ +# Copy built frontend files to Python backend public directory +RUN mkdir -p backend-py/public && cp -r frontend/dist/* backend-py/public/ + +# Create symlink for shared data (word lists) +RUN cd backend-py && ln -sf ../backend/data data + +# Stage 2: Runtime - Copy only necessary files as non-root user +FROM python:3.11-slim as runtime + +# Copy Python packages from builder stage +COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin -# Set working directory to backend for runtime -WORKDIR /app/backend +# Install minimal runtime dependencies +RUN apt-get update && apt-get install -y \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user +RUN useradd -m -u 1000 appuser + +# Set working directory +WORKDIR /app/backend-py + +# Copy built application files with correct ownership +COPY --from=builder --chown=appuser:appuser /app/backend-py ./ +COPY --from=builder --chown=appuser:appuser /app/backend/data ./data + +# Switch to non-root user +USER appuser # Expose port 7860 (Hugging Face Spaces standard) EXPOSE 7860 -# Set environment to production +# Set environment variables for production ENV NODE_ENV=production ENV PORT=7860 +ENV PYTHONPATH=/app/backend-py +ENV PYTHONUNBUFFERED=1 +ENV PIP_NO_CACHE_DIR=1 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:7860/health || exit 1 -# Start the backend server -CMD ["npm", "start"] \ No newline at end of file +# Start the Python backend server with uvicorn for better production performance +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"] \ No newline at end of file diff --git a/crossword-app/Dockerfile b/crossword-app/Dockerfile index 07850e1308b66b83b8658fed3bded0988e9a9172..866f0b086570787abe94e3fb47b20044b272dc77 100644 --- a/crossword-app/Dockerfile +++ b/crossword-app/Dockerfile @@ -1,36 +1,84 @@ -# Use Node.js 18 as base image -FROM node:18-alpine +# Multi-stage build to optimize performance and security +# Stage 1: Builder - Install dependencies and build as root +FROM python:3.11-slim as builder # Set working directory WORKDIR /app -# Copy package files for both frontend and backend -COPY frontend/package*.json ./frontend/ -COPY backend/package*.json ./backend/ +# Install system dependencies for sentence-transformers and FAISS +RUN apt-get update && apt-get install -y \ + gcc \ + g++ \ + wget \ + curl \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js for frontend build +RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs -# Install dependencies for both frontend and backend +# Copy frontend package files and install dependencies first (for better caching) +COPY frontend/package*.json ./frontend/ RUN cd frontend && npm ci -RUN cd backend && npm ci --only=production -# Copy source code +# Copy Python backend requirements and install dependencies +COPY backend-py/requirements.txt ./backend-py/ +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r backend-py/requirements.txt + +# Copy all source code COPY frontend/ ./frontend/ COPY backend/ ./backend/ +COPY backend-py/ ./backend-py/ # Build the React frontend RUN cd frontend && npm run build -# Copy built frontend files to backend public directory -RUN mkdir -p backend/public && cp -r frontend/dist/* backend/public/ +# Copy built frontend files to Python backend public directory +RUN mkdir -p backend-py/public && cp -r frontend/dist/* backend-py/public/ + +# Create symlink for shared data (word lists) +RUN cd backend-py && ln -sf ../backend/data data + +# Stage 2: Runtime - Copy only necessary files as non-root user +FROM python:3.11-slim as runtime + +# Copy Python packages from builder stage +COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin -# Set working directory to backend for runtime -WORKDIR /app/backend +# Install minimal runtime dependencies +RUN apt-get update && apt-get install -y \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user +RUN useradd -m -u 1000 appuser + +# Set working directory +WORKDIR /app/backend-py + +# Copy built application files with correct ownership +COPY --from=builder --chown=appuser:appuser /app/backend-py ./ +COPY --from=builder --chown=appuser:appuser /app/backend/data ./data + +# Switch to non-root user +USER appuser # Expose port 7860 (Hugging Face Spaces standard) EXPOSE 7860 -# Set environment to production +# Set environment variables for production ENV NODE_ENV=production ENV PORT=7860 +ENV PYTHONPATH=/app/backend-py +ENV PYTHONUNBUFFERED=1 +ENV PIP_NO_CACHE_DIR=1 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:7860/health || exit 1 -# Start the backend server -CMD ["npm", "start"] \ No newline at end of file +# Start the Python backend server with uvicorn for better production performance +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"] \ No newline at end of file diff --git a/crossword-app/backend-py/.coverage b/crossword-app/backend-py/.coverage new file mode 100644 index 0000000000000000000000000000000000000000..9713289bb347784a20f075bf476b419f60c80de6 Binary files /dev/null and b/crossword-app/backend-py/.coverage differ diff --git a/crossword-app/backend-py/.env.example b/crossword-app/backend-py/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..6df26a314dcb491b1aa2d0d0b3adbe1b7d13fbb6 --- /dev/null +++ b/crossword-app/backend-py/.env.example @@ -0,0 +1,20 @@ +# Python Backend Environment Configuration + +# Server Configuration +PORT=7860 +HOST=0.0.0.0 +NODE_ENV=production + +# AI/ML Configuration +EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2 +WORD_SIMILARITY_THRESHOLD=0.65 +MAX_VOCAB_SIZE=30000 + +# HuggingFace Configuration (if needed for cloud inference) +HUGGINGFACE_API_KEY=your_huggingface_api_key_here + +# Logging +LOG_LEVEL=INFO + +# Development settings +RELOAD=false \ No newline at end of file diff --git a/crossword-app/backend-py/README-local-setup.md b/crossword-app/backend-py/README-local-setup.md new file mode 100644 index 0000000000000000000000000000000000000000..bb7dc2930350fe0a69c3496c51600a5c70d0887d --- /dev/null +++ b/crossword-app/backend-py/README-local-setup.md @@ -0,0 +1,78 @@ +# Local Development Setup + +## Quick Start + +```bash +# Install all dependencies (same as production) +pip install -r requirements.txt +``` + +## Python Version Support +- **Recommended**: Python 3.10-3.12 +- **Minimum**: Python 3.10 (matches Docker) + +## Installation Troubleshooting + +### If you get PyTorch installation errors: +```bash +# Install PyTorch first with CPU support +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + +# Then install remaining dependencies +pip install -r requirements-local.txt --no-deps +pip install fastapi uvicorn[standard] python-dotenv python-multipart +``` + +### For M1/M2 Macs: +```bash +# Use conda for better compatibility +conda install pytorch::pytorch torchvision torchaudio -c pytorch +pip install -r requirements-local.txt --no-deps +pip install sentence-transformers faiss-cpu transformers huggingface-hub +``` + +## Running Locally + +```bash +cd crossword-app/backend-py +python app.py +``` + +The server will start on http://localhost:7860 + +## Features Available + +### Features Available: +- ✅ AI word generation via vector search +- ✅ 30K+ vocabulary from sentence-transformers +- ✅ Static word fallback +- ✅ All crossword features +- ✅ Same as production environment + +## Environment Variables + +Create a `.env` file: +```bash +# Optional - defaults to sentence-transformers/all-mpnet-base-v2 +EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 + +# Optional - similarity threshold for AI words +WORD_SIMILARITY_THRESHOLD=0.65 + +# Optional - logging level +LOG_LEVEL=INFO +``` + +## Testing + +```bash +# Test basic components +python test_local.py + +# Test with pytest +pytest +``` + +## Docker vs Local Development + +Both use the same `requirements.txt` with modern, compatible versions that work across Python 3.9-3.12 and different platforms. \ No newline at end of file diff --git a/crossword-app/backend-py/README.md b/crossword-app/backend-py/README.md new file mode 100644 index 0000000000000000000000000000000000000000..db6dbd39fe3dd285f9f6241e2ef00ac7c3463541 --- /dev/null +++ b/crossword-app/backend-py/README.md @@ -0,0 +1,332 @@ +# Python Backend with Vector Similarity Search + +This is the Python implementation of the crossword generator backend, featuring true AI word generation via vector similarity search. + +## 🚀 Features + +- **True Vector Search**: Uses sentence-transformers + FAISS for semantic word discovery +- **30K+ Vocabulary**: Searches through full model vocabulary instead of limited static lists +- **FastAPI**: Modern, fast Python web framework +- **Same API**: Compatible with existing React frontend +- **Hybrid Approach**: AI vector search with static word fallback + +## 🔄 Differences from JavaScript Backend + +| Feature | JavaScript Backend | Python Backend | +|---------|-------------------|----------------| +| **Word Generation** | Embedding filtering of static lists | True vector similarity search | +| **Vocabulary Size** | ~100 words per topic | 30K+ words from model | +| **AI Approach** | Semantic similarity filtering | Nearest neighbor search | +| **Performance** | Fast but limited | Slower startup, better results | +| **Dependencies** | Node.js + HuggingFace API | Python + ML libraries | + +## 🛠️ Setup & Installation + +### Prerequisites +- Python 3.11+ (3.11 recommended for Docker compatibility) +- pip (Python package manager) + +### Basic Setup (Core Functionality) +```bash +# Clone and navigate to backend directory +cd crossword-app/backend-py + +# Create virtual environment (recommended) +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# Install core dependencies +pip install -r requirements.txt + +# Start the server +python app.py +``` + +### Full Development Setup (with AI features) +```bash +# Install development dependencies including AI/ML libraries +pip install -r requirements-dev.txt + +# This includes: +# - All core dependencies +# - AI/ML libraries (torch, sentence-transformers, etc.) +# - Development tools (pytest, coverage, etc.) +``` + +### Requirements Files +- **`requirements.txt`**: Core dependencies for basic functionality +- **`requirements-dev.txt`**: Full development environment with AI features + +> **Note**: The AI/ML dependencies are large (~2GB). For basic testing without AI features, use `requirements.txt` only. + +> **Python Version**: Both local development and Docker use Python 3.11+ for optimal performance and latest package compatibility. + +## 📁 Structure + +``` +backend-py/ +├── app.py # FastAPI application entry point +├── requirements.txt # Core Python dependencies +├── requirements-dev.txt # Full development dependencies +├── src/ +│ ├── services/ +│ │ ├── vector_search.py # Core vector similarity search +│ │ └── crossword_generator.py # Puzzle generation logic +│ └── routes/ +│ └── api.py # API endpoints (matches JS backend) +├── test-unit/ # Unit tests (pytest framework) - 5 files +│ ├── test_crossword_generator.py +│ ├── test_api_routes.py +│ └── test_vector_search.py +├── test-integration/ # Integration tests (standalone scripts) - 16 files +│ ├── test_simple_generation.py +│ ├── test_boundary_fix.py +│ └── test_local.py # (+ 13 more test files) +├── data/ -> ../backend/data/ # Symlink to shared word data +└── public/ # Frontend static files (copied during build) +``` + +## 🛠 Dependencies + +### Core ML Stack +- `sentence-transformers`: Local model loading and embeddings +- `faiss-cpu`: Fast vector similarity search +- `torch`: PyTorch for model inference +- `numpy`: Vector operations + +### Web Framework +- `fastapi`: Modern Python web framework +- `uvicorn`: ASGI server +- `pydantic`: Data validation + +### Testing +- `pytest`: Testing framework +- `pytest-asyncio`: Async test support + +## 🧪 Testing + +### 📁 Test Organization (Reorganized for Clarity) + +**We've reorganized the test structure for better developer experience:** + +| Test Type | Location | Purpose | Framework | Count | +|-----------|----------|---------|-----------|-------| +| **Unit Tests** | `test-unit/` | Test individual components in isolation | pytest | 5 files | +| **Integration Tests** | `test-integration/` | Test complete workflows end-to-end | Standalone scripts | 16 files | + +**Benefits of this structure:** +- ✅ **Clear separation** between unit and integration testing +- ✅ **Intuitive naming** - developers immediately understand test types +- ✅ **Better tooling** - can run different test types independently +- ✅ **Easier maintenance** - organized by testing strategy + +> **Note**: Previously tests were mixed in `tests/` folder and root-level `test_*.py` files. The new structure provides much better organization. + +### Unit Tests Details (`test-unit/`) + +**What they test:** Individual components with mocking and isolation +- `test_crossword_generator.py` - Core crossword generation logic +- `test_api_routes.py` - FastAPI endpoint handlers +- `test_crossword_generator_wrapper.py` - Service wrapper layer +- `test_index_bug_fix.py` - Specific bug fix validations +- `test_vector_search.py` - AI vector search functionality (requires torch) + +### Run Unit Tests (Formal Test Suite) +```bash +# Run all unit tests +python run_tests.py + +# Run specific test modules +python run_tests.py crossword_generator +pytest test-unit/test_crossword_generator.py -v + +# Run core tests (excluding AI dependencies) +pytest test-unit/ -v --ignore=test-unit/test_vector_search.py + +# Run individual unit test classes +pytest test-unit/test_crossword_generator.py::TestCrosswordGenerator::test_init -v +``` + +### Integration Tests Details (`test-integration/`) + +**What they test:** Complete workflows without mocking - real functionality +- `test_simple_generation.py` - End-to-end crossword generation +- `test_boundary_fix.py` - Word boundary validation (our major fix!) +- `test_local.py` - Local environment and dependencies +- `test_word_boundaries.py` - Comprehensive boundary testing +- `test_bounds_comprehensive.py` - Advanced bounds checking +- `test_final_validation.py` - API integration testing +- And 10 more specialized feature tests... + +### Run Integration Tests (End-to-End Scripts) +```bash +# Test core functionality +python test-integration/test_simple_generation.py +python test-integration/test_boundary_fix.py +python test-integration/test_local.py + +# Test specific features +python test-integration/test_word_boundaries.py +python test-integration/test_bounds_comprehensive.py + +# Test API integration +python test-integration/test_final_validation.py +``` + +### Test Coverage +```bash +# Run core tests with coverage (requires requirements-dev.txt) +pytest test-unit/test_crossword_generator.py --cov=src --cov-report=html +pytest test-unit/test_crossword_generator.py --cov=src --cov-report=term + +# Full coverage report (may fail without AI dependencies) +pytest test-unit/ --cov=src --cov-report=html --ignore=test-unit/test_vector_search.py +``` + +### Test Status +- ✅ **Core crossword generation**: 15/19 unit tests passing +- ✅ **Boundary validation**: All integration tests passing +- ⚠️ **AI/Vector search**: Requires torch dependencies +- ⚠️ **Some async mocking**: Minor test infrastructure issues + +### 🔄 Migration Guide (For Existing Developers) + +**If you had previous commands, update them:** + +| Old Command | New Command | +|-------------|-------------| +| `pytest tests/` | `pytest test-unit/` | +| `python test_simple_generation.py` | `python test-integration/test_simple_generation.py` | +| `pytest tests/ --cov=src` | `pytest test-unit/ --cov=src` | + +**All functionality is preserved** - just organized better! + +## 🔧 Configuration + +Environment variables (set in HuggingFace Spaces): + +```bash +# Core settings +PORT=7860 +NODE_ENV=production + +# AI Configuration +EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2 +WORD_SIMILARITY_THRESHOLD=0.65 + +# Optional +LOG_LEVEL=INFO +``` + +## 🎯 Vector Search Process + +1. **Initialization**: + - Load sentence-transformers model locally + - Extract 30K+ vocabulary from model tokenizer + - Pre-compute embeddings for all vocabulary words + - Build FAISS index for fast similarity search + +2. **Word Generation**: + - Get topic embedding: `"Animals" → [768-dim vector]` + - Search FAISS index for nearest neighbors + - Filter by similarity threshold (0.65+) + - Filter by difficulty (word length) + - Return top matches with generated clues + +3. **Fallback**: + - If vector search fails → use static word lists + - If insufficient AI words → supplement with static words + +## 🧪 Testing + +```bash +# Local testing (without full vector search) +cd backend-py +python test_local.py + +# Start development server +python app.py +``` + +## 🐳 Docker Deployment + +The Dockerfile has been updated to use Python backend: + +```dockerfile +FROM python:3.9-slim +# ... install dependencies +# ... build frontend (same as before) +# ... copy to backend-py/public/ +CMD ["python", "app.py"] +``` + +## 🧪 Testing + +### Quick Test +```bash +# Basic functionality test (no model download) +python test_local.py +``` + +### Comprehensive Unit Tests +```bash +# Run all unit tests +python run_tests.py + +# Or use pytest directly +pytest tests/ -v + +# Run specific test file +python run_tests.py crossword_generator_fixed +pytest tests/test_crossword_generator_fixed.py -v + +# Run with coverage +pytest tests/ --cov=src --cov-report=html +``` + +### Test Structure +- `tests/test_crossword_generator_fixed.py` - Core grid generation logic +- `tests/test_vector_search.py` - Vector similarity search +- `tests/test_crossword_generator_wrapper.py` - Service wrapper +- `tests/test_api_routes.py` - FastAPI endpoints + +### Key Test Features +- ✅ **Index alignment fix**: Tests the list index out of range bug fix +- ✅ **Mocked vector search**: Tests without downloading models +- ✅ **API validation**: Tests all endpoints and error cases +- ✅ **Async support**: Full pytest-asyncio integration +- ✅ **Error handling**: Tests malformed inputs and edge cases + +## 📊 Performance Comparison + +**Startup Time**: +- JavaScript: ~2 seconds +- Python: ~30-60 seconds (model download + index building) + +**Word Quality**: +- JavaScript: Limited by static word lists +- Python: Access to full model vocabulary with semantic understanding + +**Memory Usage**: +- JavaScript: ~100MB +- Python: ~500MB-1GB (model + embeddings + FAISS index) + +**API Response Time**: +- JavaScript: ~100ms (after cache warm-up) +- Python: ~200-500ms (vector search + filtering) + +## 🔄 Migration Strategy + +1. **Phase 1** ✅: Basic Python backend structure +2. **Phase 2**: Test vector search functionality +3. **Phase 3**: Docker deployment and production testing +4. **Phase 4**: Compare with JavaScript backend +5. **Phase 5**: Production switch with rollback capability + +## 🎯 Next Steps + +- [ ] Test vector search with real model +- [ ] Optimize FAISS index performance +- [ ] Add more sophisticated crossword grid generation +- [ ] Implement LLM-based clue generation +- [ ] Add caching for frequently requested topics \ No newline at end of file diff --git a/crossword-app/backend-py/__pycache__/test_bounds_comprehensive.cpython-313-pytest-8.4.1.pyc b/crossword-app/backend-py/__pycache__/test_bounds_comprehensive.cpython-313-pytest-8.4.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74a0e8cf7830e9f15236719bd4f5a89ad6a84433 Binary files /dev/null and b/crossword-app/backend-py/__pycache__/test_bounds_comprehensive.cpython-313-pytest-8.4.1.pyc differ diff --git a/crossword-app/backend-py/app.py b/crossword-app/backend-py/app.py new file mode 100644 index 0000000000000000000000000000000000000000..28949a214fe6b9e10e2fc1f7fd47e507b354438e --- /dev/null +++ b/crossword-app/backend-py/app.py @@ -0,0 +1,146 @@ +""" +FastAPI backend for crossword puzzle generator with vector similarity search. +""" + +import os +import logging +import time +from datetime import datetime +from contextlib import asynccontextmanager +from pathlib import Path + +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse +import uvicorn +from dotenv import load_dotenv + +from src.routes.api import router as api_router +from src.services.vector_search import VectorSearchService + +# Load environment variables +load_dotenv() + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def log_with_timestamp(message): + """Helper to log with precise timestamp.""" + timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3] + logger.info(f"[{timestamp}] {message}") + +# Global vector search service instance +vector_service = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Initialize and cleanup application resources.""" + global vector_service + + # Startup + startup_time = time.time() + log_with_timestamp("🚀 Initializing Python backend with vector search...") + + # Initialize vector search service + try: + service_start = time.time() + log_with_timestamp("🔧 Creating VectorSearchService instance...") + vector_service = VectorSearchService() + + log_with_timestamp("⚡ Starting vector search initialization...") + await vector_service.initialize() + + init_time = time.time() - service_start + log_with_timestamp(f"✅ Vector search service initialized in {init_time:.2f}s") + except Exception as e: + logger.error(f"❌ Failed to initialize vector search service: {e}") + # Continue without vector search (will fallback to static words) + + # Make vector service available to routes + app.state.vector_service = vector_service + + yield + + # Shutdown + logger.info("🛑 Shutting down Python backend...") + if vector_service: + await vector_service.cleanup() + +# Create FastAPI app +app = FastAPI( + title="Crossword Puzzle Generator API", + description="Python backend with AI-powered vector similarity search", + version="2.0.0", + lifespan=lifespan +) + +# CORS configuration +cors_origins = [] +if os.getenv("NODE_ENV") == "production": + # Production: same origin + cors_origins = ["*"] # HuggingFace Spaces +else: + # Development: allow dev servers + cors_origins = [ + "http://localhost:5173", # Vite dev server + "http://localhost:3000", # Alternative dev server + "http://localhost:7860", # Local production test + ] + +app.add_middleware( + CORSMiddleware, + allow_origins=cors_origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Include API routes +app.include_router(api_router, prefix="/api") + +# Serve static files (frontend) +static_path = Path(__file__).parent / "public" +if static_path.exists(): + app.mount("/assets", StaticFiles(directory=static_path / "assets"), name="assets") + + @app.get("/") + async def serve_frontend(): + """Serve the React frontend.""" + index_path = static_path / "index.html" + if index_path.exists(): + return FileResponse(index_path) + else: + raise HTTPException(status_code=404, detail="Frontend not found") + + @app.get("/{full_path:path}") + async def serve_spa_routes(full_path: str): + """Serve React SPA routes.""" + # For any non-API route, serve the React app + if not full_path.startswith("api/"): + index_path = static_path / "index.html" + if index_path.exists(): + return FileResponse(index_path) + raise HTTPException(status_code=404, detail="Not found") + +@app.get("/health") +async def health_check(): + """Health check endpoint.""" + return { + "status": "healthy", + "backend": "python", + "vector_search": vector_service.is_initialized if vector_service else False + } + +if __name__ == "__main__": + port = int(os.getenv("PORT", 7860)) + host = "0.0.0.0" if os.getenv("NODE_ENV") == "production" else "127.0.0.1" + + logger.info(f"🐍 Starting Python backend on {host}:{port}") + uvicorn.run( + "app:app", + host=host, + port=port, + reload=os.getenv("NODE_ENV") != "production" + ) \ No newline at end of file diff --git a/crossword-app/backend-py/data/data b/crossword-app/backend-py/data/data new file mode 120000 index 0000000000000000000000000000000000000000..20bf75b0d9bf305d9096b954bf9b9b881704fa66 --- /dev/null +++ b/crossword-app/backend-py/data/data @@ -0,0 +1 @@ +../backend/data \ No newline at end of file diff --git a/crossword-app/backend-py/data/word-lists/animals.json b/crossword-app/backend-py/data/word-lists/animals.json new file mode 100644 index 0000000000000000000000000000000000000000..40d8c4e0ec8f2b27a7ba5d7336ea91c47583ddaf --- /dev/null +++ b/crossword-app/backend-py/data/word-lists/animals.json @@ -0,0 +1,165 @@ +[ + { "word": "DOG", "clue": "Man's best friend" }, + { "word": "CAT", "clue": "Feline pet that purrs" }, + { "word": "ELEPHANT", "clue": "Large mammal with a trunk" }, + { "word": "TIGER", "clue": "Striped big cat" }, + { "word": "WHALE", "clue": "Largest marine mammal" }, + { "word": "BUTTERFLY", "clue": "Colorful flying insect" }, + { "word": "BIRD", "clue": "Flying creature with feathers" }, + { "word": "FISH", "clue": "Aquatic animal with gills" }, + { "word": "LION", "clue": "King of the jungle" }, + { "word": "BEAR", "clue": "Large mammal that hibernates" }, + { "word": "RABBIT", "clue": "Hopping mammal with long ears" }, + { "word": "HORSE", "clue": "Riding animal with hooves" }, + { "word": "SHEEP", "clue": "Woolly farm animal" }, + { "word": "GOAT", "clue": "Horned farm animal" }, + { "word": "DUCK", "clue": "Water bird that quacks" }, + { "word": "CHICKEN", "clue": "Farm bird that lays eggs" }, + { "word": "SNAKE", "clue": "Slithering reptile" }, + { "word": "TURTLE", "clue": "Shelled reptile" }, + { "word": "FROG", "clue": "Amphibian that croaks" }, + { "word": "SHARK", "clue": "Predatory ocean fish" }, + { "word": "DOLPHIN", "clue": "Intelligent marine mammal" }, + { "word": "PENGUIN", "clue": "Flightless Antarctic bird" }, + { "word": "MONKEY", "clue": "Primate that swings in trees" }, + { "word": "ZEBRA", "clue": "Striped African animal" }, + { "word": "GIRAFFE", "clue": "Tallest land animal" }, + { "word": "WOLF", "clue": "Wild canine that howls" }, + { "word": "FOX", "clue": "Cunning red-furred animal" }, + { "word": "DEER", "clue": "Graceful forest animal with antlers" }, + { "word": "MOOSE", "clue": "Large antlered animal" }, + { "word": "SQUIRREL", "clue": "Tree-climbing nut gatherer" }, + { "word": "RACCOON", "clue": "Masked nocturnal animal" }, + { "word": "BEAVER", "clue": "Dam-building rodent" }, + { "word": "OTTER", "clue": "Playful water mammal" }, + { "word": "SEAL", "clue": "Marine mammal with flippers" }, + { "word": "WALRUS", "clue": "Tusked Arctic marine mammal" }, + { "word": "RHINO", "clue": "Horned thick-skinned mammal" }, + { "word": "HIPPO", "clue": "Large African river mammal" }, + { "word": "CHEETAH", "clue": "Fastest land animal" }, + { "word": "LEOPARD", "clue": "Spotted big cat" }, + { "word": "JAGUAR", "clue": "South American big cat" }, + { "word": "PUMA", "clue": "Mountain lion" }, + { "word": "LYNX", "clue": "Wild cat with tufted ears" }, + { "word": "KANGAROO", "clue": "Hopping Australian marsupial" }, + { "word": "KOALA", "clue": "Eucalyptus-eating marsupial" }, + { "word": "PANDA", "clue": "Black and white bamboo eater" }, + { "word": "SLOTH", "clue": "Slow-moving tree dweller" }, + { "word": "ARMADILLO", "clue": "Armored mammal" }, + { "word": "ANTEATER", "clue": "Long-snouted insect eater" }, + { "word": "PLATYPUS", "clue": "Egg-laying mammal with a bill" }, + { "word": "BAT", "clue": "Flying mammal" }, + { "word": "MOLE", "clue": "Underground tunnel digger" }, + { "word": "HEDGEHOG", "clue": "Spiny small mammal" }, + { "word": "PORCUPINE", "clue": "Quill-covered rodent" }, + { "word": "SKUNK", "clue": "Black and white scent-spraying mammal" }, + { "word": "WEASEL", "clue": "Small carnivorous mammal" }, + { "word": "BADGER", "clue": "Burrowing black and white mammal" }, + { "word": "FERRET", "clue": "Domesticated hunting animal" }, + { "word": "MINK", "clue": "Valuable fur-bearing animal" }, + { "word": "EAGLE", "clue": "Majestic bird of prey" }, + { "word": "HAWK", "clue": "Sharp-eyed hunting bird" }, + { "word": "OWL", "clue": "Nocturnal bird with large eyes" }, + { "word": "FALCON", "clue": "Fast diving bird of prey" }, + { "word": "VULTURE", "clue": "Scavenging bird" }, + { "word": "CROW", "clue": "Black intelligent bird" }, + { "word": "RAVEN", "clue": "Large black corvid" }, + { "word": "ROBIN", "clue": "Red-breasted songbird" }, + { "word": "SPARROW", "clue": "Small brown songbird" }, + { "word": "CARDINAL", "clue": "Bright red songbird" }, + { "word": "BLUEJAY", "clue": "Blue crested bird" }, + { "word": "WOODPECKER", "clue": "Tree-pecking bird" }, + { "word": "HUMMINGBIRD", "clue": "Tiny fast-flying bird" }, + { "word": "PELICAN", "clue": "Large-billed water bird" }, + { "word": "FLAMINGO", "clue": "Pink wading bird" }, + { "word": "STORK", "clue": "Long-legged wading bird" }, + { "word": "HERON", "clue": "Tall fishing bird" }, + { "word": "CRANE", "clue": "Large wading bird" }, + { "word": "SWAN", "clue": "Elegant white water bird" }, + { "word": "GOOSE", "clue": "Large waterfowl" }, + { "word": "TURKEY", "clue": "Large ground bird" }, + { "word": "PHEASANT", "clue": "Colorful game bird" }, + { "word": "QUAIL", "clue": "Small ground bird" }, + { "word": "PEACOCK", "clue": "Bird with spectacular tail feathers" }, + { "word": "OSTRICH", "clue": "Largest flightless bird" }, + { "word": "EMU", "clue": "Australian flightless bird" }, + { "word": "KIWI", "clue": "Small flightless New Zealand bird" }, + { "word": "PARROT", "clue": "Colorful talking bird" }, + { "word": "TOUCAN", "clue": "Large-billed tropical bird" }, + { "word": "MACAW", "clue": "Large colorful parrot" }, + { "word": "COCKATOO", "clue": "Crested parrot" }, + { "word": "CANARY", "clue": "Yellow singing bird" }, + { "word": "FINCH", "clue": "Small seed-eating bird" }, + { "word": "PIGEON", "clue": "Common city bird" }, + { "word": "DOVE", "clue": "Symbol of peace" }, + { "word": "SEAGULL", "clue": "Coastal scavenging bird" }, + { "word": "ALBATROSS", "clue": "Large ocean bird" }, + { "word": "PUFFIN", "clue": "Colorful-billed seabird" }, + { "word": "LIZARD", "clue": "Small scaly reptile" }, + { "word": "IGUANA", "clue": "Large tropical lizard" }, + { "word": "GECKO", "clue": "Wall-climbing lizard" }, + { "word": "CHAMELEON", "clue": "Color-changing reptile" }, + { "word": "ALLIGATOR", "clue": "Large American crocodilian" }, + { "word": "CROCODILE", "clue": "Large aquatic reptile" }, + { "word": "PYTHON", "clue": "Large constricting snake" }, + { "word": "COBRA", "clue": "Venomous hooded snake" }, + { "word": "VIPER", "clue": "Poisonous snake" }, + { "word": "RATTLESNAKE", "clue": "Snake with warning tail" }, + { "word": "SALAMANDER", "clue": "Amphibian that can regrow limbs" }, + { "word": "NEWT", "clue": "Small aquatic salamander" }, + { "word": "TOAD", "clue": "Warty amphibian" }, + { "word": "TADPOLE", "clue": "Frog larva" }, + { "word": "SALMON", "clue": "Fish that swims upstream" }, + { "word": "TROUT", "clue": "Freshwater game fish" }, + { "word": "BASS", "clue": "Popular sport fish" }, + { "word": "TUNA", "clue": "Large ocean fish" }, + { "word": "SWORDFISH", "clue": "Fish with long pointed bill" }, + { "word": "MARLIN", "clue": "Large billfish" }, + { "word": "MANTA", "clue": "Large ray fish" }, + { "word": "STINGRAY", "clue": "Flat fish with barbed tail" }, + { "word": "EEL", "clue": "Snake-like fish" }, + { "word": "SEAHORSE", "clue": "Horse-shaped fish" }, + { "word": "ANGELFISH", "clue": "Colorful tropical fish" }, + { "word": "GOLDFISH", "clue": "Common pet fish" }, + { "word": "CLOWNFISH", "clue": "Orange and white anemone fish" }, + { "word": "JELLYFISH", "clue": "Transparent stinging sea creature" }, + { "word": "OCTOPUS", "clue": "Eight-armed sea creature" }, + { "word": "SQUID", "clue": "Ten-armed cephalopod" }, + { "word": "CRAB", "clue": "Sideways-walking crustacean" }, + { "word": "LOBSTER", "clue": "Large marine crustacean" }, + { "word": "SHRIMP", "clue": "Small crustacean" }, + { "word": "STARFISH", "clue": "Five-armed sea creature" }, + { "word": "URCHIN", "clue": "Spiny sea creature" }, + { "word": "CORAL", "clue": "Marine organism that builds reefs" }, + { "word": "SPONGE", "clue": "Simple marine animal" }, + { "word": "OYSTER", "clue": "Pearl-producing mollusk" }, + { "word": "CLAM", "clue": "Burrowing shellfish" }, + { "word": "MUSSEL", "clue": "Dark-shelled mollusk" }, + { "word": "SNAIL", "clue": "Spiral-shelled gastropod" }, + { "word": "SLUG", "clue": "Shell-less gastropod" }, + { "word": "WORM", "clue": "Segmented invertebrate" }, + { "word": "SPIDER", "clue": "Eight-legged web spinner" }, + { "word": "SCORPION", "clue": "Arachnid with stinging tail" }, + { "word": "ANT", "clue": "Social insect worker" }, + { "word": "BEE", "clue": "Honey-making insect" }, + { "word": "WASP", "clue": "Stinging flying insect" }, + { "word": "HORNET", "clue": "Large aggressive wasp" }, + { "word": "FLY", "clue": "Common buzzing insect" }, + { "word": "MOSQUITO", "clue": "Blood-sucking insect" }, + { "word": "BEETLE", "clue": "Hard-shelled insect" }, + { "word": "LADYBUG", "clue": "Red spotted beneficial insect" }, + { "word": "DRAGONFLY", "clue": "Large-winged flying insect" }, + { "word": "GRASSHOPPER", "clue": "Jumping green insect" }, + { "word": "CRICKET", "clue": "Chirping insect" }, + { "word": "MANTIS", "clue": "Praying insect predator" }, + { "word": "MOTH", "clue": "Nocturnal butterfly relative" }, + { "word": "CATERPILLAR", "clue": "Butterfly larva" }, + { "word": "COCOON", "clue": "Insect transformation casing" }, + { "word": "TERMITE", "clue": "Wood-eating social insect" }, + { "word": "TICK", "clue": "Blood-sucking parasite" }, + { "word": "FLEA", "clue": "Jumping parasite" }, + { "word": "LOUSE", "clue": "Small parasitic insect" }, + { "word": "APHID", "clue": "Plant-sucking insect" }, + { "word": "MAGGOT", "clue": "Fly larva" }, + { "word": "GRUB", "clue": "Beetle larva" } +] \ No newline at end of file diff --git a/crossword-app/backend-py/data/word-lists/geography.json b/crossword-app/backend-py/data/word-lists/geography.json new file mode 100644 index 0000000000000000000000000000000000000000..188cb6f22f4efe3eccfaa80ab841936e75354956 --- /dev/null +++ b/crossword-app/backend-py/data/word-lists/geography.json @@ -0,0 +1,161 @@ +[ + { "word": "MOUNTAIN", "clue": "High elevation landform" }, + { "word": "OCEAN", "clue": "Large body of salt water" }, + { "word": "DESERT", "clue": "Dry, arid region" }, + { "word": "CONTINENT", "clue": "Large landmass" }, + { "word": "RIVER", "clue": "Flowing body of water" }, + { "word": "ISLAND", "clue": "Land surrounded by water" }, + { "word": "FOREST", "clue": "Dense area of trees" }, + { "word": "VALLEY", "clue": "Low area between hills" }, + { "word": "LAKE", "clue": "Body of freshwater" }, + { "word": "BEACH", "clue": "Sandy shore by water" }, + { "word": "CLIFF", "clue": "Steep rock face" }, + { "word": "PLATEAU", "clue": "Elevated flat area" }, + { "word": "CANYON", "clue": "Deep gorge with steep sides" }, + { "word": "GLACIER", "clue": "Moving mass of ice" }, + { "word": "VOLCANO", "clue": "Mountain that erupts" }, + { "word": "PENINSULA", "clue": "Land surrounded by water on three sides" }, + { "word": "ARCHIPELAGO", "clue": "Group of islands" }, + { "word": "PRAIRIE", "clue": "Grassland plain" }, + { "word": "TUNDRA", "clue": "Cold, treeless region" }, + { "word": "SAVANNA", "clue": "Tropical grassland" }, + { "word": "EQUATOR", "clue": "Earth's middle line" }, + { "word": "LATITUDE", "clue": "Distance from equator" }, + { "word": "LONGITUDE", "clue": "Distance from prime meridian" }, + { "word": "CLIMATE", "clue": "Long-term weather pattern" }, + { "word": "MONSOON", "clue": "Seasonal wind pattern" }, + { "word": "CAPITAL", "clue": "Main city of country" }, + { "word": "BORDER", "clue": "Boundary between countries" }, + { "word": "COAST", "clue": "Land meeting the sea" }, + { "word": "STRAIT", "clue": "Narrow water passage" }, + { "word": "DELTA", "clue": "River mouth formation" }, + { "word": "FJORD", "clue": "Narrow inlet between cliffs" }, + { "word": "ATOLL", "clue": "Ring-shaped coral island" }, + { "word": "MESA", "clue": "Flat-topped hill" }, + { "word": "BUTTE", "clue": "Isolated hill with steep sides" }, + { "word": "GORGE", "clue": "Deep narrow valley" }, + { "word": "RAVINE", "clue": "Small narrow gorge" }, + { "word": "RIDGE", "clue": "Long narrow hilltop" }, + { "word": "PEAK", "clue": "Mountain summit" }, + { "word": "SUMMIT", "clue": "Highest point" }, + { "word": "FOOTHILLS", "clue": "Hills at base of mountains" }, + { "word": "RANGE", "clue": "Chain of mountains" }, + { "word": "BASIN", "clue": "Low-lying area" }, + { "word": "WATERSHED", "clue": "Drainage area" }, + { "word": "ESTUARY", "clue": "Where river meets sea" }, + { "word": "BAY", "clue": "Curved inlet of water" }, + { "word": "GULF", "clue": "Large bay" }, + { "word": "CAPE", "clue": "Point of land into water" }, + { "word": "HEADLAND", "clue": "High point of land" }, + { "word": "LAGOON", "clue": "Shallow coastal body of water" }, + { "word": "REEF", "clue": "Underwater rock formation" }, + { "word": "SHOAL", "clue": "Shallow area in water" }, + { "word": "CHANNEL", "clue": "Deep water passage" }, + { "word": "SOUND", "clue": "Large sea inlet" }, + { "word": "HARBOR", "clue": "Sheltered port area" }, + { "word": "INLET", "clue": "Small bay" }, + { "word": "COVE", "clue": "Small sheltered bay" }, + { "word": "MARSH", "clue": "Wetland area" }, + { "word": "SWAMP", "clue": "Forested wetland" }, + { "word": "BOG", "clue": "Acidic wetland" }, + { "word": "OASIS", "clue": "Fertile spot in desert" }, + { "word": "DUNE", "clue": "Sand hill" }, + { "word": "PLAIN", "clue": "Flat grassland" }, + { "word": "STEPPE", "clue": "Dry grassland" }, + { "word": "TAIGA", "clue": "Northern coniferous forest" }, + { "word": "RAINFOREST", "clue": "Dense tropical forest" }, + { "word": "JUNGLE", "clue": "Dense tropical vegetation" }, + { "word": "WOODLAND", "clue": "Area with scattered trees" }, + { "word": "GROVE", "clue": "Small group of trees" }, + { "word": "MEADOW", "clue": "Grassy field" }, + { "word": "PASTURE", "clue": "Grazing land" }, + { "word": "FIELD", "clue": "Open area of land" }, + { "word": "MOOR", "clue": "Open uncultivated land" }, + { "word": "HEATH", "clue": "Shrubland area" }, + { "word": "ARCTIC", "clue": "Cold northern region" }, + { "word": "ANTARCTIC", "clue": "Cold southern region" }, + { "word": "POLAR", "clue": "Of the poles" }, + { "word": "TROPICAL", "clue": "Hot humid climate zone" }, + { "word": "TEMPERATE", "clue": "Moderate climate zone" }, + { "word": "ARID", "clue": "Very dry" }, + { "word": "HUMID", "clue": "Moist air" }, + { "word": "ALTITUDE", "clue": "Height above sea level" }, + { "word": "ELEVATION", "clue": "Height of land" }, + { "word": "TERRAIN", "clue": "Physical features of land" }, + { "word": "TOPOGRAPHY", "clue": "Surface features of area" }, + { "word": "GEOGRAPHY", "clue": "Study of Earth's features" }, + { "word": "CARTOGRAPHY", "clue": "Map making" }, + { "word": "MERIDIAN", "clue": "Longitude line" }, + { "word": "PARALLEL", "clue": "Latitude line" }, + { "word": "HEMISPHERE", "clue": "Half of Earth" }, + { "word": "TROPICS", "clue": "Hot climate zone" }, + { "word": "POLES", "clue": "Earth's endpoints" }, + { "word": "AXIS", "clue": "Earth's rotation line" }, + { "word": "ORBIT", "clue": "Path around sun" }, + { "word": "SEASON", "clue": "Time of year" }, + { "word": "SOLSTICE", "clue": "Longest or shortest day" }, + { "word": "EQUINOX", "clue": "Equal day and night" }, + { "word": "COMPASS", "clue": "Direction-finding tool" }, + { "word": "NAVIGATION", "clue": "Finding your way" }, + { "word": "BEARING", "clue": "Direction or course" }, + { "word": "AZIMUTH", "clue": "Compass direction" }, + { "word": "SCALE", "clue": "Map size ratio" }, + { "word": "LEGEND", "clue": "Map symbol key" }, + { "word": "CONTOUR", "clue": "Elevation line on map" }, + { "word": "GRID", "clue": "Map reference system" }, + { "word": "PROJECTION", "clue": "Map flattening method" }, + { "word": "SURVEY", "clue": "Land measurement" }, + { "word": "BOUNDARY", "clue": "Dividing line" }, + { "word": "FRONTIER", "clue": "Border region" }, + { "word": "TERRITORY", "clue": "Area of land" }, + { "word": "REGION", "clue": "Geographic area" }, + { "word": "ZONE", "clue": "Designated area" }, + { "word": "DISTRICT", "clue": "Administrative area" }, + { "word": "PROVINCE", "clue": "Political subdivision" }, + { "word": "STATE", "clue": "Political entity" }, + { "word": "COUNTY", "clue": "Local government area" }, + { "word": "CITY", "clue": "Large urban area" }, + { "word": "TOWN", "clue": "Small urban area" }, + { "word": "VILLAGE", "clue": "Small rural community" }, + { "word": "HAMLET", "clue": "Very small village" }, + { "word": "SUBURB", "clue": "Residential area outside city" }, + { "word": "URBAN", "clue": "City-like" }, + { "word": "RURAL", "clue": "Countryside" }, + { "word": "METROPOLITAN", "clue": "Large city area" }, + { "word": "POPULATION", "clue": "Number of people" }, + { "word": "DENSITY", "clue": "Crowdedness" }, + { "word": "SETTLEMENT", "clue": "Place where people live" }, + { "word": "COLONY", "clue": "Overseas territory" }, + { "word": "NATION", "clue": "Country" }, + { "word": "REPUBLIC", "clue": "Democratic state" }, + { "word": "KINGDOM", "clue": "Monarchy" }, + { "word": "EMPIRE", "clue": "Large political entity" }, + { "word": "FEDERATION", "clue": "Union of states" }, + { "word": "ALLIANCE", "clue": "Partnership of nations" }, + { "word": "TREATY", "clue": "International agreement" }, + { "word": "TRADE", "clue": "Commercial exchange" }, + { "word": "EXPORT", "clue": "Goods sent abroad" }, + { "word": "IMPORT", "clue": "Goods brought in" }, + { "word": "COMMERCE", "clue": "Business activity" }, + { "word": "INDUSTRY", "clue": "Manufacturing" }, + { "word": "AGRICULTURE", "clue": "Farming" }, + { "word": "MINING", "clue": "Extracting minerals" }, + { "word": "FORESTRY", "clue": "Tree management" }, + { "word": "FISHING", "clue": "Catching fish" }, + { "word": "TOURISM", "clue": "Travel industry" }, + { "word": "TRANSPORTATION", "clue": "Moving people and goods" }, + { "word": "INFRASTRUCTURE", "clue": "Basic facilities" }, + { "word": "COMMUNICATION", "clue": "Information exchange" }, + { "word": "CULTURE", "clue": "Way of life" }, + { "word": "LANGUAGE", "clue": "Communication system" }, + { "word": "RELIGION", "clue": "Belief system" }, + { "word": "ETHNICITY", "clue": "Cultural group" }, + { "word": "MIGRATION", "clue": "Movement of people" }, + { "word": "IMMIGRATION", "clue": "Moving into country" }, + { "word": "EMIGRATION", "clue": "Moving out of country" }, + { "word": "DIASPORA", "clue": "Scattered population" }, + { "word": "NOMAD", "clue": "Wandering person" }, + { "word": "REFUGEE", "clue": "Displaced person" }, + { "word": "CENSUS", "clue": "Population count" }, + { "word": "DEMOGRAPHIC", "clue": "Population characteristic" } +] \ No newline at end of file diff --git a/crossword-app/backend-py/data/word-lists/science.json b/crossword-app/backend-py/data/word-lists/science.json new file mode 100644 index 0000000000000000000000000000000000000000..3a66731c0c7f2fb90025b0a6b1b7bbe1e8f8daf9 --- /dev/null +++ b/crossword-app/backend-py/data/word-lists/science.json @@ -0,0 +1,170 @@ +[ + { "word": "ATOM", "clue": "Smallest unit of matter" }, + { "word": "GRAVITY", "clue": "Force that pulls objects down" }, + { "word": "MOLECULE", "clue": "Group of atoms bonded together" }, + { "word": "PHOTON", "clue": "Particle of light" }, + { "word": "CHEMISTRY", "clue": "Study of matter and reactions" }, + { "word": "PHYSICS", "clue": "Study of matter and energy" }, + { "word": "BIOLOGY", "clue": "Study of living organisms" }, + { "word": "ELEMENT", "clue": "Pure chemical substance" }, + { "word": "OXYGEN", "clue": "Gas essential for breathing" }, + { "word": "CARBON", "clue": "Element found in all life" }, + { "word": "HYDROGEN", "clue": "Lightest chemical element" }, + { "word": "ENERGY", "clue": "Capacity to do work" }, + { "word": "FORCE", "clue": "Push or pull on an object" }, + { "word": "VELOCITY", "clue": "Speed with direction" }, + { "word": "MASS", "clue": "Amount of matter in object" }, + { "word": "VOLUME", "clue": "Amount of space occupied" }, + { "word": "DENSITY", "clue": "Mass per unit volume" }, + { "word": "PRESSURE", "clue": "Force per unit area" }, + { "word": "TEMPERATURE", "clue": "Measure of heat" }, + { "word": "ELECTRON", "clue": "Negatively charged particle" }, + { "word": "PROTON", "clue": "Positively charged particle" }, + { "word": "NEUTRON", "clue": "Neutral atomic particle" }, + { "word": "NUCLEUS", "clue": "Center of an atom" }, + { "word": "CELL", "clue": "Basic unit of life" }, + { "word": "DNA", "clue": "Genetic blueprint molecule" }, + { "word": "PROTEIN", "clue": "Complex biological molecule" }, + { "word": "ENZYME", "clue": "Biological catalyst" }, + { "word": "VIRUS", "clue": "Infectious agent" }, + { "word": "BACTERIA", "clue": "Single-celled organisms" }, + { "word": "EVOLUTION", "clue": "Change in species over time" }, + { "word": "ISOTOPE", "clue": "Atom variant with different neutrons" }, + { "word": "ION", "clue": "Charged atom or molecule" }, + { "word": "COMPOUND", "clue": "Chemical combination of elements" }, + { "word": "MIXTURE", "clue": "Combined substances retaining properties" }, + { "word": "SOLUTION", "clue": "Dissolved mixture" }, + { "word": "ACID", "clue": "Sour chemical with low pH" }, + { "word": "BASE", "clue": "Alkaline substance with high pH" }, + { "word": "SALT", "clue": "Ionic compound from acid-base reaction" }, + { "word": "CATALYST", "clue": "Substance that speeds reactions" }, + { "word": "RNA", "clue": "Genetic messenger molecule" }, + { "word": "GENE", "clue": "Heredity unit on chromosome" }, + { "word": "CHROMOSOME", "clue": "Gene-carrying structure" }, + { "word": "TISSUE", "clue": "Group of similar cells" }, + { "word": "ORGAN", "clue": "Body part with specific function" }, + { "word": "SYSTEM", "clue": "Group of organs working together" }, + { "word": "ORGANISM", "clue": "Living individual entity" }, + { "word": "SPECIES", "clue": "Group of similar organisms" }, + { "word": "ADAPTATION", "clue": "Survival-enhancing change" }, + { "word": "MUTATION", "clue": "Genetic change in DNA" }, + { "word": "HEREDITY", "clue": "Passing traits to offspring" }, + { "word": "ECOSYSTEM", "clue": "Community and environment" }, + { "word": "HABITAT", "clue": "Natural living environment" }, + { "word": "BIODIVERSITY", "clue": "Variety of life forms" }, + { "word": "PHOTOSYNTHESIS", "clue": "Plant energy-making process" }, + { "word": "RESPIRATION", "clue": "Cellular breathing process" }, + { "word": "METABOLISM", "clue": "Chemical processes in body" }, + { "word": "HOMEOSTASIS", "clue": "Body's internal balance" }, + { "word": "MITOSIS", "clue": "Cell division for growth" }, + { "word": "MEIOSIS", "clue": "Cell division for reproduction" }, + { "word": "EMBRYO", "clue": "Early development stage" }, + { "word": "FOSSIL", "clue": "Preserved ancient remains" }, + { "word": "GEOLOGY", "clue": "Study of Earth's structure" }, + { "word": "MINERAL", "clue": "Natural inorganic crystal" }, + { "word": "ROCK", "clue": "Solid earth material" }, + { "word": "SEDIMENT", "clue": "Settled particles" }, + { "word": "EROSION", "clue": "Gradual wearing away" }, + { "word": "VOLCANO", "clue": "Earth opening spewing lava" }, + { "word": "EARTHQUAKE", "clue": "Ground shaking from plate movement" }, + { "word": "PLATE", "clue": "Earth's crust section" }, + { "word": "MAGMA", "clue": "Molten rock beneath surface" }, + { "word": "LAVA", "clue": "Molten rock on surface" }, + { "word": "CRYSTAL", "clue": "Ordered atomic structure" }, + { "word": "ATMOSPHERE", "clue": "Layer of gases around Earth" }, + { "word": "CLIMATE", "clue": "Long-term weather pattern" }, + { "word": "WEATHER", "clue": "Short-term atmospheric conditions" }, + { "word": "PRECIPITATION", "clue": "Water falling from clouds" }, + { "word": "HUMIDITY", "clue": "Moisture in air" }, + { "word": "WIND", "clue": "Moving air mass" }, + { "word": "STORM", "clue": "Violent weather event" }, + { "word": "HURRICANE", "clue": "Powerful tropical cyclone" }, + { "word": "TORNADO", "clue": "Rotating column of air" }, + { "word": "LIGHTNING", "clue": "Electrical discharge in sky" }, + { "word": "THUNDER", "clue": "Sound of lightning" }, + { "word": "RAINBOW", "clue": "Spectrum of light in sky" }, + { "word": "ASTRONOMY", "clue": "Study of celestial objects" }, + { "word": "GALAXY", "clue": "Collection of stars and planets" }, + { "word": "PLANET", "clue": "Large orbiting celestial body" }, + { "word": "STAR", "clue": "Self-luminous celestial body" }, + { "word": "MOON", "clue": "Natural satellite of planet" }, + { "word": "COMET", "clue": "Icy body with tail" }, + { "word": "ASTEROID", "clue": "Rocky space object" }, + { "word": "METEOR", "clue": "Space rock entering atmosphere" }, + { "word": "ORBIT", "clue": "Curved path around object" }, + { "word": "LIGHT", "clue": "Electromagnetic radiation" }, + { "word": "SPECTRUM", "clue": "Range of electromagnetic radiation" }, + { "word": "WAVELENGTH", "clue": "Distance between wave peaks" }, + { "word": "FREQUENCY", "clue": "Waves per unit time" }, + { "word": "AMPLITUDE", "clue": "Wave height or intensity" }, + { "word": "SOUND", "clue": "Vibrations in air" }, + { "word": "ECHO", "clue": "Reflected sound" }, + { "word": "RESONANCE", "clue": "Vibration amplification" }, + { "word": "DOPPLER", "clue": "Wave frequency shift effect" }, + { "word": "MOTION", "clue": "Change in position" }, + { "word": "ACCELERATION", "clue": "Change in velocity" }, + { "word": "MOMENTUM", "clue": "Mass times velocity" }, + { "word": "INERTIA", "clue": "Resistance to motion change" }, + { "word": "FRICTION", "clue": "Resistance to sliding" }, + { "word": "HEAT", "clue": "Thermal energy transfer" }, + { "word": "COMBUSTION", "clue": "Burning chemical reaction" }, + { "word": "OXIDATION", "clue": "Reaction with oxygen" }, + { "word": "REDUCTION", "clue": "Gain of electrons" }, + { "word": "ELECTROLYSIS", "clue": "Chemical breakdown by electricity" }, + { "word": "CONDUCTIVITY", "clue": "Ability to transfer energy" }, + { "word": "INSULATOR", "clue": "Material blocking energy flow" }, + { "word": "SEMICONDUCTOR", "clue": "Partial electrical conductor" }, + { "word": "MAGNETISM", "clue": "Force of magnetic attraction" }, + { "word": "FIELD", "clue": "Region of force influence" }, + { "word": "CIRCUIT", "clue": "Closed electrical path" }, + { "word": "CURRENT", "clue": "Flow of electric charge" }, + { "word": "VOLTAGE", "clue": "Electric potential difference" }, + { "word": "RESISTANCE", "clue": "Opposition to current flow" }, + { "word": "CAPACITOR", "clue": "Device storing electric charge" }, + { "word": "INDUCTOR", "clue": "Device storing magnetic energy" }, + { "word": "TRANSISTOR", "clue": "Electronic switching device" }, + { "word": "LASER", "clue": "Focused beam of light" }, + { "word": "RADAR", "clue": "Radio detection system" }, + { "word": "SONAR", "clue": "Sound detection system" }, + { "word": "TELESCOPE", "clue": "Instrument for viewing distant objects" }, + { "word": "MICROSCOPE", "clue": "Instrument for viewing small objects" }, + { "word": "HYPOTHESIS", "clue": "Testable scientific prediction" }, + { "word": "THEORY", "clue": "Well-tested scientific explanation" }, + { "word": "LAW", "clue": "Consistently observed scientific rule" }, + { "word": "EXPERIMENT", "clue": "Controlled scientific test" }, + { "word": "OBSERVATION", "clue": "Careful scientific watching" }, + { "word": "MEASUREMENT", "clue": "Quantified observation" }, + { "word": "ANALYSIS", "clue": "Detailed examination of data" }, + { "word": "SYNTHESIS", "clue": "Combining elements into whole" }, + { "word": "VARIABLE", "clue": "Factor that can change" }, + { "word": "CONTROL", "clue": "Unchanged comparison group" }, + { "word": "DATA", "clue": "Information collected from tests" }, + { "word": "STATISTICS", "clue": "Mathematical analysis of data" }, + { "word": "PROBABILITY", "clue": "Likelihood of occurrence" }, + { "word": "PRECISION", "clue": "Exactness of measurement" }, + { "word": "ACCURACY", "clue": "Correctness of measurement" }, + { "word": "ERROR", "clue": "Difference from true value" }, + { "word": "UNCERTAINTY", "clue": "Range of doubt in measurement" }, + { "word": "CALIBRATION", "clue": "Adjusting instrument accuracy" }, + { "word": "STANDARD", "clue": "Reference for measurement" }, + { "word": "UNIT", "clue": "Base measure of quantity" }, + { "word": "METRIC", "clue": "Decimal measurement system" }, + { "word": "WEIGHT", "clue": "Force of gravity on mass" }, + { "word": "CONCENTRATION", "clue": "Amount of substance per volume" }, + { "word": "MOLARITY", "clue": "Moles of solute per liter" }, + { "word": "EQUILIBRIUM", "clue": "State of balanced forces" }, + { "word": "STABILITY", "clue": "Resistance to change" }, + { "word": "DECAY", "clue": "Gradual breakdown process" }, + { "word": "RADIATION", "clue": "Energy emitted from source" }, + { "word": "RADIOACTIVE", "clue": "Emitting nuclear radiation" }, + { "word": "HALFLIFE", "clue": "Time for half to decay" }, + { "word": "FUSION", "clue": "Nuclear combining reaction" }, + { "word": "FISSION", "clue": "Nuclear splitting reaction" }, + { "word": "QUANTUM", "clue": "Discrete packet of energy" }, + { "word": "PARTICLE", "clue": "Tiny piece of matter" }, + { "word": "WAVE", "clue": "Energy transfer disturbance" }, + { "word": "INTERFERENCE", "clue": "Wave interaction effect" }, + { "word": "DIFFRACTION", "clue": "Wave bending around obstacle" }, + { "word": "REFLECTION", "clue": "Bouncing back of waves" }, + { "word": "REFRACTION", "clue": "Bending of waves through medium" } +] \ No newline at end of file diff --git a/crossword-app/backend-py/data/word-lists/technology.json b/crossword-app/backend-py/data/word-lists/technology.json new file mode 100644 index 0000000000000000000000000000000000000000..ce9828c32c8615ab31f77da9c27ad1de5711a411 --- /dev/null +++ b/crossword-app/backend-py/data/word-lists/technology.json @@ -0,0 +1,221 @@ +[ + { "word": "COMPUTER", "clue": "Electronic processing device" }, + { "word": "INTERNET", "clue": "Global computer network" }, + { "word": "ALGORITHM", "clue": "Set of rules for solving problems" }, + { "word": "DATABASE", "clue": "Organized collection of data" }, + { "word": "SOFTWARE", "clue": "Computer programs" }, + { "word": "HARDWARE", "clue": "Physical computer components" }, + { "word": "NETWORK", "clue": "Connected system of computers" }, + { "word": "CODE", "clue": "Programming instructions" }, + { "word": "ROBOT", "clue": "Automated machine" }, + { "word": "ARTIFICIAL", "clue": "Made by humans, not natural" }, + { "word": "DIGITAL", "clue": "Using binary data" }, + { "word": "BINARY", "clue": "Base-2 number system" }, + { "word": "PROCESSOR", "clue": "Computer's brain" }, + { "word": "MEMORY", "clue": "Data storage component" }, + { "word": "KEYBOARD", "clue": "Input device with keys" }, + { "word": "MONITOR", "clue": "Computer display screen" }, + { "word": "MOUSE", "clue": "Pointing input device" }, + { "word": "PRINTER", "clue": "Device that prints documents" }, + { "word": "SCANNER", "clue": "Device that digitizes images" }, + { "word": "CAMERA", "clue": "Device that captures images" }, + { "word": "SMARTPHONE", "clue": "Portable computing device" }, + { "word": "TABLET", "clue": "Touchscreen computing device" }, + { "word": "LAPTOP", "clue": "Portable computer" }, + { "word": "SERVER", "clue": "Computer that serves data" }, + { "word": "CLOUD", "clue": "Internet-based computing" }, + { "word": "WEBSITE", "clue": "Collection of web pages" }, + { "word": "EMAIL", "clue": "Electronic mail" }, + { "word": "BROWSER", "clue": "Web navigation software" }, + { "word": "SEARCH", "clue": "Look for information" }, + { "word": "DOWNLOAD", "clue": "Transfer data to device" }, + { "word": "UPLOAD", "clue": "Transfer data from device" }, + { "word": "BANDWIDTH", "clue": "Data transfer capacity" }, + { "word": "PROTOCOL", "clue": "Communication rules" }, + { "word": "FIREWALL", "clue": "Network security barrier" }, + { "word": "ENCRYPTION", "clue": "Data scrambling for security" }, + { "word": "PASSWORD", "clue": "Secret access code" }, + { "word": "SECURITY", "clue": "Protection from threats" }, + { "word": "VIRUS", "clue": "Malicious computer program" }, + { "word": "MALWARE", "clue": "Harmful software" }, + { "word": "ANTIVIRUS", "clue": "Protection software" }, + { "word": "BACKUP", "clue": "Data safety copy" }, + { "word": "RECOVERY", "clue": "Data restoration process" }, + { "word": "STORAGE", "clue": "Data keeping capacity" }, + { "word": "HARDDRIVE", "clue": "Magnetic storage device" }, + { "word": "FLASH", "clue": "Solid state storage" }, + { "word": "RAM", "clue": "Random access memory" }, + { "word": "ROM", "clue": "Read-only memory" }, + { "word": "CPU", "clue": "Central processing unit" }, + { "word": "GPU", "clue": "Graphics processing unit" }, + { "word": "MOTHERBOARD", "clue": "Main circuit board" }, + { "word": "CHIP", "clue": "Integrated circuit" }, + { "word": "CIRCUIT", "clue": "Electronic pathway" }, + { "word": "TRANSISTOR", "clue": "Electronic switch" }, + { "word": "SILICON", "clue": "Semiconductor material" }, + { "word": "NANOTECHNOLOGY", "clue": "Extremely small scale tech" }, + { "word": "AUTOMATION", "clue": "Self-operating technology" }, + { "word": "MACHINE", "clue": "Mechanical device" }, + { "word": "SENSOR", "clue": "Detection device" }, + { "word": "ACTUATOR", "clue": "Movement device" }, + { "word": "FEEDBACK", "clue": "System response information" }, + { "word": "PROGRAMMING", "clue": "Writing computer instructions" }, + { "word": "FUNCTION", "clue": "Reusable code block" }, + { "word": "VARIABLE", "clue": "Data storage container" }, + { "word": "LOOP", "clue": "Repeating code structure" }, + { "word": "CONDITION", "clue": "Decision-making logic" }, + { "word": "DEBUG", "clue": "Find and fix errors" }, + { "word": "COMPILE", "clue": "Convert code to executable" }, + { "word": "RUNTIME", "clue": "Program execution time" }, + { "word": "API", "clue": "Application programming interface" }, + { "word": "FRAMEWORK", "clue": "Code structure foundation" }, + { "word": "LIBRARY", "clue": "Reusable code collection" }, + { "word": "MODULE", "clue": "Self-contained code unit" }, + { "word": "OBJECT", "clue": "Data and methods container" }, + { "word": "CLASS", "clue": "Object blueprint" }, + { "word": "INHERITANCE", "clue": "Code reuse mechanism" }, + { "word": "INTERFACE", "clue": "System interaction boundary" }, + { "word": "PROTOCOL", "clue": "Communication standard" }, + { "word": "FORMAT", "clue": "Data structure standard" }, + { "word": "SYNTAX", "clue": "Language rules" }, + { "word": "SEMANTIC", "clue": "Meaning in code" }, + { "word": "PARSING", "clue": "Analyzing code structure" }, + { "word": "COMPILER", "clue": "Code translation program" }, + { "word": "INTERPRETER", "clue": "Code execution program" }, + { "word": "VIRTUAL", "clue": "Simulated environment" }, + { "word": "SIMULATION", "clue": "Computer modeling" }, + { "word": "EMULATION", "clue": "System imitation" }, + { "word": "OPTIMIZATION", "clue": "Performance improvement" }, + { "word": "EFFICIENCY", "clue": "Resource usage effectiveness" }, + { "word": "PERFORMANCE", "clue": "System speed and quality" }, + { "word": "BENCHMARK", "clue": "Performance measurement" }, + { "word": "TESTING", "clue": "Quality verification process" }, + { "word": "VALIDATION", "clue": "Correctness checking" }, + { "word": "VERIFICATION", "clue": "Accuracy confirmation" }, + { "word": "QUALITY", "clue": "Standard of excellence" }, + { "word": "MAINTENANCE", "clue": "System upkeep" }, + { "word": "UPDATE", "clue": "Software improvement" }, + { "word": "PATCH", "clue": "Software fix" }, + { "word": "VERSION", "clue": "Software release number" }, + { "word": "RELEASE", "clue": "Software distribution" }, + { "word": "DEPLOYMENT", "clue": "Software installation" }, + { "word": "CONFIGURATION", "clue": "System setup" }, + { "word": "INSTALLATION", "clue": "Software setup process" }, + { "word": "MIGRATION", "clue": "System transition" }, + { "word": "INTEGRATION", "clue": "System combination" }, + { "word": "COMPATIBILITY", "clue": "System cooperation ability" }, + { "word": "INTEROPERABILITY", "clue": "Cross-system communication" }, + { "word": "SCALABILITY", "clue": "Growth accommodation ability" }, + { "word": "RELIABILITY", "clue": "Consistent performance" }, + { "word": "AVAILABILITY", "clue": "System accessibility" }, + { "word": "REDUNDANCY", "clue": "Backup system duplication" }, + { "word": "FAULT", "clue": "System error condition" }, + { "word": "TOLERANCE", "clue": "Error handling ability" }, + { "word": "RECOVERY", "clue": "System restoration" }, + { "word": "MONITORING", "clue": "System observation" }, + { "word": "LOGGING", "clue": "Event recording" }, + { "word": "ANALYTICS", "clue": "Data analysis" }, + { "word": "METRICS", "clue": "Measurement data" }, + { "word": "DASHBOARD", "clue": "Information display panel" }, + { "word": "INTERFACE", "clue": "User interaction design" }, + { "word": "EXPERIENCE", "clue": "User interaction quality" }, + { "word": "USABILITY", "clue": "Ease of use" }, + { "word": "ACCESSIBILITY", "clue": "Universal design principle" }, + { "word": "RESPONSIVE", "clue": "Adaptive design" }, + { "word": "MOBILE", "clue": "Portable device category" }, + { "word": "TOUCHSCREEN", "clue": "Touch-sensitive display" }, + { "word": "GESTURE", "clue": "Touch movement command" }, + { "word": "VOICE", "clue": "Speech interaction" }, + { "word": "RECOGNITION", "clue": "Pattern identification" }, + { "word": "LEARNING", "clue": "Adaptive improvement" }, + { "word": "INTELLIGENCE", "clue": "Artificial reasoning" }, + { "word": "NEURAL", "clue": "Brain-inspired network" }, + { "word": "DEEP", "clue": "Multi-layered learning" }, + { "word": "MACHINE", "clue": "Automated learning system" }, + { "word": "DATA", "clue": "Information collection" }, + { "word": "BIG", "clue": "Large scale data" }, + { "word": "MINING", "clue": "Data pattern extraction" }, + { "word": "ANALYSIS", "clue": "Data examination" }, + { "word": "VISUALIZATION", "clue": "Data graphic representation" }, + { "word": "DASHBOARD", "clue": "Data monitoring panel" }, + { "word": "REPORT", "clue": "Data summary document" }, + { "word": "QUERY", "clue": "Data search request" }, + { "word": "INDEX", "clue": "Data location reference" }, + { "word": "SCHEMA", "clue": "Data structure blueprint" }, + { "word": "TABLE", "clue": "Data organization structure" }, + { "word": "RECORD", "clue": "Data entry" }, + { "word": "FIELD", "clue": "Data element" }, + { "word": "PRIMARY", "clue": "Main identifier key" }, + { "word": "FOREIGN", "clue": "Reference relationship key" }, + { "word": "RELATION", "clue": "Data connection" }, + { "word": "JOIN", "clue": "Data combination operation" }, + { "word": "TRANSACTION", "clue": "Data operation sequence" }, + { "word": "COMMIT", "clue": "Data change confirmation" }, + { "word": "ROLLBACK", "clue": "Data change reversal" }, + { "word": "CONCURRENCY", "clue": "Simultaneous access handling" }, + { "word": "LOCK", "clue": "Data access control" }, + { "word": "SYNCHRONIZATION", "clue": "Timing coordination" }, + { "word": "THREAD", "clue": "Execution sequence" }, + { "word": "PROCESS", "clue": "Running program instance" }, + { "word": "MULTITASKING", "clue": "Multiple process handling" }, + { "word": "PARALLEL", "clue": "Simultaneous execution" }, + { "word": "DISTRIBUTED", "clue": "Spread across multiple systems" }, + { "word": "CLUSTER", "clue": "Group of connected computers" }, + { "word": "GRID", "clue": "Distributed computing network" }, + { "word": "PEER", "clue": "Equal network participant" }, + { "word": "CLIENT", "clue": "Service requesting system" }, + { "word": "SERVICE", "clue": "System functionality provider" }, + { "word": "MICROSERVICE", "clue": "Small independent service" }, + { "word": "CONTAINER", "clue": "Isolated application environment" }, + { "word": "DOCKER", "clue": "Containerization platform" }, + { "word": "KUBERNETES", "clue": "Container orchestration" }, + { "word": "DEVOPS", "clue": "Development operations practice" }, + { "word": "AGILE", "clue": "Flexible development method" }, + { "word": "SCRUM", "clue": "Iterative development framework" }, + { "word": "SPRINT", "clue": "Short development cycle" }, + { "word": "KANBAN", "clue": "Visual workflow management" }, + { "word": "CONTINUOUS", "clue": "Ongoing integration practice" }, + { "word": "PIPELINE", "clue": "Automated workflow" }, + { "word": "BUILD", "clue": "Software compilation process" }, + { "word": "TESTING", "clue": "Quality assurance process" }, + { "word": "AUTOMATION", "clue": "Manual task elimination" }, + { "word": "SCRIPT", "clue": "Automated task sequence" }, + { "word": "BATCH", "clue": "Group processing" }, + { "word": "STREAMING", "clue": "Continuous data flow" }, + { "word": "REALTIME", "clue": "Immediate processing" }, + { "word": "LATENCY", "clue": "Response delay time" }, + { "word": "THROUGHPUT", "clue": "Processing capacity" }, + { "word": "BOTTLENECK", "clue": "Performance limitation point" }, + { "word": "CACHE", "clue": "Fast temporary storage" }, + { "word": "BUFFER", "clue": "Temporary data holder" }, + { "word": "QUEUE", "clue": "Ordered waiting line" }, + { "word": "STACK", "clue": "Last-in-first-out structure" }, + { "word": "HEAP", "clue": "Dynamic memory area" }, + { "word": "POINTER", "clue": "Memory address reference" }, + { "word": "REFERENCE", "clue": "Object location indicator" }, + { "word": "GARBAGE", "clue": "Unused memory collection" }, + { "word": "ALLOCATION", "clue": "Memory assignment" }, + { "word": "DEALLOCATION", "clue": "Memory release" }, + { "word": "LEAK", "clue": "Memory usage error" }, + { "word": "OVERFLOW", "clue": "Capacity exceeding error" }, + { "word": "UNDERFLOW", "clue": "Insufficient data error" }, + { "word": "EXCEPTION", "clue": "Error handling mechanism" }, + { "word": "INTERRUPT", "clue": "Process suspension signal" }, + { "word": "SIGNAL", "clue": "Process communication" }, + { "word": "EVENT", "clue": "System occurrence" }, + { "word": "HANDLER", "clue": "Event processing function" }, + { "word": "CALLBACK", "clue": "Function reference" }, + { "word": "PROMISE", "clue": "Future value placeholder" }, + { "word": "ASYNC", "clue": "Non-blocking operation" }, + { "word": "AWAIT", "clue": "Pause for completion" }, + { "word": "YIELD", "clue": "Temporary function pause" }, + { "word": "GENERATOR", "clue": "Value sequence producer" }, + { "word": "ITERATOR", "clue": "Sequential access pattern" }, + { "word": "RECURSION", "clue": "Self-calling function" }, + { "word": "CLOSURE", "clue": "Function scope retention" }, + { "word": "LAMBDA", "clue": "Anonymous function" }, + { "word": "FUNCTIONAL", "clue": "Function-based programming" }, + { "word": "PROCEDURAL", "clue": "Step-by-step programming" }, + { "word": "DECLARATIVE", "clue": "What-not-how programming" }, + { "word": "IMPERATIVE", "clue": "Command-based programming" } +] \ No newline at end of file diff --git a/crossword-app/backend-py/debug_full_generation.py b/crossword-app/backend-py/debug_full_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..ede3a8c6bb5bd00d5038de74109685f035b210fe --- /dev/null +++ b/crossword-app/backend-py/debug_full_generation.py @@ -0,0 +1,316 @@ +#!/usr/bin/env python3 +""" +Debug the complete crossword generation process to identify display/numbering issues. +""" + +import asyncio +import sys +import json +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +async def debug_complete_generation(): + """Debug the complete crossword generation process.""" + + print("🔍 Debugging Complete Crossword Generation Process\n") + + # Create generator with no vector service to use static words + generator = CrosswordGeneratorFixed(vector_service=None) + + # Override the word selection to use controlled test words + test_words = [ + {"word": "MACHINE", "clue": "Device with moving parts"}, + {"word": "COMPUTER", "clue": "Electronic device"}, + {"word": "EXPERT", "clue": "Person with specialized knowledge"}, + {"word": "SCIENCE", "clue": "Systematic study"}, + {"word": "TECHNOLOGY", "clue": "Applied science"}, + {"word": "RESEARCH", "clue": "Systematic investigation"}, + {"word": "ANALYSIS", "clue": "Detailed examination"}, + {"word": "METHOD", "clue": "Systematic approach"} + ] + + # Mock the word selection method + async def mock_select_words(topics, difficulty, use_ai): + return test_words + generator._select_words = mock_select_words + + print("=" * 70) + print("GENERATING COMPLETE CROSSWORD") + print("=" * 70) + + try: + result = await generator.generate_puzzle(["technology"], "medium", use_ai=False) + + if result: + print("✅ Crossword generation successful!") + + # Analyze the complete result + analyze_crossword_result(result) + else: + print("❌ Crossword generation failed - returned None") + + except Exception as e: + print(f"❌ Crossword generation failed with error: {e}") + import traceback + traceback.print_exc() + +def analyze_crossword_result(result): + """Analyze the complete crossword result for potential issues.""" + + print("\n" + "=" * 70) + print("CROSSWORD RESULT ANALYSIS") + print("=" * 70) + + # Print basic metadata + metadata = result.get("metadata", {}) + print("Metadata:") + for key, value in metadata.items(): + print(f" {key}: {value}") + + # Analyze the grid + grid = result.get("grid", []) + print(f"\nGrid dimensions: {len(grid)}x{len(grid[0]) if grid else 0}") + + print("\nGrid layout:") + print_numbered_grid(grid) + + # Analyze placed words vs clues + clues = result.get("clues", []) + print(f"\nNumber of clues generated: {len(clues)}") + + print("\nClue analysis:") + for i, clue in enumerate(clues): + print(f" Clue {i+1}:") + print(f" Number: {clue.get('number', 'MISSING')}") + print(f" Word: {clue.get('word', 'MISSING')}") + print(f" Direction: {clue.get('direction', 'MISSING')}") + print(f" Position: {clue.get('position', 'MISSING')}") + print(f" Text: {clue.get('text', 'MISSING')}") + + # Check for potential issues + print("\n" + "=" * 70) + print("ISSUE DETECTION") + print("=" * 70) + + check_word_boundary_consistency(grid, clues) + check_numbering_consistency(clues) + check_grid_word_alignment(grid, clues) + +def print_numbered_grid(grid): + """Print grid with coordinates for analysis.""" + if not grid: + print(" Empty grid") + return + + # Print column headers + print(" ", end="") + for c in range(len(grid[0])): + print(f"{c:2d}", end="") + print() + + # Print rows with row numbers + for r in range(len(grid)): + print(f" {r:2d}: ", end="") + for c in range(len(grid[0])): + cell = grid[r][c] + if cell == ".": + print(" .", end="") + else: + print(f" {cell}", end="") + print() + +def check_word_boundary_consistency(grid, clues): + """Check if words in clues match what's actually in the grid.""" + + print("Checking word boundary consistency:") + + issues_found = [] + + for clue in clues: + word = clue.get("word", "") + position = clue.get("position", {}) + direction = clue.get("direction", "") + + if not all([word, position, direction]): + issues_found.append(f"Incomplete clue data: {clue}") + continue + + row = position.get("row", -1) + col = position.get("col", -1) + + if row < 0 or col < 0: + issues_found.append(f"Invalid position for word '{word}': {position}") + continue + + # Extract the actual word from the grid + grid_word = extract_word_from_grid(grid, row, col, direction, len(word)) + + if grid_word != word: + issues_found.append(f"Mismatch for '{word}' at ({row}, {col}) {direction}: grid shows '{grid_word}'") + + if issues_found: + print(" ❌ Issues found:") + for issue in issues_found: + print(f" {issue}") + else: + print(" ✅ All words match grid positions") + +def extract_word_from_grid(grid, row, col, direction, expected_length): + """Extract a word from the grid at the given position and direction.""" + + if row >= len(grid) or col >= len(grid[0]): + return "OUT_OF_BOUNDS" + + word = "" + + if direction == "across": # horizontal + for i in range(expected_length): + if col + i >= len(grid[0]): + return word + "TRUNCATED" + word += grid[row][col + i] + + elif direction == "down": # vertical + for i in range(expected_length): + if row + i >= len(grid): + return word + "TRUNCATED" + word += grid[row + i][col] + + return word + +def check_numbering_consistency(clues): + """Check if clue numbering is consistent and logical.""" + + print("\nChecking numbering consistency:") + + numbers = [clue.get("number", -1) for clue in clues] + issues = [] + + # Check for duplicate numbers + if len(numbers) != len(set(numbers)): + issues.append("Duplicate clue numbers found") + + # Check for missing numbers in sequence + if numbers: + min_num = min(numbers) + max_num = max(numbers) + expected = set(range(min_num, max_num + 1)) + actual = set(numbers) + + if expected != actual: + missing = expected - actual + extra = actual - expected + if missing: + issues.append(f"Missing numbers: {sorted(missing)}") + if extra: + issues.append(f"Extra numbers: {sorted(extra)}") + + if issues: + print(" ❌ Numbering issues:") + for issue in issues: + print(f" {issue}") + else: + print(" ✅ Numbering is consistent") + +def check_grid_word_alignment(grid, clues): + """Check if all words are properly aligned and don't create unintended extensions.""" + + print("\nChecking grid word alignment:") + + # Find all letter sequences in the grid + horizontal_sequences = find_horizontal_sequences(grid) + vertical_sequences = find_vertical_sequences(grid) + + print(f" Found {len(horizontal_sequences)} horizontal sequences") + print(f" Found {len(vertical_sequences)} vertical sequences") + + # Check if each sequence corresponds to a clue + clue_words = {} + for clue in clues: + pos = clue.get("position", {}) + key = (pos.get("row"), pos.get("col"), clue.get("direction")) + clue_words[key] = clue.get("word", "") + + issues = [] + + # Check horizontal sequences + for seq in horizontal_sequences: + row, start_col, word = seq + key = (row, start_col, "across") + if key not in clue_words: + issues.append(f"Unaccounted horizontal sequence: '{word}' at ({row}, {start_col})") + elif clue_words[key] != word: + issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({row}, {start_col})") + + # Check vertical sequences + for seq in vertical_sequences: + col, start_row, word = seq + key = (start_row, col, "down") + if key not in clue_words: + issues.append(f"Unaccounted vertical sequence: '{word}' at ({start_row}, {col})") + elif clue_words[key] != word: + issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({start_row}, {col})") + + if issues: + print(" ❌ Alignment issues found:") + for issue in issues: + print(f" {issue}") + else: + print(" ✅ All words are properly aligned") + +def find_horizontal_sequences(grid): + """Find all horizontal letter sequences of length > 1.""" + sequences = [] + + for r in range(len(grid)): + current_word = "" + start_col = None + + for c in range(len(grid[0])): + if grid[r][c] != ".": + if start_col is None: + start_col = c + current_word += grid[r][c] + else: + if current_word and len(current_word) > 1: + sequences.append((r, start_col, current_word)) + current_word = "" + start_col = None + + # Handle word at end of row + if current_word and len(current_word) > 1: + sequences.append((r, start_col, current_word)) + + return sequences + +def find_vertical_sequences(grid): + """Find all vertical letter sequences of length > 1.""" + sequences = [] + + for c in range(len(grid[0])): + current_word = "" + start_row = None + + for r in range(len(grid)): + if grid[r][c] != ".": + if start_row is None: + start_row = r + current_word += grid[r][c] + else: + if current_word and len(current_word) > 1: + sequences.append((c, start_row, current_word)) + current_word = "" + start_row = None + + # Handle word at end of column + if current_word and len(current_word) > 1: + sequences.append((c, start_row, current_word)) + + return sequences + +if __name__ == "__main__": + asyncio.run(debug_complete_generation()) \ No newline at end of file diff --git a/crossword-app/backend-py/debug_grid_direct.py b/crossword-app/backend-py/debug_grid_direct.py new file mode 100644 index 0000000000000000000000000000000000000000..01d60e1e91cc6a36886689dfd124df48d14840f6 --- /dev/null +++ b/crossword-app/backend-py/debug_grid_direct.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 +""" +Direct grid generation test to identify word boundary/display issues. +""" + +import sys +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +def test_direct_grid_generation(): + """Test grid generation directly with controlled words.""" + + print("🔍 Direct Grid Generation Test\n") + + generator = CrosswordGeneratorFixed(vector_service=None) + + # Test words that might cause the issues seen in the images + test_words = [ + {"word": "MACHINE", "clue": "Device with moving parts"}, + {"word": "COMPUTER", "clue": "Electronic device"}, + {"word": "EXPERT", "clue": "Person with specialized knowledge"}, + {"word": "SCIENCE", "clue": "Systematic study"}, + {"word": "CAMERA", "clue": "Device for taking photos"}, + {"word": "METHOD", "clue": "Systematic approach"} + ] + + print("=" * 60) + print("TEST 1: Direct grid creation") + print("=" * 60) + + # Test the _create_grid method directly + result = generator._create_grid(test_words) + + if result: + print("✅ Grid generation successful!") + + grid = result["grid"] + placed_words = result["placed_words"] + clues = result["clues"] + + print(f"Grid size: {len(grid)}x{len(grid[0])}") + print(f"Words placed: {len(placed_words)}") + print(f"Clues generated: {len(clues)}") + + # Print the grid + print("\nGenerated Grid:") + print_grid_with_coordinates(grid) + + # Print placed words details + print("\nPlaced Words:") + for i, word_info in enumerate(placed_words): + print(f" {i+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}") + + # Print clues + print("\nGenerated Clues:") + for clue in clues: + print(f" {clue['number']}. {clue['direction']}: {clue['word']} - {clue['text']}") + + # Analyze for potential issues + print("\n" + "=" * 60) + print("ANALYSIS") + print("=" * 60) + + analyze_grid_issues(grid, placed_words, clues) + + else: + print("❌ Grid generation failed") + + # Test another scenario that might reproduce the image issues + print("\n" + "=" * 60) + print("TEST 2: Scenario with potential extension words") + print("=" * 60) + + # Words that might create the "MACHINERY" type issue + extension_words = [ + {"word": "MACHINE", "clue": "Device with moving parts"}, + {"word": "MACHINERY", "clue": "Mechanical equipment"}, # Might cause confusion + {"word": "EXPERT", "clue": "Specialist"}, + {"word": "TECHNOLOGY", "clue": "Applied science"}, + ] + + result2 = generator._create_grid(extension_words) + + if result2: + print("✅ Extension test grid generated!") + + grid2 = result2["grid"] + placed_words2 = result2["placed_words"] + + print("\nExtension Test Grid:") + print_grid_with_coordinates(grid2) + + print("\nPlaced Words:") + for i, word_info in enumerate(placed_words2): + print(f" {i+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}") + + # Check specifically for MACHINE vs MACHINERY issues + check_machine_machinery_issue(grid2, placed_words2) + + else: + print("❌ Extension test grid generation failed") + +def print_grid_with_coordinates(grid): + """Print grid with row and column coordinates.""" + if not grid: + print(" Empty grid") + return + + # Print column headers + print(" ", end="") + for c in range(len(grid[0])): + print(f"{c:2d}", end="") + print() + + # Print rows + for r in range(len(grid)): + print(f" {r:2d}: ", end="") + for c in range(len(grid[0])): + cell = grid[r][c] + if cell == ".": + print(" .", end="") + else: + print(f" {cell}", end="") + print() + +def analyze_grid_issues(grid, placed_words, clues): + """Analyze the grid for potential boundary/display issues.""" + + print("Checking for potential issues...") + + issues = [] + + # Check 1: Verify each placed word actually exists in the grid + for word_info in placed_words: + word = word_info["word"] + row = word_info["row"] + col = word_info["col"] + direction = word_info["direction"] + + grid_word = extract_word_from_grid(grid, row, col, direction, len(word)) + + if grid_word != word: + issues.append(f"Word mismatch: '{word}' expected at ({row},{col}) {direction}, but grid shows '{grid_word}'") + + # Check 2: Look for unintended letter sequences + all_sequences = find_all_letter_sequences(grid) + intended_words = {(w["row"], w["col"], w["direction"]): w["word"] for w in placed_words} + + for seq_info in all_sequences: + row, col, direction, seq_word = seq_info + key = (row, col, direction) + + if key not in intended_words: + if len(seq_word) > 1: # Only care about multi-letter sequences + issues.append(f"Unintended sequence: '{seq_word}' at ({row},{col}) {direction}") + elif intended_words[key] != seq_word: + issues.append(f"Sequence mismatch: expected '{intended_words[key]}' but found '{seq_word}' at ({row},{col}) {direction}") + + # Check 3: Verify clue consistency + for clue in clues: + clue_word = clue["word"] + pos = clue["position"] + clue_row = pos["row"] + clue_col = pos["col"] + clue_direction = clue["direction"] + + # Convert direction format if needed + direction_map = {"across": "horizontal", "down": "vertical"} + normalized_direction = direction_map.get(clue_direction, clue_direction) + + grid_word = extract_word_from_grid(grid, clue_row, clue_col, normalized_direction, len(clue_word)) + + if grid_word != clue_word: + issues.append(f"Clue mismatch: clue says '{clue_word}' at ({clue_row},{clue_col}) {clue_direction}, but grid shows '{grid_word}'") + + # Report results + if issues: + print("❌ Issues found:") + for issue in issues: + print(f" {issue}") + else: + print("✅ No issues detected - grid appears consistent") + +def extract_word_from_grid(grid, row, col, direction, expected_length): + """Extract word from grid at given position and direction.""" + if row >= len(grid) or col >= len(grid[0]) or row < 0 or col < 0: + return "OUT_OF_BOUNDS" + + word = "" + + if direction in ["horizontal", "across"]: + for i in range(expected_length): + if col + i >= len(grid[0]): + return word + "[TRUNCATED]" + word += grid[row][col + i] + elif direction in ["vertical", "down"]: + for i in range(expected_length): + if row + i >= len(grid): + return word + "[TRUNCATED]" + word += grid[row + i][col] + + return word + +def find_all_letter_sequences(grid): + """Find all letter sequences (horizontal and vertical) in the grid.""" + sequences = [] + + # Horizontal sequences + for r in range(len(grid)): + current_word = "" + start_col = None + + for c in range(len(grid[0])): + if grid[r][c] != ".": + if start_col is None: + start_col = c + current_word += grid[r][c] + else: + if current_word and len(current_word) > 1: + sequences.append((r, start_col, "horizontal", current_word)) + current_word = "" + start_col = None + + # Handle end of row + if current_word and len(current_word) > 1: + sequences.append((r, start_col, "horizontal", current_word)) + + # Vertical sequences + for c in range(len(grid[0])): + current_word = "" + start_row = None + + for r in range(len(grid)): + if grid[r][c] != ".": + if start_row is None: + start_row = r + current_word += grid[r][c] + else: + if current_word and len(current_word) > 1: + sequences.append((start_row, c, "vertical", current_word)) + current_word = "" + start_row = None + + # Handle end of column + if current_word and len(current_word) > 1: + sequences.append((start_row, c, "vertical", current_word)) + + return sequences + +def check_machine_machinery_issue(grid, placed_words): + """Specifically check for MACHINE vs MACHINERY confusion.""" + + print("\nChecking for MACHINE/MACHINERY issue:") + + machine_words = [w for w in placed_words if "MACHINE" in w["word"]] + + if not machine_words: + print(" No MACHINE-related words found") + return + + for word_info in machine_words: + word = word_info["word"] + row = word_info["row"] + col = word_info["col"] + direction = word_info["direction"] + + print(f" Found: '{word}' at ({row},{col}) {direction}") + + # Check what's actually in the grid at this location + grid_word = extract_word_from_grid(grid, row, col, direction, len(word)) + print(f" Grid shows: '{grid_word}'") + + # Check if there are extra letters that might create confusion + if direction == "horizontal": + # Check for letters after the word + end_col = col + len(word) + if end_col < len(grid[0]) and grid[row][end_col] != ".": + extra_letters = "" + check_col = end_col + while check_col < len(grid[0]) and grid[row][check_col] != ".": + extra_letters += grid[row][check_col] + check_col += 1 + if extra_letters: + print(f" ⚠️ Extra letters after word: '{extra_letters}'") + print(f" This might make '{word}' appear as '{word + extra_letters}'") + +if __name__ == "__main__": + test_direct_grid_generation() \ No newline at end of file diff --git a/crossword-app/backend-py/debug_index_error.py b/crossword-app/backend-py/debug_index_error.py new file mode 100644 index 0000000000000000000000000000000000000000..eb7548b92fdc7b1849d93ce9d3442e3c5b4ee2c7 --- /dev/null +++ b/crossword-app/backend-py/debug_index_error.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 +""" +Debug the recurring index error by adding comprehensive bounds checking. +""" + +import asyncio +import sys +import logging +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed +from src.services.vector_search import VectorSearchService + +# Enable debug logging +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + +class DebugCrosswordGenerator(CrosswordGeneratorFixed): + """Debug version with comprehensive bounds checking.""" + + def _can_place_word(self, grid, word, row, col, direction): + """Enhanced _can_place_word with comprehensive bounds checking.""" + try: + size = len(grid) + logger.debug(f"_can_place_word: word={word}, row={row}, col={col}, direction={direction}, grid_size={size}") + + # Check initial boundaries + if row < 0 or col < 0 or row >= size or col >= size: + logger.debug(f"Initial bounds check failed: row={row}, col={col}, size={size}") + return False + + if direction == "horizontal": + if col + len(word) > size: + logger.debug(f"Horizontal bounds check failed: col+len(word)={col + len(word)} > size={size}") + return False + + # Check word boundaries (no adjacent letters) - with bounds check + if col > 0: + if row >= size or col - 1 >= size or row < 0 or col - 1 < 0: + logger.debug(f"Horizontal left boundary check failed: row={row}, col-1={col-1}, size={size}") + return False + if grid[row][col - 1] != ".": + logger.debug(f"Horizontal left boundary has adjacent letter") + return False + + if col + len(word) < size: + if row >= size or col + len(word) >= size or row < 0 or col + len(word) < 0: + logger.debug(f"Horizontal right boundary check failed: row={row}, col+len={col + len(word)}, size={size}") + return False + if grid[row][col + len(word)] != ".": + logger.debug(f"Horizontal right boundary has adjacent letter") + return False + + # Check each letter position + for i, letter in enumerate(word): + check_row = row + check_col = col + i + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + logger.debug(f"Horizontal letter position check failed: letter {i}, row={check_row}, col={check_col}, size={size}") + return False + current_cell = grid[check_row][check_col] + if current_cell != "." and current_cell != letter: + logger.debug(f"Horizontal letter conflict: expected {letter}, found {current_cell}") + return False + + else: # vertical + if row + len(word) > size: + logger.debug(f"Vertical bounds check failed: row+len(word)={row + len(word)} > size={size}") + return False + + # Check word boundaries - with bounds check + if row > 0: + if row - 1 >= size or col >= size or row - 1 < 0 or col < 0: + logger.debug(f"Vertical top boundary check failed: row-1={row-1}, col={col}, size={size}") + return False + if grid[row - 1][col] != ".": + logger.debug(f"Vertical top boundary has adjacent letter") + return False + + if row + len(word) < size: + if row + len(word) >= size or col >= size or row + len(word) < 0 or col < 0: + logger.debug(f"Vertical bottom boundary check failed: row+len={row + len(word)}, col={col}, size={size}") + return False + if grid[row + len(word)][col] != ".": + logger.debug(f"Vertical bottom boundary has adjacent letter") + return False + + # Check each letter position + for i, letter in enumerate(word): + check_row = row + i + check_col = col + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + logger.debug(f"Vertical letter position check failed: letter {i}, row={check_row}, col={check_col}, size={size}") + return False + current_cell = grid[check_row][check_col] + if current_cell != "." and current_cell != letter: + logger.debug(f"Vertical letter conflict: expected {letter}, found {current_cell}") + return False + + logger.debug(f"_can_place_word: SUCCESS for word={word}") + return True + + except Exception as e: + logger.error(f"❌ ERROR in _can_place_word: {e}") + logger.error(f" word={word}, row={row}, col={col}, direction={direction}") + logger.error(f" grid_size={len(grid) if grid else 'None'}") + import traceback + traceback.print_exc() + return False + + def _place_word(self, grid, word, row, col, direction): + """Enhanced _place_word with comprehensive bounds checking.""" + try: + size = len(grid) + logger.debug(f"_place_word: word={word}, row={row}, col={col}, direction={direction}, grid_size={size}") + + original_state = [] + + if direction == "horizontal": + for i, letter in enumerate(word): + check_row = row + check_col = col + i + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + logger.error(f"❌ _place_word horizontal bounds error: row={check_row}, col={check_col}, size={size}") + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + + original_state.append({ + "row": check_row, + "col": check_col, + "value": grid[check_row][check_col] + }) + grid[check_row][check_col] = letter + else: + for i, letter in enumerate(word): + check_row = row + i + check_col = col + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + logger.error(f"❌ _place_word vertical bounds error: row={check_row}, col={check_col}, size={size}") + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + + original_state.append({ + "row": check_row, + "col": check_col, + "value": grid[check_row][check_col] + }) + grid[check_row][check_col] = letter + + logger.debug(f"_place_word: SUCCESS for word={word}") + return original_state + + except Exception as e: + logger.error(f"❌ ERROR in _place_word: {e}") + logger.error(f" word={word}, row={row}, col={col}, direction={direction}") + logger.error(f" grid_size={len(grid) if grid else 'None'}") + import traceback + traceback.print_exc() + raise + + def _remove_word(self, grid, original_state): + """Enhanced _remove_word with comprehensive bounds checking.""" + try: + size = len(grid) + logger.debug(f"_remove_word: restoring {len(original_state)} positions, grid_size={size}") + + for state in original_state: + check_row = state["row"] + check_col = state["col"] + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + logger.error(f"❌ _remove_word bounds error: row={check_row}, col={check_col}, size={size}") + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + + grid[check_row][check_col] = state["value"] + + logger.debug(f"_remove_word: SUCCESS") + + except Exception as e: + logger.error(f"❌ ERROR in _remove_word: {e}") + logger.error(f" grid_size={len(grid) if grid else 'None'}") + logger.error(f" original_state={original_state}") + import traceback + traceback.print_exc() + raise + + def _create_simple_cross(self, word_list, word_objs): + """Enhanced _create_simple_cross with comprehensive bounds checking.""" + try: + logger.debug(f"_create_simple_cross: words={word_list}") + + if len(word_list) < 2: + logger.debug("Not enough words for simple cross") + return None + + word1, word2 = word_list[0], word_list[1] + intersections = self._find_word_intersections(word1, word2) + + if not intersections: + logger.debug("No intersections found") + return None + + # Use first intersection + intersection = intersections[0] + size = max(len(word1), len(word2)) + 4 + logger.debug(f"Creating grid of size {size} for simple cross") + + grid = [["." for _ in range(size)] for _ in range(size)] + + # Place first word horizontally in center + center_row = size // 2 + center_col = (size - len(word1)) // 2 + + logger.debug(f"Placing word1 '{word1}' at row={center_row}, col={center_col}") + + for i, letter in enumerate(word1): + check_row = center_row + check_col = center_col + i + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + logger.error(f"❌ _create_simple_cross word1 bounds error: row={check_row}, col={check_col}, size={size}") + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + grid[check_row][check_col] = letter + + # Place second word vertically at intersection + intersection_col = center_col + intersection["word_pos"] + word2_start_row = center_row - intersection["placed_pos"] + + logger.debug(f"Placing word2 '{word2}' at row={word2_start_row}, col={intersection_col}") + + for i, letter in enumerate(word2): + check_row = word2_start_row + i + check_col = intersection_col + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + logger.error(f"❌ _create_simple_cross word2 bounds error: row={check_row}, col={check_col}, size={size}") + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + grid[check_row][check_col] = letter + + placed_words = [ + {"word": word1, "row": center_row, "col": center_col, "direction": "horizontal", "number": 1}, + {"word": word2, "row": word2_start_row, "col": intersection_col, "direction": "vertical", "number": 2} + ] + + logger.debug(f"_create_simple_cross: SUCCESS") + + trimmed = self._trim_grid(grid, placed_words) + clues = self._generate_clues(word_objs[:2], trimmed["placed_words"]) + + return { + "grid": trimmed["grid"], + "placed_words": trimmed["placed_words"], + "clues": clues + } + + except Exception as e: + logger.error(f"❌ ERROR in _create_simple_cross: {e}") + import traceback + traceback.print_exc() + raise + +async def test_debug_generator(): + """Test the debug generator to catch index errors.""" + try: + print("🧪 Testing debug crossword generator...") + + # Create mock vector service + vector_service = VectorSearchService() + + # Create debug generator + generator = DebugCrosswordGenerator(vector_service) + + # Test with various topics and difficulties + test_cases = [ + (["animals"], "medium"), + (["science"], "hard"), + (["technology"], "easy"), + (["animals", "science"], "medium"), + ] + + for i, (topics, difficulty) in enumerate(test_cases): + print(f"\n🔬 Test {i+1}: topics={topics}, difficulty={difficulty}") + try: + result = await generator.generate_puzzle(topics, difficulty, use_ai=False) + if result: + print(f"✅ Test {i+1} succeeded") + grid_size = len(result['grid']) + word_count = len(result['clues']) + print(f" Grid: {grid_size}x{grid_size}, Words: {word_count}") + else: + print(f"⚠️ Test {i+1} returned None") + except Exception as e: + print(f"❌ Test {i+1} failed: {e}") + import traceback + traceback.print_exc() + return False + + print(f"\n✅ All debug tests completed!") + return True + + except Exception as e: + print(f"❌ Debug test setup failed: {e}") + import traceback + traceback.print_exc() + return False + +if __name__ == "__main__": + asyncio.run(test_debug_generator()) \ No newline at end of file diff --git a/crossword-app/backend-py/debug_simple.py b/crossword-app/backend-py/debug_simple.py new file mode 100644 index 0000000000000000000000000000000000000000..9e0edb1926263311bda3f0b4d88c800b0d5115c0 --- /dev/null +++ b/crossword-app/backend-py/debug_simple.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +Simple debug test for crossword generator index errors. +""" + +import asyncio +import sys +import logging +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +# Enable debug logging +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + +async def test_with_static_words(): + """Test generator with static word lists.""" + + # Create generator without vector service + generator = CrosswordGeneratorFixed(vector_service=None) + + # Create test words + test_words = [ + {"word": "CAT", "clue": "Feline pet"}, + {"word": "DOG", "clue": "Man's best friend"}, + {"word": "BIRD", "clue": "Flying animal"}, + {"word": "FISH", "clue": "Aquatic animal"}, + {"word": "ELEPHANT", "clue": "Large mammal"}, + {"word": "TIGER", "clue": "Striped cat"}, + {"word": "HORSE", "clue": "Riding animal"}, + {"word": "BEAR", "clue": "Large carnivore"} + ] + + print(f"🧪 Testing crossword generation with {len(test_words)} words...") + + try: + # Test multiple times to catch intermittent errors + for attempt in range(10): + print(f"\n🔬 Attempt {attempt + 1}/10") + + # Shuffle words to create different scenarios + import random + random.shuffle(test_words) + + # Override the word selection to use our test words + generator._select_words = lambda topics, difficulty, use_ai: test_words + + result = await generator.generate_puzzle(["animals"], "medium", use_ai=False) + + if result: + grid_size = len(result['grid']) + word_count = len(result['clues']) + print(f"✅ Attempt {attempt + 1} succeeded: {grid_size}x{grid_size} grid, {word_count} words") + else: + print(f"⚠️ Attempt {attempt + 1} returned None") + + except IndexError as e: + print(f"❌ INDEX ERROR caught on attempt {attempt + 1}: {e}") + import traceback + traceback.print_exc() + return False + except Exception as e: + print(f"❌ Other error on attempt {attempt + 1}: {e}") + import traceback + traceback.print_exc() + return False + + print(f"\n✅ All 10 attempts completed successfully!") + return True + +async def test_grid_placement_directly(): + """Test grid placement functions directly with problematic data.""" + + generator = CrosswordGeneratorFixed(vector_service=None) + + # Test data that might cause issues + test_cases = [ + { + "words": ["A", "I"], # Very short words + "description": "Very short words" + }, + { + "words": ["VERYLONGWORDTHATMIGHTCAUSEISSUES", "SHORT"], + "description": "Very long word with short word" + }, + { + "words": ["ABCDEFGHIJKLMNOP", "QRSTUVWXYZ"], # Long words + "description": "Two long words" + }, + { + "words": ["TEST", "SETS", "NETS", "PETS"], # Multiple similar words + "description": "Similar words with same endings" + } + ] + + for i, test_case in enumerate(test_cases): + print(f"\n🔬 Grid test {i+1}: {test_case['description']}") + + try: + word_list = test_case["words"] + word_objs = [{"word": w, "clue": f"Clue for {w}"} for w in word_list] + + result = generator._create_grid(word_objs) + + if result: + grid_size = len(result['grid']) + word_count = len(result['placed_words']) + print(f"✅ Grid test {i+1} succeeded: {grid_size}x{grid_size} grid, {word_count} words") + else: + print(f"⚠️ Grid test {i+1} returned None") + + except IndexError as e: + print(f"❌ INDEX ERROR in grid test {i+1}: {e}") + import traceback + traceback.print_exc() + return False + except Exception as e: + print(f"❌ Other error in grid test {i+1}: {e}") + import traceback + traceback.print_exc() + return False + + return True + +if __name__ == "__main__": + print("🧪 Starting debug tests for crossword generator...") + + async def run_tests(): + success1 = await test_with_static_words() + success2 = await test_grid_placement_directly() + + if success1 and success2: + print("\n🎉 All debug tests passed! No index errors detected.") + else: + print("\n❌ Some debug tests failed.") + + asyncio.run(run_tests()) \ No newline at end of file diff --git a/crossword-app/backend-py/pytest.ini b/crossword-app/backend-py/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..9fd7eb7ef0cae2d10e98559e859b9eab46b4f5aa --- /dev/null +++ b/crossword-app/backend-py/pytest.ini @@ -0,0 +1,16 @@ +[tool:pytest] +testpaths = test-unit +python_files = test_*.py +python_classes = Test* +python_functions = test_* +addopts = + -v + --tb=short + --strict-markers + --disable-warnings + --color=yes +markers = + slow: marks tests as slow (deselect with '-m "not slow"') + integration: marks tests as integration tests + unit: marks tests as unit tests +asyncio_mode = auto \ No newline at end of file diff --git a/crossword-app/backend-py/requirements-dev.txt b/crossword-app/backend-py/requirements-dev.txt new file mode 100644 index 0000000000000000000000000000000000000000..2537e2bae7f656735af8523743681a796ec3ec2a --- /dev/null +++ b/crossword-app/backend-py/requirements-dev.txt @@ -0,0 +1,18 @@ +# Development requirements with AI/ML dependencies +# This file includes ALL dependencies for full development environment + +# Include base requirements +-r requirements.txt + +# AI/ML dependencies for vector-powered word generation +sentence-transformers==3.3.0 +torch==2.5.1 +transformers==4.47.1 +scikit-learn==1.5.2 +huggingface-hub==0.26.2 +faiss-cpu==1.9.0 + +# Additional development tools +pytest-cov==6.0.0 # For test coverage reports +black==24.8.0 # Code formatter (optional) +flake8==7.1.1 # Linting (optional) \ No newline at end of file diff --git a/crossword-app/backend-py/requirements.txt b/crossword-app/backend-py/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d49939898f34ad2dc55a65cc95df33e613bdfcd1 --- /dev/null +++ b/crossword-app/backend-py/requirements.txt @@ -0,0 +1,48 @@ +# Core FastAPI and web server dependencies +fastapi==0.116.1 +uvicorn[standard]==0.32.1 +starlette==0.47.2 +python-dotenv==1.0.1 +python-multipart==0.0.12 + +# Data validation and serialization +pydantic==2.11.7 +pydantic-core==2.33.2 +typing-extensions==4.14.1 +typing-inspection==0.4.1 + +# HTTP client dependencies +httpx==0.28.1 +httpcore==1.0.9 +h11==0.16.0 +anyio==4.10.0 +requests==2.32.4 +certifi==2025.8.3 +idna==3.10 + +# Core data processing +numpy==2.3.2 + +# Logging and monitoring +structlog==25.4.0 + +# Development and testing dependencies +pytest==8.4.1 +pytest-asyncio==1.1.0 +iniconfig==2.1.0 +packaging==25.0 +pluggy==1.6.0 +pygments==2.19.2 + +# AI/ML dependencies (optional - install separately if needed) +# Uncomment these lines if you want AI-powered word generation: +# sentence-transformers==3.3.0 +# torch==2.5.1 +# transformers==4.47.1 +# scikit-learn==1.5.2 +# huggingface-hub==0.26.2 +# faiss-cpu==1.9.0 + +# Additional utility dependencies +annotated-types==0.7.0 +sniffio==1.3.1 \ No newline at end of file diff --git a/crossword-app/backend-py/run_tests.py b/crossword-app/backend-py/run_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..eb4777fe6f9d0fba3ff50c1225ef9e2e871aede5 --- /dev/null +++ b/crossword-app/backend-py/run_tests.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +""" +Test runner script for the backend-py project. +Run this script to execute all unit tests. +""" + +import sys +import subprocess +from pathlib import Path + +def run_tests(): + """Run all tests using pytest.""" + print("🧪 Running Python Backend Unit Tests\n") + + # Change to project directory + project_root = Path(__file__).parent + + try: + # Run pytest with coverage if available + cmd = [ + sys.executable, "-m", "pytest", + "test-unit/", + "-v", + "--tb=short", + "--color=yes" + ] + + # Try to add coverage if pytest-cov is available + try: + import pytest_cov + cmd.extend([ + "--cov=src", + "--cov-report=term-missing", + "--cov-report=html:htmlcov" + ]) + print("📊 Running tests with coverage analysis") + except ImportError: + print("📝 Running tests without coverage (install pytest-cov for coverage)") + + print(f"🏃 Command: {' '.join(cmd)}\n") + + result = subprocess.run(cmd, cwd=project_root) + + if result.returncode == 0: + print("\n✅ All tests passed!") + if 'pytest_cov' in locals(): + print("📊 Coverage report generated in htmlcov/index.html") + else: + print(f"\n❌ Tests failed with exit code {result.returncode}") + + return result.returncode + + except FileNotFoundError: + print("❌ pytest not found. Install it with: pip install pytest pytest-asyncio") + return 1 + except Exception as e: + print(f"❌ Error running tests: {e}") + return 1 + +def run_specific_test(test_file): + """Run a specific test file.""" + print(f"🎯 Running specific test: {test_file}\n") + + try: + cmd = [sys.executable, "-m", "pytest", f"test-unit/{test_file}", "-v"] + result = subprocess.run(cmd, cwd=Path(__file__).parent) + return result.returncode + except Exception as e: + print(f"❌ Error running test {test_file}: {e}") + return 1 + +def main(): + """Main entry point.""" + if len(sys.argv) > 1: + # Run specific test file + test_file = sys.argv[1] + if not test_file.startswith("test_"): + test_file = f"test_{test_file}" + if not test_file.endswith(".py"): + test_file = f"{test_file}.py" + + return run_specific_test(test_file) + else: + # Run all tests + return run_tests() + +if __name__ == "__main__": + exit_code = main() + sys.exit(exit_code) \ No newline at end of file diff --git a/crossword-app/backend-py/src/__init__.py b/crossword-app/backend-py/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad1102f294ca5224009ae1f11c69a3d2ce7ffde5 --- /dev/null +++ b/crossword-app/backend-py/src/__init__.py @@ -0,0 +1 @@ +# Python backend package \ No newline at end of file diff --git a/crossword-app/backend-py/src/__pycache__/__init__.cpython-313.pyc b/crossword-app/backend-py/src/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3ca00ed93b33a3affefa06a029e1eb9166121ce Binary files /dev/null and b/crossword-app/backend-py/src/__pycache__/__init__.cpython-313.pyc differ diff --git a/crossword-app/backend-py/src/routes/__init__.py b/crossword-app/backend-py/src/routes/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7e1d20f35c4902ddd7c228972badf7ee20916776 --- /dev/null +++ b/crossword-app/backend-py/src/routes/__init__.py @@ -0,0 +1 @@ +# Routes package \ No newline at end of file diff --git a/crossword-app/backend-py/src/routes/__pycache__/__init__.cpython-313.pyc b/crossword-app/backend-py/src/routes/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..980436bc6176e62c1e993bf476ea1edef3f98845 Binary files /dev/null and b/crossword-app/backend-py/src/routes/__pycache__/__init__.cpython-313.pyc differ diff --git a/crossword-app/backend-py/src/routes/__pycache__/api.cpython-313.pyc b/crossword-app/backend-py/src/routes/__pycache__/api.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b980a8795548d12460e0355c02686f49e0707170 Binary files /dev/null and b/crossword-app/backend-py/src/routes/__pycache__/api.cpython-313.pyc differ diff --git a/crossword-app/backend-py/src/routes/api.py b/crossword-app/backend-py/src/routes/api.py new file mode 100644 index 0000000000000000000000000000000000000000..b5f31f986390c901a142adba3738954b1446dac8 --- /dev/null +++ b/crossword-app/backend-py/src/routes/api.py @@ -0,0 +1,186 @@ +""" +API routes for crossword puzzle generator. +Matches the existing JavaScript API for frontend compatibility. +""" + +import logging +from typing import List, Dict, Any, Optional +from datetime import datetime + +from fastapi import APIRouter, HTTPException, Request, Depends +from pydantic import BaseModel, Field + +from ..services.crossword_generator_wrapper import CrosswordGenerator + +logger = logging.getLogger(__name__) + +router = APIRouter() + +# Request/Response models +class GeneratePuzzleRequest(BaseModel): + topics: List[str] = Field(..., description="List of topics for the puzzle") + difficulty: str = Field(default="medium", description="Difficulty level: easy, medium, hard") + useAI: bool = Field(default=False, description="Use AI vector search for word generation") + +class WordInfo(BaseModel): + word: str + clue: str + similarity: Optional[float] = None + source: Optional[str] = None + +class ClueInfo(BaseModel): + number: int + word: str + text: str + direction: str # "across" or "down" + position: Dict[str, int] # {"row": int, "col": int} + +class PuzzleMetadata(BaseModel): + topics: List[str] + difficulty: str + wordCount: int + size: int + aiGenerated: bool + +class PuzzleResponse(BaseModel): + grid: List[List[str]] + clues: List[ClueInfo] + metadata: PuzzleMetadata + +class TopicInfo(BaseModel): + id: str + name: str + +# Global crossword generator instance (will be initialized in lifespan) +generator = None + +def get_crossword_generator(request: Request) -> CrosswordGenerator: + """Dependency to get the crossword generator with vector search service.""" + global generator + if generator is None: + vector_service = getattr(request.app.state, 'vector_service', None) + generator = CrosswordGenerator(vector_service) + return generator + +@router.get("/topics", response_model=List[TopicInfo]) +async def get_topics(): + """Get available topics for puzzle generation.""" + # Return the same topics as JavaScript backend for consistency + topics = [ + {"id": "animals", "name": "Animals"}, + {"id": "geography", "name": "Geography"}, + {"id": "science", "name": "Science"}, + {"id": "technology", "name": "Technology"} + ] + return topics + +@router.post("/generate", response_model=PuzzleResponse) +async def generate_puzzle( + request: GeneratePuzzleRequest, + crossword_gen: CrosswordGenerator = Depends(get_crossword_generator) +): + """ + Generate a crossword puzzle with optional AI vector search. + + This endpoint matches the JavaScript API exactly for frontend compatibility. + """ + try: + logger.info(f"🎯 Generating puzzle for topics: {request.topics}, difficulty: {request.difficulty}, useAI: {request.useAI}") + + # Validate topics + if not request.topics: + raise HTTPException(status_code=400, detail="At least one topic is required") + + valid_difficulties = ["easy", "medium", "hard"] + if request.difficulty not in valid_difficulties: + raise HTTPException( + status_code=400, + detail=f"Invalid difficulty. Must be one of: {valid_difficulties}" + ) + + # Generate puzzle + puzzle_data = await crossword_gen.generate_puzzle( + topics=request.topics, + difficulty=request.difficulty, + use_ai=request.useAI + ) + + if not puzzle_data: + raise HTTPException(status_code=500, detail="Failed to generate puzzle") + + logger.info(f"✅ Generated puzzle with {puzzle_data['metadata']['wordCount']} words") + return puzzle_data + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Error generating puzzle: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@router.post("/words") +async def generate_words( + request: GeneratePuzzleRequest, + crossword_gen: CrosswordGenerator = Depends(get_crossword_generator) +): + """ + Generate words for given topics (debug endpoint). + + This endpoint allows testing word generation without full puzzle creation. + """ + try: + words = await crossword_gen.generate_words_for_topics( + topics=request.topics, + difficulty=request.difficulty, + use_ai=request.useAI + ) + + return { + "topics": request.topics, + "difficulty": request.difficulty, + "useAI": request.useAI, + "wordCount": len(words), + "words": words + } + + except Exception as e: + logger.error(f"❌ Error generating words: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@router.get("/health") +async def api_health(): + """API health check.""" + return { + "status": "healthy", + "timestamp": datetime.utcnow().isoformat(), + "backend": "python", + "version": "2.0.0" + } + +@router.get("/debug/vector-search") +async def debug_vector_search( + topic: str, + difficulty: str = "medium", + max_words: int = 10, + request: Request = None +): + """ + Debug endpoint to test vector search directly. + """ + try: + vector_service = getattr(request.app.state, 'vector_service', None) + if not vector_service or not vector_service.is_initialized: + raise HTTPException(status_code=503, detail="Vector search service not available") + + words = await vector_service.find_similar_words(topic, difficulty, max_words) + + return { + "topic": topic, + "difficulty": difficulty, + "max_words": max_words, + "found_words": len(words), + "words": words + } + + except Exception as e: + logger.error(f"❌ Vector search debug failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/crossword-app/backend-py/src/services/__init__.py b/crossword-app/backend-py/src/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c66a0b2ba5713d1d2c25f309422234f995ae1fb4 --- /dev/null +++ b/crossword-app/backend-py/src/services/__init__.py @@ -0,0 +1 @@ +# Services package \ No newline at end of file diff --git a/crossword-app/backend-py/src/services/__pycache__/__init__.cpython-313.pyc b/crossword-app/backend-py/src/services/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c55a5f4fa560e34ad3ba2bf2fa66db2a5cb3ce87 Binary files /dev/null and b/crossword-app/backend-py/src/services/__pycache__/__init__.cpython-313.pyc differ diff --git a/crossword-app/backend-py/src/services/__pycache__/crossword_generator.cpython-313.pyc b/crossword-app/backend-py/src/services/__pycache__/crossword_generator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1eaaad1be169f3693ee277a6395c4641e9095a0 Binary files /dev/null and b/crossword-app/backend-py/src/services/__pycache__/crossword_generator.cpython-313.pyc differ diff --git a/crossword-app/backend-py/src/services/__pycache__/crossword_generator_fixed.cpython-313.pyc b/crossword-app/backend-py/src/services/__pycache__/crossword_generator_fixed.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..26bfdb0932126ec3401c4098b2a367b10701e921 Binary files /dev/null and b/crossword-app/backend-py/src/services/__pycache__/crossword_generator_fixed.cpython-313.pyc differ diff --git a/crossword-app/backend-py/src/services/__pycache__/crossword_generator_wrapper.cpython-313.pyc b/crossword-app/backend-py/src/services/__pycache__/crossword_generator_wrapper.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..901e1496257a9e8db6d6d54393654276c56d28e7 Binary files /dev/null and b/crossword-app/backend-py/src/services/__pycache__/crossword_generator_wrapper.cpython-313.pyc differ diff --git a/crossword-app/backend-py/src/services/__pycache__/vector_search.cpython-313.pyc b/crossword-app/backend-py/src/services/__pycache__/vector_search.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5852370f246a8acec8a082a7f1d8f4ebeb5a07e0 Binary files /dev/null and b/crossword-app/backend-py/src/services/__pycache__/vector_search.cpython-313.pyc differ diff --git a/crossword-app/backend-py/src/services/__pycache__/word_cache.cpython-313.pyc b/crossword-app/backend-py/src/services/__pycache__/word_cache.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..492802a26dc86ff31b9fca358946821cf4b6a946 Binary files /dev/null and b/crossword-app/backend-py/src/services/__pycache__/word_cache.cpython-313.pyc differ diff --git a/crossword-app/backend-py/src/services/crossword_generator.py b/crossword-app/backend-py/src/services/crossword_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..994a44833d7e2ca8d067897b758b4d08591f0099 --- /dev/null +++ b/crossword-app/backend-py/src/services/crossword_generator.py @@ -0,0 +1,722 @@ +""" +Fixed Crossword Generator - Ported from working JavaScript implementation. +""" + +import asyncio +import json +import random +import time +from pathlib import Path +from typing import Dict, List, Optional, Any, Tuple +import structlog + +logger = structlog.get_logger(__name__) + +class CrosswordGenerator: + def __init__(self, vector_service=None): + self.max_attempts = 100 + self.min_words = 6 + self.max_words = 10 # Reduced from 12 to 10 for better success rate + self.vector_service = vector_service + + async def generate_puzzle(self, topics: List[str], difficulty: str = "medium", use_ai: bool = False) -> Optional[Dict[str, Any]]: + """ + Generate a complete crossword puzzle. + """ + try: + # Import here to avoid circular imports - with fallback + try: + from .vector_search import VectorSearchService + except ImportError as import_error: + logger.warning(f"⚠️ Could not import VectorSearchService: {import_error}. Using static words only.") + # Continue without vector service + + logger.info(f"🎯 Generating puzzle for topics: {topics}, difficulty: {difficulty}, AI: {use_ai}") + + # Get words (from AI or static) + words = await self._select_words(topics, difficulty, use_ai) + + if len(words) < self.min_words: + logger.error(f"❌ Not enough words: {len(words)} < {self.min_words}") + raise Exception(f"Not enough words generated: {len(words)} < {self.min_words}") + + # Create grid + grid_result = self._create_grid(words) + + if not grid_result: + logger.error("❌ Grid creation failed") + raise Exception("Could not create crossword grid") + + logger.info(f"✅ Generated crossword with {len(grid_result['placed_words'])} words") + + return { + "grid": grid_result["grid"], + "clues": grid_result["clues"], + "metadata": { + "topics": topics, + "difficulty": difficulty, + "wordCount": len(grid_result["placed_words"]), + "size": len(grid_result["grid"]), + "aiGenerated": use_ai + } + } + + except Exception as e: + logger.error(f"❌ Error generating puzzle: {e}") + raise + + async def _select_words(self, topics: List[str], difficulty: str, use_ai: bool) -> List[Dict[str, Any]]: + """Select words for the crossword.""" + all_words = [] + + if use_ai and self.vector_service: + # Use the initialized vector service + logger.info(f"🤖 Using initialized vector service for AI word generation") + for topic in topics: + ai_words = await self.vector_service.find_similar_words(topic, difficulty, self.max_words // len(topics)) + all_words.extend(ai_words) + + if len(all_words) >= self.min_words: + logger.info(f"✅ AI generated {len(all_words)} words") + return self._sort_words_for_crossword(all_words[:self.max_words]) + else: + logger.warning(f"⚠️ AI only generated {len(all_words)} words, falling back to static") + + # Fallback to cached words + if self.vector_service: + # Use the cached words from the initialized service + logger.info(f"📦 Using cached words from initialized vector service") + for topic in topics: + cached_words = await self.vector_service._get_cached_fallback(topic, difficulty, self.max_words // len(topics)) + all_words.extend(cached_words) + else: + # Last resort: load static words directly + logger.warning(f"⚠️ No vector service available, loading static words directly") + all_words = await self._get_static_words(topics, difficulty) + + return self._sort_words_for_crossword(all_words[:self.max_words]) + + async def _get_static_words(self, topics: List[str], difficulty: str) -> List[Dict[str, Any]]: + """Get static words from JSON files.""" + all_words = [] + + for topic in topics: + # Try multiple case variations + for topic_variation in [topic, topic.capitalize(), topic.lower()]: + word_file = Path(__file__).parent.parent.parent / "data" / "word-lists" / f"{topic_variation.lower()}.json" + + if word_file.exists(): + with open(word_file, 'r') as f: + words = json.load(f) + # Filter by difficulty + filtered = self._filter_by_difficulty(words, difficulty) + all_words.extend(filtered) + break + + return all_words + + def _filter_by_difficulty(self, words: List[Dict[str, Any]], difficulty: str) -> List[Dict[str, Any]]: + """Filter words by difficulty (length).""" + difficulty_map = { + "easy": {"min_len": 3, "max_len": 8}, + "medium": {"min_len": 4, "max_len": 10}, + "hard": {"min_len": 5, "max_len": 15} + } + + criteria = difficulty_map.get(difficulty, difficulty_map["medium"]) + return [w for w in words if criteria["min_len"] <= len(w["word"]) <= criteria["max_len"]] + + def _sort_words_for_crossword(self, words: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Sort words by crossword suitability.""" + scored_words = [] + + for word_obj in words: + word = word_obj["word"].upper() + score = 0 + + # Strongly prefer shorter words for crossword viability + if 3 <= len(word) <= 5: + score += 20 # Short words get highest priority + elif 6 <= len(word) <= 7: + score += 15 # Medium words get good priority + elif len(word) == 8: + score += 8 # Long words get lower priority + elif len(word) == 9: + score += 4 # Very long words get much lower priority + elif len(word) >= 10: + score += 1 # Extremely long words get minimal priority + + # Bonus for common letters + common_letters = ['E', 'A', 'R', 'I', 'O', 'T', 'N', 'S'] + for letter in word: + if letter in common_letters: + score += 1 + + # Vowel distribution bonus + vowels = ['A', 'E', 'I', 'O', 'U'] + vowel_count = sum(1 for letter in word if letter in vowels) + score += vowel_count + + # Penalty for very long words to discourage their selection + if len(word) >= 9: + score -= 5 + + scored_words.append({**word_obj, "crossword_score": score}) + + # Sort by score with some randomization + scored_words.sort(key=lambda w: w["crossword_score"] + random.randint(-2, 2), reverse=True) + return scored_words + + def _create_grid(self, words: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + """Create crossword grid using backtracking algorithm.""" + if not words: + logger.error(f"❌ No words provided to grid generator") + return None + + logger.info(f"🎯 Creating crossword grid with {len(words)} words") + + # Debug: log the structure of words + logger.info(f"🔍 Word structures: {[type(w) for w in words[:3]]}") + if words: + logger.info(f"🔍 First word sample: {words[0]}") + + # Sort words by length (longest first) - keeping objects aligned + try: + # Create paired list of (word_string, word_object) + word_pairs = [] + for i, w in enumerate(words): + if isinstance(w, dict) and "word" in w: + word_pairs.append((w["word"].upper(), w)) + elif isinstance(w, str): + # Create dict for string-only words + word_obj = {"word": w.upper(), "clue": f"Clue for {w.upper()}"} + word_pairs.append((w.upper(), word_obj)) + else: + logger.warning(f"⚠️ Unexpected word format at index {i}: {w}") + + # Sort pairs by word length (longest first) + word_pairs.sort(key=lambda pair: len(pair[0]), reverse=True) + + # Extract sorted lists + word_list = [pair[0] for pair in word_pairs] + sorted_word_objs = [pair[1] for pair in word_pairs] + + logger.info(f"🎯 Processed {len(word_list)} words for grid: {word_list[:5]}") + except Exception as e: + logger.error(f"❌ Error processing words: {e}") + return None + + size = self._calculate_grid_size(word_list) + + # Try multiple attempts + for attempt in range(3): + current_size = size + attempt + + try: + logger.info(f"🔧 Attempt {attempt + 1}: word_list length={len(word_list)}, sorted_word_objs length={len(sorted_word_objs)}") + result = self._place_words_in_grid(word_list, sorted_word_objs, current_size) + if result: + return result + except Exception as e: + logger.error(f"❌ Grid placement attempt {attempt + 1} failed: {e}") + import traceback + traceback.print_exc() + + # Try with fewer words + if len(word_list) > 7: + reduced_words = word_list[:len(word_list) - 1] + reduced_word_objs = sorted_word_objs[:len(reduced_words)] + try: + logger.info(f"🔧 Reduced attempt {attempt + 1}: reduced_words length={len(reduced_words)}, reduced_word_objs length={len(reduced_word_objs)}") + result = self._place_words_in_grid(reduced_words, reduced_word_objs, current_size) + if result: + return result + except Exception as e: + logger.error(f"❌ Reduced grid placement attempt {attempt + 1} failed: {e}") + import traceback + traceback.print_exc() + + # Last resort: simple cross with 2 words + if len(word_list) >= 2: + return self._create_simple_cross(word_list[:2], sorted_word_objs[:2]) + + return None + + def _calculate_grid_size(self, words: List[str]) -> int: + """Calculate appropriate grid size with more generous spacing.""" + total_chars = sum(len(word) for word in words) + longest_word = max(len(word) for word in words) if words else 8 + + # More generous grid size calculation + base_size = int((total_chars * 2.0) ** 0.5) # Increased multiplier from 1.5 to 2.0 + + return max( + base_size, + longest_word + 4, # Add padding to longest word + 12 # Minimum grid size increased from 8 to 12 + ) + + def _place_words_in_grid(self, word_list: List[str], word_objs: List[Dict[str, Any]], size: int) -> Optional[Dict[str, Any]]: + """Place words in grid using backtracking.""" + logger.info(f"🔧 _place_words_in_grid: word_list={len(word_list)}, word_objs={len(word_objs)}, size={size}") + + grid = [["." for _ in range(size)] for _ in range(size)] + placed_words = [] + + start_time = time.time() + timeout = 5.0 # 5 second timeout + + try: + if self._backtrack_placement(grid, word_list, word_objs, 0, placed_words, start_time, timeout): + logger.info(f"🔧 Backtrack successful, trimming grid...") + trimmed = self._trim_grid(grid, placed_words) + logger.info(f"🔧 Grid trimmed, generating clues...") + clues = self._generate_clues(word_objs, trimmed["placed_words"]) + + return { + "grid": trimmed["grid"], + "placed_words": trimmed["placed_words"], + "clues": clues + } + else: + logger.info(f"🔧 Backtrack failed") + return None + except Exception as e: + logger.error(f"❌ Error in _place_words_in_grid: {e}") + import traceback + traceback.print_exc() + return None + + def _backtrack_placement(self, grid: List[List[str]], word_list: List[str], word_objs: List[Dict[str, Any]], + word_index: int, placed_words: List[Dict[str, Any]], start_time: float, + timeout: float, call_count: int = 0) -> bool: + """Backtracking algorithm for word placement.""" + # Timeout check + if call_count % 50 == 0 and time.time() - start_time > timeout: + return False + + if word_index >= len(word_list): + return True + + word = word_list[word_index] + size = len(grid) + + # First word: place horizontally in center + if word_index == 0: + center_row = size // 2 + center_col = (size - len(word)) // 2 + + if self._can_place_word(grid, word, center_row, center_col, "horizontal"): + original_state = self._place_word(grid, word, center_row, center_col, "horizontal") + placed_words.append({ + "word": word, + "row": center_row, + "col": center_col, + "direction": "horizontal", + "number": 1 + }) + + if self._backtrack_placement(grid, word_list, word_objs, word_index + 1, placed_words, start_time, timeout, call_count + 1): + return True + + self._remove_word(grid, original_state) + placed_words.pop() + + return False + + # Subsequent words: find intersections + all_placements = self._find_all_intersection_placements(grid, word, placed_words) + all_placements.sort(key=lambda p: p["score"], reverse=True) + + for placement in all_placements: + row, col, direction = placement["row"], placement["col"], placement["direction"] + + if self._can_place_word(grid, word, row, col, direction): + original_state = self._place_word(grid, word, row, col, direction) + placed_words.append({ + "word": word, + "row": row, + "col": col, + "direction": direction, + "number": word_index + 1 + }) + + if self._backtrack_placement(grid, word_list, word_objs, word_index + 1, placed_words, start_time, timeout, call_count + 1): + return True + + self._remove_word(grid, original_state) + placed_words.pop() + + return False + + def _can_place_word(self, grid: List[List[str]], word: str, row: int, col: int, direction: str) -> bool: + """Check if word can be placed at position.""" + size = len(grid) + + # Check boundaries + if row < 0 or col < 0 or row >= size or col >= size: + return False + + if direction == "horizontal": + if col + len(word) > size: + return False + + # CRITICAL: Check word boundaries - no letters immediately before/after + if col > 0 and grid[row][col - 1] != ".": + return False # Word would have a preceding letter + if col + len(word) < size and grid[row][col + len(word)] != ".": + return False # Word would have a trailing letter + + # Check each letter position + for i, letter in enumerate(word): + check_row = row + check_col = col + i + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + return False + current_cell = grid[check_row][check_col] + if current_cell != "." and current_cell != letter: + return False + + # For empty cells, check perpendicular validity + if current_cell == ".": + if not self._is_valid_perpendicular_placement(grid, letter, check_row, check_col, "vertical"): + return False + + else: # vertical + if row + len(word) > size: + return False + + # CRITICAL: Check word boundaries - no letters immediately before/after + if row > 0 and grid[row - 1][col] != ".": + return False # Word would have a preceding letter + if row + len(word) < size and grid[row + len(word)][col] != ".": + return False # Word would have a trailing letter + + # Check each letter position + for i, letter in enumerate(word): + check_row = row + i + check_col = col + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + return False + current_cell = grid[check_row][check_col] + if current_cell != "." and current_cell != letter: + return False + + # For empty cells, check perpendicular validity + if current_cell == ".": + if not self._is_valid_perpendicular_placement(grid, letter, check_row, check_col, "horizontal"): + return False + + return True + + def _is_valid_perpendicular_placement(self, grid: List[List[str]], letter: str, row: int, col: int, check_direction: str) -> bool: + """Check if placing a letter would create valid perpendicular word boundaries.""" + size = len(grid) + + if check_direction == "vertical": + # Check if placing this letter would create an invalid vertical sequence + has_above = row > 0 and grid[row - 1][col] != "." + has_below = row < size - 1 and grid[row + 1][col] != "." + + # Don't allow this letter to extend an existing vertical word + # unless it's exactly at an intersection point with matching letters + if has_above or has_below: + return grid[row][col] == letter + else: # horizontal + # Check if placing this letter would create an invalid horizontal sequence + has_left = col > 0 and grid[row][col - 1] != "." + has_right = col < size - 1 and grid[row][col + 1] != "." + + # Don't allow this letter to extend an existing horizontal word + # unless it's exactly at an intersection point with matching letters + if has_left or has_right: + return grid[row][col] == letter + + return True + + def _place_word(self, grid: List[List[str]], word: str, row: int, col: int, direction: str) -> List[Dict[str, Any]]: + """Place word in grid and return original state.""" + original_state = [] + size = len(grid) + + if direction == "horizontal": + for i, letter in enumerate(word): + check_row = row + check_col = col + i + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + original_state.append({ + "row": check_row, + "col": check_col, + "value": grid[check_row][check_col] + }) + grid[check_row][check_col] = letter + else: + for i, letter in enumerate(word): + check_row = row + i + check_col = col + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + original_state.append({ + "row": check_row, + "col": check_col, + "value": grid[check_row][check_col] + }) + grid[check_row][check_col] = letter + + return original_state + + def _remove_word(self, grid: List[List[str]], original_state: List[Dict[str, Any]]): + """Remove word from grid.""" + size = len(grid) + for state in original_state: + check_row = state["row"] + check_col = state["col"] + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + grid[check_row][check_col] = state["value"] + + def _find_all_intersection_placements(self, grid: List[List[str]], word: str, placed_words: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Find all possible intersection placements for a word.""" + placements = [] + + for placed_word in placed_words: + intersections = self._find_word_intersections(word, placed_word["word"]) + + for intersection in intersections: + word_pos, placed_pos = intersection["word_pos"], intersection["placed_pos"] + + placement_info = self._calculate_intersection_placement(word, word_pos, placed_word, placed_pos) + + if placement_info: + score = self._calculate_placement_score(grid, word, placement_info, placed_words) + placements.append({ + **placement_info, + "score": score + }) + + return placements + + def _find_word_intersections(self, word1: str, word2: str) -> List[Dict[str, int]]: + """Find letter intersections between two words.""" + intersections = [] + + for i, letter1 in enumerate(word1): + for j, letter2 in enumerate(word2): + if letter1 == letter2: + intersections.append({ + "word_pos": i, + "placed_pos": j + }) + + return intersections + + def _calculate_intersection_placement(self, new_word: str, new_word_pos: int, + placed_word: Dict[str, Any], placed_word_pos: int) -> Optional[Dict[str, Any]]: + """Calculate where new word should be placed for intersection.""" + placed_row, placed_col = placed_word["row"], placed_word["col"] + placed_direction = placed_word["direction"] + + # Find intersection point in grid + if placed_direction == "horizontal": + intersection_row = placed_row + intersection_col = placed_col + placed_word_pos + else: + intersection_row = placed_row + placed_word_pos + intersection_col = placed_col + + # Calculate new word position + new_direction = "vertical" if placed_direction == "horizontal" else "horizontal" + + if new_direction == "horizontal": + new_row = intersection_row + new_col = intersection_col - new_word_pos + else: + new_row = intersection_row - new_word_pos + new_col = intersection_col + + return { + "row": new_row, + "col": new_col, + "direction": new_direction + } + + def _calculate_placement_score(self, grid: List[List[str]], word: str, placement: Dict[str, Any], + placed_words: List[Dict[str, Any]]) -> int: + """Score a placement for quality.""" + row, col, direction = placement["row"], placement["col"], placement["direction"] + grid_size = len(grid) + score = 100 # Base score for intersection + + # Count intersections - with bounds checking + intersection_count = 0 + if direction == "horizontal": + for i, letter in enumerate(word): + target_row = row + target_col = col + i + # Check bounds before accessing grid + if (0 <= target_row < grid_size and + 0 <= target_col < grid_size and + grid[target_row][target_col] == letter): + intersection_count += 1 + else: # vertical + for i, letter in enumerate(word): + target_row = row + i + target_col = col + # Check bounds before accessing grid + if (0 <= target_row < grid_size and + 0 <= target_col < grid_size and + grid[target_row][target_col] == letter): + intersection_count += 1 + + score += intersection_count * 200 + + # Bonus for central placement + center = grid_size // 2 + distance_from_center = abs(row - center) + abs(col - center) + score -= distance_from_center * 5 + + return score + + + def _trim_grid(self, grid: List[List[str]], placed_words: List[Dict[str, Any]]) -> Dict[str, Any]: + """Trim grid to remove excess empty space.""" + if not placed_words: + return {"grid": grid, "placed_words": placed_words} + + # Find bounds + min_row = min_col = len(grid) + max_row = max_col = -1 + + for word in placed_words: + row, col, direction, word_text = word["row"], word["col"], word["direction"], word["word"] + + min_row = min(min_row, row) + min_col = min(min_col, col) + + if direction == "horizontal": + max_row = max(max_row, row) + max_col = max(max_col, col + len(word_text) - 1) + else: + max_row = max(max_row, row + len(word_text) - 1) + max_col = max(max_col, col) + + # Add padding with proper bounds checking + min_row = max(0, min_row - 1) + min_col = max(0, min_col - 1) + max_row = min(len(grid) - 1, max_row + 1) + max_col = min(len(grid[0]) - 1, max_col + 1) + + # Ensure bounds are valid + max_row = min(max_row, len(grid) - 1) + max_col = min(max_col, len(grid[0]) - 1) + + # Create trimmed grid + trimmed_grid = [] + for r in range(min_row, max_row + 1): + row = [] + for c in range(min_col, max_col + 1): + # Double-check bounds before accessing + if r < 0 or r >= len(grid) or c < 0 or c >= len(grid[0]): + logger.error(f"Invalid bounds: r={r}, c={c}, grid_size={len(grid)}x{len(grid[0])}") + continue + row.append(grid[r][c]) + trimmed_grid.append(row) + + # Update word positions + updated_words = [] + for word in placed_words: + updated_words.append({ + **word, + "row": word["row"] - min_row, + "col": word["col"] - min_col + }) + + return {"grid": trimmed_grid, "placed_words": updated_words} + + def _create_simple_cross(self, word_list: List[str], word_objs: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + """Create simple cross with two words.""" + if len(word_list) < 2: + return None + + word1, word2 = word_list[0], word_list[1] + intersections = self._find_word_intersections(word1, word2) + + if not intersections: + return None + + # Use first intersection + intersection = intersections[0] + size = max(len(word1), len(word2)) + 4 + grid = [["." for _ in range(size)] for _ in range(size)] + + # Place first word horizontally in center + center_row = size // 2 + center_col = (size - len(word1)) // 2 + + for i, letter in enumerate(word1): + check_row = center_row + check_col = center_col + i + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + grid[check_row][check_col] = letter + + # Place second word vertically at intersection + intersection_col = center_col + intersection["word_pos"] + word2_start_row = center_row - intersection["placed_pos"] + + for i, letter in enumerate(word2): + check_row = word2_start_row + i + check_col = intersection_col + if check_row >= size or check_col >= size or check_row < 0 or check_col < 0: + raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}") + grid[check_row][check_col] = letter + + placed_words = [ + {"word": word1, "row": center_row, "col": center_col, "direction": "horizontal", "number": 1}, + {"word": word2, "row": word2_start_row, "col": intersection_col, "direction": "vertical", "number": 2} + ] + + trimmed = self._trim_grid(grid, placed_words) + clues = self._generate_clues(word_objs[:2], trimmed["placed_words"]) + + return { + "grid": trimmed["grid"], + "placed_words": trimmed["placed_words"], + "clues": clues + } + + def _generate_clues(self, word_objs: List[Dict[str, Any]], placed_words: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Generate clues for placed words.""" + logger.info(f"🔧 _generate_clues: word_objs={len(word_objs)}, placed_words={len(placed_words)}") + clues = [] + + try: + for i, placed_word in enumerate(placed_words): + logger.info(f"🔧 Processing placed word {i}: {placed_word.get('word', 'UNKNOWN')}") + + # Find matching word object + word_obj = next((w for w in word_objs if w["word"].upper() == placed_word["word"]), None) + + if word_obj: + logger.info(f"🔧 Found matching word_obj: {word_obj.get('word', 'UNKNOWN')}") + clue_text = word_obj["clue"] if "clue" in word_obj else f"Clue for {placed_word['word']}" + else: + logger.warning(f"⚠️ No matching word_obj found for {placed_word['word']}") + clue_text = f"Clue for {placed_word['word']}" + + clues.append({ + "number": placed_word["number"], + "word": placed_word["word"], + "text": clue_text, + "direction": "across" if placed_word["direction"] == "horizontal" else "down", + "position": {"row": placed_word["row"], "col": placed_word["col"]} + }) + + logger.info(f"🔧 Generated {len(clues)} clues") + return clues + except Exception as e: + logger.error(f"❌ Error in _generate_clues: {e}") + import traceback + traceback.print_exc() + raise \ No newline at end of file diff --git a/crossword-app/backend-py/src/services/crossword_generator_wrapper.py b/crossword-app/backend-py/src/services/crossword_generator_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..ef6f22ec07b5d60b83105785b3f79e6c1028ca68 --- /dev/null +++ b/crossword-app/backend-py/src/services/crossword_generator_wrapper.py @@ -0,0 +1,58 @@ +""" +Crossword Generator - Simple wrapper for the fixed implementation +""" + +import logging +from typing import List, Dict, Any + +logger = logging.getLogger(__name__) + +class CrosswordGenerator: + """ + Wrapper that uses the fixed crossword generator implementation. + """ + + def __init__(self, vector_service=None): + self.vector_service = vector_service + self.min_words = 8 + self.max_words = 15 + + async def generate_puzzle( + self, + topics: List[str], + difficulty: str = "medium", + use_ai: bool = False + ) -> Dict[str, Any]: + """ + Generate a complete crossword puzzle using the fixed generator. + + Args: + topics: List of topic strings + difficulty: "easy", "medium", or "hard" + use_ai: Whether to use vector search for word generation + + Returns: + Dictionary containing grid, clues, and metadata + """ + try: + logger.info(f"🎯 Using fixed crossword generator for topics: {topics}") + + # Use the fixed generator implementation with the initialized vector service + from .crossword_generator import CrosswordGenerator as ActualGenerator + actual_generator = ActualGenerator(vector_service=self.vector_service) + + puzzle = await actual_generator.generate_puzzle(topics, difficulty, use_ai) + + logger.info(f"✅ Generated crossword with fixed algorithm") + return puzzle + + except Exception as e: + logger.error(f"❌ Failed to generate puzzle: {e}") + raise + + async def generate_words_for_topics(self, topics: List[str], difficulty: str, use_ai: bool) -> List[Dict[str, Any]]: + """Backward compatibility method.""" + # This method is kept for compatibility but delegates to the fixed generator + from .crossword_generator import CrosswordGenerator as ActualGenerator + actual_generator = ActualGenerator() + return await actual_generator._select_words(topics, difficulty, use_ai) \ No newline at end of file diff --git a/crossword-app/backend-py/src/services/vector_search.py b/crossword-app/backend-py/src/services/vector_search.py new file mode 100644 index 0000000000000000000000000000000000000000..5c653235a1cad121e839217da4f9458573f075d5 --- /dev/null +++ b/crossword-app/backend-py/src/services/vector_search.py @@ -0,0 +1,587 @@ +""" +Vector similarity search service using sentence-transformers and FAISS. +This implements true AI word generation via vector space nearest neighbor search. +""" + +import os +import logging +import asyncio +import time +from datetime import datetime +from typing import List, Dict, Any, Optional, Tuple +import json + +import numpy as np +import torch +from sentence_transformers import SentenceTransformer +import faiss +from pathlib import Path + +logger = logging.getLogger(__name__) + +def log_with_timestamp(message): + """Helper to log with precise timestamp.""" + timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3] + logger.info(f"[{timestamp}] {message}") + +class VectorSearchService: + """ + Service for finding semantically similar words using vector similarity search. + + This replaces the old approach of filtering static word lists with true + vector space search through the model's full vocabulary. + """ + + def __init__(self): + self.model = None + self.vocab = None + self.word_embeddings = None + self.faiss_index = None + self.is_initialized = False + + # Configuration + self.model_name = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-mpnet-base-v2") + self.similarity_threshold = float(os.getenv("WORD_SIMILARITY_THRESHOLD", "0.3")) + self.max_results = 20 + + # Cache manager for word fallback + self.cache_manager = None + + async def initialize(self): + """Initialize the vector search service.""" + try: + start_time = time.time() + log_with_timestamp(f"🔧 Loading model: {self.model_name}") + + # Load sentence transformer model + model_start = time.time() + self.model = SentenceTransformer(self.model_name) + model_time = time.time() - model_start + log_with_timestamp(f"✅ Model loaded in {model_time:.2f}s: {self.model_name}") + + # Get model vocabulary from tokenizer + vocab_start = time.time() + tokenizer = self.model.tokenizer + vocab_dict = tokenizer.get_vocab() + + # Filter vocabulary for crossword-suitable words + self.vocab = self._filter_vocabulary(vocab_dict) + vocab_time = time.time() - vocab_start + log_with_timestamp(f"📚 Filtered vocabulary in {vocab_time:.2f}s: {len(self.vocab)} words") + + # Pre-compute embeddings for all vocabulary words + embedding_start = time.time() + log_with_timestamp("🔄 Starting embedding generation...") + await self._build_embeddings_index() + embedding_time = time.time() - embedding_start + log_with_timestamp(f"🔄 Embeddings built in {embedding_time:.2f}s") + + # Initialize cache manager + cache_start = time.time() + log_with_timestamp("📦 Initializing word cache manager...") + try: + from .word_cache import WordCacheManager + self.cache_manager = WordCacheManager() + await self.cache_manager.initialize() + cache_time = time.time() - cache_start + log_with_timestamp(f"📦 Cache manager initialized in {cache_time:.2f}s") + except Exception as e: + cache_time = time.time() - cache_start + log_with_timestamp(f"⚠️ Cache manager initialization failed in {cache_time:.2f}s: {e}") + log_with_timestamp("📝 Continuing without persistent caching (in-memory only)") + self.cache_manager = None + + self.is_initialized = True + total_time = time.time() - start_time + log_with_timestamp(f"✅ Vector search service fully initialized in {total_time:.2f}s") + + except Exception as e: + logger.error(f"❌ Failed to initialize vector search: {e}") + self.is_initialized = False + raise + + def _filter_vocabulary(self, vocab_dict: Dict[str, int]) -> List[str]: + """Filter vocabulary to keep only crossword-suitable words.""" + filtered = [] + + # Words to exclude - boring, generic, or problematic for crosswords + excluded_words = { + # Generic/boring words + 'THE', 'AND', 'FOR', 'ARE', 'BUT', 'NOT', 'YOU', 'ALL', 'THIS', 'THAT', 'WITH', 'FROM', 'THEY', 'WERE', 'BEEN', 'HAVE', 'THEIR', 'SAID', 'EACH', 'WHICH', 'WHAT', 'THERE', 'WILL', 'MORE', 'WHEN', 'SOME', 'LIKE', 'INTO', 'TIME', 'VERY', 'ONLY', 'HAS', 'HAD', 'WHO', 'OIL', 'ITS', 'NOW', 'FIND', 'LONG', 'DOWN', 'DAY', 'DID', 'GET', 'COME', 'MADE', 'MAY', 'PART', + # Topic words that are too obvious + 'ANIMAL', 'ANIMALS', 'CREATURE', 'CREATURES', 'BEAST', 'BEASTS', 'THING', 'THINGS' + } + + for word, _ in vocab_dict.items(): + # Clean word (remove special tokens) + clean_word = word.strip("##").upper() + + # Filter criteria for crossword words + if ( + len(clean_word) >= 3 and # Minimum length + len(clean_word) <= 12 and # Reasonable max length + clean_word.isalpha() and # Only letters + not clean_word.startswith('[') and # No special tokens + not clean_word.startswith('<') and # No special tokens + clean_word not in excluded_words and # Avoid boring words + not self._is_plural(clean_word) and # No plurals + not self._is_boring_word(clean_word) # No boring patterns + ): + filtered.append(clean_word) + + # Remove duplicates and sort + return sorted(list(set(filtered))) + + def _is_plural(self, word: str) -> bool: + """Check if word is likely a plural.""" + # Simple plural detection + if len(word) < 4: + return False + return ( + word.endswith('S') and not word.endswith('SS') and + not word.endswith('US') and not word.endswith('IS') + ) + + def _is_boring_word(self, word: str) -> bool: + """Check if word is boring or too generic for crosswords.""" + boring_patterns = [ + # Words ending in common suffixes that are often generic + word.endswith('ING') and len(word) > 6, + word.endswith('TION') and len(word) > 7, + word.endswith('NESS') and len(word) > 6, + # Very common short words + word in ['GET', 'GOT', 'PUT', 'SET', 'LET', 'RUN', 'CUT', 'HIT', 'SIT', 'WIN', 'BIG', 'NEW', 'OLD', 'BAD', 'GOOD', 'BEST', 'LAST', 'NEXT', 'REAL'] + ] + return any(boring_patterns) + + async def _build_embeddings_index(self): + """Build FAISS index with pre-computed embeddings for all vocabulary.""" + logger.info("🔨 Building embeddings index...") + + # Compute embeddings in batches to avoid memory issues + batch_size = 100 + embeddings_list = [] + + for i in range(0, len(self.vocab), batch_size): + batch = self.vocab[i:i + batch_size] + batch_embeddings = self.model.encode(batch, convert_to_numpy=True) + embeddings_list.append(batch_embeddings) + + if i % 1000 == 0: + logger.info(f"📊 Processed {i}/{len(self.vocab)} words") + + # Combine all embeddings + self.word_embeddings = np.vstack(embeddings_list) + logger.info(f"📈 Generated embeddings shape: {self.word_embeddings.shape}") + + # Build FAISS index for fast similarity search + dimension = self.word_embeddings.shape[1] + self.faiss_index = faiss.IndexFlatIP(dimension) # Inner product similarity + + # Normalize embeddings for cosine similarity + faiss.normalize_L2(self.word_embeddings) + self.faiss_index.add(self.word_embeddings) + + logger.info(f"🔍 FAISS index built with {self.faiss_index.ntotal} vectors") + + + async def find_similar_words( + self, + topic: str, + difficulty: str = "medium", + max_words: int = 15 + ) -> List[Dict[str, Any]]: + """ + Find words similar to the given topic using vector similarity search. + + This is the core function that replaces embedding filtering with true + vector space nearest neighbor search. + """ + logger.info(f"🔍 Starting word search for topic: '{topic}', difficulty: '{difficulty}', max_words: {max_words}") + logger.info(f"🤖 Vector search initialized: {self.is_initialized}") + + if not self.is_initialized: + logger.warning("🔄 Vector search not initialized, using cached fallback") + return await self._get_cached_fallback(topic, difficulty, max_words) + + try: + # Get topic embedding + topic_embedding = self.model.encode([topic], convert_to_numpy=True) + + # Add small amount of noise to create variety in search results (with fallback) + import numpy as np + noise_factor = float(os.getenv("SEARCH_RANDOMNESS", "0.02")) # 2% noise by default + if noise_factor > 0: + try: + noise = np.random.normal(0, noise_factor, topic_embedding.shape) + topic_embedding_noisy = topic_embedding + noise + # Ensure the array is contiguous and correct type for FAISS + topic_embedding = np.ascontiguousarray(topic_embedding_noisy, dtype=np.float32) + except Exception as noise_error: + logger.warning(f"⚠️ Failed to add search noise: {noise_error}, using original embedding") + topic_embedding = np.ascontiguousarray(topic_embedding, dtype=np.float32) + else: + topic_embedding = np.ascontiguousarray(topic_embedding, dtype=np.float32) + + # Normalize for cosine similarity with error handling + try: + faiss.normalize_L2(topic_embedding) + except Exception as norm_error: + logger.warning(f"⚠️ FAISS normalization failed: {norm_error}, trying without noise") + # Fallback: use original embedding without noise + topic_embedding = self.model.encode([topic], convert_to_numpy=True) + topic_embedding = np.ascontiguousarray(topic_embedding, dtype=np.float32) + faiss.normalize_L2(topic_embedding) + + # Search for similar words using FAISS (get more results for diversity) + search_size = min(self.max_results * 6, 150) # Get many more candidates for variety + scores, indices = self.faiss_index.search(topic_embedding, search_size) + + # Debug: log search results + logger.info(f"🔍 FAISS search returned {len(scores[0])} results") + logger.info(f"🔍 Top 5 scores: {scores[0][:5]}") + logger.info(f"🔍 Similarity threshold: {self.similarity_threshold}") + + # Collect candidates with scores + candidates = [] + above_threshold = 0 + difficulty_passed = 0 + interesting_passed = 0 + + for score, idx in zip(scores[0], indices[0]): + if score < self.similarity_threshold: + continue + above_threshold += 1 + + word = self.vocab[idx] + + # Filter by difficulty and quality + if self._matches_difficulty(word, difficulty): + difficulty_passed += 1 + if self._is_interesting_word(word, topic): + interesting_passed += 1 + candidates.append({ + "word": word, + "clue": self._generate_clue(word, topic), + "similarity": float(score), + "source": "vector_search" + }) + + logger.info(f"🔍 Filtering results: {len(scores[0])} total → {above_threshold} above threshold → {difficulty_passed} difficulty OK → {interesting_passed} interesting → {len(candidates)} final") + + # Smart randomization: favor good words but add variety + import random + + if len(candidates) > max_words * 2: + # Weighted random selection favoring higher similarity scores + similar_words = self._weighted_random_selection(candidates, max_words) + else: + # If not many candidates, use all but in random order + random.shuffle(candidates) + similar_words = candidates[:max_words] + + logger.info(f"🎯 Found {len(similar_words)} similar words for '{topic}' via vector search") + + # Cache successful results for future use + if similar_words: + await self._cache_successful_search(topic, difficulty, similar_words) + + # If not enough words found, supplement with cached words + if len(similar_words) < max_words // 2: + cached_supplement = await self._get_cached_fallback( + topic, difficulty, max_words - len(similar_words) + ) + similar_words.extend(cached_supplement) + logger.info(f"🔄 Supplemented with {len(cached_supplement)} cached words") + + return similar_words[:max_words] + + except Exception as e: + logger.error(f"❌ Vector search failed for '{topic}': {e}") + # Try cached fallback first + cached_words = await self._get_cached_fallback(topic, difficulty, max_words) + if cached_words: + return cached_words + + # Last resort: bootstrap with simple topic-related words + logger.warning(f"⚠️ No cached words available, using emergency bootstrap for '{topic}'") + return self._get_emergency_bootstrap(topic, difficulty, max_words) + + def _matches_difficulty(self, word: str, difficulty: str) -> bool: + """Check if word matches difficulty criteria.""" + difficulty_map = { + "easy": {"min_len": 3, "max_len": 8}, + "medium": {"min_len": 4, "max_len": 10}, + "hard": {"min_len": 5, "max_len": 15} + } + + criteria = difficulty_map.get(difficulty, difficulty_map["medium"]) + return criteria["min_len"] <= len(word) <= criteria["max_len"] + + def _generate_clue(self, word: str, topic: str) -> str: + """Generate a simple clue for the word.""" + # Basic clue templates - can be enhanced with LLM generation later + clue_templates = { + "Animals": f"{word.lower()} (animal)", + "Technology": f"{word.lower()} (tech term)", + "Science": f"{word.lower()} (scientific term)", + "Geography": f"{word.lower()} (geographic feature)" + } + + return clue_templates.get(topic, f"{word.lower()} (related to {topic.lower()})") + + def _is_interesting_word(self, word: str, topic: str) -> bool: + """Check if word is interesting enough for crosswords.""" + # Exclude words that are too obvious for the topic + topic_lower = topic.lower() + word_lower = word.lower() + + # Don't include the topic itself or obvious variations + if word_lower == topic_lower or word_lower in topic_lower: + return False + + # Topic-specific filtering + if topic_lower == 'animals': + obvious_animals = ['mammal', 'mammals', 'wildlife', 'organism', 'organisms', 'livestock'] + if word_lower in obvious_animals: + return False + + # Prefer concrete nouns over abstract concepts + abstract_endings = ['tion', 'ness', 'ment', 'ity', 'ism'] + if any(word_lower.endswith(ending) for ending in abstract_endings) and len(word) > 8: + return False + + return True + + def _weighted_random_selection(self, candidates: List[Dict[str, Any]], max_words: int) -> List[Dict[str, Any]]: + """ + Weighted random selection that favors higher similarity scores but adds variety. + + This ensures we don't always get the exact same words, while still preferring + high-quality matches. + """ + import random + + if len(candidates) <= max_words: + return candidates + + # Create tiers based on similarity scores + candidates_sorted = sorted(candidates, key=lambda w: w["similarity"], reverse=True) + + # Tier 1: Top 25% - very high probability + tier1_size = max(1, len(candidates_sorted) // 4) + tier1 = candidates_sorted[:tier1_size] + + # Tier 2: Next 25% - high probability + tier2_size = max(1, len(candidates_sorted) // 4) + tier2 = candidates_sorted[tier1_size:tier1_size + tier2_size] + + # Tier 3: Next 35% - medium probability + tier3_size = max(1, len(candidates_sorted) * 35 // 100) + tier3 = candidates_sorted[tier1_size + tier2_size:tier1_size + tier2_size + tier3_size] + + # Tier 4: Remaining - low probability + tier4 = candidates_sorted[tier1_size + tier2_size + tier3_size:] + + selected = [] + + # Always include some from tier 1 (but not all) + tier1_count = min(max_words // 3, len(tier1)) + selected.extend(random.sample(tier1, tier1_count)) + + # Fill remaining slots with weighted random selection + remaining_slots = max_words - len(selected) + + if remaining_slots > 0: + # Create weighted pool + weighted_pool = [] + weighted_pool.extend([(w, 3) for w in tier2]) # 3x weight + weighted_pool.extend([(w, 2) for w in tier3]) # 2x weight + weighted_pool.extend([(w, 1) for w in tier4]) # 1x weight + + # Also add remaining tier1 words with high weight + remaining_tier1 = [w for w in tier1 if w not in selected] + weighted_pool.extend([(w, 4) for w in remaining_tier1]) # 4x weight + + # Weighted random selection + for _ in range(remaining_slots): + if not weighted_pool: + break + + # Create weighted list + weighted_words = [] + for word, weight in weighted_pool: + weighted_words.extend([word] * weight) + + if weighted_words: + chosen = random.choice(weighted_words) + selected.append(chosen) + + # Remove chosen word from pool + weighted_pool = [(w, wt) for w, wt in weighted_pool if w != chosen] + + # Final shuffle to mix up the order + random.shuffle(selected) + + logger.info(f"🎲 Weighted selection: {len(selected)} words from {len(candidates)} candidates") + return selected[:max_words] + + async def _get_cached_fallback( + self, + topic: str, + difficulty: str, + max_words: int + ) -> List[Dict[str, Any]]: + """Fallback to cached words when vector search fails.""" + if not self.cache_manager: + logger.warning(f"📭 No cache manager available for fallback") + return [] + + logger.info(f"🔄 Looking for cached words for topic: '{topic}', difficulty: '{difficulty}'") + + try: + cached_words = await self.cache_manager.get_cached_words(topic, difficulty, max_words) + + if cached_words: + logger.info(f"📦 Found {len(cached_words)} cached words for '{topic}/{difficulty}'") + return cached_words + else: + logger.info(f"📭 No cached words available for '{topic}/{difficulty}'") + return [] + + except Exception as e: + logger.error(f"❌ Failed to get cached fallback for '{topic}': {e}") + return [] + + async def _cache_successful_search( + self, + topic: str, + difficulty: str, + words: List[Dict[str, Any]] + ): + """Cache successful vector search results for future use.""" + if not self.cache_manager: + return + + try: + # Filter out any existing cached words to avoid duplicates + vector_words = [w for w in words if w.get("source") == "vector_search"] + + if vector_words: + success = await self.cache_manager.cache_words(topic, difficulty, vector_words) + if success: + logger.info(f"💾 Successfully cached {len(vector_words)} words for {topic}/{difficulty}") + + except Exception as e: + logger.error(f"❌ Failed to cache search results: {e}") + + def _get_emergency_bootstrap(self, topic: str, difficulty: str, max_words: int) -> List[Dict[str, Any]]: + """ + Emergency bootstrap words when vector search and cache both fail. + This prevents complete failure by providing basic topic-related words. + """ + bootstrap_words = { + "animals": [ + {"word": "DOG", "clue": "Man's best friend"}, + {"word": "CAT", "clue": "Feline pet"}, + {"word": "ELEPHANT", "clue": "Large mammal with trunk"}, + {"word": "TIGER", "clue": "Striped big cat"}, + {"word": "BIRD", "clue": "Flying creature"}, + {"word": "FISH", "clue": "Aquatic animal"}, + {"word": "HORSE", "clue": "Riding animal"}, + {"word": "BEAR", "clue": "Large mammal"}, + {"word": "WHALE", "clue": "Marine mammal"}, + {"word": "LION", "clue": "King of jungle"}, + {"word": "RABBIT", "clue": "Hopping mammal"}, + {"word": "SNAKE", "clue": "Slithering reptile"} + ], + "science": [ + {"word": "ATOM", "clue": "Basic unit of matter"}, + {"word": "CELL", "clue": "Basic unit of life"}, + {"word": "DNA", "clue": "Genetic material"}, + {"word": "ENERGY", "clue": "Capacity to do work"}, + {"word": "FORCE", "clue": "Push or pull"}, + {"word": "GRAVITY", "clue": "Force of attraction"}, + {"word": "LIGHT", "clue": "Electromagnetic radiation"}, + {"word": "MATTER", "clue": "Physical substance"}, + {"word": "MOTION", "clue": "Change in position"}, + {"word": "OXYGEN", "clue": "Essential gas"}, + {"word": "PHYSICS", "clue": "Study of matter and energy"}, + {"word": "THEORY", "clue": "Scientific explanation"} + ], + "technology": [ + {"word": "COMPUTER", "clue": "Electronic device"}, + {"word": "INTERNET", "clue": "Global network"}, + {"word": "SOFTWARE", "clue": "Computer programs"}, + {"word": "ROBOT", "clue": "Automated machine"}, + {"word": "DATA", "clue": "Information"}, + {"word": "CODE", "clue": "Programming instructions"}, + {"word": "DIGITAL", "clue": "Electronic format"}, + {"word": "NETWORK", "clue": "Connected systems"}, + {"word": "SYSTEM", "clue": "Organized whole"}, + {"word": "DEVICE", "clue": "Technical apparatus"}, + {"word": "MOBILE", "clue": "Portable technology"}, + {"word": "SCREEN", "clue": "Display surface"} + ], + "geography": [ + {"word": "MOUNTAIN", "clue": "High landform"}, + {"word": "RIVER", "clue": "Flowing water"}, + {"word": "OCEAN", "clue": "Large body of water"}, + {"word": "DESERT", "clue": "Arid region"}, + {"word": "FOREST", "clue": "Dense trees"}, + {"word": "ISLAND", "clue": "Land surrounded by water"}, + {"word": "VALLEY", "clue": "Low area between hills"}, + {"word": "LAKE", "clue": "Inland water body"}, + {"word": "COAST", "clue": "Land by the sea"}, + {"word": "PLAIN", "clue": "Flat land"}, + {"word": "HILL", "clue": "Small elevation"}, + {"word": "CLIFF", "clue": "Steep rock face"} + ] + } + + topic_lower = topic.lower() + words = bootstrap_words.get(topic_lower, []) + + if not words: + # Generic fallback for unknown topics + words = [ + {"word": "WORD", "clue": "Unit of language"}, + {"word": "PUZZLE", "clue": "Brain teaser"}, + {"word": "GAME", "clue": "Form of play"}, + {"word": "CROSS", "clue": "Intersecting lines"}, + {"word": "GRID", "clue": "Pattern of squares"}, + {"word": "CLUE", "clue": "Helpful hint"} + ] + + # Filter by difficulty and format + filtered_words = [] + for word_obj in words: + word = word_obj["word"] + if self._matches_difficulty(word, difficulty): + filtered_words.append({ + "word": word, + "clue": word_obj["clue"], + "similarity": 0.7, # Moderate relevance + "source": "emergency_bootstrap" + }) + + # Shuffle and limit + import random + random.shuffle(filtered_words) + result = filtered_words[:max_words] + + logger.info(f"🆘 Emergency bootstrap provided {len(result)} words for '{topic}'") + return result + + async def cleanup(self): + """Cleanup resources.""" + logger.info("🧹 Cleaning up vector search service") + if hasattr(self, 'model'): + del self.model + if hasattr(self, 'word_embeddings'): + del self.word_embeddings + if hasattr(self, 'faiss_index'): + del self.faiss_index + if self.cache_manager: + await self.cache_manager.cleanup_expired_caches() + self.is_initialized = False \ No newline at end of file diff --git a/crossword-app/backend-py/src/services/word_cache.py b/crossword-app/backend-py/src/services/word_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..567f89360fc362755a0bdd668720c486a2974167 --- /dev/null +++ b/crossword-app/backend-py/src/services/word_cache.py @@ -0,0 +1,347 @@ +""" +Word Cache Manager - Replaces static word file dependencies with intelligent caching. +Caches vector-discovered words with quality clues for fast retrieval. +""" + +import os +import json +import logging +import time +from datetime import datetime, timedelta +from typing import List, Dict, Any, Optional +from pathlib import Path +import asyncio + +logger = logging.getLogger(__name__) + +class WordCacheManager: + """ + Manages cached word data to replace static word file dependencies. + + Features: + - Caches vector-discovered words with quality clues + - Supports cache expiration and refresh + - Fallback for when vector search fails + - Progressive cache building from successful searches + """ + + def __init__(self, cache_dir: str = None): + # Use appropriate default cache directory for the environment + if cache_dir is None: + # Check if we're in a Docker container or HuggingFace Spaces + if os.path.exists("/.dockerenv") or os.getenv("SPACE_ID"): + # Use /tmp for containers/spaces where write permissions are limited + cache_dir = os.getenv("WORD_CACHE_DIR", "/tmp/crossword_cache") + else: + # Use local cache directory for development + cache_dir = os.getenv("WORD_CACHE_DIR", "cache") + + self.cache_dir = Path(cache_dir) + + # Try to create cache directory with fallback + try: + self.cache_dir.mkdir(parents=True, exist_ok=True) + logger.info(f"📁 Cache directory created: {self.cache_dir}") + except (PermissionError, OSError) as e: + # Fallback to temp directory + try: + import tempfile + temp_cache = Path(tempfile.gettempdir()) / "crossword_cache" + temp_cache.mkdir(exist_ok=True) + self.cache_dir = temp_cache + logger.warning(f"⚠️ Permission denied for '{cache_dir}', using temp: {self.cache_dir}") + except Exception as temp_error: + # Last resort: use in-memory only + logger.error(f"❌ Failed to create temp cache directory: {temp_error}") + logger.warning("⚠️ Using in-memory cache only (no persistence)") + self.cache_dir = None + except Exception as e: + # Last resort: use in-memory only + logger.error(f"❌ Failed to create cache directory: {e}") + logger.warning("⚠️ Using in-memory cache only (no persistence)") + self.cache_dir = None + + # Cache configuration + self.cache_expiry_hours = int(os.getenv("CACHE_EXPIRY_HOURS", "24")) + self.max_cached_words_per_topic = int(os.getenv("MAX_CACHED_WORDS", "100")) + self.cache_version = "1.0" + + # In-memory cache for fast access + self.memory_cache: Dict[str, List[Dict[str, Any]]] = {} + self.cache_metadata: Dict[str, Dict[str, Any]] = {} + + logger.info(f"📦 WordCacheManager initialized with cache_dir: {self.cache_dir}") + + async def initialize(self): + """Initialize cache manager by loading existing cache files.""" + try: + logger.info("🔧 Loading existing cache files...") + + # Skip file loading if no cache directory (in-memory only) + if self.cache_dir is None: + logger.info("📝 In-memory cache mode - no file loading") + return + + # Load all cache files into memory + cache_files = list(self.cache_dir.glob("*.json")) + loaded_count = 0 + + for cache_file in cache_files: + if cache_file.stem.endswith("_meta"): + continue # Skip metadata files + + try: + cache_key = cache_file.stem + with open(cache_file, 'r') as f: + cached_data = json.load(f) + + # Validate cache structure + if self._validate_cache_data(cached_data): + self.memory_cache[cache_key] = cached_data["words"] + self.cache_metadata[cache_key] = cached_data["metadata"] + loaded_count += 1 + logger.info(f"📥 Loaded cache: {cache_key} ({len(cached_data['words'])} words)") + else: + logger.warning(f"⚠️ Invalid cache file: {cache_file}") + + except Exception as e: + logger.error(f"❌ Failed to load cache file {cache_file}: {e}") + + logger.info(f"✅ Cache manager initialized with {loaded_count} cached topics") + + except Exception as e: + logger.error(f"❌ Failed to initialize cache manager: {e}") + + def _validate_cache_data(self, data: Dict[str, Any]) -> bool: + """Validate cache data structure.""" + required_keys = ["words", "metadata", "version"] + if not all(key in data for key in required_keys): + return False + + # Check metadata structure + metadata = data["metadata"] + required_meta_keys = ["created_at", "topic", "difficulty", "word_count"] + if not all(key in metadata for key in required_meta_keys): + return False + + # Check words structure + words = data["words"] + if not isinstance(words, list) or not words: + return True # Empty cache is valid + + # Validate first word structure + sample_word = words[0] + required_word_keys = ["word", "clue", "similarity", "source"] + return all(key in sample_word for key in required_word_keys) + + async def get_cached_words( + self, + topic: str, + difficulty: str = "medium", + max_words: int = 15 + ) -> List[Dict[str, Any]]: + """ + Get cached words for a topic and difficulty. + + Returns cached words if available and fresh, empty list otherwise. + """ + cache_key = self._get_cache_key(topic, difficulty) + + # Check memory cache first + if cache_key in self.memory_cache: + # Check if cache is still fresh + if self._is_cache_fresh(cache_key): + cached_words = self.memory_cache[cache_key] + logger.info(f"📦 Using cached words for {cache_key}: {len(cached_words)} words") + + # Return requested number of words + return cached_words[:max_words] + else: + logger.info(f"⏰ Cache expired for {cache_key}") + await self._remove_expired_cache(cache_key) + + logger.info(f"📭 No fresh cache available for {cache_key}") + return [] + + async def cache_words( + self, + topic: str, + difficulty: str, + words: List[Dict[str, Any]], + source: str = "vector_search" + ) -> bool: + """ + Cache words for future use. + + Args: + topic: Topic name + difficulty: Difficulty level + words: List of word objects with clues + source: Source of the words (e.g., "vector_search") + """ + try: + cache_key = self._get_cache_key(topic, difficulty) + + # Enhance words with caching metadata + enhanced_words = [] + for word in words[:self.max_cached_words_per_topic]: + enhanced_word = { + **word, + "cached_at": datetime.utcnow().isoformat(), + "cache_source": source + } + enhanced_words.append(enhanced_word) + + # Create cache data structure + cache_data = { + "version": self.cache_version, + "words": enhanced_words, + "metadata": { + "topic": topic, + "difficulty": difficulty, + "word_count": len(enhanced_words), + "created_at": datetime.utcnow().isoformat(), + "source": source, + "expiry_hours": self.cache_expiry_hours + } + } + + # Save to file (if cache directory available) + if self.cache_dir is not None: + cache_file = self.cache_dir / f"{cache_key}.json" + with open(cache_file, 'w') as f: + json.dump(cache_data, f, indent=2) + + # Update memory cache + self.memory_cache[cache_key] = enhanced_words + self.cache_metadata[cache_key] = cache_data["metadata"] + + logger.info(f"💾 Cached {len(enhanced_words)} words for {cache_key}") + return True + + except Exception as e: + logger.error(f"❌ Failed to cache words for {topic}/{difficulty}: {e}") + return False + + def _get_cache_key(self, topic: str, difficulty: str) -> str: + """Generate cache key from topic and difficulty.""" + return f"{topic.lower()}_{difficulty.lower()}" + + def _is_cache_fresh(self, cache_key: str) -> bool: + """Check if cache is still fresh (not expired).""" + if cache_key not in self.cache_metadata: + return False + + metadata = self.cache_metadata[cache_key] + created_at = datetime.fromisoformat(metadata["created_at"]) + expiry_hours = metadata.get("expiry_hours", self.cache_expiry_hours) + + expiry_time = created_at + timedelta(hours=expiry_hours) + return datetime.utcnow() < expiry_time + + async def _remove_expired_cache(self, cache_key: str): + """Remove expired cache from memory and disk.""" + try: + # Remove from memory + if cache_key in self.memory_cache: + del self.memory_cache[cache_key] + if cache_key in self.cache_metadata: + del self.cache_metadata[cache_key] + + # Remove from disk (if cache directory available) + if self.cache_dir is not None: + cache_file = self.cache_dir / f"{cache_key}.json" + if cache_file.exists(): + cache_file.unlink() + + logger.info(f"🗑️ Removed expired cache: {cache_key}") + + except Exception as e: + logger.error(f"❌ Failed to remove expired cache {cache_key}: {e}") + + async def warm_cache_from_static(self, static_words: Dict[str, List[Dict[str, Any]]]): + """ + Warm cache with high-quality static words as bootstrap data. + This converts the existing static words to cache format. + """ + try: + logger.info("🔥 Warming cache with bootstrap data from static words...") + + cached_count = 0 + for topic, words in static_words.items(): + if not words: + continue + + # Convert static words to cache format + cache_words = [] + for word_obj in words: + cache_word = { + "word": word_obj["word"].upper(), + "clue": word_obj.get("clue", f"Related to {topic.lower()}"), + "similarity": 0.9, # Mark as high quality + "source": "bootstrap_static", + "quality_score": 100 # High quality bootstrap data + } + cache_words.append(cache_word) + + # Cache for different difficulties + for difficulty in ["easy", "medium", "hard"]: + # Filter by difficulty + filtered_words = self._filter_words_by_difficulty(cache_words, difficulty) + + if filtered_words: + success = await self.cache_words(topic, difficulty, filtered_words, "bootstrap") + if success: + cached_count += 1 + + logger.info(f"🔥 Cache warming completed: {cached_count} topic/difficulty combinations cached") + + except Exception as e: + logger.error(f"❌ Failed to warm cache: {e}") + + def _filter_words_by_difficulty(self, words: List[Dict[str, Any]], difficulty: str) -> List[Dict[str, Any]]: + """Filter words by difficulty level.""" + difficulty_map = { + "easy": {"min_len": 3, "max_len": 8}, + "medium": {"min_len": 4, "max_len": 10}, + "hard": {"min_len": 5, "max_len": 15} + } + + criteria = difficulty_map.get(difficulty, difficulty_map["medium"]) + + filtered = [] + for word_obj in words: + word_len = len(word_obj["word"]) + if criteria["min_len"] <= word_len <= criteria["max_len"]: + filtered.append(word_obj) + + return filtered + + def get_cache_stats(self) -> Dict[str, Any]: + """Get cache statistics for monitoring.""" + total_words = sum(len(words) for words in self.memory_cache.values()) + + # Count fresh vs expired caches + fresh_caches = sum(1 for key in self.memory_cache.keys() if self._is_cache_fresh(key)) + total_caches = len(self.memory_cache) + + return { + "total_cached_topics": total_caches, + "fresh_caches": fresh_caches, + "expired_caches": total_caches - fresh_caches, + "total_cached_words": total_words, + "cache_directory": str(self.cache_dir), + "cache_expiry_hours": self.cache_expiry_hours + } + + async def cleanup_expired_caches(self): + """Clean up all expired caches.""" + expired_keys = [ + key for key in self.memory_cache.keys() + if not self._is_cache_fresh(key) + ] + + for key in expired_keys: + await self._remove_expired_cache(key) + + logger.info(f"🧹 Cleaned up {len(expired_keys)} expired caches") \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_boundary_fix.py b/crossword-app/backend-py/test-integration/test_boundary_fix.py new file mode 100644 index 0000000000000000000000000000000000000000..4981b07cb09ee9601bb0e91b8bc510ab6d3cf1da --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_boundary_fix.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 + +import sys +import asyncio +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator import CrosswordGenerator + +async def test_boundary_fix(): + """Test that the boundary fix works correctly.""" + + # Sample words that are known to cause boundary issues + test_words = [ + {"word": "COMPUTER", "clue": "Electronic device"}, + {"word": "MACHINE", "clue": "Device with moving parts"}, + {"word": "SCIENCE", "clue": "Systematic study"}, + {"word": "EXPERT", "clue": "Specialist"}, + {"word": "CODE", "clue": "Programming text"}, + {"word": "DATA", "clue": "Information"} + ] + + generator = CrosswordGenerator() + + print("🧪 Testing Boundary Fix") + print("=" * 50) + + # Generate a crossword + result = generator._create_grid(test_words) + + if not result: + print("❌ Grid generation failed") + return False + + grid = result["grid"] + placed_words = result["placed_words"] + + print(f"✅ Generated grid with {len(placed_words)} words") + print(f"Grid size: {len(grid)}x{len(grid[0])}") + + # Display the grid + print("\nGenerated Grid:") + for i, row in enumerate(grid): + row_str = " ".join(cell if cell != "." else " " for cell in row) + print(f"{i:2d} | {row_str}") + + print(f"\nPlaced Words:") + for word in placed_words: + print(f" {word['word']} at ({word['row']},{word['col']}) {word['direction']}") + + # Analyze for boundary violations + print(f"\n🔍 Analyzing for boundary violations...") + + violations = [] + + # Check horizontal words + for r in range(len(grid)): + current_word = "" + word_start = -1 + + for c in range(len(grid[r])): + if grid[r][c] != ".": + if current_word == "": + word_start = c + current_word += grid[r][c] + else: + if current_word: + # Word ended - check if it's a valid placed word + is_valid_word = any( + placed['word'] == current_word and + placed['row'] == r and + placed['col'] == word_start and + placed['direction'] == 'horizontal' + for placed in placed_words + ) + if not is_valid_word and len(current_word) > 1: + violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})") + current_word = "" + + # Check word at end of row + if current_word: + is_valid_word = any( + placed['word'] == current_word and + placed['row'] == r and + placed['col'] == word_start and + placed['direction'] == 'horizontal' + for placed in placed_words + ) + if not is_valid_word and len(current_word) > 1: + violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})") + + # Check vertical words + for c in range(len(grid[0])): + current_word = "" + word_start = -1 + + for r in range(len(grid)): + if grid[r][c] != ".": + if current_word == "": + word_start = r + current_word += grid[r][c] + else: + if current_word: + # Word ended - check if it's a valid placed word + is_valid_word = any( + placed['word'] == current_word and + placed['row'] == word_start and + placed['col'] == c and + placed['direction'] == 'vertical' + for placed in placed_words + ) + if not is_valid_word and len(current_word) > 1: + violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})") + current_word = "" + + # Check word at end of column + if current_word: + is_valid_word = any( + placed['word'] == current_word and + placed['row'] == word_start and + placed['col'] == c and + placed['direction'] == 'vertical' + for placed in placed_words + ) + if not is_valid_word and len(current_word) > 1: + violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})") + + # Report results + if violations: + print(f"❌ Found {len(violations)} boundary violations:") + for violation in violations: + print(f" - {violation}") + return False + else: + print(f"✅ No boundary violations found!") + print(f"✅ All words in grid are properly placed and bounded") + return True + +if __name__ == "__main__": + success = asyncio.run(test_boundary_fix()) + if success: + print(f"\n🎉 Boundary fix is working correctly!") + else: + print(f"\n💥 Boundary fix needs more work!") \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_bounds_comprehensive.py b/crossword-app/backend-py/test-integration/test_bounds_comprehensive.py new file mode 100644 index 0000000000000000000000000000000000000000..143919ee4aef2406b08b1e9f4f604c8f3a4a0aba --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_bounds_comprehensive.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +""" +Comprehensive test for bounds checking fixes in crossword generator. +""" + +import asyncio +import sys +import pytest +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +class TestBoundsChecking: + """Test all bounds checking in crossword generator.""" + + def setup_method(self): + """Setup test instance.""" + self.generator = CrosswordGeneratorFixed(vector_service=None) + + def test_can_place_word_bounds_horizontal(self): + """Test _can_place_word bounds checking for horizontal placement.""" + # Create small grid + grid = [["." for _ in range(5)] for _ in range(5)] + + # Test cases that should fail bounds checking + assert not self.generator._can_place_word(grid, "TOOLONG", 2, 1, "horizontal") # Word too long + assert not self.generator._can_place_word(grid, "TEST", -1, 1, "horizontal") # Negative row + assert not self.generator._can_place_word(grid, "TEST", 1, -1, "horizontal") # Negative col + assert not self.generator._can_place_word(grid, "TEST", 5, 1, "horizontal") # Row >= size + assert not self.generator._can_place_word(grid, "TEST", 1, 5, "horizontal") # Col >= size + assert not self.generator._can_place_word(grid, "TEST", 1, 3, "horizontal") # Word extends beyond grid + + # Test cases that should pass + assert self.generator._can_place_word(grid, "TEST", 2, 1, "horizontal") # Valid placement + assert self.generator._can_place_word(grid, "A", 0, 0, "horizontal") # Single letter + + def test_can_place_word_bounds_vertical(self): + """Test _can_place_word bounds checking for vertical placement.""" + # Create small grid + grid = [["." for _ in range(5)] for _ in range(5)] + + # Test cases that should fail bounds checking + assert not self.generator._can_place_word(grid, "TOOLONG", 1, 2, "vertical") # Word too long + assert not self.generator._can_place_word(grid, "TEST", -1, 1, "vertical") # Negative row + assert not self.generator._can_place_word(grid, "TEST", 1, -1, "vertical") # Negative col + assert not self.generator._can_place_word(grid, "TEST", 5, 1, "vertical") # Row >= size + assert not self.generator._can_place_word(grid, "TEST", 1, 5, "vertical") # Col >= size + assert not self.generator._can_place_word(grid, "TEST", 3, 1, "vertical") # Word extends beyond grid + + # Test cases that should pass + assert self.generator._can_place_word(grid, "TEST", 1, 2, "vertical") # Valid placement + assert self.generator._can_place_word(grid, "A", 0, 0, "vertical") # Single letter + + def test_place_word_bounds_horizontal(self): + """Test _place_word bounds checking for horizontal placement.""" + grid = [["." for _ in range(5)] for _ in range(5)] + + # Valid placement should work + original_state = self.generator._place_word(grid, "TEST", 2, 1, "horizontal") + assert len(original_state) == 4 + assert grid[2][1] == "T" + assert grid[2][4] == "T" + + # Test out-of-bounds placement should raise IndexError + with pytest.raises(IndexError): + self.generator._place_word(grid, "TOOLONG", 2, 1, "horizontal") + + with pytest.raises(IndexError): + self.generator._place_word(grid, "TEST", -1, 1, "horizontal") + + with pytest.raises(IndexError): + self.generator._place_word(grid, "TEST", 5, 1, "horizontal") + + with pytest.raises(IndexError): + self.generator._place_word(grid, "TEST", 1, 5, "horizontal") + + def test_place_word_bounds_vertical(self): + """Test _place_word bounds checking for vertical placement.""" + grid = [["." for _ in range(5)] for _ in range(5)] + + # Valid placement should work + original_state = self.generator._place_word(grid, "TEST", 1, 2, "vertical") + assert len(original_state) == 4 + assert grid[1][2] == "T" + assert grid[4][2] == "T" + + # Test out-of-bounds placement should raise IndexError + with pytest.raises(IndexError): + self.generator._place_word(grid, "TOOLONG", 1, 2, "vertical") + + with pytest.raises(IndexError): + self.generator._place_word(grid, "TEST", -1, 2, "vertical") + + with pytest.raises(IndexError): + self.generator._place_word(grid, "TEST", 5, 2, "vertical") + + with pytest.raises(IndexError): + self.generator._place_word(grid, "TEST", 2, 5, "vertical") + + def test_remove_word_bounds(self): + """Test _remove_word bounds checking.""" + grid = [["." for _ in range(5)] for _ in range(5)] + + # Place a word first + original_state = self.generator._place_word(grid, "TEST", 2, 1, "horizontal") + + # Normal removal should work + self.generator._remove_word(grid, original_state) + assert grid[2][1] == "." + + # Test invalid original state should raise IndexError + bad_state = [{"row": -1, "col": 1, "value": "."}] + with pytest.raises(IndexError): + self.generator._remove_word(grid, bad_state) + + bad_state = [{"row": 5, "col": 1, "value": "."}] + with pytest.raises(IndexError): + self.generator._remove_word(grid, bad_state) + + bad_state = [{"row": 1, "col": -1, "value": "."}] + with pytest.raises(IndexError): + self.generator._remove_word(grid, bad_state) + + bad_state = [{"row": 1, "col": 5, "value": "."}] + with pytest.raises(IndexError): + self.generator._remove_word(grid, bad_state) + + def test_create_simple_cross_bounds(self): + """Test _create_simple_cross bounds checking.""" + # Test with words that have intersections + word_list = ["CAT", "TOY"] # 'T' intersection + word_objs = [{"word": w, "clue": f"Clue for {w}"} for w in word_list] + + # This should work without bounds errors + result = self.generator._create_simple_cross(word_list, word_objs) + assert result is not None + assert len(result["placed_words"]) == 2 + + # Test with words that might cause issues + word_list = ["A", "A"] # Same single letter + word_objs = [{"word": w, "clue": f"Clue for {w}"} for w in word_list] + + # This should not crash with bounds errors + result = self.generator._create_simple_cross(word_list, word_objs) + # May return None due to placement issues, but should not crash + + def test_trim_grid_bounds(self): + """Test _trim_grid bounds checking.""" + # Create a grid with words placed + grid = [["." for _ in range(10)] for _ in range(10)] + + # Place some letters + grid[5][3] = "T" + grid[5][4] = "E" + grid[5][5] = "S" + grid[5][6] = "T" + + placed_words = [{ + "word": "TEST", + "row": 5, + "col": 3, + "direction": "horizontal", + "number": 1 + }] + + # This should work without bounds errors + result = self.generator._trim_grid(grid, placed_words) + assert result is not None + assert "grid" in result + assert "placed_words" in result + + # Test with edge case placements + placed_words = [{ + "word": "A", + "row": 0, + "col": 0, + "direction": "horizontal", + "number": 1 + }] + + grid[0][0] = "A" + result = self.generator._trim_grid(grid, placed_words) + assert result is not None + + def test_calculation_placement_score_bounds(self): + """Test _calculate_placement_score bounds checking.""" + grid = [["." for _ in range(5)] for _ in range(5)] + + # Place some letters for intersection testing + grid[2][2] = "T" + grid[2][3] = "E" + + placement = {"row": 2, "col": 2, "direction": "horizontal"} + placed_words = [] + + # This should work without bounds errors + score = self.generator._calculate_placement_score(grid, "TEST", placement, placed_words) + assert isinstance(score, int) + + # Test with out-of-bounds placement (should handle gracefully) + placement = {"row": 4, "col": 3, "direction": "horizontal"} # Would extend beyond grid + score = self.generator._calculate_placement_score(grid, "TEST", placement, placed_words) + assert isinstance(score, int) + + # Test with negative placement (should handle gracefully) + placement = {"row": -1, "col": 0, "direction": "horizontal"} + score = self.generator._calculate_placement_score(grid, "TEST", placement, placed_words) + assert isinstance(score, int) + +async def test_full_generation_stress(): + """Stress test full generation to catch index errors.""" + generator = CrosswordGeneratorFixed(vector_service=None) + + # Mock word selection to return test words + test_words = [ + {"word": "CAT", "clue": "Feline pet"}, + {"word": "DOG", "clue": "Man's best friend"}, + {"word": "BIRD", "clue": "Flying animal"}, + {"word": "FISH", "clue": "Aquatic animal"}, + {"word": "ELEPHANT", "clue": "Large mammal"}, + {"word": "TIGER", "clue": "Striped cat"}, + {"word": "HORSE", "clue": "Riding animal"}, + {"word": "BEAR", "clue": "Large carnivore"}, + {"word": "WOLF", "clue": "Pack animal"}, + {"word": "LION", "clue": "King of jungle"} + ] + + generator._select_words = lambda topics, difficulty, use_ai: test_words + + # Run multiple generation attempts + for i in range(20): + try: + result = await generator.generate_puzzle(["animals"], "medium", use_ai=False) + if result: + print(f"✅ Generation {i+1} succeeded") + else: + print(f"⚠️ Generation {i+1} returned None") + except IndexError as e: + print(f"❌ Index error in generation {i+1}: {e}") + raise + except Exception as e: + print(f"⚠️ Other error in generation {i+1}: {e}") + # Don't raise for other errors, just continue + + print("✅ All stress test generations completed without index errors!") + +if __name__ == "__main__": + # Run tests + print("🧪 Running comprehensive bounds checking tests...") + + # Run pytest on this file + import subprocess + result = subprocess.run([sys.executable, "-m", "pytest", __file__, "-v"], + capture_output=True, text=True) + + print("STDOUT:", result.stdout) + if result.stderr: + print("STDERR:", result.stderr) + + # Run stress test + print("\n🏋️ Running stress test...") + asyncio.run(test_full_generation_stress()) \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_bounds_fix.py b/crossword-app/backend-py/test-integration/test_bounds_fix.py new file mode 100644 index 0000000000000000000000000000000000000000..6f06a1ffba164cd58d2f220b8d9fa3f2aa78adde --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_bounds_fix.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +Quick test to verify the bounds checking fix. +""" + +import sys +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +def test_bounds_checking(): + """Test that placement score calculation doesn't crash with out-of-bounds access.""" + print("🧪 Testing bounds checking fix...") + + generator = CrosswordGeneratorFixed() + + # Create a small grid + grid = [["." for _ in range(5)] for _ in range(5)] + + # Test placement that would go out of bounds + placement = { + "row": 3, # Starting at row 3 + "col": 2, # Starting at col 2 + "direction": "vertical" + } + + # Word that would extend beyond grid (3+8=11 > 5) + word = "ELEPHANT" # 8 letters, would go from row 3 to row 10 (out of bounds) + + try: + # This should NOT crash with bounds checking + score = generator._calculate_placement_score(grid, word, placement, []) + print(f"✅ Success! Placement score calculated: {score}") + print("✅ Bounds checking is working correctly") + return True + except IndexError as e: + print(f"❌ IndexError still occurs: {e}") + return False + except Exception as e: + print(f"❌ Other error: {e}") + return False + +def test_valid_placement(): + """Test that valid placements still work correctly.""" + print("\n🧪 Testing valid placement scoring...") + + generator = CrosswordGeneratorFixed() + + # Create a grid with some letters + grid = [["." for _ in range(8)] for _ in range(8)] + grid[2][2] = "A" # Place an 'A' at position (2,2) + + # Test placement that intersects properly + placement = { + "row": 2, + "col": 1, + "direction": "horizontal" + } + + word = "CAT" # Should intersect at the 'A' + + try: + score = generator._calculate_placement_score(grid, word, placement, []) + print(f"✅ Valid placement score: {score}") + + # Should have intersection bonus (score > 100) + if score > 300: # Base 100 + intersection 200 + print("✅ Intersection detection working") + else: + print(f"⚠️ Expected intersection bonus, got score {score}") + + return True + except Exception as e: + print(f"❌ Error with valid placement: {e}") + return False + +if __name__ == "__main__": + print("🔧 Testing crossword generator bounds fix\n") + + test1_pass = test_bounds_checking() + test2_pass = test_valid_placement() + + if test1_pass and test2_pass: + print("\n✅ All tests passed! The bounds checking fix is working.") + else: + print("\n❌ Some tests failed. More work needed.") \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_cache_permissions.py b/crossword-app/backend-py/test-integration/test_cache_permissions.py new file mode 100644 index 0000000000000000000000000000000000000000..70650d2ddf907c9370cd669079e3aa3c571529b9 --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_cache_permissions.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +""" +Test cache permission handling. +""" + +import asyncio +import sys +import tempfile +import os +from pathlib import Path +from unittest.mock import patch + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.word_cache import WordCacheManager + +async def test_permission_denied(): + """Test cache handling when permissions are denied.""" + print("🧪 Testing permission denied scenario...") + + # Mock Path.mkdir to raise PermissionError + with patch.object(Path, 'mkdir', side_effect=PermissionError("Permission denied")): + try: + cache_manager = WordCacheManager(cache_dir="/some/protected/path") + await cache_manager.initialize() + + print(f"✅ Cache manager created with fallback: {cache_manager.cache_dir}") + + # Test caching still works (in-memory or temp dir) + test_words = [ + {"word": "TEST", "clue": "A test word", "similarity": 0.8, "source": "test"} + ] + + success = await cache_manager.cache_words("TestTopic", "medium", test_words) + print(f"✅ Caching {'succeeded' if success else 'failed'}") + + cached_words = await cache_manager.get_cached_words("TestTopic", "medium", 5) + print(f"✅ Retrieved {len(cached_words)} cached words") + + return True + + except Exception as e: + print(f"❌ Permission handling failed: {e}") + return False + +async def test_in_memory_mode(): + """Test pure in-memory cache mode.""" + print("\n🧪 Testing in-memory only mode...") + + # Force in-memory mode by setting cache_dir to None + cache_manager = WordCacheManager() + cache_manager.cache_dir = None # Force in-memory mode + + await cache_manager.initialize() + + # Test that caching still works in memory + test_words = [ + {"word": "MEMORY", "clue": "Stored in RAM", "similarity": 0.9, "source": "test"} + ] + + success = await cache_manager.cache_words("Memory", "medium", test_words) + print(f"✅ In-memory caching {'succeeded' if success else 'failed'}") + + cached_words = await cache_manager.get_cached_words("Memory", "medium", 5) + print(f"✅ Retrieved {len(cached_words)} words from memory") + + stats = cache_manager.get_cache_stats() + print(f"📊 Cache stats: {stats}") + + return len(cached_words) > 0 + +async def main(): + """Run permission tests.""" + print("🔐 Testing Cache Permission Handling\n") + + test1 = await test_permission_denied() + test2 = await test_in_memory_mode() + + if test1 and test2: + print("\n✅ All permission tests passed!") + print("📦 Cache system gracefully handles permission issues") + else: + print("\n❌ Some permission tests failed") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_cache_system.py b/crossword-app/backend-py/test-integration/test_cache_system.py new file mode 100644 index 0000000000000000000000000000000000000000..3285fed3edaede3972dc7baa1fbdcbf41e39404d --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_cache_system.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +""" +Test the new cache system to verify it works correctly. +""" + +import asyncio +import sys +import tempfile +import shutil +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.word_cache import WordCacheManager + +async def test_cache_system(): + """Test the cache system functionality.""" + print("🧪 Testing Word Cache System\n") + + # Create temporary cache directory + temp_dir = tempfile.mkdtemp() + print(f"📁 Using temporary cache directory: {temp_dir}") + + try: + # Initialize cache manager + cache_manager = WordCacheManager(cache_dir=temp_dir) + await cache_manager.initialize() + + # Test 1: Cache some words + print("\n🧪 Test 1: Caching words") + test_words = [ + {"word": "ELEPHANT", "clue": "Large mammal with trunk", "similarity": 0.8, "source": "vector_search"}, + {"word": "TIGER", "clue": "Striped big cat", "similarity": 0.7, "source": "vector_search"}, + {"word": "LION", "clue": "King of jungle", "similarity": 0.75, "source": "vector_search"}, + ] + + success = await cache_manager.cache_words("Animals", "medium", test_words) + print(f"✅ Cache operation {'succeeded' if success else 'failed'}") + + # Test 2: Retrieve cached words + print("\n🧪 Test 2: Retrieving cached words") + cached_words = await cache_manager.get_cached_words("Animals", "medium", 5) + print(f"📦 Retrieved {len(cached_words)} cached words") + + if cached_words: + print("📝 Cached words:") + for word in cached_words: + print(f" - {word['word']}: {word['clue']}") + + # Test 3: Cache statistics + print("\n🧪 Test 3: Cache statistics") + stats = cache_manager.get_cache_stats() + print(f"📊 Cache stats: {stats}") + + # Test 4: Test non-existent topic + print("\n🧪 Test 4: Non-existent topic") + empty_words = await cache_manager.get_cached_words("NonExistent", "medium", 5) + print(f"📭 Non-existent topic returned {len(empty_words)} words (expected 0)") + + # Test 5: Test bootstrap warming (if static data exists) + print("\n🧪 Test 5: Bootstrap warming simulation") + static_data = { + "Technology": [ + {"word": "COMPUTER", "clue": "Electronic device"}, + {"word": "ROBOT", "clue": "Automated machine"}, + ] + } + await cache_manager.warm_cache_from_static(static_data) + + tech_words = await cache_manager.get_cached_words("Technology", "medium", 5) + print(f"🔥 Bootstrap warming: Retrieved {len(tech_words)} tech words") + + print("\n✅ All cache system tests completed!") + return True + + except Exception as e: + print(f"\n❌ Cache system test failed: {e}") + import traceback + traceback.print_exc() + return False + + finally: + # Cleanup temporary directory + shutil.rmtree(temp_dir) + print(f"🧹 Cleaned up temporary directory") + +async def test_vector_integration(): + """Test integration with vector search service.""" + print("\n🔗 Testing Vector Search Integration\n") + + try: + from src.services.vector_search import VectorSearchService + + # Create vector service (won't initialize model, just test cache integration) + vector_service = VectorSearchService() + + # Test cache fallback without initialization + print("🧪 Testing cache fallback when vector search not initialized") + fallback_words = await vector_service._get_cached_fallback("Animals", "medium", 5) + print(f"📦 Fallback returned {len(fallback_words)} words") + + print("✅ Vector integration test completed!") + return True + + except Exception as e: + print(f"❌ Vector integration test failed: {e}") + import traceback + traceback.print_exc() + return False + +async def main(): + """Run all tests.""" + print("🚀 Testing Cache System Replacement\n") + + cache_test = await test_cache_system() + integration_test = await test_vector_integration() + + if cache_test and integration_test: + print("\n🎉 All tests passed! Cache system is working correctly.") + print("📦 Static word dependencies have been successfully replaced with caching.") + else: + print("\n❌ Some tests failed. Check the output above.") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_crossword_display.py b/crossword-app/backend-py/test-integration/test_crossword_display.py new file mode 100644 index 0000000000000000000000000000000000000000..9bc359f29650d09649acb10ad202633af22ebe0a --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_crossword_display.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 + +import json + +# The crossword grid from the API response +grid = [ + [".", ".", ".", ".", ".", ".", ".", ".", "D", ".", "."], + [".", ".", ".", ".", ".", "M", "I", "C", "E", ".", "."], + [".", ".", ".", "H", "U", "M", "A", "N", "E", ".", "."], + [".", ".", ".", "W", "H", "A", "L", "E", "R", ".", "."], + [".", "P", "Z", ".", ".", ".", "L", ".", ".", ".", "."], + ["Z", "O", "O", "L", "O", "G", "I", "C", "A", "L", "."], + [".", "U", "O", ".", ".", ".", "G", "E", "E", "S", "E"], + [".", "L", "L", "H", "U", "M", "A", "N", "I", "T", "Y"], + [".", "T", "O", ".", ".", ".", "T", "I", "G", "E", "R"], + [".", "R", "G", ".", "B", "I", "O", "L", "O", "G", "Y"], + [".", "Y", "Y", ".", ".", ".", "R", ".", ".", ".", "."] +] + +print("Generated Crossword Grid:") +print("=" * 50) + +for i, row in enumerate(grid): + row_str = "" + for j, cell in enumerate(row): + if cell == ".": + row_str += " " # Empty space + else: + row_str += f"{cell} " + print(f"{i:2d} | {row_str}") + +print("=" * 50) + +# Check for word boundaries +def check_word_boundaries(grid): + issues = [] + + # Horizontal words + for r in range(len(grid)): + in_word = False + word_start = -1 + for c in range(len(grid[r])): + if grid[r][c] != ".": + if not in_word: + in_word = True + word_start = c + else: + if in_word: + # Word ended + word_length = c - word_start + word = "".join(grid[r][word_start:c]) + print(f"Horizontal word at ({r},{word_start}): {word} (length {word_length})") + in_word = False + + # Check if word extends to end of row + if in_word: + word_length = len(grid[r]) - word_start + word = "".join(grid[r][word_start:]) + print(f"Horizontal word at ({r},{word_start}): {word} (length {word_length})") + + # Vertical words + for c in range(len(grid[0])): + in_word = False + word_start = -1 + for r in range(len(grid)): + if grid[r][c] != ".": + if not in_word: + in_word = True + word_start = r + else: + if in_word: + # Word ended + word_length = r - word_start + word = "".join([grid[i][c] for i in range(word_start, r)]) + print(f"Vertical word at ({word_start},{c}): {word} (length {word_length})") + in_word = False + + # Check if word extends to end of column + if in_word: + word_length = len(grid) - word_start + word = "".join([grid[i][c] for i in range(word_start, len(grid))]) + print(f"Vertical word at ({word_start},{c}): {word} (length {word_length})") + +print("\nWord boundary analysis:") +check_word_boundaries(grid) \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_final_crossword_validation.py b/crossword-app/backend-py/test-integration/test_final_crossword_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..b33093aad10b7dc4adb1aff67c90d867d368683a --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_final_crossword_validation.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +""" +Final test to validate that the crossword generator produces clean grids +without unwanted prefixes, suffixes, or unintended letter sequences. +""" + +import sys +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +def test_clean_crossword_generation(): + """Test that crossword generation produces clean grids without unwanted sequences.""" + + print("🧪 Final Crossword Validation Test\n") + + generator = CrosswordGeneratorFixed(vector_service=None) + + # Test multiple scenarios that previously caused issues + test_scenarios = [ + { + "name": "Basic Technology Words", + "words": [ + {"word": "COMPUTER", "clue": "Electronic device"}, + {"word": "MACHINE", "clue": "Device with moving parts"}, + {"word": "SCIENCE", "clue": "Systematic study"}, + {"word": "EXPERT", "clue": "Specialist"}, + ] + }, + { + "name": "Similar Words (MACHINE/MACHINERY)", + "words": [ + {"word": "MACHINE", "clue": "Device with moving parts"}, + {"word": "MACHINERY", "clue": "Mechanical equipment"}, + {"word": "TECHNOLOGY", "clue": "Applied science"}, + {"word": "RESEARCH", "clue": "Investigation"}, + ] + }, + { + "name": "Animal Words", + "words": [ + {"word": "ELEPHANT", "clue": "Large mammal"}, + {"word": "TIGER", "clue": "Striped cat"}, + {"word": "BEAR", "clue": "Large carnivore"}, + {"word": "HORSE", "clue": "Riding animal"}, + {"word": "BIRD", "clue": "Flying creature"}, + ] + }, + { + "name": "Mixed Length Words", + "words": [ + {"word": "CAT", "clue": "Feline pet"}, + {"word": "COMPUTER", "clue": "Electronic device"}, + {"word": "A", "clue": "First letter"}, # Edge case + {"word": "TECHNOLOGY", "clue": "Applied science"}, + ] + } + ] + + all_passed = True + + for i, scenario in enumerate(test_scenarios): + print(f"=" * 60) + print(f"TEST {i+1}: {scenario['name']}") + print(f"=" * 60) + + words = scenario["words"] + print(f"Testing with {len(words)} words: {[w['word'] for w in words]}") + + try: + result = generator._create_grid(words) + + if result: + grid = result["grid"] + placed_words = result["placed_words"] + clues = result["clues"] + + print(f"✅ Grid generated successfully") + print(f" Grid size: {len(grid)}x{len(grid[0])}") + print(f" Words placed: {len(placed_words)}") + print(f" Clues generated: {len(clues)}") + + # Print the grid + print("\nGenerated Grid:") + print_clean_grid(grid) + + # Validate the grid + validation_result = validate_grid_cleanliness(grid, placed_words) + + if validation_result["is_clean"]: + print("✅ Grid validation: CLEAN - No unwanted sequences") + else: + print("❌ Grid validation: ISSUES FOUND") + for issue in validation_result["issues"]: + print(f" - {issue}") + all_passed = False + + # Print word placements + print("\nWord Placements:") + for j, word_info in enumerate(placed_words): + print(f" {j+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}") + + else: + print("⚠️ Grid generation returned None - algorithm may be too strict") + # This might happen if validation is too restrictive + + except Exception as e: + print(f"❌ Grid generation failed: {e}") + all_passed = False + + print() + + # Summary + print("=" * 60) + print("FINAL SUMMARY") + print("=" * 60) + + if all_passed: + print("🎉 ALL TESTS PASSED!") + print("✅ Crossword generator produces clean grids without unwanted sequences") + print("✅ No more issues with unwanted prefixes, suffixes, or letter combinations") + else: + print("❌ Some tests failed - additional improvements needed") + + return all_passed + +def print_clean_grid(grid): + """Print grid in a clean, readable format.""" + if not grid: + print(" Empty grid") + return + + # Print column headers + print(" ", end="") + for c in range(len(grid[0])): + print(f"{c:2d}", end="") + print() + + # Print rows + for r in range(len(grid)): + print(f" {r:2d}: ", end="") + for c in range(len(grid[0])): + cell = grid[r][c] + if cell == ".": + print(" .", end="") + else: + print(f" {cell}", end="") + print() + +def validate_grid_cleanliness(grid, placed_words): + """Validate that grid contains only intended words without unwanted sequences.""" + + issues = [] + + # Find all letter sequences in the grid + all_sequences = [] + + # Horizontal sequences + for r in range(len(grid)): + current_seq = "" + start_col = None + + for c in range(len(grid[0])): + if grid[r][c] != ".": + if start_col is None: + start_col = c + current_seq += grid[r][c] + else: + if current_seq and len(current_seq) > 1: + all_sequences.append((r, start_col, "horizontal", current_seq)) + current_seq = "" + start_col = None + + # Handle end of row + if current_seq and len(current_seq) > 1: + all_sequences.append((r, start_col, "horizontal", current_seq)) + + # Vertical sequences + for c in range(len(grid[0])): + current_seq = "" + start_row = None + + for r in range(len(grid)): + if grid[r][c] != ".": + if start_row is None: + start_row = r + current_seq += grid[r][c] + else: + if current_seq and len(current_seq) > 1: + all_sequences.append((start_row, c, "vertical", current_seq)) + current_seq = "" + start_row = None + + # Handle end of column + if current_seq and len(current_seq) > 1: + all_sequences.append((start_row, c, "vertical", current_seq)) + + # Check if all sequences correspond to intended words + intended_words = set() + for word_info in placed_words: + key = (word_info["row"], word_info["col"], word_info["direction"], word_info["word"]) + intended_words.add(key) + + # Check each sequence + for row, col, direction, sequence in all_sequences: + key = (row, col, direction, sequence) + if key not in intended_words: + issues.append(f"Unintended sequence: '{sequence}' at ({row}, {col}) {direction}") + + # Check for specific problematic patterns + for row, col, direction, sequence in all_sequences: + # Check for 2-letter sequences (should not exist) + if len(sequence) == 2: + issues.append(f"Unwanted 2-letter sequence: '{sequence}' at ({row}, {col}) {direction}") + + # Check for words that appear to extend beyond their intended boundaries + # But exclude cases where both the shorter and longer words are intentionally placed + placed_word_set = {w["word"] for w in placed_words} + for word_info in placed_words: + word = word_info["word"] + if word in sequence and sequence != word: + if sequence.startswith(word) or sequence.endswith(word): + # Check if the sequence itself is also an intended word + if sequence not in placed_word_set: + issues.append(f"Word '{word}' appears extended as '{sequence}' at ({row}, {col}) {direction}") + + return { + "is_clean": len(issues) == 0, + "issues": issues, + "total_sequences": len(all_sequences), + "intended_sequences": len(intended_words) + } + +if __name__ == "__main__": + test_clean_crossword_generation() \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_final_validation.py b/crossword-app/backend-py/test-integration/test_final_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..9ff602b35c264aaef65284bba910eea4bb128992 --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_final_validation.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 + +import requests +import json + +def test_api_crossword(): + """Test that the API generates valid crosswords without boundary issues.""" + + url = "http://localhost:7860/api/generate" + data = { + "topics": ["animals"], + "difficulty": "medium", + "useAI": True + } + + print("🧪 Testing API Crossword Generation") + print("=" * 50) + + try: + response = requests.post(url, json=data, timeout=30) + + if response.status_code != 200: + print(f"❌ API Error: {response.status_code}") + print(response.text) + return False + + result = response.json() + + if 'detail' in result: + print(f"❌ Error: {result['detail']}") + return False + + grid = result['grid'] + clues = result['clues'] + metadata = result['metadata'] + + print(f"✅ Generated crossword with {metadata['wordCount']} words") + print(f"Grid size: {len(grid)}x{len(grid[0])}") + print(f"AI Generated: {metadata['aiGenerated']}") + + # Validate boundary issues + violations = validate_word_boundaries(grid, clues) + + if violations: + print(f"\n❌ Found {len(violations)} boundary violations:") + for violation in violations: + print(f" - {violation}") + return False + else: + print(f"\n✅ No boundary violations found!") + print(f"✅ All words are properly bounded") + + # Display sample of the grid + print(f"\nSample Grid (first 8 rows):") + for i, row in enumerate(grid[:8]): + row_str = " ".join(cell if cell != "." else " " for cell in row) + print(f"{i:2d} | {row_str}") + + return True + + except Exception as e: + print(f"❌ Test failed: {e}") + return False + +def validate_word_boundaries(grid, clues): + """Validate that all words in the grid have proper boundaries.""" + violations = [] + + # Create a set of valid word placements from clues + valid_words = set() + for clue in clues: + word = clue['word'] + pos = clue['position'] + direction = clue['direction'] + row, col = pos['row'], pos['col'] + + if direction == 'across': + valid_words.add((word, row, col, 'horizontal')) + else: + valid_words.add((word, row, col, 'vertical')) + + # Check all horizontal sequences in grid + for r in range(len(grid)): + current_word = "" + word_start = -1 + + for c in range(len(grid[r])): + if grid[r][c] != ".": + if current_word == "": + word_start = c + current_word += grid[r][c] + else: + if current_word and len(current_word) > 1: + # Check if this is a valid placed word + if (current_word, r, word_start, 'horizontal') not in valid_words: + violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})") + current_word = "" + + # Check word at end of row + if current_word and len(current_word) > 1: + if (current_word, r, word_start, 'horizontal') not in valid_words: + violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})") + + # Check all vertical sequences in grid + for c in range(len(grid[0])): + current_word = "" + word_start = -1 + + for r in range(len(grid)): + if grid[r][c] != ".": + if current_word == "": + word_start = r + current_word += grid[r][c] + else: + if current_word and len(current_word) > 1: + # Check if this is a valid placed word + if (current_word, word_start, c, 'vertical') not in valid_words: + violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})") + current_word = "" + + # Check word at end of column + if current_word and len(current_word) > 1: + if (current_word, word_start, c, 'vertical') not in valid_words: + violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})") + + return violations + +if __name__ == "__main__": + success = test_api_crossword() + if success: + print(f"\n🎉 All tests passed! The boundary fix is working correctly.") + else: + print(f"\n💥 Tests failed! The boundary issue still exists.") \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_intersection_issues.py b/crossword-app/backend-py/test-integration/test_intersection_issues.py new file mode 100644 index 0000000000000000000000000000000000000000..1363145a634d68e6a09cabf34b89ad87feacebb9 --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_intersection_issues.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 +""" +Test to reproduce the exact intersection and boundary issues seen in the crossword images. +""" + +import sys +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +def reproduce_image_issues(): + """Try to reproduce the specific issues seen in the crossword images.""" + + print("🔍 Reproducing crossword boundary issues from images...\n") + + generator = CrosswordGeneratorFixed(vector_service=None) + + # Test Case 1: Try to reproduce the "MACHINERY" extension issue + print("=" * 60) + print("TEST 1: Reproducing MACHINERY extension issue") + print("=" * 60) + + grid = [["." for _ in range(15)] for _ in range(15)] + placed_words = [] + + # Place MACHINE first + if generator._can_place_word(grid, "MACHINE", 6, 3, "horizontal"): + generator._place_word(grid, "MACHINE", 6, 3, "horizontal") + placed_words.append({ + "word": "MACHINE", "row": 6, "col": 3, "direction": "horizontal", "number": 1 + }) + print("✅ Placed MACHINE") + print_grid(grid, 4, 10, 0, 12) + + # Now try to place words that might create the extension + test_placements = [ + ("VERY", 4, 8, "vertical"), # V-E-R-Y going down, might intersect with E in MACHINE + ("EXPERT", 5, 8, "horizontal"), # Horizontal word that might extend MACHINE + ("PROTOTYPE", 6, 9, "horizontal"), # Direct extension after MACHINE + ] + + for word, row, col, direction in test_placements: + print(f"\n🔍 Testing: '{word}' at ({row}, {col}) {direction}") + + can_place = generator._can_place_word(grid, word, row, col, direction) + print(f"Can place: {can_place}") + + if can_place: + # Make a copy and test the placement + test_grid = [r[:] for r in grid] + generator._place_word(test_grid, word, row, col, direction) + print("After placement:") + print_grid(test_grid, 4, 10, 0, 15) + + # Check if MACHINE now appears to be extended + machine_row = 6 + extended_word = "" + for c in range(15): + if test_grid[machine_row][c] != ".": + extended_word += test_grid[machine_row][c] + elif extended_word: + break + + if extended_word != "MACHINE": + print(f"⚠️ MACHINE appears extended to: '{extended_word}'") + + print("-" * 40) + + # Test Case 2: Check intersection logic specifically + print("\n" + "=" * 60) + print("TEST 2: Checking intersection calculation logic") + print("=" * 60) + + # Test the intersection finding logic + word1 = "MACHINE" + word2 = "EXPERT" + + intersections = generator._find_word_intersections(word1, word2) + print(f"Intersections between '{word1}' and '{word2}': {intersections}") + + for intersection in intersections: + word_pos = intersection["word_pos"] + placed_pos = intersection["placed_pos"] + print(f" Letter '{word1[word_pos]}' at pos {word_pos} in '{word1}' matches") + print(f" Letter '{word2[placed_pos]}' at pos {placed_pos} in '{word2}'") + + # Calculate where EXPERT would be placed to intersect with MACHINE + machine_placement = {"word": "MACHINE", "row": 6, "col": 3, "direction": "horizontal"} + placement = generator._calculate_intersection_placement( + word2, placed_pos, machine_placement, word_pos + ) + + if placement: + print(f" EXPERT would be placed at: row={placement['row']}, col={placement['col']}, dir={placement['direction']}") + + # Check if this would be valid + can_place = generator._can_place_word(grid, word2, placement['row'], placement['col'], placement['direction']) + print(f" Valid placement: {can_place}") + + # Test Case 3: Multi-word intersection scenario + print("\n" + "=" * 60) + print("TEST 3: Multi-word intersection scenario") + print("=" * 60) + + # Create a more complex scenario like in the images + complex_grid = [["." for _ in range(15)] for _ in range(15)] + complex_words = [] + + # Place several words to create intersection opportunities + word_placements = [ + ("MACHINE", 7, 4, "horizontal"), + ("EXPERT", 5, 6, "vertical"), # Try to intersect at 'E' + ("SMART", 6, 8, "vertical"), # Try to intersect at another letter + ] + + for word, row, col, direction in word_placements: + print(f"\nPlacing '{word}' at ({row}, {col}) {direction}") + + if generator._can_place_word(complex_grid, word, row, col, direction): + generator._place_word(complex_grid, word, row, col, direction) + complex_words.append({ + "word": word, "row": row, "col": col, "direction": direction, "number": len(complex_words) + 1 + }) + print(f"✅ Placed '{word}'") + else: + print(f"❌ Cannot place '{word}'") + + print_grid(complex_grid, 4, 11, 2, 13) + + # Check for any unintended word formations + print("\nChecking for unintended word formations:") + check_unintended_words(complex_grid, complex_words) + +def print_grid(grid, start_row, end_row, start_col, end_col): + """Print a section of the grid.""" + print("Grid:") + for r in range(max(0, start_row), min(end_row, len(grid))): + row_str = f"R{r:2d}: " + for c in range(max(0, start_col), min(end_col, len(grid[0]))): + if grid[r][c] == ".": + row_str += ". " + else: + row_str += f"{grid[r][c]} " + print(row_str) + print() + +def check_unintended_words(grid, placed_words): + """Check for unintended word formations in the grid.""" + unintended = [] + + # Check all horizontal sequences + for r in range(len(grid)): + current_word = "" + start_col = None + + for c in range(len(grid[0])): + if grid[r][c] != ".": + if start_col is None: + start_col = c + current_word += grid[r][c] + else: + if current_word and len(current_word) > 1: + # Check if this is an intended word + intended = False + for word_info in placed_words: + if (word_info["direction"] == "horizontal" and + word_info["row"] == r and + word_info["col"] == start_col and + word_info["word"] == current_word): + intended = True + break + + if not intended: + unintended.append(f"Horizontal '{current_word}' at row {r}, col {start_col}") + + current_word = "" + start_col = None + + # Check final word if row ends with letters + if current_word and len(current_word) > 1: + intended = False + for word_info in placed_words: + if (word_info["direction"] == "horizontal" and + word_info["row"] == r and + word_info["col"] == start_col and + word_info["word"] == current_word): + intended = True + break + + if not intended: + unintended.append(f"Horizontal '{current_word}' at row {r}, col {start_col}") + + # Check all vertical sequences + for c in range(len(grid[0])): + current_word = "" + start_row = None + + for r in range(len(grid)): + if grid[r][c] != ".": + if start_row is None: + start_row = r + current_word += grid[r][c] + else: + if current_word and len(current_word) > 1: + # Check if this is an intended word + intended = False + for word_info in placed_words: + if (word_info["direction"] == "vertical" and + word_info["col"] == c and + word_info["row"] == start_row and + word_info["word"] == current_word): + intended = True + break + + if not intended: + unintended.append(f"Vertical '{current_word}' at row {start_row}, col {c}") + + current_word = "" + start_row = None + + # Check final word if column ends with letters + if current_word and len(current_word) > 1: + intended = False + for word_info in placed_words: + if (word_info["direction"] == "vertical" and + word_info["col"] == c and + word_info["row"] == start_row and + word_info["word"] == current_word): + intended = True + break + + if not intended: + unintended.append(f"Vertical '{current_word}' at row {start_row}, col {c}") + + if unintended: + print("❌ Unintended words found:") + for word in unintended: + print(f" {word}") + else: + print("✅ No unintended words detected") + +if __name__ == "__main__": + reproduce_image_issues() \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_local.py b/crossword-app/backend-py/test-integration/test_local.py new file mode 100644 index 0000000000000000000000000000000000000000..0f975673d4a12eb23c1c5baae469fec0a4bb64ef --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_local.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +""" +Simple test script to verify Python backend works locally. +""" + +import asyncio +import sys +import os +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +async def test_vector_search(): + """Test vector search service initialization.""" + try: + from src.services.vector_search import VectorSearchService + + print("🔧 Testing Vector Search Service...") + + # Set minimal configuration for testing + os.environ["EMBEDDING_MODEL"] = "sentence-transformers/all-MiniLM-L6-v2" # Smaller model for testing + os.environ["WORD_SIMILARITY_THRESHOLD"] = "0.6" + + service = VectorSearchService() + + print("📦 Initializing service (this may take a moment)...") + await service.initialize() + + if service.is_initialized: + print("✅ Vector search service initialized successfully!") + + # Test word generation + print("\n🧪 Testing word generation for 'Animals'...") + words = await service.find_similar_words("Animals", "medium", 5) + + print(f"Found {len(words)} words:") + for i, word_obj in enumerate(words, 1): + word = word_obj["word"] + similarity = word_obj.get("similarity", 0) + source = word_obj.get("source", "unknown") + print(f" {i}. {word} (similarity: {similarity:.3f}, source: {source})") + else: + print("❌ Service initialization failed") + + await service.cleanup() + + except Exception as e: + print(f"❌ Test failed: {e}") + import traceback + traceback.print_exc() + +async def test_crossword_generator(): + """Test crossword generator.""" + try: + from src.services.crossword_generator_wrapper import CrosswordGenerator + + print("\n🎯 Testing Crossword Generator...") + + generator = CrosswordGenerator() + + # Test static word generation + words = await generator.generate_words_for_topics( + topics=["Animals"], + difficulty="medium", + use_ai=False + ) + + print(f"✅ Generated {len(words)} static words for Animals:") + for word_obj in words[:3]: # Show first 3 + print(f" - {word_obj['word']}: {word_obj['clue']}") + + except Exception as e: + print(f"❌ Crossword generator test failed: {e}") + import traceback + traceback.print_exc() + +async def main(): + """Run all tests.""" + print("🐍 Testing Python Backend Components\n") + + # Test individual components + await test_crossword_generator() + + # Test vector search (commented out as it requires large download) + print("\n⚠️ Skipping vector search test (requires model download)") + print("💡 To test vector search, uncomment the line below:") + print("# await test_vector_search()") + + print("\n✅ Basic tests completed!") + print("🚀 Ready to test with FastAPI server") + print("\n🧪 For comprehensive unit tests, run:") + print(" python run_tests.py") + print(" or: pytest tests/ -v") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_simple_generation.py b/crossword-app/backend-py/test-integration/test_simple_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..b4945156e0ea2104784105e986498d927045a3fd --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_simple_generation.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +Simple test to confirm crossword generation works correctly. +""" + +import sys +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator import CrosswordGenerator + +def test_simple_generation(): + """Test basic crossword generation functionality.""" + + print("🧪 Simple Crossword Generation Test\n") + + generator = CrosswordGenerator(vector_service=None) + + # Simple test words that should work well + test_words = [ + {"word": "COMPUTER", "clue": "Electronic device"}, + {"word": "MACHINE", "clue": "Device with moving parts"}, + {"word": "EXPERT", "clue": "Specialist"}, + {"word": "SCIENCE", "clue": "Systematic study"}, + ] + + print("Testing with words:", [w["word"] for w in test_words]) + + try: + result = generator._create_grid(test_words) + + if result: + grid = result["grid"] + placed_words = result["placed_words"] + clues = result["clues"] + + print("✅ Grid generation successful!") + print(f" Grid size: {len(grid)}x{len(grid[0])}") + print(f" Words placed: {len(placed_words)}") + print(f" Clues generated: {len(clues)}") + + print("\nGenerated Grid:") + print_simple_grid(grid) + + print("\nPlaced Words:") + for i, word_info in enumerate(placed_words): + print(f" {i+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}") + + print("\nClues:") + for clue in clues: + print(f" {clue['number']}. {clue['direction']}: {clue['word']} - {clue['text']}") + + # Basic validation - just check that words are where they should be + print("\nBasic validation:") + validation_passed = True + + for word_info in placed_words: + word = word_info["word"] + row = word_info["row"] + col = word_info["col"] + direction = word_info["direction"] + + # Extract word from grid + extracted = "" + if direction == "horizontal": + for i in range(len(word)): + if col + i < len(grid[0]): + extracted += grid[row][col + i] + else: # vertical + for i in range(len(word)): + if row + i < len(grid): + extracted += grid[row + i][col] + + if extracted == word: + print(f" ✅ {word} correctly placed") + else: + print(f" ❌ {word} mismatch: expected '{word}', got '{extracted}'") + validation_passed = False + + if validation_passed: + print("\n🎉 SUCCESS! Crossword generator is working correctly.") + print("The algorithm creates valid crosswords with proper word intersections.") + print("2-letter sequences at intersections are normal crossword behavior.") + else: + print("\n❌ Validation failed - there are actual placement issues.") + + else: + print("❌ Grid generation failed - returned None") + + except Exception as e: + print(f"❌ Grid generation failed with error: {e}") + import traceback + traceback.print_exc() + +def print_simple_grid(grid): + """Print grid in a simple format.""" + if not grid: + print(" Empty grid") + return + + for r in range(len(grid)): + row_str = " " + for c in range(len(grid[0])): + if grid[r][c] == ".": + row_str += ". " + else: + row_str += f"{grid[r][c]} " + print(row_str) + +if __name__ == "__main__": + test_simple_generation() \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_simple_variety.py b/crossword-app/backend-py/test-integration/test_simple_variety.py new file mode 100644 index 0000000000000000000000000000000000000000..343ce5d2f5dfa4f178c138a5a335e1865ea4664a --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_simple_variety.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +""" +Simple test for word variety logic without dependencies. +""" + +import random +from typing import List, Dict, Any + +def weighted_random_selection(candidates: List[Dict[str, Any]], max_words: int) -> List[Dict[str, Any]]: + """ + Test version of weighted random selection. + """ + if len(candidates) <= max_words: + return candidates + + # Create tiers based on similarity scores + candidates_sorted = sorted(candidates, key=lambda w: w["similarity"], reverse=True) + + # Tier 1: Top 25% - very high probability + tier1_size = max(1, len(candidates_sorted) // 4) + tier1 = candidates_sorted[:tier1_size] + + # Tier 2: Next 25% - high probability + tier2_size = max(1, len(candidates_sorted) // 4) + tier2 = candidates_sorted[tier1_size:tier1_size + tier2_size] + + # Tier 3: Next 35% - medium probability + tier3_size = max(1, len(candidates_sorted) * 35 // 100) + tier3 = candidates_sorted[tier1_size + tier2_size:tier1_size + tier2_size + tier3_size] + + # Tier 4: Remaining - low probability + tier4 = candidates_sorted[tier1_size + tier2_size + tier3_size:] + + selected = [] + + # Always include some from tier 1 (but not all) + tier1_count = min(max_words // 3, len(tier1)) + selected.extend(random.sample(tier1, tier1_count)) + + # Fill remaining slots with weighted random selection + remaining_slots = max_words - len(selected) + + if remaining_slots > 0: + # Create weighted pool + weighted_pool = [] + weighted_pool.extend([(w, 3) for w in tier2]) # 3x weight + weighted_pool.extend([(w, 2) for w in tier3]) # 2x weight + weighted_pool.extend([(w, 1) for w in tier4]) # 1x weight + + # Also add remaining tier1 words with high weight + remaining_tier1 = [w for w in tier1 if w not in selected] + weighted_pool.extend([(w, 4) for w in remaining_tier1]) # 4x weight + + # Weighted random selection + for _ in range(remaining_slots): + if not weighted_pool: + break + + # Create weighted list + weighted_words = [] + for word, weight in weighted_pool: + weighted_words.extend([word] * weight) + + if weighted_words: + chosen = random.choice(weighted_words) + selected.append(chosen) + + # Remove chosen word from pool + weighted_pool = [(w, wt) for w, wt in weighted_pool if w != chosen] + + # Final shuffle to mix up the order + random.shuffle(selected) + + return selected[:max_words] + +def create_test_candidates(): + """Create test word candidates.""" + words = [ + "SCIENTIFIC", "SCIENTIST", "CHEMISTRY", "ASTRONOMY", "BIOLOGIST", + "PHYSICIST", "RESEARCH", "ZOOLOGY", "GEOLOGY", "BIOLOGY", + "ECOLOGY", "BOTANY", "THEORY", "EXPERIMENT", "DISCOVERY", + "LABORATORY", "MOLECULE", "EQUATION", "HYPOTHESIS", "ANALYSIS", + "PHYSICS", "QUANTUM", "GENETICS", "EVOLUTION", "MICROSCOPE" + ] + + candidates = [] + for i, word in enumerate(words): + similarity = 0.9 - (i * 0.02) # Decreasing similarity scores + candidates.append({ + "word": word, + "clue": f"{word.lower()} (scientific term)", + "similarity": similarity, + "source": "vector_search" + }) + + return candidates + +def test_variety(): + """Test word variety.""" + print("🧪 Testing word variety\n") + + candidates = create_test_candidates() + + # Run selection multiple times + results = [] + for i in range(5): + selected = weighted_random_selection(candidates, 12) + word_list = [w["word"] for w in selected] + results.append(word_list) + print(f"Selection {i+1}: {word_list[:5]}...") + + # Check variety + unique_words_per_position = [] + for pos in range(5): + words_at_pos = [result[pos] for result in results if len(result) > pos] + unique_at_pos = len(set(words_at_pos)) + unique_words_per_position.append(unique_at_pos) + print(f"Position {pos}: {unique_at_pos} different words across 5 selections") + + total_variety = sum(unique_words_per_position) + max_possible = len(unique_words_per_position) * len(results) + variety_percentage = (total_variety / max_possible) * 100 + + print(f"\n📊 Variety Score: {variety_percentage:.1f}%") + + return variety_percentage > 60 + +if __name__ == "__main__": + success = test_variety() + if success: + print("✅ Word variety test passed!") + else: + print("❌ Word variety test failed!") \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_visual_analysis.py b/crossword-app/backend-py/test-integration/test_visual_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..590ab425bbbca081084d6daf19f02b29e34c6413 --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_visual_analysis.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +""" +Test to analyze the visual representation issue that might cause words to appear extended. +""" + +import sys +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +def analyze_visual_word_extensions(): + """Analyze how visual word extensions might occur in the crossword grid.""" + + print("🔍 Analyzing Visual Word Extension Issues\n") + + generator = CrosswordGeneratorFixed(vector_service=None) + + # Create a scenario that might produce the "visual extension" effect + # Based on the crossword images, it seems like words are appearing to extend + # when they shouldn't + + grid = [["." for _ in range(15)] for _ in range(15)] + placed_words = [] + + print("=" * 60) + print("SCENARIO 1: Creating potential visual extension") + print("=" * 60) + + # Step 1: Place MACHINE + if generator._can_place_word(grid, "MACHINE", 7, 3, "horizontal"): + generator._place_word(grid, "MACHINE", 7, 3, "horizontal") + placed_words.append({ + "word": "MACHINE", "row": 7, "col": 3, "direction": "horizontal", "number": 1 + }) + print("✅ Placed MACHINE at (7, 3) horizontal") + + # Step 2: Place a word that intersects at the end of MACHINE + # This might create the visual illusion of extension + if generator._can_place_word(grid, "EXPERT", 5, 9, "vertical"): # E intersects with E in MACHINE + generator._place_word(grid, "EXPERT", 5, 9, "vertical") + placed_words.append({ + "word": "EXPERT", "row": 5, "col": 9, "direction": "vertical", "number": 2 + }) + print("✅ Placed EXPERT at (5, 9) vertical") + + # Step 3: Place another intersecting word that might create confusion + if generator._can_place_word(grid, "TYPOGRAPHY", 7, 10, "horizontal"): # Starts right after MACHINE + generator._place_word(grid, "TYPOGRAPHY", 7, 10, "horizontal") + placed_words.append({ + "word": "TYPOGRAPHY", "row": 7, "col": 10, "direction": "horizontal", "number": 3 + }) + print("✅ Placed TYPOGRAPHY at (7, 10) horizontal") + + print("\nCurrent grid state:") + print_detailed_grid(grid, placed_words) + + # Analyze what the human eye might see + print("\n" + "=" * 60) + print("VISUAL ANALYSIS") + print("=" * 60) + + analyze_row_sequences(grid, placed_words, 7) # Analyze row 7 specifically + analyze_column_sequences(grid, placed_words, 9) # Analyze column 9 specifically + + # Test another scenario + print("\n" + "=" * 60) + print("SCENARIO 2: Adjacent word placement") + print("=" * 60) + + grid2 = [["." for _ in range(15)] for _ in range(15)] + placed_words2 = [] + + # Place words that are adjacent but should be separate + if generator._can_place_word(grid2, "SCIENCE", 6, 2, "horizontal"): + generator._place_word(grid2, "SCIENCE", 6, 2, "horizontal") + placed_words2.append({ + "word": "SCIENCE", "row": 6, "col": 2, "direction": "horizontal", "number": 1 + }) + + # Try to place a word immediately after with no gap + if generator._can_place_word(grid2, "TECHNOLOGY", 6, 9, "horizontal"): + generator._place_word(grid2, "TECHNOLOGY", 6, 9, "horizontal") + placed_words2.append({ + "word": "TECHNOLOGY", "row": 6, "col": 9, "direction": "horizontal", "number": 2 + }) + print("⚠️ Placed TECHNOLOGY immediately after SCIENCE") + else: + print("✅ Correctly prevented TECHNOLOGY placement immediately after SCIENCE") + + print("\nGrid 2 state:") + print_detailed_grid(grid2, placed_words2) + + # Check what boundary checking should prevent + print("\n" + "=" * 60) + print("BOUNDARY VALIDATION TEST") + print("=" * 60) + + test_boundary_edge_cases(generator) + +def print_detailed_grid(grid, placed_words, highlight_rows=None): + """Print grid with detailed analysis.""" + print("Grid with word boundaries marked:") + print(" ", end="") + for c in range(min(15, len(grid[0]))): + print(f"{c:2d}", end="") + print() + + for r in range(min(15, len(grid))): + print(f"{r:2d}: ", end="") + for c in range(min(15, len(grid[0]))): + cell = grid[r][c] + if cell == ".": + print(" .", end="") + else: + print(f" {cell}", end="") + print() + + print("\nPlaced words:") + for i, word_info in enumerate(placed_words): + print(f" {i+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}") + +def analyze_row_sequences(grid, placed_words, row): + """Analyze letter sequences in a specific row.""" + print(f"\nAnalyzing row {row} sequences:") + + # Extract all letters in the row + row_letters = [] + for c in range(len(grid[0])): + if grid[row][c] != ".": + row_letters.append((c, grid[row][c])) + + if not row_letters: + print(" No letters in this row") + return + + print(f" Letters found: {row_letters}") + + # Find continuous sequences + sequences = [] + current_seq = [] + + for i, (col, letter) in enumerate(row_letters): + if not current_seq or col == row_letters[i-1][0] + 1: + current_seq.append((col, letter)) + else: + if len(current_seq) > 1: + sequences.append(current_seq) + current_seq = [(col, letter)] + + if len(current_seq) > 1: + sequences.append(current_seq) + + print(f" Continuous sequences: {len(sequences)}") + for i, seq in enumerate(sequences): + word = "".join([letter for col, letter in seq]) + start_col = seq[0][0] + print(f" Sequence {i+1}: '{word}' starting at column {start_col}") + + # Check if this matches intended words + intended = False + for word_info in placed_words: + if (word_info["direction"] == "horizontal" and + word_info["row"] == row and + word_info["col"] == start_col and + word_info["word"] == word): + intended = True + print(f" ✅ This is the intended word '{word_info['word']}'") + break + + if not intended and len(word) > 1: + print(f" ❌ This appears to be an unintended word formation!") + +def analyze_column_sequences(grid, placed_words, col): + """Analyze letter sequences in a specific column.""" + print(f"\nAnalyzing column {col} sequences:") + + # Extract all letters in the column + col_letters = [] + for r in range(len(grid)): + if grid[r][col] != ".": + col_letters.append((r, grid[r][col])) + + if not col_letters: + print(" No letters in this column") + return + + print(f" Letters found: {col_letters}") + + # Find continuous sequences + sequences = [] + current_seq = [] + + for i, (row, letter) in enumerate(col_letters): + if not current_seq or row == col_letters[i-1][0] + 1: + current_seq.append((row, letter)) + else: + if len(current_seq) > 1: + sequences.append(current_seq) + current_seq = [(row, letter)] + + if len(current_seq) > 1: + sequences.append(current_seq) + + print(f" Continuous sequences: {len(sequences)}") + for i, seq in enumerate(sequences): + word = "".join([letter for row, letter in seq]) + start_row = seq[0][0] + print(f" Sequence {i+1}: '{word}' starting at row {start_row}") + + # Check if this matches intended words + intended = False + for word_info in placed_words: + if (word_info["direction"] == "vertical" and + word_info["col"] == col and + word_info["row"] == start_row and + word_info["word"] == word): + intended = True + print(f" ✅ This is the intended word '{word_info['word']}'") + break + + if not intended and len(word) > 1: + print(f" ❌ This appears to be an unintended word formation!") + +def test_boundary_edge_cases(generator): + """Test specific boundary edge cases.""" + + # Test case 1: Words that touch but shouldn't form a continuous sequence + grid = [["." for _ in range(10)] for _ in range(10)] + + # Place first word + generator._place_word(grid, "CAT", 5, 2, "horizontal") + + # Try to place second word immediately adjacent (should be blocked) + can_place = generator._can_place_word(grid, "DOG", 5, 5, "horizontal") + print(f"Can place 'DOG' immediately after 'CAT': {can_place}") + + if can_place: + print("❌ PROBLEM: Adjacent words allowed - this could create visual extension!") + print("Current grid after CAT:") + for r in range(3, 8): + print(f"Row {r}: ", end="") + for c in range(8): + print(f"{grid[r][c]} ", end="") + print() + + generator._place_word(grid, "DOG", 5, 5, "horizontal") + print("After adding DOG:") + for r in range(3, 8): + print(f"Row {r}: ", end="") + for c in range(8): + print(f"{grid[r][c]} ", end="") + print() + else: + print("✅ Correctly prevented adjacent word placement") + + # Test case 2: Check gap requirement + can_place_with_gap = generator._can_place_word(grid, "DOG", 5, 6, "horizontal") + print(f"Can place 'DOG' with 1-cell gap after 'CAT': {can_place_with_gap}") + +if __name__ == "__main__": + analyze_visual_word_extensions() \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_word_boundaries.py b/crossword-app/backend-py/test-integration/test_word_boundaries.py new file mode 100644 index 0000000000000000000000000000000000000000..ff4b6eb8e247578826cdd86b633e9b5d844ef2c9 --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_word_boundaries.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python3 +""" +Test to identify and fix word boundary issues causing unwanted prefixes/suffixes. +""" + +import sys +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +def test_word_boundary_violations(): + """Test for word boundary violations that create unwanted prefixes/suffixes.""" + + generator = CrosswordGeneratorFixed(vector_service=None) + + # Test case 1: Simple word placement with boundary checking + print("🧪 Testing word boundary violations...\n") + + # Create a grid and place a word + grid = [["." for _ in range(10)] for _ in range(10)] + + # Place "MACHINE" horizontally at row 5, col 2 + placed_words = [] + + # First, place MACHINE + if generator._can_place_word(grid, "MACHINE", 5, 2, "horizontal"): + original_state = generator._place_word(grid, "MACHINE", 5, 2, "horizontal") + placed_words.append({ + "word": "MACHINE", + "row": 5, + "col": 2, + "direction": "horizontal", + "number": 1 + }) + print("✅ Placed MACHINE horizontally") + print_grid_section(grid, 4, 7, 0, 10) + else: + print("❌ Cannot place MACHINE") + return False + + # Now try to place a word that might create boundary violations + # Try placing "CAR" vertically intersecting with "A" in MACHINE + test_words = [ + ("CAR", 3, 4, "vertical"), # Should intersect at 'A' in MACHINE + ("ACE", 4, 3, "vertical"), # Should intersect at 'C' in MACHINE + ("RIG", 6, 7, "vertical"), # Should intersect at 'I' in MACHINE + ] + + for word, row, col, direction in test_words: + print(f"\n🔍 Testing placement of '{word}' at ({row}, {col}) {direction}") + + if generator._can_place_word(grid, word, row, col, direction): + # Check if this placement would create boundary issues + print(f"✅ Can place '{word}' - checking for boundary violations...") + + # Simulate the placement + test_grid = [row[:] for row in grid] # Deep copy + test_original = generator._place_word(test_grid, word, row, col, direction) + + print_grid_section(test_grid, 2, 9, 0, 10) + + # Check if any unintended words are formed + violations = check_word_boundary_violations(test_grid, word, row, col, direction, placed_words) + if violations: + print(f"❌ Boundary violations detected: {violations}") + else: + print(f"✅ No boundary violations for '{word}'") + + # Restore grid + generator._remove_word(test_grid, test_original) + else: + print(f"❌ Cannot place '{word}' at ({row}, {col}) {direction}") + + print("\n" + "="*50) + return True + +def print_grid_section(grid, start_row, end_row, start_col, end_col): + """Print a section of the grid for visualization.""" + print("Grid section:") + for r in range(start_row, min(end_row, len(grid))): + row_str = "" + for c in range(start_col, min(end_col, len(grid[0]))): + if grid[r][c] == ".": + row_str += ". " + else: + row_str += f"{grid[r][c]} " + print(f"Row {r:2d}: {row_str}") + print() + +def check_word_boundary_violations(grid, new_word, row, col, direction, existing_words): + """Check if placing a word creates unintended word extensions.""" + violations = [] + + # Check the immediate boundaries of the new word + if direction == "horizontal": + # Check before the word + if col > 0 and grid[row][col - 1] != ".": + violations.append(f"Unwanted prefix: letter '{grid[row][col - 1]}' before '{new_word}'") + + # Check after the word + if col + len(new_word) < len(grid[0]) and grid[row][col + len(new_word)] != ".": + violations.append(f"Unwanted suffix: letter '{grid[row][col + len(new_word)]}' after '{new_word}'") + + # Check perpendicular extensions at each letter + for i, letter in enumerate(new_word): + letter_col = col + i + + # Check above and below each letter for unintended words + above_letters = [] + below_letters = [] + + # Collect letters above + r = row - 1 + while r >= 0 and grid[r][letter_col] != ".": + above_letters.insert(0, grid[r][letter_col]) + r -= 1 + + # Collect letters below + r = row + 1 + while r < len(grid) and grid[r][letter_col] != ".": + below_letters.append(grid[r][letter_col]) + r += 1 + + # Check if this forms an unintended word + if above_letters or below_letters: + full_vertical_word = "".join(above_letters) + letter + "".join(below_letters) + if len(full_vertical_word) > 1: + # Check if this is an intended word from existing placements + intended = False + for existing in existing_words: + if (existing["direction"] == "vertical" and + existing["col"] == letter_col and + existing["word"] == full_vertical_word): + intended = True + break + + if not intended and len(full_vertical_word) > 1: + violations.append(f"Unintended vertical word '{full_vertical_word}' at column {letter_col}") + + else: # vertical + # Check before the word (above) + if row > 0 and grid[row - 1][col] != ".": + violations.append(f"Unwanted prefix: letter '{grid[row - 1][col]}' above '{new_word}'") + + # Check after the word (below) + if row + len(new_word) < len(grid) and grid[row + len(new_word)][col] != ".": + violations.append(f"Unwanted suffix: letter '{grid[row + len(new_word)][col]}' below '{new_word}'") + + # Check perpendicular extensions at each letter + for i, letter in enumerate(new_word): + letter_row = row + i + + # Collect letters to the left and right + left_letters = [] + right_letters = [] + + # Collect letters to the left + c = col - 1 + while c >= 0 and grid[letter_row][c] != ".": + left_letters.insert(0, grid[letter_row][c]) + c -= 1 + + # Collect letters to the right + c = col + 1 + while c < len(grid[0]) and grid[letter_row][c] != ".": + right_letters.append(grid[letter_row][c]) + c += 1 + + # Check if this forms an unintended word + if left_letters or right_letters: + full_horizontal_word = "".join(left_letters) + letter + "".join(right_letters) + if len(full_horizontal_word) > 1: + # Check if this is an intended word from existing placements + intended = False + for existing in existing_words: + if (existing["direction"] == "horizontal" and + existing["row"] == letter_row and + existing["word"] == full_horizontal_word): + intended = True + break + + if not intended and len(full_horizontal_word) > 1: + violations.append(f"Unintended horizontal word '{full_horizontal_word}' at row {letter_row}") + + return violations + +def test_enhanced_boundary_checking(): + """Test enhanced boundary checking logic.""" + print("🧪 Testing enhanced boundary checking logic...\n") + + generator = CrosswordGeneratorFixed(vector_service=None) + + # Test problematic scenario from the images + # Create a scenario where MACHINE might get extended to MACHINERY + grid = [["." for _ in range(12)] for _ in range(12)] + + # Place MACHINE first + generator._place_word(grid, "MACHINE", 5, 2, "horizontal") + placed_words = [{ + "word": "MACHINE", "row": 5, "col": 2, "direction": "horizontal", "number": 1 + }] + + print("Initial grid with MACHINE:") + print_grid_section(grid, 4, 7, 0, 12) + + # Now try to place words that might create the "RY" suffix issue + # Place "Y" somewhere that might extend MACHINE + problem_placements = [ + ("RYOT", 5, 9, "horizontal"), # This would make MACHINE -> MACHINERYOT + ("CAR", 3, 8, "vertical"), # This might create unwanted extensions + ] + + for word, row, col, direction in problem_placements: + print(f"\n🔍 Testing problematic placement: '{word}' at ({row}, {col}) {direction}") + + # Check current can_place_word logic + can_place = generator._can_place_word(grid, word, row, col, direction) + print(f"Current _can_place_word result: {can_place}") + + if can_place: + # Show what would happen + test_grid = [row[:] for row in grid] + generator._place_word(test_grid, word, row, col, direction) + print("Result grid:") + print_grid_section(test_grid, 3, 8, 0, 12) + + # Check for violations manually + violations = check_word_boundary_violations(test_grid, word, row, col, direction, placed_words) + if violations: + print(f"❌ This placement creates violations: {violations}") + else: + print("✅ No violations detected") + +if __name__ == "__main__": + print("🔍 Testing Word Boundary Issues\n") + + test_word_boundary_violations() + print("\n" + "="*60 + "\n") + test_enhanced_boundary_checking() + + print("\n🎯 Analysis complete. Check output for boundary violation patterns.") \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_word_extension_bug.py b/crossword-app/backend-py/test-integration/test_word_extension_bug.py new file mode 100644 index 0000000000000000000000000000000000000000..3cb59e004003684cd7ae85c1f030c20cf9b9464f --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_word_extension_bug.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +""" +Test to identify the word extension bug where words appear to extend beyond their boundaries. +""" + +import sys +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_fixed import CrosswordGeneratorFixed + +def debug_word_extension_issue(): + """Debug the specific word extension issue.""" + + print("🔍 Debugging Word Extension Issue\n") + + generator = CrosswordGeneratorFixed(vector_service=None) + + # Manually create a scenario that reproduces the issue + grid = [["." for _ in range(10)] for _ in range(10)] + placed_words = [] + + print("Step-by-step placement test:") + print("=" * 50) + + # Step 1: Place COMPUTER horizontally + print("1. Placing COMPUTER at (4, 0) horizontal") + if generator._can_place_word(grid, "COMPUTER", 4, 0, "horizontal"): + generator._place_word(grid, "COMPUTER", 4, 0, "horizontal") + placed_words.append({ + "word": "COMPUTER", "row": 4, "col": 0, "direction": "horizontal", "number": 1 + }) + print("✅ COMPUTER placed successfully") + print_grid_section(grid, 2, 7, 0, 10) + + # Step 2: Place MACHINE vertically intersecting with C + print("\n2. Placing MACHINE at (2, 0) vertical") + if generator._can_place_word(grid, "MACHINE", 2, 0, "vertical"): + generator._place_word(grid, "MACHINE", 2, 0, "vertical") + placed_words.append({ + "word": "MACHINE", "row": 2, "col": 0, "direction": "vertical", "number": 2 + }) + print("✅ MACHINE placed successfully") + print_grid_section(grid, 2, 9, 0, 10) + + # Step 3: Try to place EXPERT horizontally + print("\n3. Testing EXPERT placement at various positions:") + + test_positions = [ + (7, 3, "horizontal"), # This is where the algorithm tried to place it + (7, 2, "horizontal"), # This might be where it actually went + (6, 3, "horizontal"), # Alternative position + ] + + for row, col, direction in test_positions: + print(f" Testing EXPERT at ({row}, {col}) {direction}") + + can_place = generator._can_place_word(grid, "EXPERT", row, col, direction) + print(f" Can place: {can_place}") + + if can_place: + # Test placement on a copy + test_grid = [r[:] for r in grid] + original_state = generator._place_word(test_grid, "EXPERT", row, col, direction) + + print(" Grid after placement:") + print_grid_section(test_grid, 5, 10, 0, 10) + + # Check what sequences exist + sequences = find_all_sequences(test_grid) + print(f" All sequences found: {sequences}") + + # Restore grid + generator._remove_word(test_grid, original_state) + print(" " + "-" * 30) + + # Step 4: Test the validation functions directly + print("\n4. Testing validation functions:") + print("=" * 50) + + # Create a grid with a known extension issue + test_grid = [["." for _ in range(10)] for _ in range(10)] + + # Place COMPUTER + for i, letter in enumerate("COMPUTER"): + test_grid[4][i] = letter + + # Place MACHINE + for i, letter in enumerate("MACHINE"): + test_grid[2 + i][0] = letter + + # Place EXPERT but with an extension issue + expert_letters = "AEXPERT" # Simulate the extension issue + for i, letter in enumerate(expert_letters): + test_grid[7][2 + i] = letter + + print("Test grid with extension issue:") + print_grid_section(test_grid, 2, 9, 0, 10) + + # Test validation + mock_placed_words = [ + {"word": "COMPUTER", "row": 4, "col": 0, "direction": "horizontal"}, + {"word": "MACHINE", "row": 2, "col": 0, "direction": "vertical"}, + {"word": "EXPERT", "row": 7, "col": 3, "direction": "horizontal"}, # Note: col 3, not 2 + ] + + print("\nValidation results:") + is_valid = generator._validate_final_grid(test_grid, mock_placed_words) + print(f"Grid validation result: {is_valid}") + +def print_grid_section(grid, start_row, end_row, start_col, end_col): + """Print a section of the grid.""" + print("Grid:") + for r in range(start_row, min(end_row, len(grid))): + row_str = f"R{r}: " + for c in range(start_col, min(end_col, len(grid[0]))): + if grid[r][c] == ".": + row_str += ". " + else: + row_str += f"{grid[r][c]} " + print(row_str) + +def find_all_sequences(grid): + """Find all letter sequences in the grid.""" + sequences = [] + + # Horizontal sequences + for r in range(len(grid)): + current_seq = "" + start_col = None + + for c in range(len(grid[0])): + if grid[r][c] != ".": + if start_col is None: + start_col = c + current_seq += grid[r][c] + else: + if current_seq and len(current_seq) > 1: + sequences.append(f"H({r},{start_col}): {current_seq}") + current_seq = "" + start_col = None + + # Handle end of row + if current_seq and len(current_seq) > 1: + sequences.append(f"H({r},{start_col}): {current_seq}") + + # Vertical sequences + for c in range(len(grid[0])): + current_seq = "" + start_row = None + + for r in range(len(grid)): + if grid[r][c] != ".": + if start_row is None: + start_row = r + current_seq += grid[r][c] + else: + if current_seq and len(current_seq) > 1: + sequences.append(f"V({start_row},{c}): {current_seq}") + current_seq = "" + start_row = None + + # Handle end of column + if current_seq and len(current_seq) > 1: + sequences.append(f"V({start_row},{c}): {current_seq}") + + return sequences + +if __name__ == "__main__": + debug_word_extension_issue() \ No newline at end of file diff --git a/crossword-app/backend-py/test-integration/test_word_variety.py b/crossword-app/backend-py/test-integration/test_word_variety.py new file mode 100644 index 0000000000000000000000000000000000000000..5ded849e9d1ae94e04454a874601022ff7506527 --- /dev/null +++ b/crossword-app/backend-py/test-integration/test_word_variety.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +""" +Test word variety to ensure we get different words each time. +""" + +import asyncio +import sys +from pathlib import Path +from unittest.mock import Mock + +# Add project root to path +project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py +sys.path.insert(0, str(project_root)) + +from src.services.vector_search import VectorSearchService + +def mock_vector_search(): + """Create a mock vector search with reproducible but varied results.""" + # Create mock candidates (simulating different similarity scores) + mock_candidates = [] + words = [ + "SCIENTIFIC", "SCIENTIST", "CHEMISTRY", "ASTRONOMY", "BIOLOGIST", + "PHYSICIST", "RESEARCH", "ZOOLOGY", "GEOLOGY", "BIOLOGY", + "ECOLOGY", "BOTANY", "THEORY", "EXPERIMENT", "DISCOVERY", + "LABORATORY", "MOLECULE", "EQUATION", "HYPOTHESIS", "ANALYSIS", + "PHYSICS", "QUANTUM", "GENETICS", "EVOLUTION", "MICROSCOPE" + ] + + for i, word in enumerate(words): + similarity = 0.9 - (i * 0.02) # Decreasing similarity scores + mock_candidates.append({ + "word": word, + "clue": f"{word.lower()} (scientific term)", + "similarity": similarity, + "source": "vector_search" + }) + + return mock_candidates + +async def test_word_variety(): + """Test that we get different words on multiple requests.""" + print("🧪 Testing word variety with weighted random selection\n") + + # Create mock vector service + vector_service = VectorSearchService() + + # Mock the weighted selection method with our test data + candidates = mock_vector_search() + + # Run selection multiple times + results = [] + for i in range(5): + selected = vector_service._weighted_random_selection(candidates, 12) + word_list = [w["word"] for w in selected] + results.append(word_list) + print(f"Selection {i+1}: {word_list[:5]}...") # Show first 5 words + + # Check variety + unique_words_per_position = [] + for pos in range(5): # Check first 5 positions + words_at_pos = [result[pos] for result in results if len(result) > pos] + unique_at_pos = len(set(words_at_pos)) + unique_words_per_position.append(unique_at_pos) + print(f"Position {pos}: {unique_at_pos} different words across 5 selections") + + # Calculate variety score + total_variety = sum(unique_words_per_position) + max_possible = len(unique_words_per_position) * len(results) + variety_percentage = (total_variety / max_possible) * 100 + + print(f"\n📊 Variety Score: {variety_percentage:.1f}% (higher is more varied)") + + if variety_percentage > 60: + print("✅ Good variety - words are sufficiently randomized") + return True + else: + print("⚠️ Low variety - same words appearing too often") + return False + +def test_weighted_tiers(): + """Test that weighted selection respects tier priorities.""" + print("\n🧪 Testing weighted tier selection\n") + + vector_service = VectorSearchService() + + # Create candidates with clear tier separation + candidates = [] + + # Tier 1: High similarity (should appear often) + tier1_words = ["EXCELLENT", "PERFECT", "AMAZING"] + for word in tier1_words: + candidates.append({ + "word": word, "clue": f"{word} clue", + "similarity": 0.95, "source": "test" + }) + + # Tier 2: Medium-high similarity + tier2_words = ["GOOD", "NICE", "FINE"] + for word in tier2_words: + candidates.append({ + "word": word, "clue": f"{word} clue", + "similarity": 0.75, "source": "test" + }) + + # Tier 3: Lower similarity (should appear rarely) + tier3_words = ["OKAY", "AVERAGE", "BASIC"] + for word in tier3_words: + candidates.append({ + "word": word, "clue": f"{word} clue", + "similarity": 0.50, "source": "test" + }) + + # Run many selections and count frequency + word_counts = {} + num_trials = 100 + + for _ in range(num_trials): + selected = vector_service._weighted_random_selection(candidates, 6) + for word_obj in selected: + word = word_obj["word"] + word_counts[word] = word_counts.get(word, 0) + 1 + + print("Word selection frequencies:") + + tier1_avg = sum(word_counts.get(w, 0) for w in tier1_words) / len(tier1_words) + tier2_avg = sum(word_counts.get(w, 0) for w in tier2_words) / len(tier2_words) + tier3_avg = sum(word_counts.get(w, 0) for w in tier3_words) / len(tier3_words) + + print(f"Tier 1 (high similarity): {tier1_avg:.1f} avg selections") + print(f"Tier 2 (medium similarity): {tier2_avg:.1f} avg selections") + print(f"Tier 3 (low similarity): {tier3_avg:.1f} avg selections") + + # Check if weighting is working (tier 1 should be selected more than tier 3) + if tier1_avg > tier2_avg > tier3_avg: + print("✅ Weighted selection working correctly") + return True + else: + print("⚠️ Weighted selection not respecting similarity scores") + return False + +async def main(): + """Run variety tests.""" + print("🎲 Testing Word Selection Variety\n") + + variety_test = await test_word_variety() + tier_test = test_weighted_tiers() + + if variety_test and tier_test: + print("\n🎉 All variety tests passed!") + print("🔄 Word selection should now be much more varied between requests") + else: + print("\n❌ Some variety tests failed") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/crossword-app/backend-py/test-unit/__init__.py b/crossword-app/backend-py/test-unit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/crossword-app/backend-py/test-unit/__pycache__/__init__.cpython-313.pyc b/crossword-app/backend-py/test-unit/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..526d3ff0663ffdf652e13a5df575272befc52c3d Binary files /dev/null and b/crossword-app/backend-py/test-unit/__pycache__/__init__.cpython-313.pyc differ diff --git a/crossword-app/backend-py/test-unit/__pycache__/test_api_routes.cpython-313-pytest-8.4.1.pyc b/crossword-app/backend-py/test-unit/__pycache__/test_api_routes.cpython-313-pytest-8.4.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6bc7e927ae3de3a83cf9d25273624f51a7f1d97c Binary files /dev/null and b/crossword-app/backend-py/test-unit/__pycache__/test_api_routes.cpython-313-pytest-8.4.1.pyc differ diff --git a/crossword-app/backend-py/test-unit/__pycache__/test_crossword_generator.cpython-313-pytest-8.4.1.pyc b/crossword-app/backend-py/test-unit/__pycache__/test_crossword_generator.cpython-313-pytest-8.4.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0671c5a1a75c61181cbb9ef785df6e270eef3e4 Binary files /dev/null and b/crossword-app/backend-py/test-unit/__pycache__/test_crossword_generator.cpython-313-pytest-8.4.1.pyc differ diff --git a/crossword-app/backend-py/test-unit/__pycache__/test_crossword_generator_fixed.cpython-313-pytest-8.4.1.pyc b/crossword-app/backend-py/test-unit/__pycache__/test_crossword_generator_fixed.cpython-313-pytest-8.4.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b6d00cbfc248b764b5d3457ff8c1a15ee68051fb Binary files /dev/null and b/crossword-app/backend-py/test-unit/__pycache__/test_crossword_generator_fixed.cpython-313-pytest-8.4.1.pyc differ diff --git a/crossword-app/backend-py/test-unit/__pycache__/test_crossword_generator_wrapper.cpython-313-pytest-8.4.1.pyc b/crossword-app/backend-py/test-unit/__pycache__/test_crossword_generator_wrapper.cpython-313-pytest-8.4.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1eb281edb97b38cf1c52dd5eecf3164b169fa9f2 Binary files /dev/null and b/crossword-app/backend-py/test-unit/__pycache__/test_crossword_generator_wrapper.cpython-313-pytest-8.4.1.pyc differ diff --git a/crossword-app/backend-py/test-unit/__pycache__/test_index_bug_fix.cpython-313-pytest-8.4.1.pyc b/crossword-app/backend-py/test-unit/__pycache__/test_index_bug_fix.cpython-313-pytest-8.4.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7d78e33d13bec9e110d9fb15d990cea9353f18b Binary files /dev/null and b/crossword-app/backend-py/test-unit/__pycache__/test_index_bug_fix.cpython-313-pytest-8.4.1.pyc differ diff --git a/crossword-app/backend-py/test-unit/__pycache__/test_vector_search.cpython-313-pytest-8.4.1.pyc b/crossword-app/backend-py/test-unit/__pycache__/test_vector_search.cpython-313-pytest-8.4.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e1bccb3c7d1ae5133897b9b36799d5a73d72b30f Binary files /dev/null and b/crossword-app/backend-py/test-unit/__pycache__/test_vector_search.cpython-313-pytest-8.4.1.pyc differ diff --git a/crossword-app/backend-py/test-unit/test_api_routes.py b/crossword-app/backend-py/test-unit/test_api_routes.py new file mode 100644 index 0000000000000000000000000000000000000000..7fa38e97837e2f2b43b3b175cc4fa6b41145cac4 --- /dev/null +++ b/crossword-app/backend-py/test-unit/test_api_routes.py @@ -0,0 +1,314 @@ +""" +Unit tests for API routes. +""" + +import pytest +import asyncio +from unittest.mock import Mock, patch, AsyncMock +from fastapi.testclient import TestClient +from fastapi import FastAPI +import sys +from pathlib import Path + +# Add project root to path for imports +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +from src.routes.api import router, get_crossword_generator +from src.services.crossword_generator_wrapper import CrosswordGenerator + + +@pytest.fixture +def mock_vector_service(): + """Mock vector search service.""" + mock_service = Mock() + mock_service.is_initialized = True + mock_service.find_similar_words = AsyncMock(return_value=[ + {"word": "ELEPHANT", "clue": "Large mammal", "similarity": 0.8, "source": "vector_search"}, + {"word": "TIGER", "clue": "Striped cat", "similarity": 0.7, "source": "vector_search"}, + ]) + return mock_service + + +@pytest.fixture +def mock_crossword_generator(): + """Mock crossword generator.""" + mock_gen = Mock(spec=CrosswordGenerator) + mock_gen.generate_puzzle = AsyncMock(return_value={ + "grid": [["T", "E", "S", "T"], [".", ".", ".", "."]], + "clues": [ + { + "number": 1, + "word": "TEST", + "text": "A test word", + "direction": "across", + "position": {"row": 0, "col": 0} + } + ], + "metadata": { + "topics": ["Animals"], + "difficulty": "medium", + "wordCount": 1, + "size": 2, + "aiGenerated": True + } + }) + mock_gen.generate_words_for_topics = AsyncMock(return_value=[ + {"word": "ELEPHANT", "clue": "Large mammal", "similarity": 0.8, "source": "vector_search"}, + {"word": "TIGER", "clue": "Striped cat", "similarity": 0.7, "source": "vector_search"}, + ]) + return mock_gen + + +@pytest.fixture +def test_app(mock_vector_service): + """Create test FastAPI app.""" + app = FastAPI() + app.include_router(router, prefix="/api") + + # Mock the app state + app.state.vector_service = mock_vector_service + + return app + + +@pytest.fixture +def client(test_app): + """Create test client.""" + return TestClient(test_app) + + +class TestAPIRoutes: + """Test cases for API routes.""" + + def test_get_topics(self, client): + """Test GET /api/topics endpoint.""" + response = client.get("/api/topics") + + assert response.status_code == 200 + topics = response.json() + + assert len(topics) == 4 + assert all("id" in topic and "name" in topic for topic in topics) + + # Check specific topics + topic_ids = [topic["id"] for topic in topics] + assert "animals" in topic_ids + assert "geography" in topic_ids + assert "science" in topic_ids + assert "technology" in topic_ids + + def test_generate_puzzle_success(self, client, mock_crossword_generator): + """Test successful puzzle generation.""" + with patch('src.routes.api.generator', mock_crossword_generator): + response = client.post("/api/generate", json={ + "topics": ["Animals"], + "difficulty": "medium", + "useAI": True + }) + + assert response.status_code == 200 + puzzle = response.json() + + assert "grid" in puzzle + assert "clues" in puzzle + assert "metadata" in puzzle + assert puzzle["metadata"]["topics"] == ["Animals"] + assert puzzle["metadata"]["difficulty"] == "medium" + assert puzzle["metadata"]["aiGenerated"] is True + + def test_generate_puzzle_no_topics(self, client): + """Test puzzle generation with no topics.""" + response = client.post("/api/generate", json={ + "topics": [], + "difficulty": "medium", + "useAI": False + }) + + assert response.status_code == 400 + assert "At least one topic is required" in response.json()["detail"] + + def test_generate_puzzle_invalid_difficulty(self, client): + """Test puzzle generation with invalid difficulty.""" + response = client.post("/api/generate", json={ + "topics": ["Animals"], + "difficulty": "impossible", + "useAI": False + }) + + assert response.status_code == 400 + assert "Invalid difficulty" in response.json()["detail"] + + def test_generate_puzzle_generator_failure(self, client): + """Test puzzle generation when generator fails.""" + mock_gen = Mock(spec=CrosswordGenerator) + mock_gen.generate_puzzle = AsyncMock(return_value=None) + + with patch('src.routes.api.generator', mock_gen): + response = client.post("/api/generate", json={ + "topics": ["Animals"], + "difficulty": "medium", + "useAI": False + }) + + assert response.status_code == 500 + assert "Failed to generate puzzle" in response.json()["detail"] + + def test_generate_puzzle_generator_exception(self, client): + """Test puzzle generation when generator raises exception.""" + mock_gen = Mock(spec=CrosswordGenerator) + mock_gen.generate_puzzle = AsyncMock(side_effect=Exception("Test error")) + + with patch('src.routes.api.generator', mock_gen): + response = client.post("/api/generate", json={ + "topics": ["Animals"], + "difficulty": "medium", + "useAI": False + }) + + assert response.status_code == 500 + assert "Test error" in response.json()["detail"] + + def test_generate_words_success(self, client, mock_crossword_generator): + """Test successful word generation.""" + with patch('src.routes.api.generator', mock_crossword_generator): + response = client.post("/api/words", json={ + "topics": ["Animals"], + "difficulty": "medium", + "useAI": True + }) + + assert response.status_code == 200 + result = response.json() + + assert result["topics"] == ["Animals"] + assert result["difficulty"] == "medium" + assert result["useAI"] is True + assert "wordCount" in result + assert "words" in result + assert len(result["words"]) > 0 + + def test_generate_words_failure(self, client): + """Test word generation failure.""" + mock_gen = Mock(spec=CrosswordGenerator) + mock_gen.generate_words_for_topics = AsyncMock(side_effect=Exception("Word generation failed")) + + with patch('src.routes.api.generator', mock_gen): + response = client.post("/api/words", json={ + "topics": ["Animals"], + "difficulty": "medium", + "useAI": True + }) + + assert response.status_code == 500 + assert "Word generation failed" in response.json()["detail"] + + def test_api_health(self, client): + """Test API health check.""" + response = client.get("/api/health") + + assert response.status_code == 200 + health = response.json() + + assert health["status"] == "healthy" + assert health["backend"] == "python" + assert health["version"] == "2.0.0" + assert "timestamp" in health + + def test_debug_vector_search_success(self, client, mock_vector_service): + """Test debug vector search endpoint.""" + response = client.get("/api/debug/vector-search?topic=Animals&difficulty=medium&max_words=5") + + assert response.status_code == 200 + result = response.json() + + assert result["topic"] == "Animals" + assert result["difficulty"] == "medium" + assert result["max_words"] == 5 + assert "found_words" in result + assert "words" in result + + def test_debug_vector_search_service_unavailable(self, client): + """Test debug vector search when service unavailable.""" + # Create app without vector service + app = FastAPI() + app.include_router(router, prefix="/api") + app.state.vector_service = None + + with TestClient(app) as test_client: + response = test_client.get("/api/debug/vector-search?topic=Animals") + + assert response.status_code == 503 + assert "Vector search service not available" in response.json()["detail"] + + def test_debug_vector_search_not_initialized(self, client): + """Test debug vector search when service not initialized.""" + mock_service = Mock() + mock_service.is_initialized = False + + # Create app with uninitialized service + app = FastAPI() + app.include_router(router, prefix="/api") + app.state.vector_service = mock_service + + with TestClient(app) as test_client: + response = test_client.get("/api/debug/vector-search?topic=Animals") + + assert response.status_code == 503 + assert "Vector search service not available" in response.json()["detail"] + + def test_debug_vector_search_failure(self, client, mock_vector_service): + """Test debug vector search when search fails.""" + mock_vector_service.find_similar_words = AsyncMock(side_effect=Exception("Search failed")) + + response = client.get("/api/debug/vector-search?topic=Animals") + + assert response.status_code == 500 + assert "Search failed" in response.json()["detail"] + + def test_get_crossword_generator_dependency(self, mock_vector_service): + """Test the crossword generator dependency.""" + # Mock request with vector service + mock_request = Mock() + mock_request.app.state.vector_service = mock_vector_service + + with patch('src.routes.api.generator', None): # Reset global generator + generator = get_crossword_generator(mock_request) + + assert isinstance(generator, CrosswordGenerator) + assert generator.vector_service == mock_vector_service + + def test_request_validation(self, client): + """Test request model validation.""" + # Test missing required fields + response = client.post("/api/generate", json={}) + assert response.status_code == 422 # Validation error + + # Test invalid field types + response = client.post("/api/generate", json={ + "topics": "not_a_list", + "difficulty": "medium", + "useAI": "not_a_boolean" + }) + assert response.status_code == 422 # Validation error + + def test_default_values(self, client, mock_crossword_generator): + """Test request model default values.""" + with patch('src.routes.api.generator', mock_crossword_generator): + response = client.post("/api/generate", json={ + "topics": ["Animals"] + # difficulty and useAI should use defaults + }) + + assert response.status_code == 200 + + # Check that defaults were used + mock_crossword_generator.generate_puzzle.assert_called_once_with( + topics=["Animals"], + difficulty="medium", # Default value + use_ai=False # Default value + ) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/crossword-app/backend-py/test-unit/test_crossword_generator.py b/crossword-app/backend-py/test-unit/test_crossword_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..de7f0ad9afb46be33c5f74bb436f40a65c5e038d --- /dev/null +++ b/crossword-app/backend-py/test-unit/test_crossword_generator.py @@ -0,0 +1,326 @@ +""" +Unit tests for CrosswordGenerator to ensure robust crossword generation. +""" + +import pytest +import asyncio +from unittest.mock import Mock, patch +import sys +from pathlib import Path + +# Add project root to path for imports +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator import CrosswordGenerator + + +@pytest.fixture +def sample_words(): + """Sample word data for testing.""" + return [ + {"word": "DOG", "clue": "Man's best friend", "similarity": 0.8, "source": "test"}, + {"word": "ELEPHANT", "clue": "Large mammal with trunk", "similarity": 0.7, "source": "test"}, + {"word": "CAT", "clue": "Feline pet", "similarity": 0.9, "source": "test"}, + {"word": "BUTTERFLY", "clue": "Colorful flying insect", "similarity": 0.6, "source": "test"}, + {"word": "TIGER", "clue": "Striped big cat", "similarity": 0.75, "source": "test"}, + {"word": "WHALE", "clue": "Largest marine mammal", "similarity": 0.65, "source": "test"}, + ] + + +@pytest.fixture +def mock_vector_service(): + """Mock vector search service for testing.""" + mock_service = Mock() + mock_service.is_initialized = True + return mock_service + + +class TestCrosswordGenerator: + """Test cases for CrosswordGenerator.""" + + def test_init(self): + """Test generator initialization.""" + generator = CrosswordGenerator() + assert generator.max_attempts == 100 + assert generator.min_words == 6 + assert generator.max_words == 10 + assert generator.vector_service is None + + def test_init_with_vector_service(self, mock_vector_service): + """Test generator initialization with vector service.""" + generator = CrosswordGenerator(vector_service=mock_vector_service) + assert generator.vector_service == mock_vector_service + + def test_sort_words_for_crossword(self, sample_words): + """Test word sorting by crossword suitability.""" + generator = CrosswordGenerator() + sorted_words = generator._sort_words_for_crossword(sample_words) + + # Should return list of dicts with crossword_score + assert len(sorted_words) == len(sample_words) + assert all(isinstance(w, dict) for w in sorted_words) + assert all("crossword_score" in w for w in sorted_words) + + # Scores should be in descending order (with some randomization tolerance) + scores = [w["crossword_score"] for w in sorted_words] + # Allow for some randomization but generally descending + assert len(scores) > 0 + + def test_filter_by_difficulty(self, sample_words): + """Test difficulty filtering.""" + generator = CrosswordGenerator() + + # Test easy difficulty (3-8 chars) + easy_words = generator._filter_by_difficulty(sample_words, "easy") + easy_lengths = [len(w["word"]) for w in easy_words] + assert all(3 <= length <= 8 for length in easy_lengths) + + # Test medium difficulty (4-10 chars) + medium_words = generator._filter_by_difficulty(sample_words, "medium") + medium_lengths = [len(w["word"]) for w in medium_words] + assert all(4 <= length <= 10 for length in medium_lengths) + + # Test hard difficulty (5-15 chars) + hard_words = generator._filter_by_difficulty(sample_words, "hard") + hard_lengths = [len(w["word"]) for w in hard_words] + assert all(5 <= length <= 15 for length in hard_lengths) + + def test_calculate_grid_size(self): + """Test grid size calculation.""" + generator = CrosswordGenerator() + + # Test with short words + short_words = ["DOG", "CAT", "BAT"] + size = generator._calculate_grid_size(short_words) + assert size >= 8 # Minimum size + assert size >= 3 # Longest word length + + # Test with longer words + long_words = ["ELEPHANT", "BUTTERFLY", "HIPPOPOTAMUS"] + size = generator._calculate_grid_size(long_words) + assert size >= 12 # Longest word (HIPPOPOTAMUS) + + def test_create_grid_word_processing(self, sample_words): + """Test the critical word processing logic that was causing index errors.""" + generator = CrosswordGenerator() + + # This tests the fix for the list index out of range error + result = generator._create_grid(sample_words) + + # Should not crash and should return a result or None + assert result is None or isinstance(result, dict) + + # If result exists, it should have the correct structure + if result: + assert "grid" in result + assert "clues" in result + assert "placed_words" in result + + def test_create_grid_empty_words(self): + """Test grid creation with empty word list.""" + generator = CrosswordGenerator() + result = generator._create_grid([]) + assert result is None + + def test_create_grid_malformed_words(self): + """Test grid creation with malformed word data.""" + generator = CrosswordGenerator() + + # Test with various malformed inputs + malformed_words = [ + "just_string", # String instead of dict + {"no_word_key": "value"}, # Dict without 'word' key + {"word": ""}, # Empty word + None, # None value + 123, # Number + ] + + # Should not crash, might return None + result = generator._create_grid(malformed_words) + assert result is None or isinstance(result, dict) + + def test_can_place_word_horizontal(self): + """Test horizontal word placement validation.""" + generator = CrosswordGenerator() + grid = [["." for _ in range(10)] for _ in range(10)] + + # Test valid placement + assert generator._can_place_word(grid, "TEST", 5, 3, "horizontal") + + # Test boundary violations + assert not generator._can_place_word(grid, "TOOLONG", 5, 7, "horizontal") # Too long + assert not generator._can_place_word(grid, "TEST", 5, -1, "horizontal") # Negative col + assert not generator._can_place_word(grid, "TEST", -1, 3, "horizontal") # Negative row + + def test_can_place_word_vertical(self): + """Test vertical word placement validation.""" + generator = CrosswordGenerator() + grid = [["." for _ in range(10)] for _ in range(10)] + + # Test valid placement + assert generator._can_place_word(grid, "TEST", 3, 5, "vertical") + + # Test boundary violations + assert not generator._can_place_word(grid, "TOOLONG", 7, 5, "vertical") # Too long + assert not generator._can_place_word(grid, "TEST", -1, 5, "vertical") # Negative row + assert not generator._can_place_word(grid, "TEST", 3, -1, "vertical") # Negative col + + def test_place_and_remove_word(self): + """Test word placement and removal.""" + generator = CrosswordGenerator() + grid = [["." for _ in range(10)] for _ in range(10)] + + # Place word horizontally + original_state = generator._place_word(grid, "TEST", 5, 3, "horizontal") + + # Check placement + assert grid[5][3] == "T" + assert grid[5][4] == "E" + assert grid[5][5] == "S" + assert grid[5][6] == "T" + + # Remove word + generator._remove_word(grid, original_state) + + # Check removal + assert grid[5][3] == "." + assert grid[5][4] == "." + assert grid[5][5] == "." + assert grid[5][6] == "." + + def test_find_word_intersections(self): + """Test finding intersections between words.""" + generator = CrosswordGenerator() + + # Test words with common letters + intersections = generator._find_word_intersections("CAT", "DOG") + assert len(intersections) == 0 # No common letters + + intersections = generator._find_word_intersections("CAT", "ACE") + assert len(intersections) >= 1 # Common 'A' and 'C' + + # Verify intersection format + for intersection in intersections: + assert "word_pos" in intersection + assert "placed_pos" in intersection + assert isinstance(intersection["word_pos"], int) + assert isinstance(intersection["placed_pos"], int) + + def test_create_simple_cross(self, sample_words): + """Test simple cross creation as fallback.""" + generator = CrosswordGenerator() + + # Use words that have intersections + words_with_intersection = [ + {"word": "CAT", "clue": "Feline"}, + {"word": "ACE", "clue": "Playing card"}, + ] + + word_list = ["CAT", "ACE"] + result = generator._create_simple_cross(word_list, words_with_intersection) + + if result: # If intersection found + assert "grid" in result + assert "clues" in result + assert "placed_words" in result + assert len(result["placed_words"]) == 2 + + def test_generate_clues(self, sample_words): + """Test clue generation for placed words.""" + generator = CrosswordGenerator() + + placed_words = [ + {"word": "DOG", "row": 0, "col": 0, "direction": "horizontal", "number": 1}, + {"word": "CAT", "row": 0, "col": 0, "direction": "vertical", "number": 2}, + ] + + clues = generator._generate_clues(sample_words, placed_words) + + assert len(clues) == 2 + for clue in clues: + assert "number" in clue + assert "word" in clue + assert "text" in clue + assert "direction" in clue + assert clue["direction"] in ["across", "down"] + assert "position" in clue + + @pytest.mark.asyncio + async def test_select_words_with_vector_service(self, mock_vector_service, sample_words): + """Test word selection with vector service.""" + # Mock vector service methods + mock_vector_service.find_similar_words.return_value = sample_words + + generator = CrosswordGenerator(vector_service=mock_vector_service) + + words = await generator._select_words(["Animals"], "medium", True) + + assert len(words) <= generator.max_words + assert all(isinstance(w, dict) for w in words) + mock_vector_service.find_similar_words.assert_called_once() + + @pytest.mark.asyncio + async def test_select_words_without_vector_service(self): + """Test word selection without vector service.""" + generator = CrosswordGenerator() + + # Should fallback to empty/static words + words = await generator._select_words(["Animals"], "medium", True) + + # Without vector service and no static files, should return empty or minimal + assert isinstance(words, list) + + @pytest.mark.asyncio + async def test_generate_puzzle_success(self, mock_vector_service, sample_words): + """Test successful puzzle generation.""" + mock_vector_service.find_similar_words.return_value = sample_words + + generator = CrosswordGenerator(vector_service=mock_vector_service) + + # Mock the grid creation to return a simple result + with patch.object(generator, '_create_grid') as mock_create_grid: + mock_create_grid.return_value = { + "grid": [["T", "E", "S", "T"], [".", ".", ".", "."]], + "placed_words": [{"word": "TEST", "row": 0, "col": 0, "direction": "horizontal", "number": 1}], + "clues": [{"number": 1, "word": "TEST", "text": "A test", "direction": "across", "position": {"row": 0, "col": 0}}] + } + + result = await generator.generate_puzzle(["Animals"], "medium", True) + + assert result is not None + assert "grid" in result + assert "clues" in result + assert "metadata" in result + assert result["metadata"]["topics"] == ["Animals"] + assert result["metadata"]["difficulty"] == "medium" + assert result["metadata"]["aiGenerated"] is True + + @pytest.mark.asyncio + async def test_generate_puzzle_insufficient_words(self, mock_vector_service): + """Test puzzle generation with insufficient words.""" + # Return too few words + mock_vector_service.find_similar_words.return_value = [ + {"word": "CAT", "clue": "Feline", "similarity": 0.8, "source": "test"} + ] + + generator = CrosswordGenerator(vector_service=mock_vector_service) + + with pytest.raises(Exception, match="Not enough words generated"): + await generator.generate_puzzle(["Animals"], "medium", True) + + @pytest.mark.asyncio + async def test_generate_puzzle_grid_creation_fails(self, mock_vector_service, sample_words): + """Test puzzle generation when grid creation fails.""" + mock_vector_service.find_similar_words.return_value = sample_words + + generator = CrosswordGenerator(vector_service=mock_vector_service) + + # Mock grid creation to fail + with patch.object(generator, '_create_grid', return_value=None): + with pytest.raises(Exception, match="Could not create crossword grid"): + await generator.generate_puzzle(["Animals"], "medium", True) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/crossword-app/backend-py/test-unit/test_crossword_generator_wrapper.py b/crossword-app/backend-py/test-unit/test_crossword_generator_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..56168780418ed38625f241f9ffd19d3d556875ea --- /dev/null +++ b/crossword-app/backend-py/test-unit/test_crossword_generator_wrapper.py @@ -0,0 +1,305 @@ +""" +Unit tests for CrosswordGenerator wrapper. +""" + +import pytest +import asyncio +from unittest.mock import Mock, patch, AsyncMock +import sys +from pathlib import Path + +# Add project root to path for imports +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator_wrapper import CrosswordGenerator + + +@pytest.fixture +def mock_vector_service(): + """Mock vector search service.""" + mock_service = Mock() + mock_service.is_initialized = True + mock_service.find_similar_words = AsyncMock(return_value=[ + {"word": "ELEPHANT", "clue": "Large mammal", "similarity": 0.8, "source": "vector_search"}, + {"word": "TIGER", "clue": "Striped cat", "similarity": 0.7, "source": "vector_search"}, + {"word": "LION", "clue": "King of jungle", "similarity": 0.75, "source": "vector_search"}, + ]) + return mock_service + + +@pytest.fixture +def sample_puzzle_result(): + """Sample puzzle result from fixed generator.""" + return { + "grid": [ + ["T", "I", "G", "E", "R"], + [".", ".", ".", ".", "."], + ["L", "I", "O", "N", "."], + ], + "clues": [ + {"number": 1, "word": "TIGER", "text": "Striped cat", "direction": "across", "position": {"row": 0, "col": 0}}, + {"number": 2, "word": "LION", "text": "King of jungle", "direction": "across", "position": {"row": 2, "col": 0}}, + ], + "metadata": { + "topics": ["Animals"], + "difficulty": "medium", + "wordCount": 2, + "size": 3, + "aiGenerated": True + } + } + + +class TestCrosswordGeneratorWrapper: + """Test cases for CrosswordGenerator wrapper.""" + + def test_init_without_vector_service(self): + """Test wrapper initialization without vector service.""" + generator = CrosswordGenerator() + assert generator.vector_service is None + assert generator.min_words == 8 + assert generator.max_words == 15 + + def test_init_with_vector_service(self, mock_vector_service): + """Test wrapper initialization with vector service.""" + generator = CrosswordGenerator(vector_service=mock_vector_service) + assert generator.vector_service == mock_vector_service + assert generator.min_words == 8 + assert generator.max_words == 15 + + @pytest.mark.asyncio + async def test_generate_puzzle_success(self, mock_vector_service, sample_puzzle_result): + """Test successful puzzle generation through wrapper.""" + generator = CrosswordGenerator(vector_service=mock_vector_service) + + # Mock the fixed generator + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance.generate_puzzle = AsyncMock(return_value=sample_puzzle_result) + mock_fixed_class.return_value = mock_fixed_instance + + result = await generator.generate_puzzle( + topics=["Animals"], + difficulty="medium", + use_ai=True + ) + + assert result == sample_puzzle_result + assert result["metadata"]["topics"] == ["Animals"] + assert result["metadata"]["difficulty"] == "medium" + assert result["metadata"]["aiGenerated"] is True + + # Verify fixed generator was called correctly + mock_fixed_class.assert_called_once_with(vector_service=mock_vector_service) + mock_fixed_instance.generate_puzzle.assert_called_once_with( + ["Animals"], "medium", True + ) + + @pytest.mark.asyncio + async def test_generate_puzzle_without_vector_service(self, sample_puzzle_result): + """Test puzzle generation without vector service.""" + generator = CrosswordGenerator() + + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance.generate_puzzle = AsyncMock(return_value=sample_puzzle_result) + mock_fixed_class.return_value = mock_fixed_instance + + result = await generator.generate_puzzle( + topics=["Animals"], + difficulty="hard", + use_ai=False + ) + + assert result == sample_puzzle_result + + # Verify fixed generator was called with None vector service + mock_fixed_class.assert_called_once_with(vector_service=None) + mock_fixed_instance.generate_puzzle.assert_called_once_with( + ["Animals"], "hard", False + ) + + @pytest.mark.asyncio + async def test_generate_puzzle_failure(self, mock_vector_service): + """Test puzzle generation failure.""" + generator = CrosswordGenerator(vector_service=mock_vector_service) + + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance.generate_puzzle = AsyncMock(side_effect=Exception("Generation failed")) + mock_fixed_class.return_value = mock_fixed_instance + + with pytest.raises(Exception, match="Generation failed"): + await generator.generate_puzzle( + topics=["Animals"], + difficulty="medium", + use_ai=True + ) + + @pytest.mark.asyncio + async def test_generate_puzzle_multiple_topics(self, mock_vector_service, sample_puzzle_result): + """Test puzzle generation with multiple topics.""" + generator = CrosswordGenerator(vector_service=mock_vector_service) + + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance.generate_puzzle = AsyncMock(return_value=sample_puzzle_result) + mock_fixed_class.return_value = mock_fixed_instance + + await generator.generate_puzzle( + topics=["Animals", "Geography"], + difficulty="easy", + use_ai=True + ) + + mock_fixed_instance.generate_puzzle.assert_called_once_with( + ["Animals", "Geography"], "easy", True + ) + + @pytest.mark.asyncio + async def test_generate_words_for_topics_success(self, mock_vector_service): + """Test word generation for topics.""" + generator = CrosswordGenerator(vector_service=mock_vector_service) + + sample_words = [ + {"word": "ELEPHANT", "clue": "Large mammal", "similarity": 0.8}, + {"word": "TIGER", "clue": "Striped cat", "similarity": 0.7}, + ] + + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance._select_words = AsyncMock(return_value=sample_words) + mock_fixed_class.return_value = mock_fixed_instance + + result = await generator.generate_words_for_topics( + topics=["Animals"], + difficulty="medium", + use_ai=True + ) + + assert result == sample_words + assert len(result) == 2 + assert all("word" in word and "clue" in word for word in result) + + # Verify fixed generator was called correctly + mock_fixed_class.assert_called_once_with() # No vector service passed for this method + mock_fixed_instance._select_words.assert_called_once_with( + ["Animals"], "medium", True + ) + + @pytest.mark.asyncio + async def test_generate_words_for_topics_failure(self): + """Test word generation failure.""" + generator = CrosswordGenerator() + + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance._select_words = AsyncMock(side_effect=Exception("Word selection failed")) + mock_fixed_class.return_value = mock_fixed_instance + + with pytest.raises(Exception, match="Word selection failed"): + await generator.generate_words_for_topics( + topics=["Animals"], + difficulty="medium", + use_ai=False + ) + + @pytest.mark.asyncio + async def test_generate_words_for_topics_empty_result(self): + """Test word generation with empty result.""" + generator = CrosswordGenerator() + + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance._select_words = AsyncMock(return_value=[]) + mock_fixed_class.return_value = mock_fixed_instance + + result = await generator.generate_words_for_topics( + topics=["NonExistent"], + difficulty="medium", + use_ai=False + ) + + assert result == [] + + @pytest.mark.asyncio + async def test_difficulty_levels(self, mock_vector_service, sample_puzzle_result): + """Test all difficulty levels.""" + generator = CrosswordGenerator(vector_service=mock_vector_service) + + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance.generate_puzzle = AsyncMock(return_value=sample_puzzle_result) + mock_fixed_class.return_value = mock_fixed_instance + + # Test all difficulty levels + for difficulty in ["easy", "medium", "hard"]: + await generator.generate_puzzle( + topics=["Animals"], + difficulty=difficulty, + use_ai=True + ) + + # Verify the difficulty was passed correctly + calls = mock_fixed_instance.generate_puzzle.call_args_list + assert any(call[0][1] == difficulty for call in calls) + + @pytest.mark.asyncio + async def test_ai_flag_variations(self, mock_vector_service, sample_puzzle_result): + """Test AI flag variations.""" + generator = CrosswordGenerator(vector_service=mock_vector_service) + + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance.generate_puzzle = AsyncMock(return_value=sample_puzzle_result) + mock_fixed_class.return_value = mock_fixed_instance + + # Test with AI enabled + await generator.generate_puzzle( + topics=["Animals"], + difficulty="medium", + use_ai=True + ) + + # Test with AI disabled + await generator.generate_puzzle( + topics=["Animals"], + difficulty="medium", + use_ai=False + ) + + # Verify both calls were made + assert mock_fixed_instance.generate_puzzle.call_count == 2 + calls = mock_fixed_instance.generate_puzzle.call_args_list + assert calls[0][0][2] is True # First call with use_ai=True + assert calls[1][0][2] is False # Second call with use_ai=False + + @pytest.mark.asyncio + async def test_logging_integration(self, mock_vector_service, sample_puzzle_result): + """Test that logging messages are generated.""" + generator = CrosswordGenerator(vector_service=mock_vector_service) + + with patch('src.services.crossword_generator_wrapper.CrosswordGenerator') as mock_fixed_class: + mock_fixed_instance = Mock() + mock_fixed_instance.generate_puzzle = AsyncMock(return_value=sample_puzzle_result) + mock_fixed_class.return_value = mock_fixed_instance + + with patch('src.services.crossword_generator_wrapper.logger') as mock_logger: + await generator.generate_puzzle( + topics=["Animals"], + difficulty="medium", + use_ai=True + ) + + # Verify logging calls were made + assert mock_logger.info.call_count >= 2 + + # Check for specific log messages + log_calls = [call[0][0] for call in mock_logger.info.call_args_list] + assert any("Using fixed crossword generator" in msg for msg in log_calls) + assert any("Generated crossword with fixed algorithm" in msg for msg in log_calls) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/crossword-app/backend-py/test-unit/test_index_bug_fix.py b/crossword-app/backend-py/test-unit/test_index_bug_fix.py new file mode 100644 index 0000000000000000000000000000000000000000..29eea70c50062bdb67dba78ba924026e7fd276e8 --- /dev/null +++ b/crossword-app/backend-py/test-unit/test_index_bug_fix.py @@ -0,0 +1,264 @@ +""" +Specific unit tests to verify the list index out of range bug is completely fixed. +These tests reproduce the exact conditions that were causing the crash. +""" + +import pytest +import asyncio +import sys +from pathlib import Path +from unittest.mock import Mock, patch + +# Add project root to path +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +from src.services.crossword_generator import CrosswordGenerator + + +class TestIndexBugFix: + """Test cases specifically for the index out of range bug.""" + + @pytest.fixture + def real_vector_words(self): + """Real word data that was causing the crash - from the actual logs.""" + return [ + {'word': 'ZOOLOGY', 'clue': 'zoology (animal)', 'similarity': 0.6106429100036621, 'source': 'vector_search', 'crossword_score': 16}, + {'word': 'NATURE', 'clue': 'nature (animal)', 'similarity': 0.5933953523635864, 'source': 'vector_search', 'crossword_score': 18}, + {'word': 'VETERINARY', 'clue': 'veterinary (animal)', 'similarity': 0.7589661479, 'source': 'vector_search', 'crossword_score': 25}, + {'word': 'ZOOLOGICAL', 'clue': 'zoological (animal)', 'similarity': 0.668032, 'source': 'vector_search', 'crossword_score': 22}, + {'word': 'MAMMALIAN', 'clue': 'mammalian (animal)', 'similarity': 0.6375998, 'source': 'vector_search', 'crossword_score': 20}, + {'word': 'CHILDREN', 'clue': 'children (animal)', 'similarity': 0.6281173, 'source': 'vector_search', 'crossword_score': 19}, + {'word': 'ELEPHANT', 'clue': 'elephant (animal)', 'similarity': 0.6157694, 'source': 'vector_search', 'crossword_score': 18}, + {'word': 'FAUNA', 'clue': 'fauna (animal)', 'similarity': 0.5890194177627563, 'source': 'vector_search', 'crossword_score': 16}, + {'word': 'ORGANISM', 'clue': 'organism (animal)', 'similarity': 0.58123, 'source': 'vector_search', 'crossword_score': 19}, + {'word': 'MAMMAL', 'clue': 'mammal (animal)', 'similarity': 0.57892, 'source': 'vector_search', 'crossword_score': 17}, + {'word': 'CREATURE', 'clue': 'creature (animal)', 'similarity': 0.57654, 'source': 'vector_search', 'crossword_score': 18}, + {'word': 'SPECIES', 'clue': 'species (animal)', 'similarity': 0.57432, 'source': 'vector_search', 'crossword_score': 16} + ] + + def test_calculate_placement_score_bounds_checking(self): + """Test that _calculate_placement_score handles out-of-bounds access correctly.""" + generator = CrosswordGenerator() + + # Create a small 5x5 grid + grid = [["." for _ in range(5)] for _ in range(5)] + + # Test cases that should NOT crash + test_cases = [ + # Horizontal placement that would go out of bounds + {"row": 2, "col": 3, "direction": "horizontal", "word": "ELEPHANT"}, # 8 letters, would go to col 10 + {"row": 4, "col": 0, "direction": "horizontal", "word": "VETERINARY"}, # 10 letters, would go to col 9 + + # Vertical placement that would go out of bounds + {"row": 3, "col": 2, "direction": "vertical", "word": "ZOOLOGICAL"}, # 10 letters, would go to row 12 + {"row": 1, "col": 4, "direction": "vertical", "word": "MAMMALIAN"}, # 9 letters, would go to row 9 + + # Edge cases + {"row": 0, "col": 0, "direction": "horizontal", "word": "SUPERLONGWORD"}, + {"row": 0, "col": 0, "direction": "vertical", "word": "SUPERLONGWORD"}, + {"row": 4, "col": 4, "direction": "horizontal", "word": "TEST"}, + {"row": 4, "col": 4, "direction": "vertical", "word": "TEST"}, + ] + + for i, test_case in enumerate(test_cases): + placement = { + "row": test_case["row"], + "col": test_case["col"], + "direction": test_case["direction"] + } + word = test_case["word"] + + try: + # This should NOT raise IndexError + score = generator._calculate_placement_score(grid, word, placement, []) + print(f"✅ Test case {i+1}: {word} at ({test_case['row']},{test_case['col']}) {test_case['direction']} -> score: {score}") + assert isinstance(score, int), f"Score should be integer, got {type(score)}" + + except IndexError as e: + pytest.fail(f"❌ IndexError in test case {i+1}: {word} at ({test_case['row']},{test_case['col']}) {test_case['direction']} - {e}") + except Exception as e: + pytest.fail(f"❌ Unexpected error in test case {i+1}: {e}") + + def test_word_sorting_alignment(self, real_vector_words): + """Test that word sorting maintains alignment between word_list and word_objs.""" + generator = CrosswordGenerator() + + # This is the exact code path that was causing the index error + word_pairs = [] + for i, w in enumerate(real_vector_words): + if isinstance(w, dict) and "word" in w: + word_pairs.append((w["word"].upper(), w)) + else: + pytest.fail(f"Invalid word format at index {i}: {w}") + + # Sort pairs by word length (longest first) + word_pairs.sort(key=lambda pair: len(pair[0]), reverse=True) + + # Extract sorted lists + word_list = [pair[0] for pair in word_pairs] + sorted_word_objs = [pair[1] for pair in word_pairs] + + # Verify alignment + assert len(word_list) == len(sorted_word_objs), "Array lengths must match" + + for i, (word, word_obj) in enumerate(zip(word_list, sorted_word_objs)): + assert word == word_obj["word"].upper(), f"Mismatch at index {i}: {word} != {word_obj['word'].upper()}" + + print(f"✅ Word sorting alignment verified for {len(word_list)} words") + + def test_grid_creation_with_real_data(self, real_vector_words): + """Test grid creation with the exact data that was causing crashes.""" + generator = CrosswordGenerator() + + try: + # This should NOT crash + result = generator._create_grid(real_vector_words) + + if result is None: + print("⚠️ Grid creation returned None (no successful placement)") + else: + print(f"✅ Grid creation succeeded with {len(result['placed_words'])} placed words") + assert "grid" in result + assert "clues" in result + assert "placed_words" in result + + except IndexError as e: + pytest.fail(f"❌ IndexError in grid creation: {e}") + except Exception as e: + # Other exceptions are okay (e.g., timeout, no intersections found) + print(f"ℹ️ Grid creation failed with non-index error: {e}") + + def test_backtrack_placement_bounds(self, real_vector_words): + """Test that backtracking placement handles bounds correctly.""" + generator = CrosswordGenerator() + + # Create grid + grid = [["." for _ in range(15)] for _ in range(15)] + placed_words = [] + + # Extract word list + word_list = [w["word"].upper() for w in real_vector_words] + word_list.sort(key=len, reverse=True) + + try: + # Test backtracking - should not crash even if no solution found + result = generator._backtrack_placement( + grid, word_list, real_vector_words, 0, placed_words, + start_time=0, timeout=1.0 # Short timeout + ) + + print(f"✅ Backtrack placement completed without IndexError, result: {result}") + + except IndexError as e: + pytest.fail(f"❌ IndexError in backtrack placement: {e}") + except Exception as e: + # Other exceptions are okay (timeout, etc.) + print(f"ℹ️ Backtrack placement failed with non-index error: {e}") + + def test_intersection_placement_edge_cases(self): + """Test intersection placement calculations with edge cases.""" + generator = CrosswordGenerator() + + # Create grid with a word already placed + grid = [["." for _ in range(10)] for _ in range(10)] + + # Place "TEST" horizontally at (5, 2) + for i, letter in enumerate("TEST"): + grid[5][2 + i] = letter + + placed_words = [{ + "word": "TEST", + "row": 5, + "col": 2, + "direction": "horizontal", + "number": 1 + }] + + # Test words that might cause out-of-bounds access + test_words = ["VETERINARY", "ZOOLOGICAL", "ELEPHANT", "T", "AT", "STRESS"] + + for word in test_words: + try: + placements = generator._find_all_intersection_placements(grid, word, placed_words) + print(f"✅ Found {len(placements)} intersection placements for '{word}'") + + # Test each placement + for placement in placements: + try: + score = generator._calculate_placement_score(grid, word, placement, placed_words) + print(f" - Placement at ({placement['row']},{placement['col']}) {placement['direction']}: score {score}") + except IndexError as e: + pytest.fail(f"❌ IndexError calculating score for {word}: {e}") + + except IndexError as e: + pytest.fail(f"❌ IndexError finding intersections for {word}: {e}") + + @pytest.mark.asyncio + async def test_full_puzzle_generation_stress(self, real_vector_words): + """Stress test full puzzle generation with problematic data.""" + generator = CrosswordGenerator() + + # Mock vector service + mock_vector_service = Mock() + mock_vector_service.find_similar_words = Mock(return_value=real_vector_words) + generator.vector_service = mock_vector_service + + try: + # This should complete without IndexError + result = await generator.generate_puzzle(["Animals"], "medium", True) + + if result is None: + print("⚠️ Puzzle generation returned None") + else: + print(f"✅ Full puzzle generation succeeded!") + assert "grid" in result + assert "clues" in result + assert "metadata" in result + + except IndexError as e: + pytest.fail(f"❌ IndexError in full puzzle generation: {e}") + except Exception as e: + # Other exceptions might be okay + print(f"ℹ️ Puzzle generation failed with non-index error: {e}") + + def test_edge_case_grids(self): + """Test edge cases with different grid sizes and word combinations.""" + generator = CrosswordGenerator() + + edge_cases = [ + # Very small grid + {"grid_size": 3, "words": ["CAT", "DOG"]}, + # Single cell grid + {"grid_size": 1, "words": ["A"]}, + # Large grid with short words + {"grid_size": 20, "words": ["A", "I", "IT", "AT"]}, + # Small grid with long words + {"grid_size": 5, "words": ["SUPERCALIFRAGILISTICEXPIALIDOCIOUS"]}, + ] + + for case in edge_cases: + grid = [["." for _ in range(case["grid_size"])] for _ in range(case["grid_size"])] + placed_words = [] + + for word in case["words"]: + try: + # Test various placement attempts + for row in range(case["grid_size"]): + for col in range(case["grid_size"]): + for direction in ["horizontal", "vertical"]: + placement = {"row": row, "col": col, "direction": direction} + + # These should not crash + can_place = generator._can_place_word(grid, word, row, col, direction) + score = generator._calculate_placement_score(grid, word, placement, placed_words) + + assert isinstance(can_place, bool) + assert isinstance(score, int) + + except IndexError as e: + pytest.fail(f"❌ IndexError with grid_size={case['grid_size']}, word='{word}': {e}") + +if __name__ == "__main__": + # Run just these specific tests + pytest.main([__file__, "-v", "--tb=short"]) \ No newline at end of file diff --git a/crossword-app/backend-py/test-unit/test_vector_search.py b/crossword-app/backend-py/test-unit/test_vector_search.py new file mode 100644 index 0000000000000000000000000000000000000000..7ee24b74c5726dd30bab2f1e36c46c539bfffc24 --- /dev/null +++ b/crossword-app/backend-py/test-unit/test_vector_search.py @@ -0,0 +1,354 @@ +""" +Unit tests for VectorSearchService. +""" + +import pytest +import asyncio +import os +import tempfile +import json +from unittest.mock import Mock, patch, MagicMock +import sys +from pathlib import Path +import numpy as np + +# Add project root to path for imports +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +from src.services.vector_search import VectorSearchService + + +@pytest.fixture +def mock_sentence_transformer(): + """Mock SentenceTransformer for testing.""" + mock_model = Mock() + mock_model.encode.return_value = np.random.rand(5, 384) # 5 words, 384 dimensions + + # Mock tokenizer + mock_tokenizer = Mock() + mock_tokenizer.get_vocab.return_value = { + "dog": 1, "cat": 2, "elephant": 3, "tiger": 4, "whale": 5, + "bird": 6, "fish": 7, "lion": 8, "bear": 9, "rabbit": 10, + "horse": 11, "sheep": 12, "goat": 13, "duck": 14, "chicken": 15 + } + mock_model.tokenizer = mock_tokenizer + + return mock_model + + +@pytest.fixture +def sample_static_words(): + """Sample static words for testing.""" + return { + "Animals": [ + {"word": "DOG", "clue": "Man's best friend"}, + {"word": "CAT", "clue": "Feline pet"}, + {"word": "ELEPHANT", "clue": "Large mammal with trunk"}, + ], + "Technology": [ + {"word": "COMPUTER", "clue": "Electronic device"}, + {"word": "ROBOT", "clue": "Automated machine"}, + ] + } + + +@pytest.fixture +def temp_static_words_dir(sample_static_words): + """Create temporary directory with static word files.""" + with tempfile.TemporaryDirectory() as temp_dir: + word_lists_dir = Path(temp_dir) / "word-lists" + word_lists_dir.mkdir() + + for topic, words in sample_static_words.items(): + file_path = word_lists_dir / f"{topic.lower()}.json" + with open(file_path, 'w') as f: + json.dump(words, f) + + yield word_lists_dir + + +class TestVectorSearchService: + """Test cases for VectorSearchService.""" + + def test_init(self): + """Test service initialization.""" + service = VectorSearchService() + assert service.model is None + assert service.vocab is None + assert service.word_embeddings is None + assert service.faiss_index is None + assert service.is_initialized is False + assert service.static_words == {} + + # Check default configuration + assert "all-mpnet-base-v2" in service.model_name + assert service.similarity_threshold == 0.3 + assert service.max_results == 20 + + def test_filter_vocabulary(self): + """Test vocabulary filtering.""" + service = VectorSearchService() + + vocab_dict = { + "dog": 1, "cat": 2, "elephant": 3, # Good words + "the": 4, "and": 5, "##ing": 6, # Should be filtered + "dogs": 7, "cats": 8, # Plurals - should be filtered + "a": 9, "ab": 10, # Too short + "supercalifragilisticexpialidocious": 11, # Too long + "[CLS]": 12, "": 13, # Special tokens + } + + filtered = service._filter_vocabulary(vocab_dict) + + # Should keep good words + assert "DOG" in filtered + assert "CAT" in filtered + assert "ELEPHANT" in filtered + + # Should filter out bad words + assert "THE" not in filtered + assert "AND" not in filtered + assert "DOGS" not in filtered + assert "CATS" not in filtered + assert "A" not in filtered + assert "[CLS]" not in filtered + + def test_is_plural(self): + """Test plural detection.""" + service = VectorSearchService() + + # Test plurals + assert service._is_plural("DOGS") is True + assert service._is_plural("CATS") is True + assert service._is_plural("BIRDS") is True + + # Test non-plurals + assert service._is_plural("DOG") is False + assert service._is_plural("CLASS") is False # Ends in SS + assert service._is_plural("BUS") is False # Ends in US + assert service._is_plural("THIS") is False # Ends in IS + assert service._is_plural("CAT") is False + + def test_is_boring_word(self): + """Test boring word detection.""" + service = VectorSearchService() + + # Test boring words + assert service._is_boring_word("RUNNING") is True # ING ending + assert service._is_boring_word("EDUCATION") is True # TION ending + assert service._is_boring_word("HAPPINESS") is True # NESS ending + assert service._is_boring_word("GET") is True # Common short word + + # Test interesting words + assert service._is_boring_word("DOG") is False + assert service._is_boring_word("ELEPHANT") is False + assert service._is_boring_word("COMPUTER") is False + + def test_matches_difficulty(self): + """Test difficulty matching.""" + service = VectorSearchService() + + # Easy: 3-8 chars + assert service._matches_difficulty("DOG", "easy") is True # 3 chars + assert service._matches_difficulty("ELEPHANT", "easy") is True # 8 chars + assert service._matches_difficulty("AB", "easy") is False # Too short + assert service._matches_difficulty("SUPERLONGSWORD", "easy") is False # Too long + + # Medium: 4-10 chars + assert service._matches_difficulty("CATS", "medium") is True # 4 chars + assert service._matches_difficulty("BUTTERFLIES", "medium") is False # 11 chars + + # Hard: 5-15 chars + assert service._matches_difficulty("TIGER", "hard") is True # 5 chars + assert service._matches_difficulty("DOG", "hard") is False # Too short + + def test_generate_clue(self): + """Test clue generation.""" + service = VectorSearchService() + + # Test topic-specific clues + clue = service._generate_clue("ELEPHANT", "Animals") + assert "elephant" in clue.lower() + assert "animal" in clue.lower() + + clue = service._generate_clue("COMPUTER", "Technology") + assert "computer" in clue.lower() + assert "tech" in clue.lower() + + # Test generic clue + clue = service._generate_clue("WORD", "Unknown") + assert "word" in clue.lower() + assert "unknown" in clue.lower() + + def test_is_interesting_word(self): + """Test interesting word detection.""" + service = VectorSearchService() + + # Test word matching topic (should be filtered out) + assert service._is_interesting_word("ANIMAL", "Animals") is False + assert service._is_interesting_word("ANIMALS", "Animals") is False + + # Test obvious animal words + assert service._is_interesting_word("MAMMAL", "Animals") is False + assert service._is_interesting_word("WILDLIFE", "Animals") is False + + # Test abstract words + assert service._is_interesting_word("EDUCATION", "School") is False # -tion ending + assert service._is_interesting_word("HAPPINESS", "Emotions") is False # -ness ending + + # Test good words + assert service._is_interesting_word("ELEPHANT", "Animals") is True + assert service._is_interesting_word("COMPUTER", "Technology") is True + + @pytest.mark.asyncio + async def test_load_static_words_success(self, temp_static_words_dir): + """Test successful static words loading.""" + service = VectorSearchService() + + # Mock the path finding to use our temp directory + with patch.object(Path, 'exists') as mock_exists: + def exists_side_effect(): + # First call checks if path exists, second call checks files + mock_exists.return_value = True + return True + mock_exists.side_effect = exists_side_effect + + with patch('pathlib.Path.glob') as mock_glob: + # Mock glob to return our test files + mock_files = [ + temp_static_words_dir / "animals.json", + temp_static_words_dir / "technology.json" + ] + mock_glob.return_value = mock_files + + await service._load_static_words() + + # Should have loaded both topics + assert "Animals" in service.static_words + assert "Technology" in service.static_words + assert len(service.static_words["Animals"]) == 3 + assert len(service.static_words["Technology"]) == 2 + + @pytest.mark.asyncio + async def test_load_static_words_no_files(self): + """Test static words loading when no files exist.""" + service = VectorSearchService() + + with patch.object(Path, 'exists', return_value=False): + await service._load_static_words() + assert service.static_words == {} + + @pytest.mark.asyncio + async def test_get_static_fallback(self, sample_static_words): + """Test static fallback word retrieval.""" + service = VectorSearchService() + service.static_words = sample_static_words + + # Test exact topic match + words = await service._get_static_fallback("Animals", "medium", 5) + assert len(words) <= 5 + assert all(w["source"] == "static_fallback" for w in words) + assert all("word" in w and "clue" in w for w in words) + + # Test case variations + words = await service._get_static_fallback("animals", "medium", 5) + assert len(words) > 0 # Should find Animals via case variation + + # Test non-existent topic + words = await service._get_static_fallback("NonExistent", "medium", 5) + assert len(words) == 0 + + @pytest.mark.asyncio + @patch('src.services.vector_search.SentenceTransformer') + @patch('src.services.vector_search.faiss') + async def test_initialize_success(self, mock_faiss, mock_transformer_class, mock_sentence_transformer): + """Test successful service initialization.""" + # Setup mocks + mock_transformer_class.return_value = mock_sentence_transformer + mock_index = Mock() + mock_faiss.IndexFlatIP.return_value = mock_index + mock_faiss.normalize_L2 = Mock() + + service = VectorSearchService() + + with patch.object(service, '_load_static_words') as mock_load_static: + await service.initialize() + + assert service.is_initialized is True + assert service.model == mock_sentence_transformer + assert service.vocab is not None + assert service.faiss_index == mock_index + mock_load_static.assert_called_once() + + @pytest.mark.asyncio + @patch('src.services.vector_search.SentenceTransformer') + async def test_initialize_failure(self, mock_transformer_class): + """Test service initialization failure.""" + # Make SentenceTransformer raise an exception + mock_transformer_class.side_effect = Exception("Model load failed") + + service = VectorSearchService() + + with pytest.raises(Exception, match="Model load failed"): + await service.initialize() + + assert service.is_initialized is False + + @pytest.mark.asyncio + async def test_find_similar_words_not_initialized(self, sample_static_words): + """Test word search when service not initialized.""" + service = VectorSearchService() + service.static_words = sample_static_words + + words = await service.find_similar_words("Animals", "medium", 5) + + # Should fallback to static words + assert len(words) > 0 + assert all(w["source"] == "static_fallback" for w in words) + + @pytest.mark.asyncio + @patch('src.services.vector_search.faiss') + async def test_find_similar_words_initialized(self, mock_faiss, mock_sentence_transformer): + """Test word search when service is initialized.""" + # Setup service as initialized + service = VectorSearchService() + service.is_initialized = True + service.model = mock_sentence_transformer + service.vocab = ["ELEPHANT", "TIGER", "LION", "BEAR", "WHALE"] + + # Mock FAISS search results + mock_index = Mock() + mock_index.search.return_value = ( + np.array([[0.8, 0.7, 0.6, 0.5, 0.4]]), # Scores + np.array([[0, 1, 2, 3, 4]]) # Indices + ) + service.faiss_index = mock_index + + # Mock embedding generation + mock_sentence_transformer.encode.return_value = np.array([[0.1, 0.2, 0.3]]) + mock_faiss.normalize_L2 = Mock() + + words = await service.find_similar_words("Animals", "medium", 5) + + assert len(words) > 0 + assert all(w["source"] == "vector_search" for w in words) + assert all("similarity" in w for w in words) + mock_index.search.assert_called_once() + + @pytest.mark.asyncio + async def test_cleanup(self): + """Test service cleanup.""" + service = VectorSearchService() + service.model = Mock() + service.word_embeddings = Mock() + service.faiss_index = Mock() + service.is_initialized = True + + await service.cleanup() + + assert service.is_initialized is False + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/mise.toml b/mise.toml new file mode 100644 index 0000000000000000000000000000000000000000..6f3b64ab764f3d3ecc0784a2b5c68863b27bd0b1 --- /dev/null +++ b/mise.toml @@ -0,0 +1,2 @@ +[tools] +python = "3.10"