Add complete Python backend with AI-powered crossword generation
Browse files- Implement FastAPI backend with vector search and machine learning capabilities
- Add comprehensive crossword generation algorithm with bounds checking fixes
- Include multi-layer word caching system with graceful fallback to static words
- Add extensive test suite (unit tests, integration tests, boundary condition tests)
- Update Docker configuration for Python backend deployment
- Add comprehensive documentation and development setup guides
- Integrate sentence-transformers and FAISS for semantic word discovery
- Maintain API compatibility with existing Node.js backend
Signed-off-by: Vimal Kumar <[email protected]>
This view is limited to 50 files because it contains too many changes. Β
See raw diff
- .gitignore +6 -1
- CLAUDE.md +217 -0
- Dockerfile +64 -15
- crossword-app/Dockerfile +63 -15
- crossword-app/backend-py/.coverage +0 -0
- crossword-app/backend-py/.env.example +20 -0
- crossword-app/backend-py/README-local-setup.md +78 -0
- crossword-app/backend-py/README.md +332 -0
- crossword-app/backend-py/__pycache__/test_bounds_comprehensive.cpython-313-pytest-8.4.1.pyc +0 -0
- crossword-app/backend-py/app.py +146 -0
- crossword-app/backend-py/data/data +1 -0
- crossword-app/backend-py/data/word-lists/animals.json +165 -0
- crossword-app/backend-py/data/word-lists/geography.json +161 -0
- crossword-app/backend-py/data/word-lists/science.json +170 -0
- crossword-app/backend-py/data/word-lists/technology.json +221 -0
- crossword-app/backend-py/debug_full_generation.py +316 -0
- crossword-app/backend-py/debug_grid_direct.py +293 -0
- crossword-app/backend-py/debug_index_error.py +307 -0
- crossword-app/backend-py/debug_simple.py +142 -0
- crossword-app/backend-py/pytest.ini +16 -0
- crossword-app/backend-py/requirements-dev.txt +18 -0
- crossword-app/backend-py/requirements.txt +48 -0
- crossword-app/backend-py/run_tests.py +89 -0
- crossword-app/backend-py/src/__init__.py +1 -0
- crossword-app/backend-py/src/__pycache__/__init__.cpython-313.pyc +0 -0
- crossword-app/backend-py/src/routes/__init__.py +1 -0
- crossword-app/backend-py/src/routes/__pycache__/__init__.cpython-313.pyc +0 -0
- crossword-app/backend-py/src/routes/__pycache__/api.cpython-313.pyc +0 -0
- crossword-app/backend-py/src/routes/api.py +186 -0
- crossword-app/backend-py/src/services/__init__.py +1 -0
- crossword-app/backend-py/src/services/__pycache__/__init__.cpython-313.pyc +0 -0
- crossword-app/backend-py/src/services/__pycache__/crossword_generator.cpython-313.pyc +0 -0
- crossword-app/backend-py/src/services/__pycache__/crossword_generator_fixed.cpython-313.pyc +0 -0
- crossword-app/backend-py/src/services/__pycache__/crossword_generator_wrapper.cpython-313.pyc +0 -0
- crossword-app/backend-py/src/services/__pycache__/vector_search.cpython-313.pyc +0 -0
- crossword-app/backend-py/src/services/__pycache__/word_cache.cpython-313.pyc +0 -0
- crossword-app/backend-py/src/services/crossword_generator.py +722 -0
- crossword-app/backend-py/src/services/crossword_generator_wrapper.py +58 -0
- crossword-app/backend-py/src/services/vector_search.py +587 -0
- crossword-app/backend-py/src/services/word_cache.py +347 -0
- crossword-app/backend-py/test-integration/test_boundary_fix.py +147 -0
- crossword-app/backend-py/test-integration/test_bounds_comprehensive.py +266 -0
- crossword-app/backend-py/test-integration/test_bounds_fix.py +90 -0
- crossword-app/backend-py/test-integration/test_cache_permissions.py +88 -0
- crossword-app/backend-py/test-integration/test_cache_system.py +127 -0
- crossword-app/backend-py/test-integration/test_crossword_display.py +85 -0
- crossword-app/backend-py/test-integration/test_final_crossword_validation.py +239 -0
- crossword-app/backend-py/test-integration/test_final_validation.py +133 -0
- crossword-app/backend-py/test-integration/test_intersection_issues.py +247 -0
- crossword-app/backend-py/test-integration/test_local.py +98 -0
.gitignore
CHANGED
@@ -47,4 +47,9 @@ pids
|
|
47 |
.Spotlight-V100
|
48 |
.Trashes
|
49 |
ehthumbs.db
|
50 |
-
Thumbs.db
|
|
|
|
|
|
|
|
|
|
|
|
47 |
.Spotlight-V100
|
48 |
.Trashes
|
49 |
ehthumbs.db
|
50 |
+
Thumbs.db
|
51 |
+
|
52 |
+
hack
|
53 |
+
issues/
|
54 |
+
samples/
|
55 |
+
venv/
|
CLAUDE.md
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# CLAUDE.md
|
2 |
+
|
3 |
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
4 |
+
|
5 |
+
## Project Structure
|
6 |
+
|
7 |
+
This is a full-stack crossword puzzle generator with two backend implementations:
|
8 |
+
- **Node.js Backend** (`backend/`) - Original implementation with static word lists
|
9 |
+
- **Python Backend** (`backend-py/`) - New implementation with AI-powered vector search
|
10 |
+
- **React Frontend** (`frontend/`) - Modern React app with Vite
|
11 |
+
|
12 |
+
Current deployment uses the Python backend with Docker containerization.
|
13 |
+
|
14 |
+
## Development Commands
|
15 |
+
|
16 |
+
### Frontend Development
|
17 |
+
```bash
|
18 |
+
cd frontend
|
19 |
+
npm install
|
20 |
+
npm run dev # Start development server on http://localhost:5173
|
21 |
+
npm run build # Build for production
|
22 |
+
npm run preview # Preview production build
|
23 |
+
```
|
24 |
+
|
25 |
+
### Backend Development (Python - Primary)
|
26 |
+
```bash
|
27 |
+
cd backend-py
|
28 |
+
|
29 |
+
# Testing
|
30 |
+
python run_tests.py # Run all tests
|
31 |
+
python run_tests.py crossword_generator_fixed # Run specific test
|
32 |
+
pytest tests/ -v # Direct pytest
|
33 |
+
pytest tests/test_index_bug_fix.py -v # Core functionality tests
|
34 |
+
python test_local.py # Quick test without ML deps
|
35 |
+
|
36 |
+
# Development server
|
37 |
+
python app.py # Start FastAPI server on port 7860
|
38 |
+
|
39 |
+
# Debug/development tools
|
40 |
+
python test_simple_generation.py # Test crossword generation
|
41 |
+
python debug_grid_direct.py # Debug grid placement
|
42 |
+
```
|
43 |
+
|
44 |
+
### Backend Development (Node.js - Legacy)
|
45 |
+
```bash
|
46 |
+
cd backend
|
47 |
+
npm install
|
48 |
+
npm run dev # Start Express server on http://localhost:3000
|
49 |
+
npm test # Run tests
|
50 |
+
```
|
51 |
+
|
52 |
+
### Docker Deployment
|
53 |
+
```bash
|
54 |
+
# Build and run locally
|
55 |
+
docker build -t crossword-app .
|
56 |
+
docker run -p 7860:7860 -e NODE_ENV=production crossword-app
|
57 |
+
|
58 |
+
# Test deployment
|
59 |
+
curl http://localhost:7860/api/topics
|
60 |
+
curl http://localhost:7860/health
|
61 |
+
```
|
62 |
+
|
63 |
+
### Linting and Type Checking
|
64 |
+
```bash
|
65 |
+
# Python backend
|
66 |
+
cd backend-py
|
67 |
+
mypy src/ # Type checking (if mypy installed)
|
68 |
+
ruff src/ # Linting (if ruff installed)
|
69 |
+
|
70 |
+
# Frontend
|
71 |
+
cd frontend
|
72 |
+
npm run lint # ESLint (if configured)
|
73 |
+
```
|
74 |
+
|
75 |
+
## Architecture Overview
|
76 |
+
|
77 |
+
### Full-Stack Components
|
78 |
+
|
79 |
+
**Frontend** (`frontend/`)
|
80 |
+
- React 18 with hooks and functional components
|
81 |
+
- Key components: `TopicSelector.jsx`, `PuzzleGrid.jsx`, `ClueList.jsx`
|
82 |
+
- Custom hook: `useCrossword.js` manages puzzle state
|
83 |
+
- Grid rendering using CSS Grid with interactive cell filling
|
84 |
+
|
85 |
+
**Python Backend** (`backend-py/` - Primary)
|
86 |
+
- FastAPI web framework serving both API and static frontend files
|
87 |
+
- AI-powered word generation using vector similarity search
|
88 |
+
- Comprehensive bounds checking fixes for crossword generation
|
89 |
+
- Multi-layer caching system with graceful fallback to static words
|
90 |
+
|
91 |
+
**Node.js Backend** (`backend/` - Legacy)
|
92 |
+
- Express.js with file-based word storage
|
93 |
+
- Original crossword generation algorithm
|
94 |
+
- Static word lists organized by topic (animals.json, science.json, etc.)
|
95 |
+
|
96 |
+
### Core Python Backend Components
|
97 |
+
|
98 |
+
**CrosswordGeneratorFixed** (`backend-py/src/services/crossword_generator_fixed.py`)
|
99 |
+
- Main crossword generation algorithm using backtracking
|
100 |
+
- Handles grid placement, bounds checking, and word intersections
|
101 |
+
- Contains fixes for "list index out of range" errors with comprehensive bounds validation
|
102 |
+
- Key methods: `_create_grid()`, `_backtrack_placement()`, `_can_place_word()`, `_place_word()`
|
103 |
+
|
104 |
+
**VectorSearchService** (`backend-py/src/services/vector_search.py`)
|
105 |
+
- AI-powered word discovery using sentence-transformers + FAISS
|
106 |
+
- Extracts 30K+ words from model vocabulary vs static word lists
|
107 |
+
- Implements semantic similarity search with caching and fallback systems
|
108 |
+
- Requires torch/sentence-transformers dependencies (optional for core functionality)
|
109 |
+
|
110 |
+
**WordCache** (`backend-py/src/services/word_cache.py`)
|
111 |
+
- Multi-layer caching system for vector-discovered words
|
112 |
+
- Handles permission issues with fallback mechanisms
|
113 |
+
- Reduces dependency on static word files
|
114 |
+
|
115 |
+
### Data Flow
|
116 |
+
|
117 |
+
1. **User Interaction** β React frontend (TopicSelector, PuzzleGrid)
|
118 |
+
2. **API Request** β FastAPI backend (`backend-py/routes/api.py`)
|
119 |
+
3. **Word Selection** β VectorSearchService (AI) or static word fallback
|
120 |
+
4. **Grid Generation** β CrosswordGeneratorFixed backtracking algorithm
|
121 |
+
5. **Response** β JSON with grid, clues, and metadata
|
122 |
+
6. **Frontend Rendering** β Interactive crossword grid with clues
|
123 |
+
|
124 |
+
### Critical Dependencies
|
125 |
+
|
126 |
+
**Frontend:**
|
127 |
+
- React 18, Vite (development/build)
|
128 |
+
- Node.js 18+ and npm 9+
|
129 |
+
|
130 |
+
**Python Backend (Primary):**
|
131 |
+
- FastAPI, uvicorn, pydantic (web framework)
|
132 |
+
- pytest, pytest-asyncio (testing)
|
133 |
+
|
134 |
+
**Optional AI Features:**
|
135 |
+
- torch, sentence-transformers, faiss-cpu (vector search)
|
136 |
+
- httpx (for API testing)
|
137 |
+
|
138 |
+
**Node.js Backend (Legacy):**
|
139 |
+
- Express.js, cors, helmet
|
140 |
+
- JSON file-based word storage
|
141 |
+
|
142 |
+
The Python backend gracefully degrades to static word lists when AI dependencies are missing.
|
143 |
+
|
144 |
+
### API Endpoints
|
145 |
+
|
146 |
+
Both backends provide compatible REST APIs:
|
147 |
+
- `GET /api/topics` - Get available topics
|
148 |
+
- `POST /api/generate` - Generate crossword puzzle
|
149 |
+
- `POST /api/validate` - Validate user answers
|
150 |
+
- `GET /api/health` - Health check
|
151 |
+
|
152 |
+
### Testing Strategy
|
153 |
+
|
154 |
+
**Python Backend Tests:**
|
155 |
+
- `test_crossword_generator_fixed.py` - Grid generation logic
|
156 |
+
- `test_index_bug_fix.py` - Bounds checking and index error fixes (CRITICAL)
|
157 |
+
- `test_vector_search.py` - AI word generation (needs torch)
|
158 |
+
- `test_api_routes.py` - FastAPI endpoints (needs httpx)
|
159 |
+
|
160 |
+
**Frontend Tests:**
|
161 |
+
- Component testing with React Testing Library (if configured)
|
162 |
+
- E2E testing with Playwright/Cypress (if configured)
|
163 |
+
|
164 |
+
### Key Fixes Applied
|
165 |
+
|
166 |
+
**Index Error Resolution:**
|
167 |
+
- Added comprehensive bounds checking in `_can_place_word()`, `_place_word()`, `_remove_word()`
|
168 |
+
- Fixed `_calculate_placement_score()` to validate grid coordinates before access
|
169 |
+
- All grid access operations now validate row/col bounds
|
170 |
+
|
171 |
+
**Word Boundary Issues:**
|
172 |
+
- 2-letter sequences at crossword intersections are normal behavior, not bugs
|
173 |
+
- Removed overly strict validation that was rejecting valid crossword patterns
|
174 |
+
- Grid placement logic maintains compatibility with JavaScript backend quality
|
175 |
+
|
176 |
+
### Environment Configuration
|
177 |
+
|
178 |
+
**Python Backend (Production):**
|
179 |
+
```bash
|
180 |
+
NODE_ENV=production
|
181 |
+
PORT=7860
|
182 |
+
EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
|
183 |
+
WORD_SIMILARITY_THRESHOLD=0.65
|
184 |
+
PYTHONPATH=/app/backend-py
|
185 |
+
PYTHONUNBUFFERED=1
|
186 |
+
```
|
187 |
+
|
188 |
+
**Frontend Development:**
|
189 |
+
```bash
|
190 |
+
VITE_API_BASE_URL=http://localhost:7860 # Points to Python backend
|
191 |
+
```
|
192 |
+
|
193 |
+
**Node.js Backend (Legacy):**
|
194 |
+
```bash
|
195 |
+
NODE_ENV=development
|
196 |
+
PORT=3000
|
197 |
+
DATABASE_URL=postgresql://user:pass@host:port/db # Optional
|
198 |
+
```
|
199 |
+
|
200 |
+
### Performance Notes
|
201 |
+
|
202 |
+
**Python Backend:**
|
203 |
+
- **Startup**: ~30-60 seconds with AI (model download), ~2 seconds without
|
204 |
+
- **Memory**: ~500MB-1GB with AI, ~100MB without
|
205 |
+
- **Response Time**: ~200-500ms with vector search, ~100ms with static words
|
206 |
+
- FAISS index building is the main startup bottleneck
|
207 |
+
|
208 |
+
**Frontend:**
|
209 |
+
- **Development**: Hot reload with Vite (~200ms)
|
210 |
+
- **Build Time**: ~10-30 seconds for production build
|
211 |
+
- **Bundle Size**: Optimized with Vite tree-shaking
|
212 |
+
|
213 |
+
**Deployment:**
|
214 |
+
- Docker build time: ~5-10 minutes (includes frontend build + Python deps)
|
215 |
+
- Container size: ~1.5GB (includes ML models and dependencies)
|
216 |
+
- Hugging Face Spaces deployment: Automatic on git push
|
217 |
+
- run unit tests after fixing a bug
|
Dockerfile
CHANGED
@@ -1,36 +1,85 @@
|
|
1 |
-
#
|
2 |
-
|
|
|
3 |
|
4 |
# Set working directory
|
5 |
WORKDIR /app
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
#
|
|
|
12 |
RUN cd frontend && npm ci
|
13 |
-
RUN cd backend && npm ci --only=production
|
14 |
|
15 |
-
# Copy
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
COPY crossword-app/frontend/ ./frontend/
|
17 |
COPY crossword-app/backend/ ./backend/
|
|
|
18 |
|
19 |
# Build the React frontend
|
20 |
RUN cd frontend && npm run build
|
21 |
|
22 |
-
# Copy built frontend files to backend public directory
|
23 |
-
RUN mkdir -p backend/public && cp -r frontend/dist/* backend/public/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
#
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# Expose port 7860 (Hugging Face Spaces standard)
|
29 |
EXPOSE 7860
|
30 |
|
31 |
-
# Set environment
|
32 |
ENV NODE_ENV=production
|
33 |
ENV PORT=7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
# Start the backend server
|
36 |
-
CMD ["
|
|
|
1 |
+
# Multi-stage build to optimize performance and security
|
2 |
+
# Stage 1: Builder - Install dependencies and build as root
|
3 |
+
FROM python:3.11-slim as builder
|
4 |
|
5 |
# Set working directory
|
6 |
WORKDIR /app
|
7 |
|
8 |
+
# Install system dependencies for sentence-transformers and FAISS
|
9 |
+
RUN apt-get update && apt-get install -y \
|
10 |
+
gcc \
|
11 |
+
g++ \
|
12 |
+
wget \
|
13 |
+
curl \
|
14 |
+
git \
|
15 |
+
&& rm -rf /var/lib/apt/lists/*
|
16 |
+
|
17 |
+
# Install Node.js for frontend build
|
18 |
+
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
|
19 |
+
apt-get install -y nodejs
|
20 |
|
21 |
+
# Copy frontend package files and install dependencies first (for better caching)
|
22 |
+
COPY crossword-app/frontend/package*.json ./frontend/
|
23 |
RUN cd frontend && npm ci
|
|
|
24 |
|
25 |
+
# Copy Python backend requirements and install dependencies
|
26 |
+
COPY crossword-app/backend-py/requirements.txt ./backend-py/
|
27 |
+
COPY crossword-app/backend-py/requirements-dev.txt ./backend-py/
|
28 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
29 |
+
pip install --no-cache-dir -r backend-py/requirements-dev.txt
|
30 |
+
|
31 |
+
# Copy all source code
|
32 |
COPY crossword-app/frontend/ ./frontend/
|
33 |
COPY crossword-app/backend/ ./backend/
|
34 |
+
COPY crossword-app/backend-py/ ./backend-py/
|
35 |
|
36 |
# Build the React frontend
|
37 |
RUN cd frontend && npm run build
|
38 |
|
39 |
+
# Copy built frontend files to Python backend public directory
|
40 |
+
RUN mkdir -p backend-py/public && cp -r frontend/dist/* backend-py/public/
|
41 |
+
|
42 |
+
# Create symlink for shared data (word lists)
|
43 |
+
RUN cd backend-py && ln -sf ../backend/data data
|
44 |
+
|
45 |
+
# Stage 2: Runtime - Copy only necessary files as non-root user
|
46 |
+
FROM python:3.11-slim as runtime
|
47 |
+
|
48 |
+
# Copy Python packages from builder stage
|
49 |
+
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
50 |
+
COPY --from=builder /usr/local/bin /usr/local/bin
|
51 |
|
52 |
+
# Install minimal runtime dependencies
|
53 |
+
RUN apt-get update && apt-get install -y \
|
54 |
+
curl \
|
55 |
+
&& rm -rf /var/lib/apt/lists/*
|
56 |
+
|
57 |
+
# Create non-root user
|
58 |
+
RUN useradd -m -u 1000 appuser
|
59 |
+
|
60 |
+
# Set working directory
|
61 |
+
WORKDIR /app/backend-py
|
62 |
+
|
63 |
+
# Copy built application files with correct ownership
|
64 |
+
COPY --from=builder --chown=appuser:appuser /app/backend-py ./
|
65 |
+
COPY --from=builder --chown=appuser:appuser /app/backend/data ./data
|
66 |
+
|
67 |
+
# Switch to non-root user
|
68 |
+
USER appuser
|
69 |
|
70 |
# Expose port 7860 (Hugging Face Spaces standard)
|
71 |
EXPOSE 7860
|
72 |
|
73 |
+
# Set environment variables for production
|
74 |
ENV NODE_ENV=production
|
75 |
ENV PORT=7860
|
76 |
+
ENV PYTHONPATH=/app/backend-py
|
77 |
+
ENV PYTHONUNBUFFERED=1
|
78 |
+
ENV PIP_NO_CACHE_DIR=1
|
79 |
+
|
80 |
+
# Health check
|
81 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
82 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
83 |
|
84 |
+
# Start the Python backend server with uvicorn for better production performance
|
85 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
crossword-app/Dockerfile
CHANGED
@@ -1,36 +1,84 @@
|
|
1 |
-
#
|
2 |
-
|
|
|
3 |
|
4 |
# Set working directory
|
5 |
WORKDIR /app
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
#
|
|
|
12 |
RUN cd frontend && npm ci
|
13 |
-
RUN cd backend && npm ci --only=production
|
14 |
|
15 |
-
# Copy
|
|
|
|
|
|
|
|
|
|
|
16 |
COPY frontend/ ./frontend/
|
17 |
COPY backend/ ./backend/
|
|
|
18 |
|
19 |
# Build the React frontend
|
20 |
RUN cd frontend && npm run build
|
21 |
|
22 |
-
# Copy built frontend files to backend public directory
|
23 |
-
RUN mkdir -p backend/public && cp -r frontend/dist/* backend/public/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
#
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# Expose port 7860 (Hugging Face Spaces standard)
|
29 |
EXPOSE 7860
|
30 |
|
31 |
-
# Set environment
|
32 |
ENV NODE_ENV=production
|
33 |
ENV PORT=7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
# Start the backend server
|
36 |
-
CMD ["
|
|
|
1 |
+
# Multi-stage build to optimize performance and security
|
2 |
+
# Stage 1: Builder - Install dependencies and build as root
|
3 |
+
FROM python:3.11-slim as builder
|
4 |
|
5 |
# Set working directory
|
6 |
WORKDIR /app
|
7 |
|
8 |
+
# Install system dependencies for sentence-transformers and FAISS
|
9 |
+
RUN apt-get update && apt-get install -y \
|
10 |
+
gcc \
|
11 |
+
g++ \
|
12 |
+
wget \
|
13 |
+
curl \
|
14 |
+
git \
|
15 |
+
&& rm -rf /var/lib/apt/lists/*
|
16 |
+
|
17 |
+
# Install Node.js for frontend build
|
18 |
+
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
|
19 |
+
apt-get install -y nodejs
|
20 |
|
21 |
+
# Copy frontend package files and install dependencies first (for better caching)
|
22 |
+
COPY frontend/package*.json ./frontend/
|
23 |
RUN cd frontend && npm ci
|
|
|
24 |
|
25 |
+
# Copy Python backend requirements and install dependencies
|
26 |
+
COPY backend-py/requirements.txt ./backend-py/
|
27 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
28 |
+
pip install --no-cache-dir -r backend-py/requirements.txt
|
29 |
+
|
30 |
+
# Copy all source code
|
31 |
COPY frontend/ ./frontend/
|
32 |
COPY backend/ ./backend/
|
33 |
+
COPY backend-py/ ./backend-py/
|
34 |
|
35 |
# Build the React frontend
|
36 |
RUN cd frontend && npm run build
|
37 |
|
38 |
+
# Copy built frontend files to Python backend public directory
|
39 |
+
RUN mkdir -p backend-py/public && cp -r frontend/dist/* backend-py/public/
|
40 |
+
|
41 |
+
# Create symlink for shared data (word lists)
|
42 |
+
RUN cd backend-py && ln -sf ../backend/data data
|
43 |
+
|
44 |
+
# Stage 2: Runtime - Copy only necessary files as non-root user
|
45 |
+
FROM python:3.11-slim as runtime
|
46 |
+
|
47 |
+
# Copy Python packages from builder stage
|
48 |
+
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
49 |
+
COPY --from=builder /usr/local/bin /usr/local/bin
|
50 |
|
51 |
+
# Install minimal runtime dependencies
|
52 |
+
RUN apt-get update && apt-get install -y \
|
53 |
+
curl \
|
54 |
+
&& rm -rf /var/lib/apt/lists/*
|
55 |
+
|
56 |
+
# Create non-root user
|
57 |
+
RUN useradd -m -u 1000 appuser
|
58 |
+
|
59 |
+
# Set working directory
|
60 |
+
WORKDIR /app/backend-py
|
61 |
+
|
62 |
+
# Copy built application files with correct ownership
|
63 |
+
COPY --from=builder --chown=appuser:appuser /app/backend-py ./
|
64 |
+
COPY --from=builder --chown=appuser:appuser /app/backend/data ./data
|
65 |
+
|
66 |
+
# Switch to non-root user
|
67 |
+
USER appuser
|
68 |
|
69 |
# Expose port 7860 (Hugging Face Spaces standard)
|
70 |
EXPOSE 7860
|
71 |
|
72 |
+
# Set environment variables for production
|
73 |
ENV NODE_ENV=production
|
74 |
ENV PORT=7860
|
75 |
+
ENV PYTHONPATH=/app/backend-py
|
76 |
+
ENV PYTHONUNBUFFERED=1
|
77 |
+
ENV PIP_NO_CACHE_DIR=1
|
78 |
+
|
79 |
+
# Health check
|
80 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
81 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
82 |
|
83 |
+
# Start the Python backend server with uvicorn for better production performance
|
84 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
crossword-app/backend-py/.coverage
ADDED
Binary file (53.2 kB). View file
|
|
crossword-app/backend-py/.env.example
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python Backend Environment Configuration
|
2 |
+
|
3 |
+
# Server Configuration
|
4 |
+
PORT=7860
|
5 |
+
HOST=0.0.0.0
|
6 |
+
NODE_ENV=production
|
7 |
+
|
8 |
+
# AI/ML Configuration
|
9 |
+
EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
|
10 |
+
WORD_SIMILARITY_THRESHOLD=0.65
|
11 |
+
MAX_VOCAB_SIZE=30000
|
12 |
+
|
13 |
+
# HuggingFace Configuration (if needed for cloud inference)
|
14 |
+
HUGGINGFACE_API_KEY=your_huggingface_api_key_here
|
15 |
+
|
16 |
+
# Logging
|
17 |
+
LOG_LEVEL=INFO
|
18 |
+
|
19 |
+
# Development settings
|
20 |
+
RELOAD=false
|
crossword-app/backend-py/README-local-setup.md
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Local Development Setup
|
2 |
+
|
3 |
+
## Quick Start
|
4 |
+
|
5 |
+
```bash
|
6 |
+
# Install all dependencies (same as production)
|
7 |
+
pip install -r requirements.txt
|
8 |
+
```
|
9 |
+
|
10 |
+
## Python Version Support
|
11 |
+
- **Recommended**: Python 3.10-3.12
|
12 |
+
- **Minimum**: Python 3.10 (matches Docker)
|
13 |
+
|
14 |
+
## Installation Troubleshooting
|
15 |
+
|
16 |
+
### If you get PyTorch installation errors:
|
17 |
+
```bash
|
18 |
+
# Install PyTorch first with CPU support
|
19 |
+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
20 |
+
|
21 |
+
# Then install remaining dependencies
|
22 |
+
pip install -r requirements-local.txt --no-deps
|
23 |
+
pip install fastapi uvicorn[standard] python-dotenv python-multipart
|
24 |
+
```
|
25 |
+
|
26 |
+
### For M1/M2 Macs:
|
27 |
+
```bash
|
28 |
+
# Use conda for better compatibility
|
29 |
+
conda install pytorch::pytorch torchvision torchaudio -c pytorch
|
30 |
+
pip install -r requirements-local.txt --no-deps
|
31 |
+
pip install sentence-transformers faiss-cpu transformers huggingface-hub
|
32 |
+
```
|
33 |
+
|
34 |
+
## Running Locally
|
35 |
+
|
36 |
+
```bash
|
37 |
+
cd crossword-app/backend-py
|
38 |
+
python app.py
|
39 |
+
```
|
40 |
+
|
41 |
+
The server will start on http://localhost:7860
|
42 |
+
|
43 |
+
## Features Available
|
44 |
+
|
45 |
+
### Features Available:
|
46 |
+
- β
AI word generation via vector search
|
47 |
+
- β
30K+ vocabulary from sentence-transformers
|
48 |
+
- β
Static word fallback
|
49 |
+
- β
All crossword features
|
50 |
+
- β
Same as production environment
|
51 |
+
|
52 |
+
## Environment Variables
|
53 |
+
|
54 |
+
Create a `.env` file:
|
55 |
+
```bash
|
56 |
+
# Optional - defaults to sentence-transformers/all-mpnet-base-v2
|
57 |
+
EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
58 |
+
|
59 |
+
# Optional - similarity threshold for AI words
|
60 |
+
WORD_SIMILARITY_THRESHOLD=0.65
|
61 |
+
|
62 |
+
# Optional - logging level
|
63 |
+
LOG_LEVEL=INFO
|
64 |
+
```
|
65 |
+
|
66 |
+
## Testing
|
67 |
+
|
68 |
+
```bash
|
69 |
+
# Test basic components
|
70 |
+
python test_local.py
|
71 |
+
|
72 |
+
# Test with pytest
|
73 |
+
pytest
|
74 |
+
```
|
75 |
+
|
76 |
+
## Docker vs Local Development
|
77 |
+
|
78 |
+
Both use the same `requirements.txt` with modern, compatible versions that work across Python 3.9-3.12 and different platforms.
|
crossword-app/backend-py/README.md
ADDED
@@ -0,0 +1,332 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python Backend with Vector Similarity Search
|
2 |
+
|
3 |
+
This is the Python implementation of the crossword generator backend, featuring true AI word generation via vector similarity search.
|
4 |
+
|
5 |
+
## π Features
|
6 |
+
|
7 |
+
- **True Vector Search**: Uses sentence-transformers + FAISS for semantic word discovery
|
8 |
+
- **30K+ Vocabulary**: Searches through full model vocabulary instead of limited static lists
|
9 |
+
- **FastAPI**: Modern, fast Python web framework
|
10 |
+
- **Same API**: Compatible with existing React frontend
|
11 |
+
- **Hybrid Approach**: AI vector search with static word fallback
|
12 |
+
|
13 |
+
## π Differences from JavaScript Backend
|
14 |
+
|
15 |
+
| Feature | JavaScript Backend | Python Backend |
|
16 |
+
|---------|-------------------|----------------|
|
17 |
+
| **Word Generation** | Embedding filtering of static lists | True vector similarity search |
|
18 |
+
| **Vocabulary Size** | ~100 words per topic | 30K+ words from model |
|
19 |
+
| **AI Approach** | Semantic similarity filtering | Nearest neighbor search |
|
20 |
+
| **Performance** | Fast but limited | Slower startup, better results |
|
21 |
+
| **Dependencies** | Node.js + HuggingFace API | Python + ML libraries |
|
22 |
+
|
23 |
+
## π οΈ Setup & Installation
|
24 |
+
|
25 |
+
### Prerequisites
|
26 |
+
- Python 3.11+ (3.11 recommended for Docker compatibility)
|
27 |
+
- pip (Python package manager)
|
28 |
+
|
29 |
+
### Basic Setup (Core Functionality)
|
30 |
+
```bash
|
31 |
+
# Clone and navigate to backend directory
|
32 |
+
cd crossword-app/backend-py
|
33 |
+
|
34 |
+
# Create virtual environment (recommended)
|
35 |
+
python -m venv venv
|
36 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
37 |
+
|
38 |
+
# Install core dependencies
|
39 |
+
pip install -r requirements.txt
|
40 |
+
|
41 |
+
# Start the server
|
42 |
+
python app.py
|
43 |
+
```
|
44 |
+
|
45 |
+
### Full Development Setup (with AI features)
|
46 |
+
```bash
|
47 |
+
# Install development dependencies including AI/ML libraries
|
48 |
+
pip install -r requirements-dev.txt
|
49 |
+
|
50 |
+
# This includes:
|
51 |
+
# - All core dependencies
|
52 |
+
# - AI/ML libraries (torch, sentence-transformers, etc.)
|
53 |
+
# - Development tools (pytest, coverage, etc.)
|
54 |
+
```
|
55 |
+
|
56 |
+
### Requirements Files
|
57 |
+
- **`requirements.txt`**: Core dependencies for basic functionality
|
58 |
+
- **`requirements-dev.txt`**: Full development environment with AI features
|
59 |
+
|
60 |
+
> **Note**: The AI/ML dependencies are large (~2GB). For basic testing without AI features, use `requirements.txt` only.
|
61 |
+
|
62 |
+
> **Python Version**: Both local development and Docker use Python 3.11+ for optimal performance and latest package compatibility.
|
63 |
+
|
64 |
+
## π Structure
|
65 |
+
|
66 |
+
```
|
67 |
+
backend-py/
|
68 |
+
βββ app.py # FastAPI application entry point
|
69 |
+
βββ requirements.txt # Core Python dependencies
|
70 |
+
βββ requirements-dev.txt # Full development dependencies
|
71 |
+
βββ src/
|
72 |
+
β βββ services/
|
73 |
+
β β βββ vector_search.py # Core vector similarity search
|
74 |
+
β β βββ crossword_generator.py # Puzzle generation logic
|
75 |
+
β βββ routes/
|
76 |
+
β βββ api.py # API endpoints (matches JS backend)
|
77 |
+
βββ test-unit/ # Unit tests (pytest framework) - 5 files
|
78 |
+
β βββ test_crossword_generator.py
|
79 |
+
β βββ test_api_routes.py
|
80 |
+
β βββ test_vector_search.py
|
81 |
+
βββ test-integration/ # Integration tests (standalone scripts) - 16 files
|
82 |
+
β βββ test_simple_generation.py
|
83 |
+
β βββ test_boundary_fix.py
|
84 |
+
β βββ test_local.py # (+ 13 more test files)
|
85 |
+
βββ data/ -> ../backend/data/ # Symlink to shared word data
|
86 |
+
βββ public/ # Frontend static files (copied during build)
|
87 |
+
```
|
88 |
+
|
89 |
+
## π Dependencies
|
90 |
+
|
91 |
+
### Core ML Stack
|
92 |
+
- `sentence-transformers`: Local model loading and embeddings
|
93 |
+
- `faiss-cpu`: Fast vector similarity search
|
94 |
+
- `torch`: PyTorch for model inference
|
95 |
+
- `numpy`: Vector operations
|
96 |
+
|
97 |
+
### Web Framework
|
98 |
+
- `fastapi`: Modern Python web framework
|
99 |
+
- `uvicorn`: ASGI server
|
100 |
+
- `pydantic`: Data validation
|
101 |
+
|
102 |
+
### Testing
|
103 |
+
- `pytest`: Testing framework
|
104 |
+
- `pytest-asyncio`: Async test support
|
105 |
+
|
106 |
+
## π§ͺ Testing
|
107 |
+
|
108 |
+
### π Test Organization (Reorganized for Clarity)
|
109 |
+
|
110 |
+
**We've reorganized the test structure for better developer experience:**
|
111 |
+
|
112 |
+
| Test Type | Location | Purpose | Framework | Count |
|
113 |
+
|-----------|----------|---------|-----------|-------|
|
114 |
+
| **Unit Tests** | `test-unit/` | Test individual components in isolation | pytest | 5 files |
|
115 |
+
| **Integration Tests** | `test-integration/` | Test complete workflows end-to-end | Standalone scripts | 16 files |
|
116 |
+
|
117 |
+
**Benefits of this structure:**
|
118 |
+
- β
**Clear separation** between unit and integration testing
|
119 |
+
- β
**Intuitive naming** - developers immediately understand test types
|
120 |
+
- β
**Better tooling** - can run different test types independently
|
121 |
+
- β
**Easier maintenance** - organized by testing strategy
|
122 |
+
|
123 |
+
> **Note**: Previously tests were mixed in `tests/` folder and root-level `test_*.py` files. The new structure provides much better organization.
|
124 |
+
|
125 |
+
### Unit Tests Details (`test-unit/`)
|
126 |
+
|
127 |
+
**What they test:** Individual components with mocking and isolation
|
128 |
+
- `test_crossword_generator.py` - Core crossword generation logic
|
129 |
+
- `test_api_routes.py` - FastAPI endpoint handlers
|
130 |
+
- `test_crossword_generator_wrapper.py` - Service wrapper layer
|
131 |
+
- `test_index_bug_fix.py` - Specific bug fix validations
|
132 |
+
- `test_vector_search.py` - AI vector search functionality (requires torch)
|
133 |
+
|
134 |
+
### Run Unit Tests (Formal Test Suite)
|
135 |
+
```bash
|
136 |
+
# Run all unit tests
|
137 |
+
python run_tests.py
|
138 |
+
|
139 |
+
# Run specific test modules
|
140 |
+
python run_tests.py crossword_generator
|
141 |
+
pytest test-unit/test_crossword_generator.py -v
|
142 |
+
|
143 |
+
# Run core tests (excluding AI dependencies)
|
144 |
+
pytest test-unit/ -v --ignore=test-unit/test_vector_search.py
|
145 |
+
|
146 |
+
# Run individual unit test classes
|
147 |
+
pytest test-unit/test_crossword_generator.py::TestCrosswordGenerator::test_init -v
|
148 |
+
```
|
149 |
+
|
150 |
+
### Integration Tests Details (`test-integration/`)
|
151 |
+
|
152 |
+
**What they test:** Complete workflows without mocking - real functionality
|
153 |
+
- `test_simple_generation.py` - End-to-end crossword generation
|
154 |
+
- `test_boundary_fix.py` - Word boundary validation (our major fix!)
|
155 |
+
- `test_local.py` - Local environment and dependencies
|
156 |
+
- `test_word_boundaries.py` - Comprehensive boundary testing
|
157 |
+
- `test_bounds_comprehensive.py` - Advanced bounds checking
|
158 |
+
- `test_final_validation.py` - API integration testing
|
159 |
+
- And 10 more specialized feature tests...
|
160 |
+
|
161 |
+
### Run Integration Tests (End-to-End Scripts)
|
162 |
+
```bash
|
163 |
+
# Test core functionality
|
164 |
+
python test-integration/test_simple_generation.py
|
165 |
+
python test-integration/test_boundary_fix.py
|
166 |
+
python test-integration/test_local.py
|
167 |
+
|
168 |
+
# Test specific features
|
169 |
+
python test-integration/test_word_boundaries.py
|
170 |
+
python test-integration/test_bounds_comprehensive.py
|
171 |
+
|
172 |
+
# Test API integration
|
173 |
+
python test-integration/test_final_validation.py
|
174 |
+
```
|
175 |
+
|
176 |
+
### Test Coverage
|
177 |
+
```bash
|
178 |
+
# Run core tests with coverage (requires requirements-dev.txt)
|
179 |
+
pytest test-unit/test_crossword_generator.py --cov=src --cov-report=html
|
180 |
+
pytest test-unit/test_crossword_generator.py --cov=src --cov-report=term
|
181 |
+
|
182 |
+
# Full coverage report (may fail without AI dependencies)
|
183 |
+
pytest test-unit/ --cov=src --cov-report=html --ignore=test-unit/test_vector_search.py
|
184 |
+
```
|
185 |
+
|
186 |
+
### Test Status
|
187 |
+
- β
**Core crossword generation**: 15/19 unit tests passing
|
188 |
+
- β
**Boundary validation**: All integration tests passing
|
189 |
+
- β οΈ **AI/Vector search**: Requires torch dependencies
|
190 |
+
- β οΈ **Some async mocking**: Minor test infrastructure issues
|
191 |
+
|
192 |
+
### π Migration Guide (For Existing Developers)
|
193 |
+
|
194 |
+
**If you had previous commands, update them:**
|
195 |
+
|
196 |
+
| Old Command | New Command |
|
197 |
+
|-------------|-------------|
|
198 |
+
| `pytest tests/` | `pytest test-unit/` |
|
199 |
+
| `python test_simple_generation.py` | `python test-integration/test_simple_generation.py` |
|
200 |
+
| `pytest tests/ --cov=src` | `pytest test-unit/ --cov=src` |
|
201 |
+
|
202 |
+
**All functionality is preserved** - just organized better!
|
203 |
+
|
204 |
+
## π§ Configuration
|
205 |
+
|
206 |
+
Environment variables (set in HuggingFace Spaces):
|
207 |
+
|
208 |
+
```bash
|
209 |
+
# Core settings
|
210 |
+
PORT=7860
|
211 |
+
NODE_ENV=production
|
212 |
+
|
213 |
+
# AI Configuration
|
214 |
+
EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
|
215 |
+
WORD_SIMILARITY_THRESHOLD=0.65
|
216 |
+
|
217 |
+
# Optional
|
218 |
+
LOG_LEVEL=INFO
|
219 |
+
```
|
220 |
+
|
221 |
+
## π― Vector Search Process
|
222 |
+
|
223 |
+
1. **Initialization**:
|
224 |
+
- Load sentence-transformers model locally
|
225 |
+
- Extract 30K+ vocabulary from model tokenizer
|
226 |
+
- Pre-compute embeddings for all vocabulary words
|
227 |
+
- Build FAISS index for fast similarity search
|
228 |
+
|
229 |
+
2. **Word Generation**:
|
230 |
+
- Get topic embedding: `"Animals" β [768-dim vector]`
|
231 |
+
- Search FAISS index for nearest neighbors
|
232 |
+
- Filter by similarity threshold (0.65+)
|
233 |
+
- Filter by difficulty (word length)
|
234 |
+
- Return top matches with generated clues
|
235 |
+
|
236 |
+
3. **Fallback**:
|
237 |
+
- If vector search fails β use static word lists
|
238 |
+
- If insufficient AI words β supplement with static words
|
239 |
+
|
240 |
+
## π§ͺ Testing
|
241 |
+
|
242 |
+
```bash
|
243 |
+
# Local testing (without full vector search)
|
244 |
+
cd backend-py
|
245 |
+
python test_local.py
|
246 |
+
|
247 |
+
# Start development server
|
248 |
+
python app.py
|
249 |
+
```
|
250 |
+
|
251 |
+
## π³ Docker Deployment
|
252 |
+
|
253 |
+
The Dockerfile has been updated to use Python backend:
|
254 |
+
|
255 |
+
```dockerfile
|
256 |
+
FROM python:3.9-slim
|
257 |
+
# ... install dependencies
|
258 |
+
# ... build frontend (same as before)
|
259 |
+
# ... copy to backend-py/public/
|
260 |
+
CMD ["python", "app.py"]
|
261 |
+
```
|
262 |
+
|
263 |
+
## π§ͺ Testing
|
264 |
+
|
265 |
+
### Quick Test
|
266 |
+
```bash
|
267 |
+
# Basic functionality test (no model download)
|
268 |
+
python test_local.py
|
269 |
+
```
|
270 |
+
|
271 |
+
### Comprehensive Unit Tests
|
272 |
+
```bash
|
273 |
+
# Run all unit tests
|
274 |
+
python run_tests.py
|
275 |
+
|
276 |
+
# Or use pytest directly
|
277 |
+
pytest tests/ -v
|
278 |
+
|
279 |
+
# Run specific test file
|
280 |
+
python run_tests.py crossword_generator_fixed
|
281 |
+
pytest tests/test_crossword_generator_fixed.py -v
|
282 |
+
|
283 |
+
# Run with coverage
|
284 |
+
pytest tests/ --cov=src --cov-report=html
|
285 |
+
```
|
286 |
+
|
287 |
+
### Test Structure
|
288 |
+
- `tests/test_crossword_generator_fixed.py` - Core grid generation logic
|
289 |
+
- `tests/test_vector_search.py` - Vector similarity search
|
290 |
+
- `tests/test_crossword_generator_wrapper.py` - Service wrapper
|
291 |
+
- `tests/test_api_routes.py` - FastAPI endpoints
|
292 |
+
|
293 |
+
### Key Test Features
|
294 |
+
- β
**Index alignment fix**: Tests the list index out of range bug fix
|
295 |
+
- β
**Mocked vector search**: Tests without downloading models
|
296 |
+
- β
**API validation**: Tests all endpoints and error cases
|
297 |
+
- β
**Async support**: Full pytest-asyncio integration
|
298 |
+
- β
**Error handling**: Tests malformed inputs and edge cases
|
299 |
+
|
300 |
+
## π Performance Comparison
|
301 |
+
|
302 |
+
**Startup Time**:
|
303 |
+
- JavaScript: ~2 seconds
|
304 |
+
- Python: ~30-60 seconds (model download + index building)
|
305 |
+
|
306 |
+
**Word Quality**:
|
307 |
+
- JavaScript: Limited by static word lists
|
308 |
+
- Python: Access to full model vocabulary with semantic understanding
|
309 |
+
|
310 |
+
**Memory Usage**:
|
311 |
+
- JavaScript: ~100MB
|
312 |
+
- Python: ~500MB-1GB (model + embeddings + FAISS index)
|
313 |
+
|
314 |
+
**API Response Time**:
|
315 |
+
- JavaScript: ~100ms (after cache warm-up)
|
316 |
+
- Python: ~200-500ms (vector search + filtering)
|
317 |
+
|
318 |
+
## π Migration Strategy
|
319 |
+
|
320 |
+
1. **Phase 1** β
: Basic Python backend structure
|
321 |
+
2. **Phase 2**: Test vector search functionality
|
322 |
+
3. **Phase 3**: Docker deployment and production testing
|
323 |
+
4. **Phase 4**: Compare with JavaScript backend
|
324 |
+
5. **Phase 5**: Production switch with rollback capability
|
325 |
+
|
326 |
+
## π― Next Steps
|
327 |
+
|
328 |
+
- [ ] Test vector search with real model
|
329 |
+
- [ ] Optimize FAISS index performance
|
330 |
+
- [ ] Add more sophisticated crossword grid generation
|
331 |
+
- [ ] Implement LLM-based clue generation
|
332 |
+
- [ ] Add caching for frequently requested topics
|
crossword-app/backend-py/__pycache__/test_bounds_comprehensive.cpython-313-pytest-8.4.1.pyc
ADDED
Binary file (39.4 kB). View file
|
|
crossword-app/backend-py/app.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
FastAPI backend for crossword puzzle generator with vector similarity search.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import os
|
6 |
+
import logging
|
7 |
+
import time
|
8 |
+
from datetime import datetime
|
9 |
+
from contextlib import asynccontextmanager
|
10 |
+
from pathlib import Path
|
11 |
+
|
12 |
+
from fastapi import FastAPI, HTTPException
|
13 |
+
from fastapi.middleware.cors import CORSMiddleware
|
14 |
+
from fastapi.staticfiles import StaticFiles
|
15 |
+
from fastapi.responses import FileResponse
|
16 |
+
import uvicorn
|
17 |
+
from dotenv import load_dotenv
|
18 |
+
|
19 |
+
from src.routes.api import router as api_router
|
20 |
+
from src.services.vector_search import VectorSearchService
|
21 |
+
|
22 |
+
# Load environment variables
|
23 |
+
load_dotenv()
|
24 |
+
|
25 |
+
# Set up logging
|
26 |
+
logging.basicConfig(level=logging.INFO)
|
27 |
+
logger = logging.getLogger(__name__)
|
28 |
+
|
29 |
+
def log_with_timestamp(message):
|
30 |
+
"""Helper to log with precise timestamp."""
|
31 |
+
timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
|
32 |
+
logger.info(f"[{timestamp}] {message}")
|
33 |
+
|
34 |
+
# Global vector search service instance
|
35 |
+
vector_service = None
|
36 |
+
|
37 |
+
@asynccontextmanager
|
38 |
+
async def lifespan(app: FastAPI):
|
39 |
+
"""Initialize and cleanup application resources."""
|
40 |
+
global vector_service
|
41 |
+
|
42 |
+
# Startup
|
43 |
+
startup_time = time.time()
|
44 |
+
log_with_timestamp("π Initializing Python backend with vector search...")
|
45 |
+
|
46 |
+
# Initialize vector search service
|
47 |
+
try:
|
48 |
+
service_start = time.time()
|
49 |
+
log_with_timestamp("π§ Creating VectorSearchService instance...")
|
50 |
+
vector_service = VectorSearchService()
|
51 |
+
|
52 |
+
log_with_timestamp("β‘ Starting vector search initialization...")
|
53 |
+
await vector_service.initialize()
|
54 |
+
|
55 |
+
init_time = time.time() - service_start
|
56 |
+
log_with_timestamp(f"β
Vector search service initialized in {init_time:.2f}s")
|
57 |
+
except Exception as e:
|
58 |
+
logger.error(f"β Failed to initialize vector search service: {e}")
|
59 |
+
# Continue without vector search (will fallback to static words)
|
60 |
+
|
61 |
+
# Make vector service available to routes
|
62 |
+
app.state.vector_service = vector_service
|
63 |
+
|
64 |
+
yield
|
65 |
+
|
66 |
+
# Shutdown
|
67 |
+
logger.info("π Shutting down Python backend...")
|
68 |
+
if vector_service:
|
69 |
+
await vector_service.cleanup()
|
70 |
+
|
71 |
+
# Create FastAPI app
|
72 |
+
app = FastAPI(
|
73 |
+
title="Crossword Puzzle Generator API",
|
74 |
+
description="Python backend with AI-powered vector similarity search",
|
75 |
+
version="2.0.0",
|
76 |
+
lifespan=lifespan
|
77 |
+
)
|
78 |
+
|
79 |
+
# CORS configuration
|
80 |
+
cors_origins = []
|
81 |
+
if os.getenv("NODE_ENV") == "production":
|
82 |
+
# Production: same origin
|
83 |
+
cors_origins = ["*"] # HuggingFace Spaces
|
84 |
+
else:
|
85 |
+
# Development: allow dev servers
|
86 |
+
cors_origins = [
|
87 |
+
"http://localhost:5173", # Vite dev server
|
88 |
+
"http://localhost:3000", # Alternative dev server
|
89 |
+
"http://localhost:7860", # Local production test
|
90 |
+
]
|
91 |
+
|
92 |
+
app.add_middleware(
|
93 |
+
CORSMiddleware,
|
94 |
+
allow_origins=cors_origins,
|
95 |
+
allow_credentials=True,
|
96 |
+
allow_methods=["*"],
|
97 |
+
allow_headers=["*"],
|
98 |
+
)
|
99 |
+
|
100 |
+
# Include API routes
|
101 |
+
app.include_router(api_router, prefix="/api")
|
102 |
+
|
103 |
+
# Serve static files (frontend)
|
104 |
+
static_path = Path(__file__).parent / "public"
|
105 |
+
if static_path.exists():
|
106 |
+
app.mount("/assets", StaticFiles(directory=static_path / "assets"), name="assets")
|
107 |
+
|
108 |
+
@app.get("/")
|
109 |
+
async def serve_frontend():
|
110 |
+
"""Serve the React frontend."""
|
111 |
+
index_path = static_path / "index.html"
|
112 |
+
if index_path.exists():
|
113 |
+
return FileResponse(index_path)
|
114 |
+
else:
|
115 |
+
raise HTTPException(status_code=404, detail="Frontend not found")
|
116 |
+
|
117 |
+
@app.get("/{full_path:path}")
|
118 |
+
async def serve_spa_routes(full_path: str):
|
119 |
+
"""Serve React SPA routes."""
|
120 |
+
# For any non-API route, serve the React app
|
121 |
+
if not full_path.startswith("api/"):
|
122 |
+
index_path = static_path / "index.html"
|
123 |
+
if index_path.exists():
|
124 |
+
return FileResponse(index_path)
|
125 |
+
raise HTTPException(status_code=404, detail="Not found")
|
126 |
+
|
127 |
+
@app.get("/health")
|
128 |
+
async def health_check():
|
129 |
+
"""Health check endpoint."""
|
130 |
+
return {
|
131 |
+
"status": "healthy",
|
132 |
+
"backend": "python",
|
133 |
+
"vector_search": vector_service.is_initialized if vector_service else False
|
134 |
+
}
|
135 |
+
|
136 |
+
if __name__ == "__main__":
|
137 |
+
port = int(os.getenv("PORT", 7860))
|
138 |
+
host = "0.0.0.0" if os.getenv("NODE_ENV") == "production" else "127.0.0.1"
|
139 |
+
|
140 |
+
logger.info(f"π Starting Python backend on {host}:{port}")
|
141 |
+
uvicorn.run(
|
142 |
+
"app:app",
|
143 |
+
host=host,
|
144 |
+
port=port,
|
145 |
+
reload=os.getenv("NODE_ENV") != "production"
|
146 |
+
)
|
crossword-app/backend-py/data/data
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../backend/data
|
crossword-app/backend-py/data/word-lists/animals.json
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{ "word": "DOG", "clue": "Man's best friend" },
|
3 |
+
{ "word": "CAT", "clue": "Feline pet that purrs" },
|
4 |
+
{ "word": "ELEPHANT", "clue": "Large mammal with a trunk" },
|
5 |
+
{ "word": "TIGER", "clue": "Striped big cat" },
|
6 |
+
{ "word": "WHALE", "clue": "Largest marine mammal" },
|
7 |
+
{ "word": "BUTTERFLY", "clue": "Colorful flying insect" },
|
8 |
+
{ "word": "BIRD", "clue": "Flying creature with feathers" },
|
9 |
+
{ "word": "FISH", "clue": "Aquatic animal with gills" },
|
10 |
+
{ "word": "LION", "clue": "King of the jungle" },
|
11 |
+
{ "word": "BEAR", "clue": "Large mammal that hibernates" },
|
12 |
+
{ "word": "RABBIT", "clue": "Hopping mammal with long ears" },
|
13 |
+
{ "word": "HORSE", "clue": "Riding animal with hooves" },
|
14 |
+
{ "word": "SHEEP", "clue": "Woolly farm animal" },
|
15 |
+
{ "word": "GOAT", "clue": "Horned farm animal" },
|
16 |
+
{ "word": "DUCK", "clue": "Water bird that quacks" },
|
17 |
+
{ "word": "CHICKEN", "clue": "Farm bird that lays eggs" },
|
18 |
+
{ "word": "SNAKE", "clue": "Slithering reptile" },
|
19 |
+
{ "word": "TURTLE", "clue": "Shelled reptile" },
|
20 |
+
{ "word": "FROG", "clue": "Amphibian that croaks" },
|
21 |
+
{ "word": "SHARK", "clue": "Predatory ocean fish" },
|
22 |
+
{ "word": "DOLPHIN", "clue": "Intelligent marine mammal" },
|
23 |
+
{ "word": "PENGUIN", "clue": "Flightless Antarctic bird" },
|
24 |
+
{ "word": "MONKEY", "clue": "Primate that swings in trees" },
|
25 |
+
{ "word": "ZEBRA", "clue": "Striped African animal" },
|
26 |
+
{ "word": "GIRAFFE", "clue": "Tallest land animal" },
|
27 |
+
{ "word": "WOLF", "clue": "Wild canine that howls" },
|
28 |
+
{ "word": "FOX", "clue": "Cunning red-furred animal" },
|
29 |
+
{ "word": "DEER", "clue": "Graceful forest animal with antlers" },
|
30 |
+
{ "word": "MOOSE", "clue": "Large antlered animal" },
|
31 |
+
{ "word": "SQUIRREL", "clue": "Tree-climbing nut gatherer" },
|
32 |
+
{ "word": "RACCOON", "clue": "Masked nocturnal animal" },
|
33 |
+
{ "word": "BEAVER", "clue": "Dam-building rodent" },
|
34 |
+
{ "word": "OTTER", "clue": "Playful water mammal" },
|
35 |
+
{ "word": "SEAL", "clue": "Marine mammal with flippers" },
|
36 |
+
{ "word": "WALRUS", "clue": "Tusked Arctic marine mammal" },
|
37 |
+
{ "word": "RHINO", "clue": "Horned thick-skinned mammal" },
|
38 |
+
{ "word": "HIPPO", "clue": "Large African river mammal" },
|
39 |
+
{ "word": "CHEETAH", "clue": "Fastest land animal" },
|
40 |
+
{ "word": "LEOPARD", "clue": "Spotted big cat" },
|
41 |
+
{ "word": "JAGUAR", "clue": "South American big cat" },
|
42 |
+
{ "word": "PUMA", "clue": "Mountain lion" },
|
43 |
+
{ "word": "LYNX", "clue": "Wild cat with tufted ears" },
|
44 |
+
{ "word": "KANGAROO", "clue": "Hopping Australian marsupial" },
|
45 |
+
{ "word": "KOALA", "clue": "Eucalyptus-eating marsupial" },
|
46 |
+
{ "word": "PANDA", "clue": "Black and white bamboo eater" },
|
47 |
+
{ "word": "SLOTH", "clue": "Slow-moving tree dweller" },
|
48 |
+
{ "word": "ARMADILLO", "clue": "Armored mammal" },
|
49 |
+
{ "word": "ANTEATER", "clue": "Long-snouted insect eater" },
|
50 |
+
{ "word": "PLATYPUS", "clue": "Egg-laying mammal with a bill" },
|
51 |
+
{ "word": "BAT", "clue": "Flying mammal" },
|
52 |
+
{ "word": "MOLE", "clue": "Underground tunnel digger" },
|
53 |
+
{ "word": "HEDGEHOG", "clue": "Spiny small mammal" },
|
54 |
+
{ "word": "PORCUPINE", "clue": "Quill-covered rodent" },
|
55 |
+
{ "word": "SKUNK", "clue": "Black and white scent-spraying mammal" },
|
56 |
+
{ "word": "WEASEL", "clue": "Small carnivorous mammal" },
|
57 |
+
{ "word": "BADGER", "clue": "Burrowing black and white mammal" },
|
58 |
+
{ "word": "FERRET", "clue": "Domesticated hunting animal" },
|
59 |
+
{ "word": "MINK", "clue": "Valuable fur-bearing animal" },
|
60 |
+
{ "word": "EAGLE", "clue": "Majestic bird of prey" },
|
61 |
+
{ "word": "HAWK", "clue": "Sharp-eyed hunting bird" },
|
62 |
+
{ "word": "OWL", "clue": "Nocturnal bird with large eyes" },
|
63 |
+
{ "word": "FALCON", "clue": "Fast diving bird of prey" },
|
64 |
+
{ "word": "VULTURE", "clue": "Scavenging bird" },
|
65 |
+
{ "word": "CROW", "clue": "Black intelligent bird" },
|
66 |
+
{ "word": "RAVEN", "clue": "Large black corvid" },
|
67 |
+
{ "word": "ROBIN", "clue": "Red-breasted songbird" },
|
68 |
+
{ "word": "SPARROW", "clue": "Small brown songbird" },
|
69 |
+
{ "word": "CARDINAL", "clue": "Bright red songbird" },
|
70 |
+
{ "word": "BLUEJAY", "clue": "Blue crested bird" },
|
71 |
+
{ "word": "WOODPECKER", "clue": "Tree-pecking bird" },
|
72 |
+
{ "word": "HUMMINGBIRD", "clue": "Tiny fast-flying bird" },
|
73 |
+
{ "word": "PELICAN", "clue": "Large-billed water bird" },
|
74 |
+
{ "word": "FLAMINGO", "clue": "Pink wading bird" },
|
75 |
+
{ "word": "STORK", "clue": "Long-legged wading bird" },
|
76 |
+
{ "word": "HERON", "clue": "Tall fishing bird" },
|
77 |
+
{ "word": "CRANE", "clue": "Large wading bird" },
|
78 |
+
{ "word": "SWAN", "clue": "Elegant white water bird" },
|
79 |
+
{ "word": "GOOSE", "clue": "Large waterfowl" },
|
80 |
+
{ "word": "TURKEY", "clue": "Large ground bird" },
|
81 |
+
{ "word": "PHEASANT", "clue": "Colorful game bird" },
|
82 |
+
{ "word": "QUAIL", "clue": "Small ground bird" },
|
83 |
+
{ "word": "PEACOCK", "clue": "Bird with spectacular tail feathers" },
|
84 |
+
{ "word": "OSTRICH", "clue": "Largest flightless bird" },
|
85 |
+
{ "word": "EMU", "clue": "Australian flightless bird" },
|
86 |
+
{ "word": "KIWI", "clue": "Small flightless New Zealand bird" },
|
87 |
+
{ "word": "PARROT", "clue": "Colorful talking bird" },
|
88 |
+
{ "word": "TOUCAN", "clue": "Large-billed tropical bird" },
|
89 |
+
{ "word": "MACAW", "clue": "Large colorful parrot" },
|
90 |
+
{ "word": "COCKATOO", "clue": "Crested parrot" },
|
91 |
+
{ "word": "CANARY", "clue": "Yellow singing bird" },
|
92 |
+
{ "word": "FINCH", "clue": "Small seed-eating bird" },
|
93 |
+
{ "word": "PIGEON", "clue": "Common city bird" },
|
94 |
+
{ "word": "DOVE", "clue": "Symbol of peace" },
|
95 |
+
{ "word": "SEAGULL", "clue": "Coastal scavenging bird" },
|
96 |
+
{ "word": "ALBATROSS", "clue": "Large ocean bird" },
|
97 |
+
{ "word": "PUFFIN", "clue": "Colorful-billed seabird" },
|
98 |
+
{ "word": "LIZARD", "clue": "Small scaly reptile" },
|
99 |
+
{ "word": "IGUANA", "clue": "Large tropical lizard" },
|
100 |
+
{ "word": "GECKO", "clue": "Wall-climbing lizard" },
|
101 |
+
{ "word": "CHAMELEON", "clue": "Color-changing reptile" },
|
102 |
+
{ "word": "ALLIGATOR", "clue": "Large American crocodilian" },
|
103 |
+
{ "word": "CROCODILE", "clue": "Large aquatic reptile" },
|
104 |
+
{ "word": "PYTHON", "clue": "Large constricting snake" },
|
105 |
+
{ "word": "COBRA", "clue": "Venomous hooded snake" },
|
106 |
+
{ "word": "VIPER", "clue": "Poisonous snake" },
|
107 |
+
{ "word": "RATTLESNAKE", "clue": "Snake with warning tail" },
|
108 |
+
{ "word": "SALAMANDER", "clue": "Amphibian that can regrow limbs" },
|
109 |
+
{ "word": "NEWT", "clue": "Small aquatic salamander" },
|
110 |
+
{ "word": "TOAD", "clue": "Warty amphibian" },
|
111 |
+
{ "word": "TADPOLE", "clue": "Frog larva" },
|
112 |
+
{ "word": "SALMON", "clue": "Fish that swims upstream" },
|
113 |
+
{ "word": "TROUT", "clue": "Freshwater game fish" },
|
114 |
+
{ "word": "BASS", "clue": "Popular sport fish" },
|
115 |
+
{ "word": "TUNA", "clue": "Large ocean fish" },
|
116 |
+
{ "word": "SWORDFISH", "clue": "Fish with long pointed bill" },
|
117 |
+
{ "word": "MARLIN", "clue": "Large billfish" },
|
118 |
+
{ "word": "MANTA", "clue": "Large ray fish" },
|
119 |
+
{ "word": "STINGRAY", "clue": "Flat fish with barbed tail" },
|
120 |
+
{ "word": "EEL", "clue": "Snake-like fish" },
|
121 |
+
{ "word": "SEAHORSE", "clue": "Horse-shaped fish" },
|
122 |
+
{ "word": "ANGELFISH", "clue": "Colorful tropical fish" },
|
123 |
+
{ "word": "GOLDFISH", "clue": "Common pet fish" },
|
124 |
+
{ "word": "CLOWNFISH", "clue": "Orange and white anemone fish" },
|
125 |
+
{ "word": "JELLYFISH", "clue": "Transparent stinging sea creature" },
|
126 |
+
{ "word": "OCTOPUS", "clue": "Eight-armed sea creature" },
|
127 |
+
{ "word": "SQUID", "clue": "Ten-armed cephalopod" },
|
128 |
+
{ "word": "CRAB", "clue": "Sideways-walking crustacean" },
|
129 |
+
{ "word": "LOBSTER", "clue": "Large marine crustacean" },
|
130 |
+
{ "word": "SHRIMP", "clue": "Small crustacean" },
|
131 |
+
{ "word": "STARFISH", "clue": "Five-armed sea creature" },
|
132 |
+
{ "word": "URCHIN", "clue": "Spiny sea creature" },
|
133 |
+
{ "word": "CORAL", "clue": "Marine organism that builds reefs" },
|
134 |
+
{ "word": "SPONGE", "clue": "Simple marine animal" },
|
135 |
+
{ "word": "OYSTER", "clue": "Pearl-producing mollusk" },
|
136 |
+
{ "word": "CLAM", "clue": "Burrowing shellfish" },
|
137 |
+
{ "word": "MUSSEL", "clue": "Dark-shelled mollusk" },
|
138 |
+
{ "word": "SNAIL", "clue": "Spiral-shelled gastropod" },
|
139 |
+
{ "word": "SLUG", "clue": "Shell-less gastropod" },
|
140 |
+
{ "word": "WORM", "clue": "Segmented invertebrate" },
|
141 |
+
{ "word": "SPIDER", "clue": "Eight-legged web spinner" },
|
142 |
+
{ "word": "SCORPION", "clue": "Arachnid with stinging tail" },
|
143 |
+
{ "word": "ANT", "clue": "Social insect worker" },
|
144 |
+
{ "word": "BEE", "clue": "Honey-making insect" },
|
145 |
+
{ "word": "WASP", "clue": "Stinging flying insect" },
|
146 |
+
{ "word": "HORNET", "clue": "Large aggressive wasp" },
|
147 |
+
{ "word": "FLY", "clue": "Common buzzing insect" },
|
148 |
+
{ "word": "MOSQUITO", "clue": "Blood-sucking insect" },
|
149 |
+
{ "word": "BEETLE", "clue": "Hard-shelled insect" },
|
150 |
+
{ "word": "LADYBUG", "clue": "Red spotted beneficial insect" },
|
151 |
+
{ "word": "DRAGONFLY", "clue": "Large-winged flying insect" },
|
152 |
+
{ "word": "GRASSHOPPER", "clue": "Jumping green insect" },
|
153 |
+
{ "word": "CRICKET", "clue": "Chirping insect" },
|
154 |
+
{ "word": "MANTIS", "clue": "Praying insect predator" },
|
155 |
+
{ "word": "MOTH", "clue": "Nocturnal butterfly relative" },
|
156 |
+
{ "word": "CATERPILLAR", "clue": "Butterfly larva" },
|
157 |
+
{ "word": "COCOON", "clue": "Insect transformation casing" },
|
158 |
+
{ "word": "TERMITE", "clue": "Wood-eating social insect" },
|
159 |
+
{ "word": "TICK", "clue": "Blood-sucking parasite" },
|
160 |
+
{ "word": "FLEA", "clue": "Jumping parasite" },
|
161 |
+
{ "word": "LOUSE", "clue": "Small parasitic insect" },
|
162 |
+
{ "word": "APHID", "clue": "Plant-sucking insect" },
|
163 |
+
{ "word": "MAGGOT", "clue": "Fly larva" },
|
164 |
+
{ "word": "GRUB", "clue": "Beetle larva" }
|
165 |
+
]
|
crossword-app/backend-py/data/word-lists/geography.json
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{ "word": "MOUNTAIN", "clue": "High elevation landform" },
|
3 |
+
{ "word": "OCEAN", "clue": "Large body of salt water" },
|
4 |
+
{ "word": "DESERT", "clue": "Dry, arid region" },
|
5 |
+
{ "word": "CONTINENT", "clue": "Large landmass" },
|
6 |
+
{ "word": "RIVER", "clue": "Flowing body of water" },
|
7 |
+
{ "word": "ISLAND", "clue": "Land surrounded by water" },
|
8 |
+
{ "word": "FOREST", "clue": "Dense area of trees" },
|
9 |
+
{ "word": "VALLEY", "clue": "Low area between hills" },
|
10 |
+
{ "word": "LAKE", "clue": "Body of freshwater" },
|
11 |
+
{ "word": "BEACH", "clue": "Sandy shore by water" },
|
12 |
+
{ "word": "CLIFF", "clue": "Steep rock face" },
|
13 |
+
{ "word": "PLATEAU", "clue": "Elevated flat area" },
|
14 |
+
{ "word": "CANYON", "clue": "Deep gorge with steep sides" },
|
15 |
+
{ "word": "GLACIER", "clue": "Moving mass of ice" },
|
16 |
+
{ "word": "VOLCANO", "clue": "Mountain that erupts" },
|
17 |
+
{ "word": "PENINSULA", "clue": "Land surrounded by water on three sides" },
|
18 |
+
{ "word": "ARCHIPELAGO", "clue": "Group of islands" },
|
19 |
+
{ "word": "PRAIRIE", "clue": "Grassland plain" },
|
20 |
+
{ "word": "TUNDRA", "clue": "Cold, treeless region" },
|
21 |
+
{ "word": "SAVANNA", "clue": "Tropical grassland" },
|
22 |
+
{ "word": "EQUATOR", "clue": "Earth's middle line" },
|
23 |
+
{ "word": "LATITUDE", "clue": "Distance from equator" },
|
24 |
+
{ "word": "LONGITUDE", "clue": "Distance from prime meridian" },
|
25 |
+
{ "word": "CLIMATE", "clue": "Long-term weather pattern" },
|
26 |
+
{ "word": "MONSOON", "clue": "Seasonal wind pattern" },
|
27 |
+
{ "word": "CAPITAL", "clue": "Main city of country" },
|
28 |
+
{ "word": "BORDER", "clue": "Boundary between countries" },
|
29 |
+
{ "word": "COAST", "clue": "Land meeting the sea" },
|
30 |
+
{ "word": "STRAIT", "clue": "Narrow water passage" },
|
31 |
+
{ "word": "DELTA", "clue": "River mouth formation" },
|
32 |
+
{ "word": "FJORD", "clue": "Narrow inlet between cliffs" },
|
33 |
+
{ "word": "ATOLL", "clue": "Ring-shaped coral island" },
|
34 |
+
{ "word": "MESA", "clue": "Flat-topped hill" },
|
35 |
+
{ "word": "BUTTE", "clue": "Isolated hill with steep sides" },
|
36 |
+
{ "word": "GORGE", "clue": "Deep narrow valley" },
|
37 |
+
{ "word": "RAVINE", "clue": "Small narrow gorge" },
|
38 |
+
{ "word": "RIDGE", "clue": "Long narrow hilltop" },
|
39 |
+
{ "word": "PEAK", "clue": "Mountain summit" },
|
40 |
+
{ "word": "SUMMIT", "clue": "Highest point" },
|
41 |
+
{ "word": "FOOTHILLS", "clue": "Hills at base of mountains" },
|
42 |
+
{ "word": "RANGE", "clue": "Chain of mountains" },
|
43 |
+
{ "word": "BASIN", "clue": "Low-lying area" },
|
44 |
+
{ "word": "WATERSHED", "clue": "Drainage area" },
|
45 |
+
{ "word": "ESTUARY", "clue": "Where river meets sea" },
|
46 |
+
{ "word": "BAY", "clue": "Curved inlet of water" },
|
47 |
+
{ "word": "GULF", "clue": "Large bay" },
|
48 |
+
{ "word": "CAPE", "clue": "Point of land into water" },
|
49 |
+
{ "word": "HEADLAND", "clue": "High point of land" },
|
50 |
+
{ "word": "LAGOON", "clue": "Shallow coastal body of water" },
|
51 |
+
{ "word": "REEF", "clue": "Underwater rock formation" },
|
52 |
+
{ "word": "SHOAL", "clue": "Shallow area in water" },
|
53 |
+
{ "word": "CHANNEL", "clue": "Deep water passage" },
|
54 |
+
{ "word": "SOUND", "clue": "Large sea inlet" },
|
55 |
+
{ "word": "HARBOR", "clue": "Sheltered port area" },
|
56 |
+
{ "word": "INLET", "clue": "Small bay" },
|
57 |
+
{ "word": "COVE", "clue": "Small sheltered bay" },
|
58 |
+
{ "word": "MARSH", "clue": "Wetland area" },
|
59 |
+
{ "word": "SWAMP", "clue": "Forested wetland" },
|
60 |
+
{ "word": "BOG", "clue": "Acidic wetland" },
|
61 |
+
{ "word": "OASIS", "clue": "Fertile spot in desert" },
|
62 |
+
{ "word": "DUNE", "clue": "Sand hill" },
|
63 |
+
{ "word": "PLAIN", "clue": "Flat grassland" },
|
64 |
+
{ "word": "STEPPE", "clue": "Dry grassland" },
|
65 |
+
{ "word": "TAIGA", "clue": "Northern coniferous forest" },
|
66 |
+
{ "word": "RAINFOREST", "clue": "Dense tropical forest" },
|
67 |
+
{ "word": "JUNGLE", "clue": "Dense tropical vegetation" },
|
68 |
+
{ "word": "WOODLAND", "clue": "Area with scattered trees" },
|
69 |
+
{ "word": "GROVE", "clue": "Small group of trees" },
|
70 |
+
{ "word": "MEADOW", "clue": "Grassy field" },
|
71 |
+
{ "word": "PASTURE", "clue": "Grazing land" },
|
72 |
+
{ "word": "FIELD", "clue": "Open area of land" },
|
73 |
+
{ "word": "MOOR", "clue": "Open uncultivated land" },
|
74 |
+
{ "word": "HEATH", "clue": "Shrubland area" },
|
75 |
+
{ "word": "ARCTIC", "clue": "Cold northern region" },
|
76 |
+
{ "word": "ANTARCTIC", "clue": "Cold southern region" },
|
77 |
+
{ "word": "POLAR", "clue": "Of the poles" },
|
78 |
+
{ "word": "TROPICAL", "clue": "Hot humid climate zone" },
|
79 |
+
{ "word": "TEMPERATE", "clue": "Moderate climate zone" },
|
80 |
+
{ "word": "ARID", "clue": "Very dry" },
|
81 |
+
{ "word": "HUMID", "clue": "Moist air" },
|
82 |
+
{ "word": "ALTITUDE", "clue": "Height above sea level" },
|
83 |
+
{ "word": "ELEVATION", "clue": "Height of land" },
|
84 |
+
{ "word": "TERRAIN", "clue": "Physical features of land" },
|
85 |
+
{ "word": "TOPOGRAPHY", "clue": "Surface features of area" },
|
86 |
+
{ "word": "GEOGRAPHY", "clue": "Study of Earth's features" },
|
87 |
+
{ "word": "CARTOGRAPHY", "clue": "Map making" },
|
88 |
+
{ "word": "MERIDIAN", "clue": "Longitude line" },
|
89 |
+
{ "word": "PARALLEL", "clue": "Latitude line" },
|
90 |
+
{ "word": "HEMISPHERE", "clue": "Half of Earth" },
|
91 |
+
{ "word": "TROPICS", "clue": "Hot climate zone" },
|
92 |
+
{ "word": "POLES", "clue": "Earth's endpoints" },
|
93 |
+
{ "word": "AXIS", "clue": "Earth's rotation line" },
|
94 |
+
{ "word": "ORBIT", "clue": "Path around sun" },
|
95 |
+
{ "word": "SEASON", "clue": "Time of year" },
|
96 |
+
{ "word": "SOLSTICE", "clue": "Longest or shortest day" },
|
97 |
+
{ "word": "EQUINOX", "clue": "Equal day and night" },
|
98 |
+
{ "word": "COMPASS", "clue": "Direction-finding tool" },
|
99 |
+
{ "word": "NAVIGATION", "clue": "Finding your way" },
|
100 |
+
{ "word": "BEARING", "clue": "Direction or course" },
|
101 |
+
{ "word": "AZIMUTH", "clue": "Compass direction" },
|
102 |
+
{ "word": "SCALE", "clue": "Map size ratio" },
|
103 |
+
{ "word": "LEGEND", "clue": "Map symbol key" },
|
104 |
+
{ "word": "CONTOUR", "clue": "Elevation line on map" },
|
105 |
+
{ "word": "GRID", "clue": "Map reference system" },
|
106 |
+
{ "word": "PROJECTION", "clue": "Map flattening method" },
|
107 |
+
{ "word": "SURVEY", "clue": "Land measurement" },
|
108 |
+
{ "word": "BOUNDARY", "clue": "Dividing line" },
|
109 |
+
{ "word": "FRONTIER", "clue": "Border region" },
|
110 |
+
{ "word": "TERRITORY", "clue": "Area of land" },
|
111 |
+
{ "word": "REGION", "clue": "Geographic area" },
|
112 |
+
{ "word": "ZONE", "clue": "Designated area" },
|
113 |
+
{ "word": "DISTRICT", "clue": "Administrative area" },
|
114 |
+
{ "word": "PROVINCE", "clue": "Political subdivision" },
|
115 |
+
{ "word": "STATE", "clue": "Political entity" },
|
116 |
+
{ "word": "COUNTY", "clue": "Local government area" },
|
117 |
+
{ "word": "CITY", "clue": "Large urban area" },
|
118 |
+
{ "word": "TOWN", "clue": "Small urban area" },
|
119 |
+
{ "word": "VILLAGE", "clue": "Small rural community" },
|
120 |
+
{ "word": "HAMLET", "clue": "Very small village" },
|
121 |
+
{ "word": "SUBURB", "clue": "Residential area outside city" },
|
122 |
+
{ "word": "URBAN", "clue": "City-like" },
|
123 |
+
{ "word": "RURAL", "clue": "Countryside" },
|
124 |
+
{ "word": "METROPOLITAN", "clue": "Large city area" },
|
125 |
+
{ "word": "POPULATION", "clue": "Number of people" },
|
126 |
+
{ "word": "DENSITY", "clue": "Crowdedness" },
|
127 |
+
{ "word": "SETTLEMENT", "clue": "Place where people live" },
|
128 |
+
{ "word": "COLONY", "clue": "Overseas territory" },
|
129 |
+
{ "word": "NATION", "clue": "Country" },
|
130 |
+
{ "word": "REPUBLIC", "clue": "Democratic state" },
|
131 |
+
{ "word": "KINGDOM", "clue": "Monarchy" },
|
132 |
+
{ "word": "EMPIRE", "clue": "Large political entity" },
|
133 |
+
{ "word": "FEDERATION", "clue": "Union of states" },
|
134 |
+
{ "word": "ALLIANCE", "clue": "Partnership of nations" },
|
135 |
+
{ "word": "TREATY", "clue": "International agreement" },
|
136 |
+
{ "word": "TRADE", "clue": "Commercial exchange" },
|
137 |
+
{ "word": "EXPORT", "clue": "Goods sent abroad" },
|
138 |
+
{ "word": "IMPORT", "clue": "Goods brought in" },
|
139 |
+
{ "word": "COMMERCE", "clue": "Business activity" },
|
140 |
+
{ "word": "INDUSTRY", "clue": "Manufacturing" },
|
141 |
+
{ "word": "AGRICULTURE", "clue": "Farming" },
|
142 |
+
{ "word": "MINING", "clue": "Extracting minerals" },
|
143 |
+
{ "word": "FORESTRY", "clue": "Tree management" },
|
144 |
+
{ "word": "FISHING", "clue": "Catching fish" },
|
145 |
+
{ "word": "TOURISM", "clue": "Travel industry" },
|
146 |
+
{ "word": "TRANSPORTATION", "clue": "Moving people and goods" },
|
147 |
+
{ "word": "INFRASTRUCTURE", "clue": "Basic facilities" },
|
148 |
+
{ "word": "COMMUNICATION", "clue": "Information exchange" },
|
149 |
+
{ "word": "CULTURE", "clue": "Way of life" },
|
150 |
+
{ "word": "LANGUAGE", "clue": "Communication system" },
|
151 |
+
{ "word": "RELIGION", "clue": "Belief system" },
|
152 |
+
{ "word": "ETHNICITY", "clue": "Cultural group" },
|
153 |
+
{ "word": "MIGRATION", "clue": "Movement of people" },
|
154 |
+
{ "word": "IMMIGRATION", "clue": "Moving into country" },
|
155 |
+
{ "word": "EMIGRATION", "clue": "Moving out of country" },
|
156 |
+
{ "word": "DIASPORA", "clue": "Scattered population" },
|
157 |
+
{ "word": "NOMAD", "clue": "Wandering person" },
|
158 |
+
{ "word": "REFUGEE", "clue": "Displaced person" },
|
159 |
+
{ "word": "CENSUS", "clue": "Population count" },
|
160 |
+
{ "word": "DEMOGRAPHIC", "clue": "Population characteristic" }
|
161 |
+
]
|
crossword-app/backend-py/data/word-lists/science.json
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{ "word": "ATOM", "clue": "Smallest unit of matter" },
|
3 |
+
{ "word": "GRAVITY", "clue": "Force that pulls objects down" },
|
4 |
+
{ "word": "MOLECULE", "clue": "Group of atoms bonded together" },
|
5 |
+
{ "word": "PHOTON", "clue": "Particle of light" },
|
6 |
+
{ "word": "CHEMISTRY", "clue": "Study of matter and reactions" },
|
7 |
+
{ "word": "PHYSICS", "clue": "Study of matter and energy" },
|
8 |
+
{ "word": "BIOLOGY", "clue": "Study of living organisms" },
|
9 |
+
{ "word": "ELEMENT", "clue": "Pure chemical substance" },
|
10 |
+
{ "word": "OXYGEN", "clue": "Gas essential for breathing" },
|
11 |
+
{ "word": "CARBON", "clue": "Element found in all life" },
|
12 |
+
{ "word": "HYDROGEN", "clue": "Lightest chemical element" },
|
13 |
+
{ "word": "ENERGY", "clue": "Capacity to do work" },
|
14 |
+
{ "word": "FORCE", "clue": "Push or pull on an object" },
|
15 |
+
{ "word": "VELOCITY", "clue": "Speed with direction" },
|
16 |
+
{ "word": "MASS", "clue": "Amount of matter in object" },
|
17 |
+
{ "word": "VOLUME", "clue": "Amount of space occupied" },
|
18 |
+
{ "word": "DENSITY", "clue": "Mass per unit volume" },
|
19 |
+
{ "word": "PRESSURE", "clue": "Force per unit area" },
|
20 |
+
{ "word": "TEMPERATURE", "clue": "Measure of heat" },
|
21 |
+
{ "word": "ELECTRON", "clue": "Negatively charged particle" },
|
22 |
+
{ "word": "PROTON", "clue": "Positively charged particle" },
|
23 |
+
{ "word": "NEUTRON", "clue": "Neutral atomic particle" },
|
24 |
+
{ "word": "NUCLEUS", "clue": "Center of an atom" },
|
25 |
+
{ "word": "CELL", "clue": "Basic unit of life" },
|
26 |
+
{ "word": "DNA", "clue": "Genetic blueprint molecule" },
|
27 |
+
{ "word": "PROTEIN", "clue": "Complex biological molecule" },
|
28 |
+
{ "word": "ENZYME", "clue": "Biological catalyst" },
|
29 |
+
{ "word": "VIRUS", "clue": "Infectious agent" },
|
30 |
+
{ "word": "BACTERIA", "clue": "Single-celled organisms" },
|
31 |
+
{ "word": "EVOLUTION", "clue": "Change in species over time" },
|
32 |
+
{ "word": "ISOTOPE", "clue": "Atom variant with different neutrons" },
|
33 |
+
{ "word": "ION", "clue": "Charged atom or molecule" },
|
34 |
+
{ "word": "COMPOUND", "clue": "Chemical combination of elements" },
|
35 |
+
{ "word": "MIXTURE", "clue": "Combined substances retaining properties" },
|
36 |
+
{ "word": "SOLUTION", "clue": "Dissolved mixture" },
|
37 |
+
{ "word": "ACID", "clue": "Sour chemical with low pH" },
|
38 |
+
{ "word": "BASE", "clue": "Alkaline substance with high pH" },
|
39 |
+
{ "word": "SALT", "clue": "Ionic compound from acid-base reaction" },
|
40 |
+
{ "word": "CATALYST", "clue": "Substance that speeds reactions" },
|
41 |
+
{ "word": "RNA", "clue": "Genetic messenger molecule" },
|
42 |
+
{ "word": "GENE", "clue": "Heredity unit on chromosome" },
|
43 |
+
{ "word": "CHROMOSOME", "clue": "Gene-carrying structure" },
|
44 |
+
{ "word": "TISSUE", "clue": "Group of similar cells" },
|
45 |
+
{ "word": "ORGAN", "clue": "Body part with specific function" },
|
46 |
+
{ "word": "SYSTEM", "clue": "Group of organs working together" },
|
47 |
+
{ "word": "ORGANISM", "clue": "Living individual entity" },
|
48 |
+
{ "word": "SPECIES", "clue": "Group of similar organisms" },
|
49 |
+
{ "word": "ADAPTATION", "clue": "Survival-enhancing change" },
|
50 |
+
{ "word": "MUTATION", "clue": "Genetic change in DNA" },
|
51 |
+
{ "word": "HEREDITY", "clue": "Passing traits to offspring" },
|
52 |
+
{ "word": "ECOSYSTEM", "clue": "Community and environment" },
|
53 |
+
{ "word": "HABITAT", "clue": "Natural living environment" },
|
54 |
+
{ "word": "BIODIVERSITY", "clue": "Variety of life forms" },
|
55 |
+
{ "word": "PHOTOSYNTHESIS", "clue": "Plant energy-making process" },
|
56 |
+
{ "word": "RESPIRATION", "clue": "Cellular breathing process" },
|
57 |
+
{ "word": "METABOLISM", "clue": "Chemical processes in body" },
|
58 |
+
{ "word": "HOMEOSTASIS", "clue": "Body's internal balance" },
|
59 |
+
{ "word": "MITOSIS", "clue": "Cell division for growth" },
|
60 |
+
{ "word": "MEIOSIS", "clue": "Cell division for reproduction" },
|
61 |
+
{ "word": "EMBRYO", "clue": "Early development stage" },
|
62 |
+
{ "word": "FOSSIL", "clue": "Preserved ancient remains" },
|
63 |
+
{ "word": "GEOLOGY", "clue": "Study of Earth's structure" },
|
64 |
+
{ "word": "MINERAL", "clue": "Natural inorganic crystal" },
|
65 |
+
{ "word": "ROCK", "clue": "Solid earth material" },
|
66 |
+
{ "word": "SEDIMENT", "clue": "Settled particles" },
|
67 |
+
{ "word": "EROSION", "clue": "Gradual wearing away" },
|
68 |
+
{ "word": "VOLCANO", "clue": "Earth opening spewing lava" },
|
69 |
+
{ "word": "EARTHQUAKE", "clue": "Ground shaking from plate movement" },
|
70 |
+
{ "word": "PLATE", "clue": "Earth's crust section" },
|
71 |
+
{ "word": "MAGMA", "clue": "Molten rock beneath surface" },
|
72 |
+
{ "word": "LAVA", "clue": "Molten rock on surface" },
|
73 |
+
{ "word": "CRYSTAL", "clue": "Ordered atomic structure" },
|
74 |
+
{ "word": "ATMOSPHERE", "clue": "Layer of gases around Earth" },
|
75 |
+
{ "word": "CLIMATE", "clue": "Long-term weather pattern" },
|
76 |
+
{ "word": "WEATHER", "clue": "Short-term atmospheric conditions" },
|
77 |
+
{ "word": "PRECIPITATION", "clue": "Water falling from clouds" },
|
78 |
+
{ "word": "HUMIDITY", "clue": "Moisture in air" },
|
79 |
+
{ "word": "WIND", "clue": "Moving air mass" },
|
80 |
+
{ "word": "STORM", "clue": "Violent weather event" },
|
81 |
+
{ "word": "HURRICANE", "clue": "Powerful tropical cyclone" },
|
82 |
+
{ "word": "TORNADO", "clue": "Rotating column of air" },
|
83 |
+
{ "word": "LIGHTNING", "clue": "Electrical discharge in sky" },
|
84 |
+
{ "word": "THUNDER", "clue": "Sound of lightning" },
|
85 |
+
{ "word": "RAINBOW", "clue": "Spectrum of light in sky" },
|
86 |
+
{ "word": "ASTRONOMY", "clue": "Study of celestial objects" },
|
87 |
+
{ "word": "GALAXY", "clue": "Collection of stars and planets" },
|
88 |
+
{ "word": "PLANET", "clue": "Large orbiting celestial body" },
|
89 |
+
{ "word": "STAR", "clue": "Self-luminous celestial body" },
|
90 |
+
{ "word": "MOON", "clue": "Natural satellite of planet" },
|
91 |
+
{ "word": "COMET", "clue": "Icy body with tail" },
|
92 |
+
{ "word": "ASTEROID", "clue": "Rocky space object" },
|
93 |
+
{ "word": "METEOR", "clue": "Space rock entering atmosphere" },
|
94 |
+
{ "word": "ORBIT", "clue": "Curved path around object" },
|
95 |
+
{ "word": "LIGHT", "clue": "Electromagnetic radiation" },
|
96 |
+
{ "word": "SPECTRUM", "clue": "Range of electromagnetic radiation" },
|
97 |
+
{ "word": "WAVELENGTH", "clue": "Distance between wave peaks" },
|
98 |
+
{ "word": "FREQUENCY", "clue": "Waves per unit time" },
|
99 |
+
{ "word": "AMPLITUDE", "clue": "Wave height or intensity" },
|
100 |
+
{ "word": "SOUND", "clue": "Vibrations in air" },
|
101 |
+
{ "word": "ECHO", "clue": "Reflected sound" },
|
102 |
+
{ "word": "RESONANCE", "clue": "Vibration amplification" },
|
103 |
+
{ "word": "DOPPLER", "clue": "Wave frequency shift effect" },
|
104 |
+
{ "word": "MOTION", "clue": "Change in position" },
|
105 |
+
{ "word": "ACCELERATION", "clue": "Change in velocity" },
|
106 |
+
{ "word": "MOMENTUM", "clue": "Mass times velocity" },
|
107 |
+
{ "word": "INERTIA", "clue": "Resistance to motion change" },
|
108 |
+
{ "word": "FRICTION", "clue": "Resistance to sliding" },
|
109 |
+
{ "word": "HEAT", "clue": "Thermal energy transfer" },
|
110 |
+
{ "word": "COMBUSTION", "clue": "Burning chemical reaction" },
|
111 |
+
{ "word": "OXIDATION", "clue": "Reaction with oxygen" },
|
112 |
+
{ "word": "REDUCTION", "clue": "Gain of electrons" },
|
113 |
+
{ "word": "ELECTROLYSIS", "clue": "Chemical breakdown by electricity" },
|
114 |
+
{ "word": "CONDUCTIVITY", "clue": "Ability to transfer energy" },
|
115 |
+
{ "word": "INSULATOR", "clue": "Material blocking energy flow" },
|
116 |
+
{ "word": "SEMICONDUCTOR", "clue": "Partial electrical conductor" },
|
117 |
+
{ "word": "MAGNETISM", "clue": "Force of magnetic attraction" },
|
118 |
+
{ "word": "FIELD", "clue": "Region of force influence" },
|
119 |
+
{ "word": "CIRCUIT", "clue": "Closed electrical path" },
|
120 |
+
{ "word": "CURRENT", "clue": "Flow of electric charge" },
|
121 |
+
{ "word": "VOLTAGE", "clue": "Electric potential difference" },
|
122 |
+
{ "word": "RESISTANCE", "clue": "Opposition to current flow" },
|
123 |
+
{ "word": "CAPACITOR", "clue": "Device storing electric charge" },
|
124 |
+
{ "word": "INDUCTOR", "clue": "Device storing magnetic energy" },
|
125 |
+
{ "word": "TRANSISTOR", "clue": "Electronic switching device" },
|
126 |
+
{ "word": "LASER", "clue": "Focused beam of light" },
|
127 |
+
{ "word": "RADAR", "clue": "Radio detection system" },
|
128 |
+
{ "word": "SONAR", "clue": "Sound detection system" },
|
129 |
+
{ "word": "TELESCOPE", "clue": "Instrument for viewing distant objects" },
|
130 |
+
{ "word": "MICROSCOPE", "clue": "Instrument for viewing small objects" },
|
131 |
+
{ "word": "HYPOTHESIS", "clue": "Testable scientific prediction" },
|
132 |
+
{ "word": "THEORY", "clue": "Well-tested scientific explanation" },
|
133 |
+
{ "word": "LAW", "clue": "Consistently observed scientific rule" },
|
134 |
+
{ "word": "EXPERIMENT", "clue": "Controlled scientific test" },
|
135 |
+
{ "word": "OBSERVATION", "clue": "Careful scientific watching" },
|
136 |
+
{ "word": "MEASUREMENT", "clue": "Quantified observation" },
|
137 |
+
{ "word": "ANALYSIS", "clue": "Detailed examination of data" },
|
138 |
+
{ "word": "SYNTHESIS", "clue": "Combining elements into whole" },
|
139 |
+
{ "word": "VARIABLE", "clue": "Factor that can change" },
|
140 |
+
{ "word": "CONTROL", "clue": "Unchanged comparison group" },
|
141 |
+
{ "word": "DATA", "clue": "Information collected from tests" },
|
142 |
+
{ "word": "STATISTICS", "clue": "Mathematical analysis of data" },
|
143 |
+
{ "word": "PROBABILITY", "clue": "Likelihood of occurrence" },
|
144 |
+
{ "word": "PRECISION", "clue": "Exactness of measurement" },
|
145 |
+
{ "word": "ACCURACY", "clue": "Correctness of measurement" },
|
146 |
+
{ "word": "ERROR", "clue": "Difference from true value" },
|
147 |
+
{ "word": "UNCERTAINTY", "clue": "Range of doubt in measurement" },
|
148 |
+
{ "word": "CALIBRATION", "clue": "Adjusting instrument accuracy" },
|
149 |
+
{ "word": "STANDARD", "clue": "Reference for measurement" },
|
150 |
+
{ "word": "UNIT", "clue": "Base measure of quantity" },
|
151 |
+
{ "word": "METRIC", "clue": "Decimal measurement system" },
|
152 |
+
{ "word": "WEIGHT", "clue": "Force of gravity on mass" },
|
153 |
+
{ "word": "CONCENTRATION", "clue": "Amount of substance per volume" },
|
154 |
+
{ "word": "MOLARITY", "clue": "Moles of solute per liter" },
|
155 |
+
{ "word": "EQUILIBRIUM", "clue": "State of balanced forces" },
|
156 |
+
{ "word": "STABILITY", "clue": "Resistance to change" },
|
157 |
+
{ "word": "DECAY", "clue": "Gradual breakdown process" },
|
158 |
+
{ "word": "RADIATION", "clue": "Energy emitted from source" },
|
159 |
+
{ "word": "RADIOACTIVE", "clue": "Emitting nuclear radiation" },
|
160 |
+
{ "word": "HALFLIFE", "clue": "Time for half to decay" },
|
161 |
+
{ "word": "FUSION", "clue": "Nuclear combining reaction" },
|
162 |
+
{ "word": "FISSION", "clue": "Nuclear splitting reaction" },
|
163 |
+
{ "word": "QUANTUM", "clue": "Discrete packet of energy" },
|
164 |
+
{ "word": "PARTICLE", "clue": "Tiny piece of matter" },
|
165 |
+
{ "word": "WAVE", "clue": "Energy transfer disturbance" },
|
166 |
+
{ "word": "INTERFERENCE", "clue": "Wave interaction effect" },
|
167 |
+
{ "word": "DIFFRACTION", "clue": "Wave bending around obstacle" },
|
168 |
+
{ "word": "REFLECTION", "clue": "Bouncing back of waves" },
|
169 |
+
{ "word": "REFRACTION", "clue": "Bending of waves through medium" }
|
170 |
+
]
|
crossword-app/backend-py/data/word-lists/technology.json
ADDED
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{ "word": "COMPUTER", "clue": "Electronic processing device" },
|
3 |
+
{ "word": "INTERNET", "clue": "Global computer network" },
|
4 |
+
{ "word": "ALGORITHM", "clue": "Set of rules for solving problems" },
|
5 |
+
{ "word": "DATABASE", "clue": "Organized collection of data" },
|
6 |
+
{ "word": "SOFTWARE", "clue": "Computer programs" },
|
7 |
+
{ "word": "HARDWARE", "clue": "Physical computer components" },
|
8 |
+
{ "word": "NETWORK", "clue": "Connected system of computers" },
|
9 |
+
{ "word": "CODE", "clue": "Programming instructions" },
|
10 |
+
{ "word": "ROBOT", "clue": "Automated machine" },
|
11 |
+
{ "word": "ARTIFICIAL", "clue": "Made by humans, not natural" },
|
12 |
+
{ "word": "DIGITAL", "clue": "Using binary data" },
|
13 |
+
{ "word": "BINARY", "clue": "Base-2 number system" },
|
14 |
+
{ "word": "PROCESSOR", "clue": "Computer's brain" },
|
15 |
+
{ "word": "MEMORY", "clue": "Data storage component" },
|
16 |
+
{ "word": "KEYBOARD", "clue": "Input device with keys" },
|
17 |
+
{ "word": "MONITOR", "clue": "Computer display screen" },
|
18 |
+
{ "word": "MOUSE", "clue": "Pointing input device" },
|
19 |
+
{ "word": "PRINTER", "clue": "Device that prints documents" },
|
20 |
+
{ "word": "SCANNER", "clue": "Device that digitizes images" },
|
21 |
+
{ "word": "CAMERA", "clue": "Device that captures images" },
|
22 |
+
{ "word": "SMARTPHONE", "clue": "Portable computing device" },
|
23 |
+
{ "word": "TABLET", "clue": "Touchscreen computing device" },
|
24 |
+
{ "word": "LAPTOP", "clue": "Portable computer" },
|
25 |
+
{ "word": "SERVER", "clue": "Computer that serves data" },
|
26 |
+
{ "word": "CLOUD", "clue": "Internet-based computing" },
|
27 |
+
{ "word": "WEBSITE", "clue": "Collection of web pages" },
|
28 |
+
{ "word": "EMAIL", "clue": "Electronic mail" },
|
29 |
+
{ "word": "BROWSER", "clue": "Web navigation software" },
|
30 |
+
{ "word": "SEARCH", "clue": "Look for information" },
|
31 |
+
{ "word": "DOWNLOAD", "clue": "Transfer data to device" },
|
32 |
+
{ "word": "UPLOAD", "clue": "Transfer data from device" },
|
33 |
+
{ "word": "BANDWIDTH", "clue": "Data transfer capacity" },
|
34 |
+
{ "word": "PROTOCOL", "clue": "Communication rules" },
|
35 |
+
{ "word": "FIREWALL", "clue": "Network security barrier" },
|
36 |
+
{ "word": "ENCRYPTION", "clue": "Data scrambling for security" },
|
37 |
+
{ "word": "PASSWORD", "clue": "Secret access code" },
|
38 |
+
{ "word": "SECURITY", "clue": "Protection from threats" },
|
39 |
+
{ "word": "VIRUS", "clue": "Malicious computer program" },
|
40 |
+
{ "word": "MALWARE", "clue": "Harmful software" },
|
41 |
+
{ "word": "ANTIVIRUS", "clue": "Protection software" },
|
42 |
+
{ "word": "BACKUP", "clue": "Data safety copy" },
|
43 |
+
{ "word": "RECOVERY", "clue": "Data restoration process" },
|
44 |
+
{ "word": "STORAGE", "clue": "Data keeping capacity" },
|
45 |
+
{ "word": "HARDDRIVE", "clue": "Magnetic storage device" },
|
46 |
+
{ "word": "FLASH", "clue": "Solid state storage" },
|
47 |
+
{ "word": "RAM", "clue": "Random access memory" },
|
48 |
+
{ "word": "ROM", "clue": "Read-only memory" },
|
49 |
+
{ "word": "CPU", "clue": "Central processing unit" },
|
50 |
+
{ "word": "GPU", "clue": "Graphics processing unit" },
|
51 |
+
{ "word": "MOTHERBOARD", "clue": "Main circuit board" },
|
52 |
+
{ "word": "CHIP", "clue": "Integrated circuit" },
|
53 |
+
{ "word": "CIRCUIT", "clue": "Electronic pathway" },
|
54 |
+
{ "word": "TRANSISTOR", "clue": "Electronic switch" },
|
55 |
+
{ "word": "SILICON", "clue": "Semiconductor material" },
|
56 |
+
{ "word": "NANOTECHNOLOGY", "clue": "Extremely small scale tech" },
|
57 |
+
{ "word": "AUTOMATION", "clue": "Self-operating technology" },
|
58 |
+
{ "word": "MACHINE", "clue": "Mechanical device" },
|
59 |
+
{ "word": "SENSOR", "clue": "Detection device" },
|
60 |
+
{ "word": "ACTUATOR", "clue": "Movement device" },
|
61 |
+
{ "word": "FEEDBACK", "clue": "System response information" },
|
62 |
+
{ "word": "PROGRAMMING", "clue": "Writing computer instructions" },
|
63 |
+
{ "word": "FUNCTION", "clue": "Reusable code block" },
|
64 |
+
{ "word": "VARIABLE", "clue": "Data storage container" },
|
65 |
+
{ "word": "LOOP", "clue": "Repeating code structure" },
|
66 |
+
{ "word": "CONDITION", "clue": "Decision-making logic" },
|
67 |
+
{ "word": "DEBUG", "clue": "Find and fix errors" },
|
68 |
+
{ "word": "COMPILE", "clue": "Convert code to executable" },
|
69 |
+
{ "word": "RUNTIME", "clue": "Program execution time" },
|
70 |
+
{ "word": "API", "clue": "Application programming interface" },
|
71 |
+
{ "word": "FRAMEWORK", "clue": "Code structure foundation" },
|
72 |
+
{ "word": "LIBRARY", "clue": "Reusable code collection" },
|
73 |
+
{ "word": "MODULE", "clue": "Self-contained code unit" },
|
74 |
+
{ "word": "OBJECT", "clue": "Data and methods container" },
|
75 |
+
{ "word": "CLASS", "clue": "Object blueprint" },
|
76 |
+
{ "word": "INHERITANCE", "clue": "Code reuse mechanism" },
|
77 |
+
{ "word": "INTERFACE", "clue": "System interaction boundary" },
|
78 |
+
{ "word": "PROTOCOL", "clue": "Communication standard" },
|
79 |
+
{ "word": "FORMAT", "clue": "Data structure standard" },
|
80 |
+
{ "word": "SYNTAX", "clue": "Language rules" },
|
81 |
+
{ "word": "SEMANTIC", "clue": "Meaning in code" },
|
82 |
+
{ "word": "PARSING", "clue": "Analyzing code structure" },
|
83 |
+
{ "word": "COMPILER", "clue": "Code translation program" },
|
84 |
+
{ "word": "INTERPRETER", "clue": "Code execution program" },
|
85 |
+
{ "word": "VIRTUAL", "clue": "Simulated environment" },
|
86 |
+
{ "word": "SIMULATION", "clue": "Computer modeling" },
|
87 |
+
{ "word": "EMULATION", "clue": "System imitation" },
|
88 |
+
{ "word": "OPTIMIZATION", "clue": "Performance improvement" },
|
89 |
+
{ "word": "EFFICIENCY", "clue": "Resource usage effectiveness" },
|
90 |
+
{ "word": "PERFORMANCE", "clue": "System speed and quality" },
|
91 |
+
{ "word": "BENCHMARK", "clue": "Performance measurement" },
|
92 |
+
{ "word": "TESTING", "clue": "Quality verification process" },
|
93 |
+
{ "word": "VALIDATION", "clue": "Correctness checking" },
|
94 |
+
{ "word": "VERIFICATION", "clue": "Accuracy confirmation" },
|
95 |
+
{ "word": "QUALITY", "clue": "Standard of excellence" },
|
96 |
+
{ "word": "MAINTENANCE", "clue": "System upkeep" },
|
97 |
+
{ "word": "UPDATE", "clue": "Software improvement" },
|
98 |
+
{ "word": "PATCH", "clue": "Software fix" },
|
99 |
+
{ "word": "VERSION", "clue": "Software release number" },
|
100 |
+
{ "word": "RELEASE", "clue": "Software distribution" },
|
101 |
+
{ "word": "DEPLOYMENT", "clue": "Software installation" },
|
102 |
+
{ "word": "CONFIGURATION", "clue": "System setup" },
|
103 |
+
{ "word": "INSTALLATION", "clue": "Software setup process" },
|
104 |
+
{ "word": "MIGRATION", "clue": "System transition" },
|
105 |
+
{ "word": "INTEGRATION", "clue": "System combination" },
|
106 |
+
{ "word": "COMPATIBILITY", "clue": "System cooperation ability" },
|
107 |
+
{ "word": "INTEROPERABILITY", "clue": "Cross-system communication" },
|
108 |
+
{ "word": "SCALABILITY", "clue": "Growth accommodation ability" },
|
109 |
+
{ "word": "RELIABILITY", "clue": "Consistent performance" },
|
110 |
+
{ "word": "AVAILABILITY", "clue": "System accessibility" },
|
111 |
+
{ "word": "REDUNDANCY", "clue": "Backup system duplication" },
|
112 |
+
{ "word": "FAULT", "clue": "System error condition" },
|
113 |
+
{ "word": "TOLERANCE", "clue": "Error handling ability" },
|
114 |
+
{ "word": "RECOVERY", "clue": "System restoration" },
|
115 |
+
{ "word": "MONITORING", "clue": "System observation" },
|
116 |
+
{ "word": "LOGGING", "clue": "Event recording" },
|
117 |
+
{ "word": "ANALYTICS", "clue": "Data analysis" },
|
118 |
+
{ "word": "METRICS", "clue": "Measurement data" },
|
119 |
+
{ "word": "DASHBOARD", "clue": "Information display panel" },
|
120 |
+
{ "word": "INTERFACE", "clue": "User interaction design" },
|
121 |
+
{ "word": "EXPERIENCE", "clue": "User interaction quality" },
|
122 |
+
{ "word": "USABILITY", "clue": "Ease of use" },
|
123 |
+
{ "word": "ACCESSIBILITY", "clue": "Universal design principle" },
|
124 |
+
{ "word": "RESPONSIVE", "clue": "Adaptive design" },
|
125 |
+
{ "word": "MOBILE", "clue": "Portable device category" },
|
126 |
+
{ "word": "TOUCHSCREEN", "clue": "Touch-sensitive display" },
|
127 |
+
{ "word": "GESTURE", "clue": "Touch movement command" },
|
128 |
+
{ "word": "VOICE", "clue": "Speech interaction" },
|
129 |
+
{ "word": "RECOGNITION", "clue": "Pattern identification" },
|
130 |
+
{ "word": "LEARNING", "clue": "Adaptive improvement" },
|
131 |
+
{ "word": "INTELLIGENCE", "clue": "Artificial reasoning" },
|
132 |
+
{ "word": "NEURAL", "clue": "Brain-inspired network" },
|
133 |
+
{ "word": "DEEP", "clue": "Multi-layered learning" },
|
134 |
+
{ "word": "MACHINE", "clue": "Automated learning system" },
|
135 |
+
{ "word": "DATA", "clue": "Information collection" },
|
136 |
+
{ "word": "BIG", "clue": "Large scale data" },
|
137 |
+
{ "word": "MINING", "clue": "Data pattern extraction" },
|
138 |
+
{ "word": "ANALYSIS", "clue": "Data examination" },
|
139 |
+
{ "word": "VISUALIZATION", "clue": "Data graphic representation" },
|
140 |
+
{ "word": "DASHBOARD", "clue": "Data monitoring panel" },
|
141 |
+
{ "word": "REPORT", "clue": "Data summary document" },
|
142 |
+
{ "word": "QUERY", "clue": "Data search request" },
|
143 |
+
{ "word": "INDEX", "clue": "Data location reference" },
|
144 |
+
{ "word": "SCHEMA", "clue": "Data structure blueprint" },
|
145 |
+
{ "word": "TABLE", "clue": "Data organization structure" },
|
146 |
+
{ "word": "RECORD", "clue": "Data entry" },
|
147 |
+
{ "word": "FIELD", "clue": "Data element" },
|
148 |
+
{ "word": "PRIMARY", "clue": "Main identifier key" },
|
149 |
+
{ "word": "FOREIGN", "clue": "Reference relationship key" },
|
150 |
+
{ "word": "RELATION", "clue": "Data connection" },
|
151 |
+
{ "word": "JOIN", "clue": "Data combination operation" },
|
152 |
+
{ "word": "TRANSACTION", "clue": "Data operation sequence" },
|
153 |
+
{ "word": "COMMIT", "clue": "Data change confirmation" },
|
154 |
+
{ "word": "ROLLBACK", "clue": "Data change reversal" },
|
155 |
+
{ "word": "CONCURRENCY", "clue": "Simultaneous access handling" },
|
156 |
+
{ "word": "LOCK", "clue": "Data access control" },
|
157 |
+
{ "word": "SYNCHRONIZATION", "clue": "Timing coordination" },
|
158 |
+
{ "word": "THREAD", "clue": "Execution sequence" },
|
159 |
+
{ "word": "PROCESS", "clue": "Running program instance" },
|
160 |
+
{ "word": "MULTITASKING", "clue": "Multiple process handling" },
|
161 |
+
{ "word": "PARALLEL", "clue": "Simultaneous execution" },
|
162 |
+
{ "word": "DISTRIBUTED", "clue": "Spread across multiple systems" },
|
163 |
+
{ "word": "CLUSTER", "clue": "Group of connected computers" },
|
164 |
+
{ "word": "GRID", "clue": "Distributed computing network" },
|
165 |
+
{ "word": "PEER", "clue": "Equal network participant" },
|
166 |
+
{ "word": "CLIENT", "clue": "Service requesting system" },
|
167 |
+
{ "word": "SERVICE", "clue": "System functionality provider" },
|
168 |
+
{ "word": "MICROSERVICE", "clue": "Small independent service" },
|
169 |
+
{ "word": "CONTAINER", "clue": "Isolated application environment" },
|
170 |
+
{ "word": "DOCKER", "clue": "Containerization platform" },
|
171 |
+
{ "word": "KUBERNETES", "clue": "Container orchestration" },
|
172 |
+
{ "word": "DEVOPS", "clue": "Development operations practice" },
|
173 |
+
{ "word": "AGILE", "clue": "Flexible development method" },
|
174 |
+
{ "word": "SCRUM", "clue": "Iterative development framework" },
|
175 |
+
{ "word": "SPRINT", "clue": "Short development cycle" },
|
176 |
+
{ "word": "KANBAN", "clue": "Visual workflow management" },
|
177 |
+
{ "word": "CONTINUOUS", "clue": "Ongoing integration practice" },
|
178 |
+
{ "word": "PIPELINE", "clue": "Automated workflow" },
|
179 |
+
{ "word": "BUILD", "clue": "Software compilation process" },
|
180 |
+
{ "word": "TESTING", "clue": "Quality assurance process" },
|
181 |
+
{ "word": "AUTOMATION", "clue": "Manual task elimination" },
|
182 |
+
{ "word": "SCRIPT", "clue": "Automated task sequence" },
|
183 |
+
{ "word": "BATCH", "clue": "Group processing" },
|
184 |
+
{ "word": "STREAMING", "clue": "Continuous data flow" },
|
185 |
+
{ "word": "REALTIME", "clue": "Immediate processing" },
|
186 |
+
{ "word": "LATENCY", "clue": "Response delay time" },
|
187 |
+
{ "word": "THROUGHPUT", "clue": "Processing capacity" },
|
188 |
+
{ "word": "BOTTLENECK", "clue": "Performance limitation point" },
|
189 |
+
{ "word": "CACHE", "clue": "Fast temporary storage" },
|
190 |
+
{ "word": "BUFFER", "clue": "Temporary data holder" },
|
191 |
+
{ "word": "QUEUE", "clue": "Ordered waiting line" },
|
192 |
+
{ "word": "STACK", "clue": "Last-in-first-out structure" },
|
193 |
+
{ "word": "HEAP", "clue": "Dynamic memory area" },
|
194 |
+
{ "word": "POINTER", "clue": "Memory address reference" },
|
195 |
+
{ "word": "REFERENCE", "clue": "Object location indicator" },
|
196 |
+
{ "word": "GARBAGE", "clue": "Unused memory collection" },
|
197 |
+
{ "word": "ALLOCATION", "clue": "Memory assignment" },
|
198 |
+
{ "word": "DEALLOCATION", "clue": "Memory release" },
|
199 |
+
{ "word": "LEAK", "clue": "Memory usage error" },
|
200 |
+
{ "word": "OVERFLOW", "clue": "Capacity exceeding error" },
|
201 |
+
{ "word": "UNDERFLOW", "clue": "Insufficient data error" },
|
202 |
+
{ "word": "EXCEPTION", "clue": "Error handling mechanism" },
|
203 |
+
{ "word": "INTERRUPT", "clue": "Process suspension signal" },
|
204 |
+
{ "word": "SIGNAL", "clue": "Process communication" },
|
205 |
+
{ "word": "EVENT", "clue": "System occurrence" },
|
206 |
+
{ "word": "HANDLER", "clue": "Event processing function" },
|
207 |
+
{ "word": "CALLBACK", "clue": "Function reference" },
|
208 |
+
{ "word": "PROMISE", "clue": "Future value placeholder" },
|
209 |
+
{ "word": "ASYNC", "clue": "Non-blocking operation" },
|
210 |
+
{ "word": "AWAIT", "clue": "Pause for completion" },
|
211 |
+
{ "word": "YIELD", "clue": "Temporary function pause" },
|
212 |
+
{ "word": "GENERATOR", "clue": "Value sequence producer" },
|
213 |
+
{ "word": "ITERATOR", "clue": "Sequential access pattern" },
|
214 |
+
{ "word": "RECURSION", "clue": "Self-calling function" },
|
215 |
+
{ "word": "CLOSURE", "clue": "Function scope retention" },
|
216 |
+
{ "word": "LAMBDA", "clue": "Anonymous function" },
|
217 |
+
{ "word": "FUNCTIONAL", "clue": "Function-based programming" },
|
218 |
+
{ "word": "PROCEDURAL", "clue": "Step-by-step programming" },
|
219 |
+
{ "word": "DECLARATIVE", "clue": "What-not-how programming" },
|
220 |
+
{ "word": "IMPERATIVE", "clue": "Command-based programming" }
|
221 |
+
]
|
crossword-app/backend-py/debug_full_generation.py
ADDED
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Debug the complete crossword generation process to identify display/numbering issues.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import sys
|
8 |
+
import json
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
# Add project root to path
|
12 |
+
project_root = Path(__file__).parent
|
13 |
+
sys.path.insert(0, str(project_root))
|
14 |
+
|
15 |
+
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
|
16 |
+
|
17 |
+
async def debug_complete_generation():
|
18 |
+
"""Debug the complete crossword generation process."""
|
19 |
+
|
20 |
+
print("π Debugging Complete Crossword Generation Process\n")
|
21 |
+
|
22 |
+
# Create generator with no vector service to use static words
|
23 |
+
generator = CrosswordGeneratorFixed(vector_service=None)
|
24 |
+
|
25 |
+
# Override the word selection to use controlled test words
|
26 |
+
test_words = [
|
27 |
+
{"word": "MACHINE", "clue": "Device with moving parts"},
|
28 |
+
{"word": "COMPUTER", "clue": "Electronic device"},
|
29 |
+
{"word": "EXPERT", "clue": "Person with specialized knowledge"},
|
30 |
+
{"word": "SCIENCE", "clue": "Systematic study"},
|
31 |
+
{"word": "TECHNOLOGY", "clue": "Applied science"},
|
32 |
+
{"word": "RESEARCH", "clue": "Systematic investigation"},
|
33 |
+
{"word": "ANALYSIS", "clue": "Detailed examination"},
|
34 |
+
{"word": "METHOD", "clue": "Systematic approach"}
|
35 |
+
]
|
36 |
+
|
37 |
+
# Mock the word selection method
|
38 |
+
async def mock_select_words(topics, difficulty, use_ai):
|
39 |
+
return test_words
|
40 |
+
generator._select_words = mock_select_words
|
41 |
+
|
42 |
+
print("=" * 70)
|
43 |
+
print("GENERATING COMPLETE CROSSWORD")
|
44 |
+
print("=" * 70)
|
45 |
+
|
46 |
+
try:
|
47 |
+
result = await generator.generate_puzzle(["technology"], "medium", use_ai=False)
|
48 |
+
|
49 |
+
if result:
|
50 |
+
print("β
Crossword generation successful!")
|
51 |
+
|
52 |
+
# Analyze the complete result
|
53 |
+
analyze_crossword_result(result)
|
54 |
+
else:
|
55 |
+
print("β Crossword generation failed - returned None")
|
56 |
+
|
57 |
+
except Exception as e:
|
58 |
+
print(f"β Crossword generation failed with error: {e}")
|
59 |
+
import traceback
|
60 |
+
traceback.print_exc()
|
61 |
+
|
62 |
+
def analyze_crossword_result(result):
|
63 |
+
"""Analyze the complete crossword result for potential issues."""
|
64 |
+
|
65 |
+
print("\n" + "=" * 70)
|
66 |
+
print("CROSSWORD RESULT ANALYSIS")
|
67 |
+
print("=" * 70)
|
68 |
+
|
69 |
+
# Print basic metadata
|
70 |
+
metadata = result.get("metadata", {})
|
71 |
+
print("Metadata:")
|
72 |
+
for key, value in metadata.items():
|
73 |
+
print(f" {key}: {value}")
|
74 |
+
|
75 |
+
# Analyze the grid
|
76 |
+
grid = result.get("grid", [])
|
77 |
+
print(f"\nGrid dimensions: {len(grid)}x{len(grid[0]) if grid else 0}")
|
78 |
+
|
79 |
+
print("\nGrid layout:")
|
80 |
+
print_numbered_grid(grid)
|
81 |
+
|
82 |
+
# Analyze placed words vs clues
|
83 |
+
clues = result.get("clues", [])
|
84 |
+
print(f"\nNumber of clues generated: {len(clues)}")
|
85 |
+
|
86 |
+
print("\nClue analysis:")
|
87 |
+
for i, clue in enumerate(clues):
|
88 |
+
print(f" Clue {i+1}:")
|
89 |
+
print(f" Number: {clue.get('number', 'MISSING')}")
|
90 |
+
print(f" Word: {clue.get('word', 'MISSING')}")
|
91 |
+
print(f" Direction: {clue.get('direction', 'MISSING')}")
|
92 |
+
print(f" Position: {clue.get('position', 'MISSING')}")
|
93 |
+
print(f" Text: {clue.get('text', 'MISSING')}")
|
94 |
+
|
95 |
+
# Check for potential issues
|
96 |
+
print("\n" + "=" * 70)
|
97 |
+
print("ISSUE DETECTION")
|
98 |
+
print("=" * 70)
|
99 |
+
|
100 |
+
check_word_boundary_consistency(grid, clues)
|
101 |
+
check_numbering_consistency(clues)
|
102 |
+
check_grid_word_alignment(grid, clues)
|
103 |
+
|
104 |
+
def print_numbered_grid(grid):
|
105 |
+
"""Print grid with coordinates for analysis."""
|
106 |
+
if not grid:
|
107 |
+
print(" Empty grid")
|
108 |
+
return
|
109 |
+
|
110 |
+
# Print column headers
|
111 |
+
print(" ", end="")
|
112 |
+
for c in range(len(grid[0])):
|
113 |
+
print(f"{c:2d}", end="")
|
114 |
+
print()
|
115 |
+
|
116 |
+
# Print rows with row numbers
|
117 |
+
for r in range(len(grid)):
|
118 |
+
print(f" {r:2d}: ", end="")
|
119 |
+
for c in range(len(grid[0])):
|
120 |
+
cell = grid[r][c]
|
121 |
+
if cell == ".":
|
122 |
+
print(" .", end="")
|
123 |
+
else:
|
124 |
+
print(f" {cell}", end="")
|
125 |
+
print()
|
126 |
+
|
127 |
+
def check_word_boundary_consistency(grid, clues):
|
128 |
+
"""Check if words in clues match what's actually in the grid."""
|
129 |
+
|
130 |
+
print("Checking word boundary consistency:")
|
131 |
+
|
132 |
+
issues_found = []
|
133 |
+
|
134 |
+
for clue in clues:
|
135 |
+
word = clue.get("word", "")
|
136 |
+
position = clue.get("position", {})
|
137 |
+
direction = clue.get("direction", "")
|
138 |
+
|
139 |
+
if not all([word, position, direction]):
|
140 |
+
issues_found.append(f"Incomplete clue data: {clue}")
|
141 |
+
continue
|
142 |
+
|
143 |
+
row = position.get("row", -1)
|
144 |
+
col = position.get("col", -1)
|
145 |
+
|
146 |
+
if row < 0 or col < 0:
|
147 |
+
issues_found.append(f"Invalid position for word '{word}': {position}")
|
148 |
+
continue
|
149 |
+
|
150 |
+
# Extract the actual word from the grid
|
151 |
+
grid_word = extract_word_from_grid(grid, row, col, direction, len(word))
|
152 |
+
|
153 |
+
if grid_word != word:
|
154 |
+
issues_found.append(f"Mismatch for '{word}' at ({row}, {col}) {direction}: grid shows '{grid_word}'")
|
155 |
+
|
156 |
+
if issues_found:
|
157 |
+
print(" β Issues found:")
|
158 |
+
for issue in issues_found:
|
159 |
+
print(f" {issue}")
|
160 |
+
else:
|
161 |
+
print(" β
All words match grid positions")
|
162 |
+
|
163 |
+
def extract_word_from_grid(grid, row, col, direction, expected_length):
|
164 |
+
"""Extract a word from the grid at the given position and direction."""
|
165 |
+
|
166 |
+
if row >= len(grid) or col >= len(grid[0]):
|
167 |
+
return "OUT_OF_BOUNDS"
|
168 |
+
|
169 |
+
word = ""
|
170 |
+
|
171 |
+
if direction == "across": # horizontal
|
172 |
+
for i in range(expected_length):
|
173 |
+
if col + i >= len(grid[0]):
|
174 |
+
return word + "TRUNCATED"
|
175 |
+
word += grid[row][col + i]
|
176 |
+
|
177 |
+
elif direction == "down": # vertical
|
178 |
+
for i in range(expected_length):
|
179 |
+
if row + i >= len(grid):
|
180 |
+
return word + "TRUNCATED"
|
181 |
+
word += grid[row + i][col]
|
182 |
+
|
183 |
+
return word
|
184 |
+
|
185 |
+
def check_numbering_consistency(clues):
|
186 |
+
"""Check if clue numbering is consistent and logical."""
|
187 |
+
|
188 |
+
print("\nChecking numbering consistency:")
|
189 |
+
|
190 |
+
numbers = [clue.get("number", -1) for clue in clues]
|
191 |
+
issues = []
|
192 |
+
|
193 |
+
# Check for duplicate numbers
|
194 |
+
if len(numbers) != len(set(numbers)):
|
195 |
+
issues.append("Duplicate clue numbers found")
|
196 |
+
|
197 |
+
# Check for missing numbers in sequence
|
198 |
+
if numbers:
|
199 |
+
min_num = min(numbers)
|
200 |
+
max_num = max(numbers)
|
201 |
+
expected = set(range(min_num, max_num + 1))
|
202 |
+
actual = set(numbers)
|
203 |
+
|
204 |
+
if expected != actual:
|
205 |
+
missing = expected - actual
|
206 |
+
extra = actual - expected
|
207 |
+
if missing:
|
208 |
+
issues.append(f"Missing numbers: {sorted(missing)}")
|
209 |
+
if extra:
|
210 |
+
issues.append(f"Extra numbers: {sorted(extra)}")
|
211 |
+
|
212 |
+
if issues:
|
213 |
+
print(" β Numbering issues:")
|
214 |
+
for issue in issues:
|
215 |
+
print(f" {issue}")
|
216 |
+
else:
|
217 |
+
print(" β
Numbering is consistent")
|
218 |
+
|
219 |
+
def check_grid_word_alignment(grid, clues):
|
220 |
+
"""Check if all words are properly aligned and don't create unintended extensions."""
|
221 |
+
|
222 |
+
print("\nChecking grid word alignment:")
|
223 |
+
|
224 |
+
# Find all letter sequences in the grid
|
225 |
+
horizontal_sequences = find_horizontal_sequences(grid)
|
226 |
+
vertical_sequences = find_vertical_sequences(grid)
|
227 |
+
|
228 |
+
print(f" Found {len(horizontal_sequences)} horizontal sequences")
|
229 |
+
print(f" Found {len(vertical_sequences)} vertical sequences")
|
230 |
+
|
231 |
+
# Check if each sequence corresponds to a clue
|
232 |
+
clue_words = {}
|
233 |
+
for clue in clues:
|
234 |
+
pos = clue.get("position", {})
|
235 |
+
key = (pos.get("row"), pos.get("col"), clue.get("direction"))
|
236 |
+
clue_words[key] = clue.get("word", "")
|
237 |
+
|
238 |
+
issues = []
|
239 |
+
|
240 |
+
# Check horizontal sequences
|
241 |
+
for seq in horizontal_sequences:
|
242 |
+
row, start_col, word = seq
|
243 |
+
key = (row, start_col, "across")
|
244 |
+
if key not in clue_words:
|
245 |
+
issues.append(f"Unaccounted horizontal sequence: '{word}' at ({row}, {start_col})")
|
246 |
+
elif clue_words[key] != word:
|
247 |
+
issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({row}, {start_col})")
|
248 |
+
|
249 |
+
# Check vertical sequences
|
250 |
+
for seq in vertical_sequences:
|
251 |
+
col, start_row, word = seq
|
252 |
+
key = (start_row, col, "down")
|
253 |
+
if key not in clue_words:
|
254 |
+
issues.append(f"Unaccounted vertical sequence: '{word}' at ({start_row}, {col})")
|
255 |
+
elif clue_words[key] != word:
|
256 |
+
issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({start_row}, {col})")
|
257 |
+
|
258 |
+
if issues:
|
259 |
+
print(" β Alignment issues found:")
|
260 |
+
for issue in issues:
|
261 |
+
print(f" {issue}")
|
262 |
+
else:
|
263 |
+
print(" β
All words are properly aligned")
|
264 |
+
|
265 |
+
def find_horizontal_sequences(grid):
|
266 |
+
"""Find all horizontal letter sequences of length > 1."""
|
267 |
+
sequences = []
|
268 |
+
|
269 |
+
for r in range(len(grid)):
|
270 |
+
current_word = ""
|
271 |
+
start_col = None
|
272 |
+
|
273 |
+
for c in range(len(grid[0])):
|
274 |
+
if grid[r][c] != ".":
|
275 |
+
if start_col is None:
|
276 |
+
start_col = c
|
277 |
+
current_word += grid[r][c]
|
278 |
+
else:
|
279 |
+
if current_word and len(current_word) > 1:
|
280 |
+
sequences.append((r, start_col, current_word))
|
281 |
+
current_word = ""
|
282 |
+
start_col = None
|
283 |
+
|
284 |
+
# Handle word at end of row
|
285 |
+
if current_word and len(current_word) > 1:
|
286 |
+
sequences.append((r, start_col, current_word))
|
287 |
+
|
288 |
+
return sequences
|
289 |
+
|
290 |
+
def find_vertical_sequences(grid):
|
291 |
+
"""Find all vertical letter sequences of length > 1."""
|
292 |
+
sequences = []
|
293 |
+
|
294 |
+
for c in range(len(grid[0])):
|
295 |
+
current_word = ""
|
296 |
+
start_row = None
|
297 |
+
|
298 |
+
for r in range(len(grid)):
|
299 |
+
if grid[r][c] != ".":
|
300 |
+
if start_row is None:
|
301 |
+
start_row = r
|
302 |
+
current_word += grid[r][c]
|
303 |
+
else:
|
304 |
+
if current_word and len(current_word) > 1:
|
305 |
+
sequences.append((c, start_row, current_word))
|
306 |
+
current_word = ""
|
307 |
+
start_row = None
|
308 |
+
|
309 |
+
# Handle word at end of column
|
310 |
+
if current_word and len(current_word) > 1:
|
311 |
+
sequences.append((c, start_row, current_word))
|
312 |
+
|
313 |
+
return sequences
|
314 |
+
|
315 |
+
if __name__ == "__main__":
|
316 |
+
asyncio.run(debug_complete_generation())
|
crossword-app/backend-py/debug_grid_direct.py
ADDED
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Direct grid generation test to identify word boundary/display issues.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import sys
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
+
# Add project root to path
|
10 |
+
project_root = Path(__file__).parent
|
11 |
+
sys.path.insert(0, str(project_root))
|
12 |
+
|
13 |
+
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
|
14 |
+
|
15 |
+
def test_direct_grid_generation():
|
16 |
+
"""Test grid generation directly with controlled words."""
|
17 |
+
|
18 |
+
print("π Direct Grid Generation Test\n")
|
19 |
+
|
20 |
+
generator = CrosswordGeneratorFixed(vector_service=None)
|
21 |
+
|
22 |
+
# Test words that might cause the issues seen in the images
|
23 |
+
test_words = [
|
24 |
+
{"word": "MACHINE", "clue": "Device with moving parts"},
|
25 |
+
{"word": "COMPUTER", "clue": "Electronic device"},
|
26 |
+
{"word": "EXPERT", "clue": "Person with specialized knowledge"},
|
27 |
+
{"word": "SCIENCE", "clue": "Systematic study"},
|
28 |
+
{"word": "CAMERA", "clue": "Device for taking photos"},
|
29 |
+
{"word": "METHOD", "clue": "Systematic approach"}
|
30 |
+
]
|
31 |
+
|
32 |
+
print("=" * 60)
|
33 |
+
print("TEST 1: Direct grid creation")
|
34 |
+
print("=" * 60)
|
35 |
+
|
36 |
+
# Test the _create_grid method directly
|
37 |
+
result = generator._create_grid(test_words)
|
38 |
+
|
39 |
+
if result:
|
40 |
+
print("β
Grid generation successful!")
|
41 |
+
|
42 |
+
grid = result["grid"]
|
43 |
+
placed_words = result["placed_words"]
|
44 |
+
clues = result["clues"]
|
45 |
+
|
46 |
+
print(f"Grid size: {len(grid)}x{len(grid[0])}")
|
47 |
+
print(f"Words placed: {len(placed_words)}")
|
48 |
+
print(f"Clues generated: {len(clues)}")
|
49 |
+
|
50 |
+
# Print the grid
|
51 |
+
print("\nGenerated Grid:")
|
52 |
+
print_grid_with_coordinates(grid)
|
53 |
+
|
54 |
+
# Print placed words details
|
55 |
+
print("\nPlaced Words:")
|
56 |
+
for i, word_info in enumerate(placed_words):
|
57 |
+
print(f" {i+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")
|
58 |
+
|
59 |
+
# Print clues
|
60 |
+
print("\nGenerated Clues:")
|
61 |
+
for clue in clues:
|
62 |
+
print(f" {clue['number']}. {clue['direction']}: {clue['word']} - {clue['text']}")
|
63 |
+
|
64 |
+
# Analyze for potential issues
|
65 |
+
print("\n" + "=" * 60)
|
66 |
+
print("ANALYSIS")
|
67 |
+
print("=" * 60)
|
68 |
+
|
69 |
+
analyze_grid_issues(grid, placed_words, clues)
|
70 |
+
|
71 |
+
else:
|
72 |
+
print("β Grid generation failed")
|
73 |
+
|
74 |
+
# Test another scenario that might reproduce the image issues
|
75 |
+
print("\n" + "=" * 60)
|
76 |
+
print("TEST 2: Scenario with potential extension words")
|
77 |
+
print("=" * 60)
|
78 |
+
|
79 |
+
# Words that might create the "MACHINERY" type issue
|
80 |
+
extension_words = [
|
81 |
+
{"word": "MACHINE", "clue": "Device with moving parts"},
|
82 |
+
{"word": "MACHINERY", "clue": "Mechanical equipment"}, # Might cause confusion
|
83 |
+
{"word": "EXPERT", "clue": "Specialist"},
|
84 |
+
{"word": "TECHNOLOGY", "clue": "Applied science"},
|
85 |
+
]
|
86 |
+
|
87 |
+
result2 = generator._create_grid(extension_words)
|
88 |
+
|
89 |
+
if result2:
|
90 |
+
print("β
Extension test grid generated!")
|
91 |
+
|
92 |
+
grid2 = result2["grid"]
|
93 |
+
placed_words2 = result2["placed_words"]
|
94 |
+
|
95 |
+
print("\nExtension Test Grid:")
|
96 |
+
print_grid_with_coordinates(grid2)
|
97 |
+
|
98 |
+
print("\nPlaced Words:")
|
99 |
+
for i, word_info in enumerate(placed_words2):
|
100 |
+
print(f" {i+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")
|
101 |
+
|
102 |
+
# Check specifically for MACHINE vs MACHINERY issues
|
103 |
+
check_machine_machinery_issue(grid2, placed_words2)
|
104 |
+
|
105 |
+
else:
|
106 |
+
print("β Extension test grid generation failed")
|
107 |
+
|
108 |
+
def print_grid_with_coordinates(grid):
|
109 |
+
"""Print grid with row and column coordinates."""
|
110 |
+
if not grid:
|
111 |
+
print(" Empty grid")
|
112 |
+
return
|
113 |
+
|
114 |
+
# Print column headers
|
115 |
+
print(" ", end="")
|
116 |
+
for c in range(len(grid[0])):
|
117 |
+
print(f"{c:2d}", end="")
|
118 |
+
print()
|
119 |
+
|
120 |
+
# Print rows
|
121 |
+
for r in range(len(grid)):
|
122 |
+
print(f" {r:2d}: ", end="")
|
123 |
+
for c in range(len(grid[0])):
|
124 |
+
cell = grid[r][c]
|
125 |
+
if cell == ".":
|
126 |
+
print(" .", end="")
|
127 |
+
else:
|
128 |
+
print(f" {cell}", end="")
|
129 |
+
print()
|
130 |
+
|
131 |
+
def analyze_grid_issues(grid, placed_words, clues):
|
132 |
+
"""Analyze the grid for potential boundary/display issues."""
|
133 |
+
|
134 |
+
print("Checking for potential issues...")
|
135 |
+
|
136 |
+
issues = []
|
137 |
+
|
138 |
+
# Check 1: Verify each placed word actually exists in the grid
|
139 |
+
for word_info in placed_words:
|
140 |
+
word = word_info["word"]
|
141 |
+
row = word_info["row"]
|
142 |
+
col = word_info["col"]
|
143 |
+
direction = word_info["direction"]
|
144 |
+
|
145 |
+
grid_word = extract_word_from_grid(grid, row, col, direction, len(word))
|
146 |
+
|
147 |
+
if grid_word != word:
|
148 |
+
issues.append(f"Word mismatch: '{word}' expected at ({row},{col}) {direction}, but grid shows '{grid_word}'")
|
149 |
+
|
150 |
+
# Check 2: Look for unintended letter sequences
|
151 |
+
all_sequences = find_all_letter_sequences(grid)
|
152 |
+
intended_words = {(w["row"], w["col"], w["direction"]): w["word"] for w in placed_words}
|
153 |
+
|
154 |
+
for seq_info in all_sequences:
|
155 |
+
row, col, direction, seq_word = seq_info
|
156 |
+
key = (row, col, direction)
|
157 |
+
|
158 |
+
if key not in intended_words:
|
159 |
+
if len(seq_word) > 1: # Only care about multi-letter sequences
|
160 |
+
issues.append(f"Unintended sequence: '{seq_word}' at ({row},{col}) {direction}")
|
161 |
+
elif intended_words[key] != seq_word:
|
162 |
+
issues.append(f"Sequence mismatch: expected '{intended_words[key]}' but found '{seq_word}' at ({row},{col}) {direction}")
|
163 |
+
|
164 |
+
# Check 3: Verify clue consistency
|
165 |
+
for clue in clues:
|
166 |
+
clue_word = clue["word"]
|
167 |
+
pos = clue["position"]
|
168 |
+
clue_row = pos["row"]
|
169 |
+
clue_col = pos["col"]
|
170 |
+
clue_direction = clue["direction"]
|
171 |
+
|
172 |
+
# Convert direction format if needed
|
173 |
+
direction_map = {"across": "horizontal", "down": "vertical"}
|
174 |
+
normalized_direction = direction_map.get(clue_direction, clue_direction)
|
175 |
+
|
176 |
+
grid_word = extract_word_from_grid(grid, clue_row, clue_col, normalized_direction, len(clue_word))
|
177 |
+
|
178 |
+
if grid_word != clue_word:
|
179 |
+
issues.append(f"Clue mismatch: clue says '{clue_word}' at ({clue_row},{clue_col}) {clue_direction}, but grid shows '{grid_word}'")
|
180 |
+
|
181 |
+
# Report results
|
182 |
+
if issues:
|
183 |
+
print("β Issues found:")
|
184 |
+
for issue in issues:
|
185 |
+
print(f" {issue}")
|
186 |
+
else:
|
187 |
+
print("β
No issues detected - grid appears consistent")
|
188 |
+
|
189 |
+
def extract_word_from_grid(grid, row, col, direction, expected_length):
|
190 |
+
"""Extract word from grid at given position and direction."""
|
191 |
+
if row >= len(grid) or col >= len(grid[0]) or row < 0 or col < 0:
|
192 |
+
return "OUT_OF_BOUNDS"
|
193 |
+
|
194 |
+
word = ""
|
195 |
+
|
196 |
+
if direction in ["horizontal", "across"]:
|
197 |
+
for i in range(expected_length):
|
198 |
+
if col + i >= len(grid[0]):
|
199 |
+
return word + "[TRUNCATED]"
|
200 |
+
word += grid[row][col + i]
|
201 |
+
elif direction in ["vertical", "down"]:
|
202 |
+
for i in range(expected_length):
|
203 |
+
if row + i >= len(grid):
|
204 |
+
return word + "[TRUNCATED]"
|
205 |
+
word += grid[row + i][col]
|
206 |
+
|
207 |
+
return word
|
208 |
+
|
209 |
+
def find_all_letter_sequences(grid):
|
210 |
+
"""Find all letter sequences (horizontal and vertical) in the grid."""
|
211 |
+
sequences = []
|
212 |
+
|
213 |
+
# Horizontal sequences
|
214 |
+
for r in range(len(grid)):
|
215 |
+
current_word = ""
|
216 |
+
start_col = None
|
217 |
+
|
218 |
+
for c in range(len(grid[0])):
|
219 |
+
if grid[r][c] != ".":
|
220 |
+
if start_col is None:
|
221 |
+
start_col = c
|
222 |
+
current_word += grid[r][c]
|
223 |
+
else:
|
224 |
+
if current_word and len(current_word) > 1:
|
225 |
+
sequences.append((r, start_col, "horizontal", current_word))
|
226 |
+
current_word = ""
|
227 |
+
start_col = None
|
228 |
+
|
229 |
+
# Handle end of row
|
230 |
+
if current_word and len(current_word) > 1:
|
231 |
+
sequences.append((r, start_col, "horizontal", current_word))
|
232 |
+
|
233 |
+
# Vertical sequences
|
234 |
+
for c in range(len(grid[0])):
|
235 |
+
current_word = ""
|
236 |
+
start_row = None
|
237 |
+
|
238 |
+
for r in range(len(grid)):
|
239 |
+
if grid[r][c] != ".":
|
240 |
+
if start_row is None:
|
241 |
+
start_row = r
|
242 |
+
current_word += grid[r][c]
|
243 |
+
else:
|
244 |
+
if current_word and len(current_word) > 1:
|
245 |
+
sequences.append((start_row, c, "vertical", current_word))
|
246 |
+
current_word = ""
|
247 |
+
start_row = None
|
248 |
+
|
249 |
+
# Handle end of column
|
250 |
+
if current_word and len(current_word) > 1:
|
251 |
+
sequences.append((start_row, c, "vertical", current_word))
|
252 |
+
|
253 |
+
return sequences
|
254 |
+
|
255 |
+
def check_machine_machinery_issue(grid, placed_words):
|
256 |
+
"""Specifically check for MACHINE vs MACHINERY confusion."""
|
257 |
+
|
258 |
+
print("\nChecking for MACHINE/MACHINERY issue:")
|
259 |
+
|
260 |
+
machine_words = [w for w in placed_words if "MACHINE" in w["word"]]
|
261 |
+
|
262 |
+
if not machine_words:
|
263 |
+
print(" No MACHINE-related words found")
|
264 |
+
return
|
265 |
+
|
266 |
+
for word_info in machine_words:
|
267 |
+
word = word_info["word"]
|
268 |
+
row = word_info["row"]
|
269 |
+
col = word_info["col"]
|
270 |
+
direction = word_info["direction"]
|
271 |
+
|
272 |
+
print(f" Found: '{word}' at ({row},{col}) {direction}")
|
273 |
+
|
274 |
+
# Check what's actually in the grid at this location
|
275 |
+
grid_word = extract_word_from_grid(grid, row, col, direction, len(word))
|
276 |
+
print(f" Grid shows: '{grid_word}'")
|
277 |
+
|
278 |
+
# Check if there are extra letters that might create confusion
|
279 |
+
if direction == "horizontal":
|
280 |
+
# Check for letters after the word
|
281 |
+
end_col = col + len(word)
|
282 |
+
if end_col < len(grid[0]) and grid[row][end_col] != ".":
|
283 |
+
extra_letters = ""
|
284 |
+
check_col = end_col
|
285 |
+
while check_col < len(grid[0]) and grid[row][check_col] != ".":
|
286 |
+
extra_letters += grid[row][check_col]
|
287 |
+
check_col += 1
|
288 |
+
if extra_letters:
|
289 |
+
print(f" β οΈ Extra letters after word: '{extra_letters}'")
|
290 |
+
print(f" This might make '{word}' appear as '{word + extra_letters}'")
|
291 |
+
|
292 |
+
if __name__ == "__main__":
|
293 |
+
test_direct_grid_generation()
|
crossword-app/backend-py/debug_index_error.py
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Debug the recurring index error by adding comprehensive bounds checking.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import sys
|
8 |
+
import logging
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
# Add project root to path
|
12 |
+
project_root = Path(__file__).parent
|
13 |
+
sys.path.insert(0, str(project_root))
|
14 |
+
|
15 |
+
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
|
16 |
+
from src.services.vector_search import VectorSearchService
|
17 |
+
|
18 |
+
# Enable debug logging
|
19 |
+
logging.basicConfig(level=logging.DEBUG)
|
20 |
+
logger = logging.getLogger(__name__)
|
21 |
+
|
22 |
+
class DebugCrosswordGenerator(CrosswordGeneratorFixed):
|
23 |
+
"""Debug version with comprehensive bounds checking."""
|
24 |
+
|
25 |
+
def _can_place_word(self, grid, word, row, col, direction):
|
26 |
+
"""Enhanced _can_place_word with comprehensive bounds checking."""
|
27 |
+
try:
|
28 |
+
size = len(grid)
|
29 |
+
logger.debug(f"_can_place_word: word={word}, row={row}, col={col}, direction={direction}, grid_size={size}")
|
30 |
+
|
31 |
+
# Check initial boundaries
|
32 |
+
if row < 0 or col < 0 or row >= size or col >= size:
|
33 |
+
logger.debug(f"Initial bounds check failed: row={row}, col={col}, size={size}")
|
34 |
+
return False
|
35 |
+
|
36 |
+
if direction == "horizontal":
|
37 |
+
if col + len(word) > size:
|
38 |
+
logger.debug(f"Horizontal bounds check failed: col+len(word)={col + len(word)} > size={size}")
|
39 |
+
return False
|
40 |
+
|
41 |
+
# Check word boundaries (no adjacent letters) - with bounds check
|
42 |
+
if col > 0:
|
43 |
+
if row >= size or col - 1 >= size or row < 0 or col - 1 < 0:
|
44 |
+
logger.debug(f"Horizontal left boundary check failed: row={row}, col-1={col-1}, size={size}")
|
45 |
+
return False
|
46 |
+
if grid[row][col - 1] != ".":
|
47 |
+
logger.debug(f"Horizontal left boundary has adjacent letter")
|
48 |
+
return False
|
49 |
+
|
50 |
+
if col + len(word) < size:
|
51 |
+
if row >= size or col + len(word) >= size or row < 0 or col + len(word) < 0:
|
52 |
+
logger.debug(f"Horizontal right boundary check failed: row={row}, col+len={col + len(word)}, size={size}")
|
53 |
+
return False
|
54 |
+
if grid[row][col + len(word)] != ".":
|
55 |
+
logger.debug(f"Horizontal right boundary has adjacent letter")
|
56 |
+
return False
|
57 |
+
|
58 |
+
# Check each letter position
|
59 |
+
for i, letter in enumerate(word):
|
60 |
+
check_row = row
|
61 |
+
check_col = col + i
|
62 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
63 |
+
logger.debug(f"Horizontal letter position check failed: letter {i}, row={check_row}, col={check_col}, size={size}")
|
64 |
+
return False
|
65 |
+
current_cell = grid[check_row][check_col]
|
66 |
+
if current_cell != "." and current_cell != letter:
|
67 |
+
logger.debug(f"Horizontal letter conflict: expected {letter}, found {current_cell}")
|
68 |
+
return False
|
69 |
+
|
70 |
+
else: # vertical
|
71 |
+
if row + len(word) > size:
|
72 |
+
logger.debug(f"Vertical bounds check failed: row+len(word)={row + len(word)} > size={size}")
|
73 |
+
return False
|
74 |
+
|
75 |
+
# Check word boundaries - with bounds check
|
76 |
+
if row > 0:
|
77 |
+
if row - 1 >= size or col >= size or row - 1 < 0 or col < 0:
|
78 |
+
logger.debug(f"Vertical top boundary check failed: row-1={row-1}, col={col}, size={size}")
|
79 |
+
return False
|
80 |
+
if grid[row - 1][col] != ".":
|
81 |
+
logger.debug(f"Vertical top boundary has adjacent letter")
|
82 |
+
return False
|
83 |
+
|
84 |
+
if row + len(word) < size:
|
85 |
+
if row + len(word) >= size or col >= size or row + len(word) < 0 or col < 0:
|
86 |
+
logger.debug(f"Vertical bottom boundary check failed: row+len={row + len(word)}, col={col}, size={size}")
|
87 |
+
return False
|
88 |
+
if grid[row + len(word)][col] != ".":
|
89 |
+
logger.debug(f"Vertical bottom boundary has adjacent letter")
|
90 |
+
return False
|
91 |
+
|
92 |
+
# Check each letter position
|
93 |
+
for i, letter in enumerate(word):
|
94 |
+
check_row = row + i
|
95 |
+
check_col = col
|
96 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
97 |
+
logger.debug(f"Vertical letter position check failed: letter {i}, row={check_row}, col={check_col}, size={size}")
|
98 |
+
return False
|
99 |
+
current_cell = grid[check_row][check_col]
|
100 |
+
if current_cell != "." and current_cell != letter:
|
101 |
+
logger.debug(f"Vertical letter conflict: expected {letter}, found {current_cell}")
|
102 |
+
return False
|
103 |
+
|
104 |
+
logger.debug(f"_can_place_word: SUCCESS for word={word}")
|
105 |
+
return True
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
logger.error(f"β ERROR in _can_place_word: {e}")
|
109 |
+
logger.error(f" word={word}, row={row}, col={col}, direction={direction}")
|
110 |
+
logger.error(f" grid_size={len(grid) if grid else 'None'}")
|
111 |
+
import traceback
|
112 |
+
traceback.print_exc()
|
113 |
+
return False
|
114 |
+
|
115 |
+
def _place_word(self, grid, word, row, col, direction):
|
116 |
+
"""Enhanced _place_word with comprehensive bounds checking."""
|
117 |
+
try:
|
118 |
+
size = len(grid)
|
119 |
+
logger.debug(f"_place_word: word={word}, row={row}, col={col}, direction={direction}, grid_size={size}")
|
120 |
+
|
121 |
+
original_state = []
|
122 |
+
|
123 |
+
if direction == "horizontal":
|
124 |
+
for i, letter in enumerate(word):
|
125 |
+
check_row = row
|
126 |
+
check_col = col + i
|
127 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
128 |
+
logger.error(f"β _place_word horizontal bounds error: row={check_row}, col={check_col}, size={size}")
|
129 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
130 |
+
|
131 |
+
original_state.append({
|
132 |
+
"row": check_row,
|
133 |
+
"col": check_col,
|
134 |
+
"value": grid[check_row][check_col]
|
135 |
+
})
|
136 |
+
grid[check_row][check_col] = letter
|
137 |
+
else:
|
138 |
+
for i, letter in enumerate(word):
|
139 |
+
check_row = row + i
|
140 |
+
check_col = col
|
141 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
142 |
+
logger.error(f"β _place_word vertical bounds error: row={check_row}, col={check_col}, size={size}")
|
143 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
144 |
+
|
145 |
+
original_state.append({
|
146 |
+
"row": check_row,
|
147 |
+
"col": check_col,
|
148 |
+
"value": grid[check_row][check_col]
|
149 |
+
})
|
150 |
+
grid[check_row][check_col] = letter
|
151 |
+
|
152 |
+
logger.debug(f"_place_word: SUCCESS for word={word}")
|
153 |
+
return original_state
|
154 |
+
|
155 |
+
except Exception as e:
|
156 |
+
logger.error(f"β ERROR in _place_word: {e}")
|
157 |
+
logger.error(f" word={word}, row={row}, col={col}, direction={direction}")
|
158 |
+
logger.error(f" grid_size={len(grid) if grid else 'None'}")
|
159 |
+
import traceback
|
160 |
+
traceback.print_exc()
|
161 |
+
raise
|
162 |
+
|
163 |
+
def _remove_word(self, grid, original_state):
|
164 |
+
"""Enhanced _remove_word with comprehensive bounds checking."""
|
165 |
+
try:
|
166 |
+
size = len(grid)
|
167 |
+
logger.debug(f"_remove_word: restoring {len(original_state)} positions, grid_size={size}")
|
168 |
+
|
169 |
+
for state in original_state:
|
170 |
+
check_row = state["row"]
|
171 |
+
check_col = state["col"]
|
172 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
173 |
+
logger.error(f"β _remove_word bounds error: row={check_row}, col={check_col}, size={size}")
|
174 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
175 |
+
|
176 |
+
grid[check_row][check_col] = state["value"]
|
177 |
+
|
178 |
+
logger.debug(f"_remove_word: SUCCESS")
|
179 |
+
|
180 |
+
except Exception as e:
|
181 |
+
logger.error(f"β ERROR in _remove_word: {e}")
|
182 |
+
logger.error(f" grid_size={len(grid) if grid else 'None'}")
|
183 |
+
logger.error(f" original_state={original_state}")
|
184 |
+
import traceback
|
185 |
+
traceback.print_exc()
|
186 |
+
raise
|
187 |
+
|
188 |
+
def _create_simple_cross(self, word_list, word_objs):
|
189 |
+
"""Enhanced _create_simple_cross with comprehensive bounds checking."""
|
190 |
+
try:
|
191 |
+
logger.debug(f"_create_simple_cross: words={word_list}")
|
192 |
+
|
193 |
+
if len(word_list) < 2:
|
194 |
+
logger.debug("Not enough words for simple cross")
|
195 |
+
return None
|
196 |
+
|
197 |
+
word1, word2 = word_list[0], word_list[1]
|
198 |
+
intersections = self._find_word_intersections(word1, word2)
|
199 |
+
|
200 |
+
if not intersections:
|
201 |
+
logger.debug("No intersections found")
|
202 |
+
return None
|
203 |
+
|
204 |
+
# Use first intersection
|
205 |
+
intersection = intersections[0]
|
206 |
+
size = max(len(word1), len(word2)) + 4
|
207 |
+
logger.debug(f"Creating grid of size {size} for simple cross")
|
208 |
+
|
209 |
+
grid = [["." for _ in range(size)] for _ in range(size)]
|
210 |
+
|
211 |
+
# Place first word horizontally in center
|
212 |
+
center_row = size // 2
|
213 |
+
center_col = (size - len(word1)) // 2
|
214 |
+
|
215 |
+
logger.debug(f"Placing word1 '{word1}' at row={center_row}, col={center_col}")
|
216 |
+
|
217 |
+
for i, letter in enumerate(word1):
|
218 |
+
check_row = center_row
|
219 |
+
check_col = center_col + i
|
220 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
221 |
+
logger.error(f"β _create_simple_cross word1 bounds error: row={check_row}, col={check_col}, size={size}")
|
222 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
223 |
+
grid[check_row][check_col] = letter
|
224 |
+
|
225 |
+
# Place second word vertically at intersection
|
226 |
+
intersection_col = center_col + intersection["word_pos"]
|
227 |
+
word2_start_row = center_row - intersection["placed_pos"]
|
228 |
+
|
229 |
+
logger.debug(f"Placing word2 '{word2}' at row={word2_start_row}, col={intersection_col}")
|
230 |
+
|
231 |
+
for i, letter in enumerate(word2):
|
232 |
+
check_row = word2_start_row + i
|
233 |
+
check_col = intersection_col
|
234 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
235 |
+
logger.error(f"β _create_simple_cross word2 bounds error: row={check_row}, col={check_col}, size={size}")
|
236 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
237 |
+
grid[check_row][check_col] = letter
|
238 |
+
|
239 |
+
placed_words = [
|
240 |
+
{"word": word1, "row": center_row, "col": center_col, "direction": "horizontal", "number": 1},
|
241 |
+
{"word": word2, "row": word2_start_row, "col": intersection_col, "direction": "vertical", "number": 2}
|
242 |
+
]
|
243 |
+
|
244 |
+
logger.debug(f"_create_simple_cross: SUCCESS")
|
245 |
+
|
246 |
+
trimmed = self._trim_grid(grid, placed_words)
|
247 |
+
clues = self._generate_clues(word_objs[:2], trimmed["placed_words"])
|
248 |
+
|
249 |
+
return {
|
250 |
+
"grid": trimmed["grid"],
|
251 |
+
"placed_words": trimmed["placed_words"],
|
252 |
+
"clues": clues
|
253 |
+
}
|
254 |
+
|
255 |
+
except Exception as e:
|
256 |
+
logger.error(f"β ERROR in _create_simple_cross: {e}")
|
257 |
+
import traceback
|
258 |
+
traceback.print_exc()
|
259 |
+
raise
|
260 |
+
|
261 |
+
async def test_debug_generator():
|
262 |
+
"""Test the debug generator to catch index errors."""
|
263 |
+
try:
|
264 |
+
print("π§ͺ Testing debug crossword generator...")
|
265 |
+
|
266 |
+
# Create mock vector service
|
267 |
+
vector_service = VectorSearchService()
|
268 |
+
|
269 |
+
# Create debug generator
|
270 |
+
generator = DebugCrosswordGenerator(vector_service)
|
271 |
+
|
272 |
+
# Test with various topics and difficulties
|
273 |
+
test_cases = [
|
274 |
+
(["animals"], "medium"),
|
275 |
+
(["science"], "hard"),
|
276 |
+
(["technology"], "easy"),
|
277 |
+
(["animals", "science"], "medium"),
|
278 |
+
]
|
279 |
+
|
280 |
+
for i, (topics, difficulty) in enumerate(test_cases):
|
281 |
+
print(f"\n㪠Test {i+1}: topics={topics}, difficulty={difficulty}")
|
282 |
+
try:
|
283 |
+
result = await generator.generate_puzzle(topics, difficulty, use_ai=False)
|
284 |
+
if result:
|
285 |
+
print(f"β
Test {i+1} succeeded")
|
286 |
+
grid_size = len(result['grid'])
|
287 |
+
word_count = len(result['clues'])
|
288 |
+
print(f" Grid: {grid_size}x{grid_size}, Words: {word_count}")
|
289 |
+
else:
|
290 |
+
print(f"β οΈ Test {i+1} returned None")
|
291 |
+
except Exception as e:
|
292 |
+
print(f"β Test {i+1} failed: {e}")
|
293 |
+
import traceback
|
294 |
+
traceback.print_exc()
|
295 |
+
return False
|
296 |
+
|
297 |
+
print(f"\nβ
All debug tests completed!")
|
298 |
+
return True
|
299 |
+
|
300 |
+
except Exception as e:
|
301 |
+
print(f"β Debug test setup failed: {e}")
|
302 |
+
import traceback
|
303 |
+
traceback.print_exc()
|
304 |
+
return False
|
305 |
+
|
306 |
+
if __name__ == "__main__":
|
307 |
+
asyncio.run(test_debug_generator())
|
crossword-app/backend-py/debug_simple.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Simple debug test for crossword generator index errors.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import sys
|
8 |
+
import logging
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
# Add project root to path
|
12 |
+
project_root = Path(__file__).parent
|
13 |
+
sys.path.insert(0, str(project_root))
|
14 |
+
|
15 |
+
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
|
16 |
+
|
17 |
+
# Enable debug logging
|
18 |
+
logging.basicConfig(level=logging.DEBUG)
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
async def test_with_static_words():
|
22 |
+
"""Test generator with static word lists."""
|
23 |
+
|
24 |
+
# Create generator without vector service
|
25 |
+
generator = CrosswordGeneratorFixed(vector_service=None)
|
26 |
+
|
27 |
+
# Create test words
|
28 |
+
test_words = [
|
29 |
+
{"word": "CAT", "clue": "Feline pet"},
|
30 |
+
{"word": "DOG", "clue": "Man's best friend"},
|
31 |
+
{"word": "BIRD", "clue": "Flying animal"},
|
32 |
+
{"word": "FISH", "clue": "Aquatic animal"},
|
33 |
+
{"word": "ELEPHANT", "clue": "Large mammal"},
|
34 |
+
{"word": "TIGER", "clue": "Striped cat"},
|
35 |
+
{"word": "HORSE", "clue": "Riding animal"},
|
36 |
+
{"word": "BEAR", "clue": "Large carnivore"}
|
37 |
+
]
|
38 |
+
|
39 |
+
print(f"π§ͺ Testing crossword generation with {len(test_words)} words...")
|
40 |
+
|
41 |
+
try:
|
42 |
+
# Test multiple times to catch intermittent errors
|
43 |
+
for attempt in range(10):
|
44 |
+
print(f"\n㪠Attempt {attempt + 1}/10")
|
45 |
+
|
46 |
+
# Shuffle words to create different scenarios
|
47 |
+
import random
|
48 |
+
random.shuffle(test_words)
|
49 |
+
|
50 |
+
# Override the word selection to use our test words
|
51 |
+
generator._select_words = lambda topics, difficulty, use_ai: test_words
|
52 |
+
|
53 |
+
result = await generator.generate_puzzle(["animals"], "medium", use_ai=False)
|
54 |
+
|
55 |
+
if result:
|
56 |
+
grid_size = len(result['grid'])
|
57 |
+
word_count = len(result['clues'])
|
58 |
+
print(f"β
Attempt {attempt + 1} succeeded: {grid_size}x{grid_size} grid, {word_count} words")
|
59 |
+
else:
|
60 |
+
print(f"β οΈ Attempt {attempt + 1} returned None")
|
61 |
+
|
62 |
+
except IndexError as e:
|
63 |
+
print(f"β INDEX ERROR caught on attempt {attempt + 1}: {e}")
|
64 |
+
import traceback
|
65 |
+
traceback.print_exc()
|
66 |
+
return False
|
67 |
+
except Exception as e:
|
68 |
+
print(f"β Other error on attempt {attempt + 1}: {e}")
|
69 |
+
import traceback
|
70 |
+
traceback.print_exc()
|
71 |
+
return False
|
72 |
+
|
73 |
+
print(f"\nβ
All 10 attempts completed successfully!")
|
74 |
+
return True
|
75 |
+
|
76 |
+
async def test_grid_placement_directly():
|
77 |
+
"""Test grid placement functions directly with problematic data."""
|
78 |
+
|
79 |
+
generator = CrosswordGeneratorFixed(vector_service=None)
|
80 |
+
|
81 |
+
# Test data that might cause issues
|
82 |
+
test_cases = [
|
83 |
+
{
|
84 |
+
"words": ["A", "I"], # Very short words
|
85 |
+
"description": "Very short words"
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"words": ["VERYLONGWORDTHATMIGHTCAUSEISSUES", "SHORT"],
|
89 |
+
"description": "Very long word with short word"
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"words": ["ABCDEFGHIJKLMNOP", "QRSTUVWXYZ"], # Long words
|
93 |
+
"description": "Two long words"
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"words": ["TEST", "SETS", "NETS", "PETS"], # Multiple similar words
|
97 |
+
"description": "Similar words with same endings"
|
98 |
+
}
|
99 |
+
]
|
100 |
+
|
101 |
+
for i, test_case in enumerate(test_cases):
|
102 |
+
print(f"\n㪠Grid test {i+1}: {test_case['description']}")
|
103 |
+
|
104 |
+
try:
|
105 |
+
word_list = test_case["words"]
|
106 |
+
word_objs = [{"word": w, "clue": f"Clue for {w}"} for w in word_list]
|
107 |
+
|
108 |
+
result = generator._create_grid(word_objs)
|
109 |
+
|
110 |
+
if result:
|
111 |
+
grid_size = len(result['grid'])
|
112 |
+
word_count = len(result['placed_words'])
|
113 |
+
print(f"β
Grid test {i+1} succeeded: {grid_size}x{grid_size} grid, {word_count} words")
|
114 |
+
else:
|
115 |
+
print(f"β οΈ Grid test {i+1} returned None")
|
116 |
+
|
117 |
+
except IndexError as e:
|
118 |
+
print(f"β INDEX ERROR in grid test {i+1}: {e}")
|
119 |
+
import traceback
|
120 |
+
traceback.print_exc()
|
121 |
+
return False
|
122 |
+
except Exception as e:
|
123 |
+
print(f"β Other error in grid test {i+1}: {e}")
|
124 |
+
import traceback
|
125 |
+
traceback.print_exc()
|
126 |
+
return False
|
127 |
+
|
128 |
+
return True
|
129 |
+
|
130 |
+
if __name__ == "__main__":
|
131 |
+
print("π§ͺ Starting debug tests for crossword generator...")
|
132 |
+
|
133 |
+
async def run_tests():
|
134 |
+
success1 = await test_with_static_words()
|
135 |
+
success2 = await test_grid_placement_directly()
|
136 |
+
|
137 |
+
if success1 and success2:
|
138 |
+
print("\nπ All debug tests passed! No index errors detected.")
|
139 |
+
else:
|
140 |
+
print("\nβ Some debug tests failed.")
|
141 |
+
|
142 |
+
asyncio.run(run_tests())
|
crossword-app/backend-py/pytest.ini
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool:pytest]
|
2 |
+
testpaths = test-unit
|
3 |
+
python_files = test_*.py
|
4 |
+
python_classes = Test*
|
5 |
+
python_functions = test_*
|
6 |
+
addopts =
|
7 |
+
-v
|
8 |
+
--tb=short
|
9 |
+
--strict-markers
|
10 |
+
--disable-warnings
|
11 |
+
--color=yes
|
12 |
+
markers =
|
13 |
+
slow: marks tests as slow (deselect with '-m "not slow"')
|
14 |
+
integration: marks tests as integration tests
|
15 |
+
unit: marks tests as unit tests
|
16 |
+
asyncio_mode = auto
|
crossword-app/backend-py/requirements-dev.txt
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Development requirements with AI/ML dependencies
|
2 |
+
# This file includes ALL dependencies for full development environment
|
3 |
+
|
4 |
+
# Include base requirements
|
5 |
+
-r requirements.txt
|
6 |
+
|
7 |
+
# AI/ML dependencies for vector-powered word generation
|
8 |
+
sentence-transformers==3.3.0
|
9 |
+
torch==2.5.1
|
10 |
+
transformers==4.47.1
|
11 |
+
scikit-learn==1.5.2
|
12 |
+
huggingface-hub==0.26.2
|
13 |
+
faiss-cpu==1.9.0
|
14 |
+
|
15 |
+
# Additional development tools
|
16 |
+
pytest-cov==6.0.0 # For test coverage reports
|
17 |
+
black==24.8.0 # Code formatter (optional)
|
18 |
+
flake8==7.1.1 # Linting (optional)
|
crossword-app/backend-py/requirements.txt
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core FastAPI and web server dependencies
|
2 |
+
fastapi==0.116.1
|
3 |
+
uvicorn[standard]==0.32.1
|
4 |
+
starlette==0.47.2
|
5 |
+
python-dotenv==1.0.1
|
6 |
+
python-multipart==0.0.12
|
7 |
+
|
8 |
+
# Data validation and serialization
|
9 |
+
pydantic==2.11.7
|
10 |
+
pydantic-core==2.33.2
|
11 |
+
typing-extensions==4.14.1
|
12 |
+
typing-inspection==0.4.1
|
13 |
+
|
14 |
+
# HTTP client dependencies
|
15 |
+
httpx==0.28.1
|
16 |
+
httpcore==1.0.9
|
17 |
+
h11==0.16.0
|
18 |
+
anyio==4.10.0
|
19 |
+
requests==2.32.4
|
20 |
+
certifi==2025.8.3
|
21 |
+
idna==3.10
|
22 |
+
|
23 |
+
# Core data processing
|
24 |
+
numpy==2.3.2
|
25 |
+
|
26 |
+
# Logging and monitoring
|
27 |
+
structlog==25.4.0
|
28 |
+
|
29 |
+
# Development and testing dependencies
|
30 |
+
pytest==8.4.1
|
31 |
+
pytest-asyncio==1.1.0
|
32 |
+
iniconfig==2.1.0
|
33 |
+
packaging==25.0
|
34 |
+
pluggy==1.6.0
|
35 |
+
pygments==2.19.2
|
36 |
+
|
37 |
+
# AI/ML dependencies (optional - install separately if needed)
|
38 |
+
# Uncomment these lines if you want AI-powered word generation:
|
39 |
+
# sentence-transformers==3.3.0
|
40 |
+
# torch==2.5.1
|
41 |
+
# transformers==4.47.1
|
42 |
+
# scikit-learn==1.5.2
|
43 |
+
# huggingface-hub==0.26.2
|
44 |
+
# faiss-cpu==1.9.0
|
45 |
+
|
46 |
+
# Additional utility dependencies
|
47 |
+
annotated-types==0.7.0
|
48 |
+
sniffio==1.3.1
|
crossword-app/backend-py/run_tests.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test runner script for the backend-py project.
|
4 |
+
Run this script to execute all unit tests.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import sys
|
8 |
+
import subprocess
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
def run_tests():
|
12 |
+
"""Run all tests using pytest."""
|
13 |
+
print("π§ͺ Running Python Backend Unit Tests\n")
|
14 |
+
|
15 |
+
# Change to project directory
|
16 |
+
project_root = Path(__file__).parent
|
17 |
+
|
18 |
+
try:
|
19 |
+
# Run pytest with coverage if available
|
20 |
+
cmd = [
|
21 |
+
sys.executable, "-m", "pytest",
|
22 |
+
"test-unit/",
|
23 |
+
"-v",
|
24 |
+
"--tb=short",
|
25 |
+
"--color=yes"
|
26 |
+
]
|
27 |
+
|
28 |
+
# Try to add coverage if pytest-cov is available
|
29 |
+
try:
|
30 |
+
import pytest_cov
|
31 |
+
cmd.extend([
|
32 |
+
"--cov=src",
|
33 |
+
"--cov-report=term-missing",
|
34 |
+
"--cov-report=html:htmlcov"
|
35 |
+
])
|
36 |
+
print("π Running tests with coverage analysis")
|
37 |
+
except ImportError:
|
38 |
+
print("π Running tests without coverage (install pytest-cov for coverage)")
|
39 |
+
|
40 |
+
print(f"π Command: {' '.join(cmd)}\n")
|
41 |
+
|
42 |
+
result = subprocess.run(cmd, cwd=project_root)
|
43 |
+
|
44 |
+
if result.returncode == 0:
|
45 |
+
print("\nβ
All tests passed!")
|
46 |
+
if 'pytest_cov' in locals():
|
47 |
+
print("π Coverage report generated in htmlcov/index.html")
|
48 |
+
else:
|
49 |
+
print(f"\nβ Tests failed with exit code {result.returncode}")
|
50 |
+
|
51 |
+
return result.returncode
|
52 |
+
|
53 |
+
except FileNotFoundError:
|
54 |
+
print("β pytest not found. Install it with: pip install pytest pytest-asyncio")
|
55 |
+
return 1
|
56 |
+
except Exception as e:
|
57 |
+
print(f"β Error running tests: {e}")
|
58 |
+
return 1
|
59 |
+
|
60 |
+
def run_specific_test(test_file):
|
61 |
+
"""Run a specific test file."""
|
62 |
+
print(f"π― Running specific test: {test_file}\n")
|
63 |
+
|
64 |
+
try:
|
65 |
+
cmd = [sys.executable, "-m", "pytest", f"test-unit/{test_file}", "-v"]
|
66 |
+
result = subprocess.run(cmd, cwd=Path(__file__).parent)
|
67 |
+
return result.returncode
|
68 |
+
except Exception as e:
|
69 |
+
print(f"β Error running test {test_file}: {e}")
|
70 |
+
return 1
|
71 |
+
|
72 |
+
def main():
|
73 |
+
"""Main entry point."""
|
74 |
+
if len(sys.argv) > 1:
|
75 |
+
# Run specific test file
|
76 |
+
test_file = sys.argv[1]
|
77 |
+
if not test_file.startswith("test_"):
|
78 |
+
test_file = f"test_{test_file}"
|
79 |
+
if not test_file.endswith(".py"):
|
80 |
+
test_file = f"{test_file}.py"
|
81 |
+
|
82 |
+
return run_specific_test(test_file)
|
83 |
+
else:
|
84 |
+
# Run all tests
|
85 |
+
return run_tests()
|
86 |
+
|
87 |
+
if __name__ == "__main__":
|
88 |
+
exit_code = main()
|
89 |
+
sys.exit(exit_code)
|
crossword-app/backend-py/src/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Python backend package
|
crossword-app/backend-py/src/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (179 Bytes). View file
|
|
crossword-app/backend-py/src/routes/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Routes package
|
crossword-app/backend-py/src/routes/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (186 Bytes). View file
|
|
crossword-app/backend-py/src/routes/__pycache__/api.cpython-313.pyc
ADDED
Binary file (8.43 kB). View file
|
|
crossword-app/backend-py/src/routes/api.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
API routes for crossword puzzle generator.
|
3 |
+
Matches the existing JavaScript API for frontend compatibility.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import logging
|
7 |
+
from typing import List, Dict, Any, Optional
|
8 |
+
from datetime import datetime
|
9 |
+
|
10 |
+
from fastapi import APIRouter, HTTPException, Request, Depends
|
11 |
+
from pydantic import BaseModel, Field
|
12 |
+
|
13 |
+
from ..services.crossword_generator_wrapper import CrosswordGenerator
|
14 |
+
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
router = APIRouter()
|
18 |
+
|
19 |
+
# Request/Response models
|
20 |
+
class GeneratePuzzleRequest(BaseModel):
|
21 |
+
topics: List[str] = Field(..., description="List of topics for the puzzle")
|
22 |
+
difficulty: str = Field(default="medium", description="Difficulty level: easy, medium, hard")
|
23 |
+
useAI: bool = Field(default=False, description="Use AI vector search for word generation")
|
24 |
+
|
25 |
+
class WordInfo(BaseModel):
|
26 |
+
word: str
|
27 |
+
clue: str
|
28 |
+
similarity: Optional[float] = None
|
29 |
+
source: Optional[str] = None
|
30 |
+
|
31 |
+
class ClueInfo(BaseModel):
|
32 |
+
number: int
|
33 |
+
word: str
|
34 |
+
text: str
|
35 |
+
direction: str # "across" or "down"
|
36 |
+
position: Dict[str, int] # {"row": int, "col": int}
|
37 |
+
|
38 |
+
class PuzzleMetadata(BaseModel):
|
39 |
+
topics: List[str]
|
40 |
+
difficulty: str
|
41 |
+
wordCount: int
|
42 |
+
size: int
|
43 |
+
aiGenerated: bool
|
44 |
+
|
45 |
+
class PuzzleResponse(BaseModel):
|
46 |
+
grid: List[List[str]]
|
47 |
+
clues: List[ClueInfo]
|
48 |
+
metadata: PuzzleMetadata
|
49 |
+
|
50 |
+
class TopicInfo(BaseModel):
|
51 |
+
id: str
|
52 |
+
name: str
|
53 |
+
|
54 |
+
# Global crossword generator instance (will be initialized in lifespan)
|
55 |
+
generator = None
|
56 |
+
|
57 |
+
def get_crossword_generator(request: Request) -> CrosswordGenerator:
|
58 |
+
"""Dependency to get the crossword generator with vector search service."""
|
59 |
+
global generator
|
60 |
+
if generator is None:
|
61 |
+
vector_service = getattr(request.app.state, 'vector_service', None)
|
62 |
+
generator = CrosswordGenerator(vector_service)
|
63 |
+
return generator
|
64 |
+
|
65 |
+
@router.get("/topics", response_model=List[TopicInfo])
|
66 |
+
async def get_topics():
|
67 |
+
"""Get available topics for puzzle generation."""
|
68 |
+
# Return the same topics as JavaScript backend for consistency
|
69 |
+
topics = [
|
70 |
+
{"id": "animals", "name": "Animals"},
|
71 |
+
{"id": "geography", "name": "Geography"},
|
72 |
+
{"id": "science", "name": "Science"},
|
73 |
+
{"id": "technology", "name": "Technology"}
|
74 |
+
]
|
75 |
+
return topics
|
76 |
+
|
77 |
+
@router.post("/generate", response_model=PuzzleResponse)
|
78 |
+
async def generate_puzzle(
|
79 |
+
request: GeneratePuzzleRequest,
|
80 |
+
crossword_gen: CrosswordGenerator = Depends(get_crossword_generator)
|
81 |
+
):
|
82 |
+
"""
|
83 |
+
Generate a crossword puzzle with optional AI vector search.
|
84 |
+
|
85 |
+
This endpoint matches the JavaScript API exactly for frontend compatibility.
|
86 |
+
"""
|
87 |
+
try:
|
88 |
+
logger.info(f"π― Generating puzzle for topics: {request.topics}, difficulty: {request.difficulty}, useAI: {request.useAI}")
|
89 |
+
|
90 |
+
# Validate topics
|
91 |
+
if not request.topics:
|
92 |
+
raise HTTPException(status_code=400, detail="At least one topic is required")
|
93 |
+
|
94 |
+
valid_difficulties = ["easy", "medium", "hard"]
|
95 |
+
if request.difficulty not in valid_difficulties:
|
96 |
+
raise HTTPException(
|
97 |
+
status_code=400,
|
98 |
+
detail=f"Invalid difficulty. Must be one of: {valid_difficulties}"
|
99 |
+
)
|
100 |
+
|
101 |
+
# Generate puzzle
|
102 |
+
puzzle_data = await crossword_gen.generate_puzzle(
|
103 |
+
topics=request.topics,
|
104 |
+
difficulty=request.difficulty,
|
105 |
+
use_ai=request.useAI
|
106 |
+
)
|
107 |
+
|
108 |
+
if not puzzle_data:
|
109 |
+
raise HTTPException(status_code=500, detail="Failed to generate puzzle")
|
110 |
+
|
111 |
+
logger.info(f"β
Generated puzzle with {puzzle_data['metadata']['wordCount']} words")
|
112 |
+
return puzzle_data
|
113 |
+
|
114 |
+
except HTTPException:
|
115 |
+
raise
|
116 |
+
except Exception as e:
|
117 |
+
logger.error(f"β Error generating puzzle: {e}")
|
118 |
+
raise HTTPException(status_code=500, detail=str(e))
|
119 |
+
|
120 |
+
@router.post("/words")
|
121 |
+
async def generate_words(
|
122 |
+
request: GeneratePuzzleRequest,
|
123 |
+
crossword_gen: CrosswordGenerator = Depends(get_crossword_generator)
|
124 |
+
):
|
125 |
+
"""
|
126 |
+
Generate words for given topics (debug endpoint).
|
127 |
+
|
128 |
+
This endpoint allows testing word generation without full puzzle creation.
|
129 |
+
"""
|
130 |
+
try:
|
131 |
+
words = await crossword_gen.generate_words_for_topics(
|
132 |
+
topics=request.topics,
|
133 |
+
difficulty=request.difficulty,
|
134 |
+
use_ai=request.useAI
|
135 |
+
)
|
136 |
+
|
137 |
+
return {
|
138 |
+
"topics": request.topics,
|
139 |
+
"difficulty": request.difficulty,
|
140 |
+
"useAI": request.useAI,
|
141 |
+
"wordCount": len(words),
|
142 |
+
"words": words
|
143 |
+
}
|
144 |
+
|
145 |
+
except Exception as e:
|
146 |
+
logger.error(f"β Error generating words: {e}")
|
147 |
+
raise HTTPException(status_code=500, detail=str(e))
|
148 |
+
|
149 |
+
@router.get("/health")
|
150 |
+
async def api_health():
|
151 |
+
"""API health check."""
|
152 |
+
return {
|
153 |
+
"status": "healthy",
|
154 |
+
"timestamp": datetime.utcnow().isoformat(),
|
155 |
+
"backend": "python",
|
156 |
+
"version": "2.0.0"
|
157 |
+
}
|
158 |
+
|
159 |
+
@router.get("/debug/vector-search")
|
160 |
+
async def debug_vector_search(
|
161 |
+
topic: str,
|
162 |
+
difficulty: str = "medium",
|
163 |
+
max_words: int = 10,
|
164 |
+
request: Request = None
|
165 |
+
):
|
166 |
+
"""
|
167 |
+
Debug endpoint to test vector search directly.
|
168 |
+
"""
|
169 |
+
try:
|
170 |
+
vector_service = getattr(request.app.state, 'vector_service', None)
|
171 |
+
if not vector_service or not vector_service.is_initialized:
|
172 |
+
raise HTTPException(status_code=503, detail="Vector search service not available")
|
173 |
+
|
174 |
+
words = await vector_service.find_similar_words(topic, difficulty, max_words)
|
175 |
+
|
176 |
+
return {
|
177 |
+
"topic": topic,
|
178 |
+
"difficulty": difficulty,
|
179 |
+
"max_words": max_words,
|
180 |
+
"found_words": len(words),
|
181 |
+
"words": words
|
182 |
+
}
|
183 |
+
|
184 |
+
except Exception as e:
|
185 |
+
logger.error(f"β Vector search debug failed: {e}")
|
186 |
+
raise HTTPException(status_code=500, detail=str(e))
|
crossword-app/backend-py/src/services/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Services package
|
crossword-app/backend-py/src/services/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (188 Bytes). View file
|
|
crossword-app/backend-py/src/services/__pycache__/crossword_generator.cpython-313.pyc
ADDED
Binary file (33.3 kB). View file
|
|
crossword-app/backend-py/src/services/__pycache__/crossword_generator_fixed.cpython-313.pyc
ADDED
Binary file (33.4 kB). View file
|
|
crossword-app/backend-py/src/services/__pycache__/crossword_generator_wrapper.cpython-313.pyc
ADDED
Binary file (2.91 kB). View file
|
|
crossword-app/backend-py/src/services/__pycache__/vector_search.cpython-313.pyc
ADDED
Binary file (28.5 kB). View file
|
|
crossword-app/backend-py/src/services/__pycache__/word_cache.cpython-313.pyc
ADDED
Binary file (17.3 kB). View file
|
|
crossword-app/backend-py/src/services/crossword_generator.py
ADDED
@@ -0,0 +1,722 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Fixed Crossword Generator - Ported from working JavaScript implementation.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import asyncio
|
6 |
+
import json
|
7 |
+
import random
|
8 |
+
import time
|
9 |
+
from pathlib import Path
|
10 |
+
from typing import Dict, List, Optional, Any, Tuple
|
11 |
+
import structlog
|
12 |
+
|
13 |
+
logger = structlog.get_logger(__name__)
|
14 |
+
|
15 |
+
class CrosswordGenerator:
|
16 |
+
def __init__(self, vector_service=None):
|
17 |
+
self.max_attempts = 100
|
18 |
+
self.min_words = 6
|
19 |
+
self.max_words = 10 # Reduced from 12 to 10 for better success rate
|
20 |
+
self.vector_service = vector_service
|
21 |
+
|
22 |
+
async def generate_puzzle(self, topics: List[str], difficulty: str = "medium", use_ai: bool = False) -> Optional[Dict[str, Any]]:
|
23 |
+
"""
|
24 |
+
Generate a complete crossword puzzle.
|
25 |
+
"""
|
26 |
+
try:
|
27 |
+
# Import here to avoid circular imports - with fallback
|
28 |
+
try:
|
29 |
+
from .vector_search import VectorSearchService
|
30 |
+
except ImportError as import_error:
|
31 |
+
logger.warning(f"β οΈ Could not import VectorSearchService: {import_error}. Using static words only.")
|
32 |
+
# Continue without vector service
|
33 |
+
|
34 |
+
logger.info(f"π― Generating puzzle for topics: {topics}, difficulty: {difficulty}, AI: {use_ai}")
|
35 |
+
|
36 |
+
# Get words (from AI or static)
|
37 |
+
words = await self._select_words(topics, difficulty, use_ai)
|
38 |
+
|
39 |
+
if len(words) < self.min_words:
|
40 |
+
logger.error(f"β Not enough words: {len(words)} < {self.min_words}")
|
41 |
+
raise Exception(f"Not enough words generated: {len(words)} < {self.min_words}")
|
42 |
+
|
43 |
+
# Create grid
|
44 |
+
grid_result = self._create_grid(words)
|
45 |
+
|
46 |
+
if not grid_result:
|
47 |
+
logger.error("β Grid creation failed")
|
48 |
+
raise Exception("Could not create crossword grid")
|
49 |
+
|
50 |
+
logger.info(f"β
Generated crossword with {len(grid_result['placed_words'])} words")
|
51 |
+
|
52 |
+
return {
|
53 |
+
"grid": grid_result["grid"],
|
54 |
+
"clues": grid_result["clues"],
|
55 |
+
"metadata": {
|
56 |
+
"topics": topics,
|
57 |
+
"difficulty": difficulty,
|
58 |
+
"wordCount": len(grid_result["placed_words"]),
|
59 |
+
"size": len(grid_result["grid"]),
|
60 |
+
"aiGenerated": use_ai
|
61 |
+
}
|
62 |
+
}
|
63 |
+
|
64 |
+
except Exception as e:
|
65 |
+
logger.error(f"β Error generating puzzle: {e}")
|
66 |
+
raise
|
67 |
+
|
68 |
+
async def _select_words(self, topics: List[str], difficulty: str, use_ai: bool) -> List[Dict[str, Any]]:
|
69 |
+
"""Select words for the crossword."""
|
70 |
+
all_words = []
|
71 |
+
|
72 |
+
if use_ai and self.vector_service:
|
73 |
+
# Use the initialized vector service
|
74 |
+
logger.info(f"π€ Using initialized vector service for AI word generation")
|
75 |
+
for topic in topics:
|
76 |
+
ai_words = await self.vector_service.find_similar_words(topic, difficulty, self.max_words // len(topics))
|
77 |
+
all_words.extend(ai_words)
|
78 |
+
|
79 |
+
if len(all_words) >= self.min_words:
|
80 |
+
logger.info(f"β
AI generated {len(all_words)} words")
|
81 |
+
return self._sort_words_for_crossword(all_words[:self.max_words])
|
82 |
+
else:
|
83 |
+
logger.warning(f"β οΈ AI only generated {len(all_words)} words, falling back to static")
|
84 |
+
|
85 |
+
# Fallback to cached words
|
86 |
+
if self.vector_service:
|
87 |
+
# Use the cached words from the initialized service
|
88 |
+
logger.info(f"π¦ Using cached words from initialized vector service")
|
89 |
+
for topic in topics:
|
90 |
+
cached_words = await self.vector_service._get_cached_fallback(topic, difficulty, self.max_words // len(topics))
|
91 |
+
all_words.extend(cached_words)
|
92 |
+
else:
|
93 |
+
# Last resort: load static words directly
|
94 |
+
logger.warning(f"β οΈ No vector service available, loading static words directly")
|
95 |
+
all_words = await self._get_static_words(topics, difficulty)
|
96 |
+
|
97 |
+
return self._sort_words_for_crossword(all_words[:self.max_words])
|
98 |
+
|
99 |
+
async def _get_static_words(self, topics: List[str], difficulty: str) -> List[Dict[str, Any]]:
|
100 |
+
"""Get static words from JSON files."""
|
101 |
+
all_words = []
|
102 |
+
|
103 |
+
for topic in topics:
|
104 |
+
# Try multiple case variations
|
105 |
+
for topic_variation in [topic, topic.capitalize(), topic.lower()]:
|
106 |
+
word_file = Path(__file__).parent.parent.parent / "data" / "word-lists" / f"{topic_variation.lower()}.json"
|
107 |
+
|
108 |
+
if word_file.exists():
|
109 |
+
with open(word_file, 'r') as f:
|
110 |
+
words = json.load(f)
|
111 |
+
# Filter by difficulty
|
112 |
+
filtered = self._filter_by_difficulty(words, difficulty)
|
113 |
+
all_words.extend(filtered)
|
114 |
+
break
|
115 |
+
|
116 |
+
return all_words
|
117 |
+
|
118 |
+
def _filter_by_difficulty(self, words: List[Dict[str, Any]], difficulty: str) -> List[Dict[str, Any]]:
|
119 |
+
"""Filter words by difficulty (length)."""
|
120 |
+
difficulty_map = {
|
121 |
+
"easy": {"min_len": 3, "max_len": 8},
|
122 |
+
"medium": {"min_len": 4, "max_len": 10},
|
123 |
+
"hard": {"min_len": 5, "max_len": 15}
|
124 |
+
}
|
125 |
+
|
126 |
+
criteria = difficulty_map.get(difficulty, difficulty_map["medium"])
|
127 |
+
return [w for w in words if criteria["min_len"] <= len(w["word"]) <= criteria["max_len"]]
|
128 |
+
|
129 |
+
def _sort_words_for_crossword(self, words: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
130 |
+
"""Sort words by crossword suitability."""
|
131 |
+
scored_words = []
|
132 |
+
|
133 |
+
for word_obj in words:
|
134 |
+
word = word_obj["word"].upper()
|
135 |
+
score = 0
|
136 |
+
|
137 |
+
# Strongly prefer shorter words for crossword viability
|
138 |
+
if 3 <= len(word) <= 5:
|
139 |
+
score += 20 # Short words get highest priority
|
140 |
+
elif 6 <= len(word) <= 7:
|
141 |
+
score += 15 # Medium words get good priority
|
142 |
+
elif len(word) == 8:
|
143 |
+
score += 8 # Long words get lower priority
|
144 |
+
elif len(word) == 9:
|
145 |
+
score += 4 # Very long words get much lower priority
|
146 |
+
elif len(word) >= 10:
|
147 |
+
score += 1 # Extremely long words get minimal priority
|
148 |
+
|
149 |
+
# Bonus for common letters
|
150 |
+
common_letters = ['E', 'A', 'R', 'I', 'O', 'T', 'N', 'S']
|
151 |
+
for letter in word:
|
152 |
+
if letter in common_letters:
|
153 |
+
score += 1
|
154 |
+
|
155 |
+
# Vowel distribution bonus
|
156 |
+
vowels = ['A', 'E', 'I', 'O', 'U']
|
157 |
+
vowel_count = sum(1 for letter in word if letter in vowels)
|
158 |
+
score += vowel_count
|
159 |
+
|
160 |
+
# Penalty for very long words to discourage their selection
|
161 |
+
if len(word) >= 9:
|
162 |
+
score -= 5
|
163 |
+
|
164 |
+
scored_words.append({**word_obj, "crossword_score": score})
|
165 |
+
|
166 |
+
# Sort by score with some randomization
|
167 |
+
scored_words.sort(key=lambda w: w["crossword_score"] + random.randint(-2, 2), reverse=True)
|
168 |
+
return scored_words
|
169 |
+
|
170 |
+
def _create_grid(self, words: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
171 |
+
"""Create crossword grid using backtracking algorithm."""
|
172 |
+
if not words:
|
173 |
+
logger.error(f"β No words provided to grid generator")
|
174 |
+
return None
|
175 |
+
|
176 |
+
logger.info(f"π― Creating crossword grid with {len(words)} words")
|
177 |
+
|
178 |
+
# Debug: log the structure of words
|
179 |
+
logger.info(f"π Word structures: {[type(w) for w in words[:3]]}")
|
180 |
+
if words:
|
181 |
+
logger.info(f"π First word sample: {words[0]}")
|
182 |
+
|
183 |
+
# Sort words by length (longest first) - keeping objects aligned
|
184 |
+
try:
|
185 |
+
# Create paired list of (word_string, word_object)
|
186 |
+
word_pairs = []
|
187 |
+
for i, w in enumerate(words):
|
188 |
+
if isinstance(w, dict) and "word" in w:
|
189 |
+
word_pairs.append((w["word"].upper(), w))
|
190 |
+
elif isinstance(w, str):
|
191 |
+
# Create dict for string-only words
|
192 |
+
word_obj = {"word": w.upper(), "clue": f"Clue for {w.upper()}"}
|
193 |
+
word_pairs.append((w.upper(), word_obj))
|
194 |
+
else:
|
195 |
+
logger.warning(f"β οΈ Unexpected word format at index {i}: {w}")
|
196 |
+
|
197 |
+
# Sort pairs by word length (longest first)
|
198 |
+
word_pairs.sort(key=lambda pair: len(pair[0]), reverse=True)
|
199 |
+
|
200 |
+
# Extract sorted lists
|
201 |
+
word_list = [pair[0] for pair in word_pairs]
|
202 |
+
sorted_word_objs = [pair[1] for pair in word_pairs]
|
203 |
+
|
204 |
+
logger.info(f"π― Processed {len(word_list)} words for grid: {word_list[:5]}")
|
205 |
+
except Exception as e:
|
206 |
+
logger.error(f"β Error processing words: {e}")
|
207 |
+
return None
|
208 |
+
|
209 |
+
size = self._calculate_grid_size(word_list)
|
210 |
+
|
211 |
+
# Try multiple attempts
|
212 |
+
for attempt in range(3):
|
213 |
+
current_size = size + attempt
|
214 |
+
|
215 |
+
try:
|
216 |
+
logger.info(f"π§ Attempt {attempt + 1}: word_list length={len(word_list)}, sorted_word_objs length={len(sorted_word_objs)}")
|
217 |
+
result = self._place_words_in_grid(word_list, sorted_word_objs, current_size)
|
218 |
+
if result:
|
219 |
+
return result
|
220 |
+
except Exception as e:
|
221 |
+
logger.error(f"β Grid placement attempt {attempt + 1} failed: {e}")
|
222 |
+
import traceback
|
223 |
+
traceback.print_exc()
|
224 |
+
|
225 |
+
# Try with fewer words
|
226 |
+
if len(word_list) > 7:
|
227 |
+
reduced_words = word_list[:len(word_list) - 1]
|
228 |
+
reduced_word_objs = sorted_word_objs[:len(reduced_words)]
|
229 |
+
try:
|
230 |
+
logger.info(f"π§ Reduced attempt {attempt + 1}: reduced_words length={len(reduced_words)}, reduced_word_objs length={len(reduced_word_objs)}")
|
231 |
+
result = self._place_words_in_grid(reduced_words, reduced_word_objs, current_size)
|
232 |
+
if result:
|
233 |
+
return result
|
234 |
+
except Exception as e:
|
235 |
+
logger.error(f"β Reduced grid placement attempt {attempt + 1} failed: {e}")
|
236 |
+
import traceback
|
237 |
+
traceback.print_exc()
|
238 |
+
|
239 |
+
# Last resort: simple cross with 2 words
|
240 |
+
if len(word_list) >= 2:
|
241 |
+
return self._create_simple_cross(word_list[:2], sorted_word_objs[:2])
|
242 |
+
|
243 |
+
return None
|
244 |
+
|
245 |
+
def _calculate_grid_size(self, words: List[str]) -> int:
|
246 |
+
"""Calculate appropriate grid size with more generous spacing."""
|
247 |
+
total_chars = sum(len(word) for word in words)
|
248 |
+
longest_word = max(len(word) for word in words) if words else 8
|
249 |
+
|
250 |
+
# More generous grid size calculation
|
251 |
+
base_size = int((total_chars * 2.0) ** 0.5) # Increased multiplier from 1.5 to 2.0
|
252 |
+
|
253 |
+
return max(
|
254 |
+
base_size,
|
255 |
+
longest_word + 4, # Add padding to longest word
|
256 |
+
12 # Minimum grid size increased from 8 to 12
|
257 |
+
)
|
258 |
+
|
259 |
+
def _place_words_in_grid(self, word_list: List[str], word_objs: List[Dict[str, Any]], size: int) -> Optional[Dict[str, Any]]:
|
260 |
+
"""Place words in grid using backtracking."""
|
261 |
+
logger.info(f"π§ _place_words_in_grid: word_list={len(word_list)}, word_objs={len(word_objs)}, size={size}")
|
262 |
+
|
263 |
+
grid = [["." for _ in range(size)] for _ in range(size)]
|
264 |
+
placed_words = []
|
265 |
+
|
266 |
+
start_time = time.time()
|
267 |
+
timeout = 5.0 # 5 second timeout
|
268 |
+
|
269 |
+
try:
|
270 |
+
if self._backtrack_placement(grid, word_list, word_objs, 0, placed_words, start_time, timeout):
|
271 |
+
logger.info(f"π§ Backtrack successful, trimming grid...")
|
272 |
+
trimmed = self._trim_grid(grid, placed_words)
|
273 |
+
logger.info(f"π§ Grid trimmed, generating clues...")
|
274 |
+
clues = self._generate_clues(word_objs, trimmed["placed_words"])
|
275 |
+
|
276 |
+
return {
|
277 |
+
"grid": trimmed["grid"],
|
278 |
+
"placed_words": trimmed["placed_words"],
|
279 |
+
"clues": clues
|
280 |
+
}
|
281 |
+
else:
|
282 |
+
logger.info(f"π§ Backtrack failed")
|
283 |
+
return None
|
284 |
+
except Exception as e:
|
285 |
+
logger.error(f"β Error in _place_words_in_grid: {e}")
|
286 |
+
import traceback
|
287 |
+
traceback.print_exc()
|
288 |
+
return None
|
289 |
+
|
290 |
+
def _backtrack_placement(self, grid: List[List[str]], word_list: List[str], word_objs: List[Dict[str, Any]],
|
291 |
+
word_index: int, placed_words: List[Dict[str, Any]], start_time: float,
|
292 |
+
timeout: float, call_count: int = 0) -> bool:
|
293 |
+
"""Backtracking algorithm for word placement."""
|
294 |
+
# Timeout check
|
295 |
+
if call_count % 50 == 0 and time.time() - start_time > timeout:
|
296 |
+
return False
|
297 |
+
|
298 |
+
if word_index >= len(word_list):
|
299 |
+
return True
|
300 |
+
|
301 |
+
word = word_list[word_index]
|
302 |
+
size = len(grid)
|
303 |
+
|
304 |
+
# First word: place horizontally in center
|
305 |
+
if word_index == 0:
|
306 |
+
center_row = size // 2
|
307 |
+
center_col = (size - len(word)) // 2
|
308 |
+
|
309 |
+
if self._can_place_word(grid, word, center_row, center_col, "horizontal"):
|
310 |
+
original_state = self._place_word(grid, word, center_row, center_col, "horizontal")
|
311 |
+
placed_words.append({
|
312 |
+
"word": word,
|
313 |
+
"row": center_row,
|
314 |
+
"col": center_col,
|
315 |
+
"direction": "horizontal",
|
316 |
+
"number": 1
|
317 |
+
})
|
318 |
+
|
319 |
+
if self._backtrack_placement(grid, word_list, word_objs, word_index + 1, placed_words, start_time, timeout, call_count + 1):
|
320 |
+
return True
|
321 |
+
|
322 |
+
self._remove_word(grid, original_state)
|
323 |
+
placed_words.pop()
|
324 |
+
|
325 |
+
return False
|
326 |
+
|
327 |
+
# Subsequent words: find intersections
|
328 |
+
all_placements = self._find_all_intersection_placements(grid, word, placed_words)
|
329 |
+
all_placements.sort(key=lambda p: p["score"], reverse=True)
|
330 |
+
|
331 |
+
for placement in all_placements:
|
332 |
+
row, col, direction = placement["row"], placement["col"], placement["direction"]
|
333 |
+
|
334 |
+
if self._can_place_word(grid, word, row, col, direction):
|
335 |
+
original_state = self._place_word(grid, word, row, col, direction)
|
336 |
+
placed_words.append({
|
337 |
+
"word": word,
|
338 |
+
"row": row,
|
339 |
+
"col": col,
|
340 |
+
"direction": direction,
|
341 |
+
"number": word_index + 1
|
342 |
+
})
|
343 |
+
|
344 |
+
if self._backtrack_placement(grid, word_list, word_objs, word_index + 1, placed_words, start_time, timeout, call_count + 1):
|
345 |
+
return True
|
346 |
+
|
347 |
+
self._remove_word(grid, original_state)
|
348 |
+
placed_words.pop()
|
349 |
+
|
350 |
+
return False
|
351 |
+
|
352 |
+
def _can_place_word(self, grid: List[List[str]], word: str, row: int, col: int, direction: str) -> bool:
|
353 |
+
"""Check if word can be placed at position."""
|
354 |
+
size = len(grid)
|
355 |
+
|
356 |
+
# Check boundaries
|
357 |
+
if row < 0 or col < 0 or row >= size or col >= size:
|
358 |
+
return False
|
359 |
+
|
360 |
+
if direction == "horizontal":
|
361 |
+
if col + len(word) > size:
|
362 |
+
return False
|
363 |
+
|
364 |
+
# CRITICAL: Check word boundaries - no letters immediately before/after
|
365 |
+
if col > 0 and grid[row][col - 1] != ".":
|
366 |
+
return False # Word would have a preceding letter
|
367 |
+
if col + len(word) < size and grid[row][col + len(word)] != ".":
|
368 |
+
return False # Word would have a trailing letter
|
369 |
+
|
370 |
+
# Check each letter position
|
371 |
+
for i, letter in enumerate(word):
|
372 |
+
check_row = row
|
373 |
+
check_col = col + i
|
374 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
375 |
+
return False
|
376 |
+
current_cell = grid[check_row][check_col]
|
377 |
+
if current_cell != "." and current_cell != letter:
|
378 |
+
return False
|
379 |
+
|
380 |
+
# For empty cells, check perpendicular validity
|
381 |
+
if current_cell == ".":
|
382 |
+
if not self._is_valid_perpendicular_placement(grid, letter, check_row, check_col, "vertical"):
|
383 |
+
return False
|
384 |
+
|
385 |
+
else: # vertical
|
386 |
+
if row + len(word) > size:
|
387 |
+
return False
|
388 |
+
|
389 |
+
# CRITICAL: Check word boundaries - no letters immediately before/after
|
390 |
+
if row > 0 and grid[row - 1][col] != ".":
|
391 |
+
return False # Word would have a preceding letter
|
392 |
+
if row + len(word) < size and grid[row + len(word)][col] != ".":
|
393 |
+
return False # Word would have a trailing letter
|
394 |
+
|
395 |
+
# Check each letter position
|
396 |
+
for i, letter in enumerate(word):
|
397 |
+
check_row = row + i
|
398 |
+
check_col = col
|
399 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
400 |
+
return False
|
401 |
+
current_cell = grid[check_row][check_col]
|
402 |
+
if current_cell != "." and current_cell != letter:
|
403 |
+
return False
|
404 |
+
|
405 |
+
# For empty cells, check perpendicular validity
|
406 |
+
if current_cell == ".":
|
407 |
+
if not self._is_valid_perpendicular_placement(grid, letter, check_row, check_col, "horizontal"):
|
408 |
+
return False
|
409 |
+
|
410 |
+
return True
|
411 |
+
|
412 |
+
def _is_valid_perpendicular_placement(self, grid: List[List[str]], letter: str, row: int, col: int, check_direction: str) -> bool:
|
413 |
+
"""Check if placing a letter would create valid perpendicular word boundaries."""
|
414 |
+
size = len(grid)
|
415 |
+
|
416 |
+
if check_direction == "vertical":
|
417 |
+
# Check if placing this letter would create an invalid vertical sequence
|
418 |
+
has_above = row > 0 and grid[row - 1][col] != "."
|
419 |
+
has_below = row < size - 1 and grid[row + 1][col] != "."
|
420 |
+
|
421 |
+
# Don't allow this letter to extend an existing vertical word
|
422 |
+
# unless it's exactly at an intersection point with matching letters
|
423 |
+
if has_above or has_below:
|
424 |
+
return grid[row][col] == letter
|
425 |
+
else: # horizontal
|
426 |
+
# Check if placing this letter would create an invalid horizontal sequence
|
427 |
+
has_left = col > 0 and grid[row][col - 1] != "."
|
428 |
+
has_right = col < size - 1 and grid[row][col + 1] != "."
|
429 |
+
|
430 |
+
# Don't allow this letter to extend an existing horizontal word
|
431 |
+
# unless it's exactly at an intersection point with matching letters
|
432 |
+
if has_left or has_right:
|
433 |
+
return grid[row][col] == letter
|
434 |
+
|
435 |
+
return True
|
436 |
+
|
437 |
+
def _place_word(self, grid: List[List[str]], word: str, row: int, col: int, direction: str) -> List[Dict[str, Any]]:
|
438 |
+
"""Place word in grid and return original state."""
|
439 |
+
original_state = []
|
440 |
+
size = len(grid)
|
441 |
+
|
442 |
+
if direction == "horizontal":
|
443 |
+
for i, letter in enumerate(word):
|
444 |
+
check_row = row
|
445 |
+
check_col = col + i
|
446 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
447 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
448 |
+
original_state.append({
|
449 |
+
"row": check_row,
|
450 |
+
"col": check_col,
|
451 |
+
"value": grid[check_row][check_col]
|
452 |
+
})
|
453 |
+
grid[check_row][check_col] = letter
|
454 |
+
else:
|
455 |
+
for i, letter in enumerate(word):
|
456 |
+
check_row = row + i
|
457 |
+
check_col = col
|
458 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
459 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
460 |
+
original_state.append({
|
461 |
+
"row": check_row,
|
462 |
+
"col": check_col,
|
463 |
+
"value": grid[check_row][check_col]
|
464 |
+
})
|
465 |
+
grid[check_row][check_col] = letter
|
466 |
+
|
467 |
+
return original_state
|
468 |
+
|
469 |
+
def _remove_word(self, grid: List[List[str]], original_state: List[Dict[str, Any]]):
|
470 |
+
"""Remove word from grid."""
|
471 |
+
size = len(grid)
|
472 |
+
for state in original_state:
|
473 |
+
check_row = state["row"]
|
474 |
+
check_col = state["col"]
|
475 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
476 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
477 |
+
grid[check_row][check_col] = state["value"]
|
478 |
+
|
479 |
+
def _find_all_intersection_placements(self, grid: List[List[str]], word: str, placed_words: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
480 |
+
"""Find all possible intersection placements for a word."""
|
481 |
+
placements = []
|
482 |
+
|
483 |
+
for placed_word in placed_words:
|
484 |
+
intersections = self._find_word_intersections(word, placed_word["word"])
|
485 |
+
|
486 |
+
for intersection in intersections:
|
487 |
+
word_pos, placed_pos = intersection["word_pos"], intersection["placed_pos"]
|
488 |
+
|
489 |
+
placement_info = self._calculate_intersection_placement(word, word_pos, placed_word, placed_pos)
|
490 |
+
|
491 |
+
if placement_info:
|
492 |
+
score = self._calculate_placement_score(grid, word, placement_info, placed_words)
|
493 |
+
placements.append({
|
494 |
+
**placement_info,
|
495 |
+
"score": score
|
496 |
+
})
|
497 |
+
|
498 |
+
return placements
|
499 |
+
|
500 |
+
def _find_word_intersections(self, word1: str, word2: str) -> List[Dict[str, int]]:
|
501 |
+
"""Find letter intersections between two words."""
|
502 |
+
intersections = []
|
503 |
+
|
504 |
+
for i, letter1 in enumerate(word1):
|
505 |
+
for j, letter2 in enumerate(word2):
|
506 |
+
if letter1 == letter2:
|
507 |
+
intersections.append({
|
508 |
+
"word_pos": i,
|
509 |
+
"placed_pos": j
|
510 |
+
})
|
511 |
+
|
512 |
+
return intersections
|
513 |
+
|
514 |
+
def _calculate_intersection_placement(self, new_word: str, new_word_pos: int,
|
515 |
+
placed_word: Dict[str, Any], placed_word_pos: int) -> Optional[Dict[str, Any]]:
|
516 |
+
"""Calculate where new word should be placed for intersection."""
|
517 |
+
placed_row, placed_col = placed_word["row"], placed_word["col"]
|
518 |
+
placed_direction = placed_word["direction"]
|
519 |
+
|
520 |
+
# Find intersection point in grid
|
521 |
+
if placed_direction == "horizontal":
|
522 |
+
intersection_row = placed_row
|
523 |
+
intersection_col = placed_col + placed_word_pos
|
524 |
+
else:
|
525 |
+
intersection_row = placed_row + placed_word_pos
|
526 |
+
intersection_col = placed_col
|
527 |
+
|
528 |
+
# Calculate new word position
|
529 |
+
new_direction = "vertical" if placed_direction == "horizontal" else "horizontal"
|
530 |
+
|
531 |
+
if new_direction == "horizontal":
|
532 |
+
new_row = intersection_row
|
533 |
+
new_col = intersection_col - new_word_pos
|
534 |
+
else:
|
535 |
+
new_row = intersection_row - new_word_pos
|
536 |
+
new_col = intersection_col
|
537 |
+
|
538 |
+
return {
|
539 |
+
"row": new_row,
|
540 |
+
"col": new_col,
|
541 |
+
"direction": new_direction
|
542 |
+
}
|
543 |
+
|
544 |
+
def _calculate_placement_score(self, grid: List[List[str]], word: str, placement: Dict[str, Any],
|
545 |
+
placed_words: List[Dict[str, Any]]) -> int:
|
546 |
+
"""Score a placement for quality."""
|
547 |
+
row, col, direction = placement["row"], placement["col"], placement["direction"]
|
548 |
+
grid_size = len(grid)
|
549 |
+
score = 100 # Base score for intersection
|
550 |
+
|
551 |
+
# Count intersections - with bounds checking
|
552 |
+
intersection_count = 0
|
553 |
+
if direction == "horizontal":
|
554 |
+
for i, letter in enumerate(word):
|
555 |
+
target_row = row
|
556 |
+
target_col = col + i
|
557 |
+
# Check bounds before accessing grid
|
558 |
+
if (0 <= target_row < grid_size and
|
559 |
+
0 <= target_col < grid_size and
|
560 |
+
grid[target_row][target_col] == letter):
|
561 |
+
intersection_count += 1
|
562 |
+
else: # vertical
|
563 |
+
for i, letter in enumerate(word):
|
564 |
+
target_row = row + i
|
565 |
+
target_col = col
|
566 |
+
# Check bounds before accessing grid
|
567 |
+
if (0 <= target_row < grid_size and
|
568 |
+
0 <= target_col < grid_size and
|
569 |
+
grid[target_row][target_col] == letter):
|
570 |
+
intersection_count += 1
|
571 |
+
|
572 |
+
score += intersection_count * 200
|
573 |
+
|
574 |
+
# Bonus for central placement
|
575 |
+
center = grid_size // 2
|
576 |
+
distance_from_center = abs(row - center) + abs(col - center)
|
577 |
+
score -= distance_from_center * 5
|
578 |
+
|
579 |
+
return score
|
580 |
+
|
581 |
+
|
582 |
+
def _trim_grid(self, grid: List[List[str]], placed_words: List[Dict[str, Any]]) -> Dict[str, Any]:
|
583 |
+
"""Trim grid to remove excess empty space."""
|
584 |
+
if not placed_words:
|
585 |
+
return {"grid": grid, "placed_words": placed_words}
|
586 |
+
|
587 |
+
# Find bounds
|
588 |
+
min_row = min_col = len(grid)
|
589 |
+
max_row = max_col = -1
|
590 |
+
|
591 |
+
for word in placed_words:
|
592 |
+
row, col, direction, word_text = word["row"], word["col"], word["direction"], word["word"]
|
593 |
+
|
594 |
+
min_row = min(min_row, row)
|
595 |
+
min_col = min(min_col, col)
|
596 |
+
|
597 |
+
if direction == "horizontal":
|
598 |
+
max_row = max(max_row, row)
|
599 |
+
max_col = max(max_col, col + len(word_text) - 1)
|
600 |
+
else:
|
601 |
+
max_row = max(max_row, row + len(word_text) - 1)
|
602 |
+
max_col = max(max_col, col)
|
603 |
+
|
604 |
+
# Add padding with proper bounds checking
|
605 |
+
min_row = max(0, min_row - 1)
|
606 |
+
min_col = max(0, min_col - 1)
|
607 |
+
max_row = min(len(grid) - 1, max_row + 1)
|
608 |
+
max_col = min(len(grid[0]) - 1, max_col + 1)
|
609 |
+
|
610 |
+
# Ensure bounds are valid
|
611 |
+
max_row = min(max_row, len(grid) - 1)
|
612 |
+
max_col = min(max_col, len(grid[0]) - 1)
|
613 |
+
|
614 |
+
# Create trimmed grid
|
615 |
+
trimmed_grid = []
|
616 |
+
for r in range(min_row, max_row + 1):
|
617 |
+
row = []
|
618 |
+
for c in range(min_col, max_col + 1):
|
619 |
+
# Double-check bounds before accessing
|
620 |
+
if r < 0 or r >= len(grid) or c < 0 or c >= len(grid[0]):
|
621 |
+
logger.error(f"Invalid bounds: r={r}, c={c}, grid_size={len(grid)}x{len(grid[0])}")
|
622 |
+
continue
|
623 |
+
row.append(grid[r][c])
|
624 |
+
trimmed_grid.append(row)
|
625 |
+
|
626 |
+
# Update word positions
|
627 |
+
updated_words = []
|
628 |
+
for word in placed_words:
|
629 |
+
updated_words.append({
|
630 |
+
**word,
|
631 |
+
"row": word["row"] - min_row,
|
632 |
+
"col": word["col"] - min_col
|
633 |
+
})
|
634 |
+
|
635 |
+
return {"grid": trimmed_grid, "placed_words": updated_words}
|
636 |
+
|
637 |
+
def _create_simple_cross(self, word_list: List[str], word_objs: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
638 |
+
"""Create simple cross with two words."""
|
639 |
+
if len(word_list) < 2:
|
640 |
+
return None
|
641 |
+
|
642 |
+
word1, word2 = word_list[0], word_list[1]
|
643 |
+
intersections = self._find_word_intersections(word1, word2)
|
644 |
+
|
645 |
+
if not intersections:
|
646 |
+
return None
|
647 |
+
|
648 |
+
# Use first intersection
|
649 |
+
intersection = intersections[0]
|
650 |
+
size = max(len(word1), len(word2)) + 4
|
651 |
+
grid = [["." for _ in range(size)] for _ in range(size)]
|
652 |
+
|
653 |
+
# Place first word horizontally in center
|
654 |
+
center_row = size // 2
|
655 |
+
center_col = (size - len(word1)) // 2
|
656 |
+
|
657 |
+
for i, letter in enumerate(word1):
|
658 |
+
check_row = center_row
|
659 |
+
check_col = center_col + i
|
660 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
661 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
662 |
+
grid[check_row][check_col] = letter
|
663 |
+
|
664 |
+
# Place second word vertically at intersection
|
665 |
+
intersection_col = center_col + intersection["word_pos"]
|
666 |
+
word2_start_row = center_row - intersection["placed_pos"]
|
667 |
+
|
668 |
+
for i, letter in enumerate(word2):
|
669 |
+
check_row = word2_start_row + i
|
670 |
+
check_col = intersection_col
|
671 |
+
if check_row >= size or check_col >= size or check_row < 0 or check_col < 0:
|
672 |
+
raise IndexError(f"Grid index out of range: [{check_row}][{check_col}] in grid of size {size}")
|
673 |
+
grid[check_row][check_col] = letter
|
674 |
+
|
675 |
+
placed_words = [
|
676 |
+
{"word": word1, "row": center_row, "col": center_col, "direction": "horizontal", "number": 1},
|
677 |
+
{"word": word2, "row": word2_start_row, "col": intersection_col, "direction": "vertical", "number": 2}
|
678 |
+
]
|
679 |
+
|
680 |
+
trimmed = self._trim_grid(grid, placed_words)
|
681 |
+
clues = self._generate_clues(word_objs[:2], trimmed["placed_words"])
|
682 |
+
|
683 |
+
return {
|
684 |
+
"grid": trimmed["grid"],
|
685 |
+
"placed_words": trimmed["placed_words"],
|
686 |
+
"clues": clues
|
687 |
+
}
|
688 |
+
|
689 |
+
def _generate_clues(self, word_objs: List[Dict[str, Any]], placed_words: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
690 |
+
"""Generate clues for placed words."""
|
691 |
+
logger.info(f"π§ _generate_clues: word_objs={len(word_objs)}, placed_words={len(placed_words)}")
|
692 |
+
clues = []
|
693 |
+
|
694 |
+
try:
|
695 |
+
for i, placed_word in enumerate(placed_words):
|
696 |
+
logger.info(f"π§ Processing placed word {i}: {placed_word.get('word', 'UNKNOWN')}")
|
697 |
+
|
698 |
+
# Find matching word object
|
699 |
+
word_obj = next((w for w in word_objs if w["word"].upper() == placed_word["word"]), None)
|
700 |
+
|
701 |
+
if word_obj:
|
702 |
+
logger.info(f"π§ Found matching word_obj: {word_obj.get('word', 'UNKNOWN')}")
|
703 |
+
clue_text = word_obj["clue"] if "clue" in word_obj else f"Clue for {placed_word['word']}"
|
704 |
+
else:
|
705 |
+
logger.warning(f"β οΈ No matching word_obj found for {placed_word['word']}")
|
706 |
+
clue_text = f"Clue for {placed_word['word']}"
|
707 |
+
|
708 |
+
clues.append({
|
709 |
+
"number": placed_word["number"],
|
710 |
+
"word": placed_word["word"],
|
711 |
+
"text": clue_text,
|
712 |
+
"direction": "across" if placed_word["direction"] == "horizontal" else "down",
|
713 |
+
"position": {"row": placed_word["row"], "col": placed_word["col"]}
|
714 |
+
})
|
715 |
+
|
716 |
+
logger.info(f"π§ Generated {len(clues)} clues")
|
717 |
+
return clues
|
718 |
+
except Exception as e:
|
719 |
+
logger.error(f"β Error in _generate_clues: {e}")
|
720 |
+
import traceback
|
721 |
+
traceback.print_exc()
|
722 |
+
raise
|
crossword-app/backend-py/src/services/crossword_generator_wrapper.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Crossword Generator - Simple wrapper for the fixed implementation
|
3 |
+
"""
|
4 |
+
|
5 |
+
import logging
|
6 |
+
from typing import List, Dict, Any
|
7 |
+
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
class CrosswordGenerator:
|
11 |
+
"""
|
12 |
+
Wrapper that uses the fixed crossword generator implementation.
|
13 |
+
"""
|
14 |
+
|
15 |
+
def __init__(self, vector_service=None):
|
16 |
+
self.vector_service = vector_service
|
17 |
+
self.min_words = 8
|
18 |
+
self.max_words = 15
|
19 |
+
|
20 |
+
async def generate_puzzle(
|
21 |
+
self,
|
22 |
+
topics: List[str],
|
23 |
+
difficulty: str = "medium",
|
24 |
+
use_ai: bool = False
|
25 |
+
) -> Dict[str, Any]:
|
26 |
+
"""
|
27 |
+
Generate a complete crossword puzzle using the fixed generator.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
topics: List of topic strings
|
31 |
+
difficulty: "easy", "medium", or "hard"
|
32 |
+
use_ai: Whether to use vector search for word generation
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
Dictionary containing grid, clues, and metadata
|
36 |
+
"""
|
37 |
+
try:
|
38 |
+
logger.info(f"π― Using fixed crossword generator for topics: {topics}")
|
39 |
+
|
40 |
+
# Use the fixed generator implementation with the initialized vector service
|
41 |
+
from .crossword_generator import CrosswordGenerator as ActualGenerator
|
42 |
+
actual_generator = ActualGenerator(vector_service=self.vector_service)
|
43 |
+
|
44 |
+
puzzle = await actual_generator.generate_puzzle(topics, difficulty, use_ai)
|
45 |
+
|
46 |
+
logger.info(f"β
Generated crossword with fixed algorithm")
|
47 |
+
return puzzle
|
48 |
+
|
49 |
+
except Exception as e:
|
50 |
+
logger.error(f"β Failed to generate puzzle: {e}")
|
51 |
+
raise
|
52 |
+
|
53 |
+
async def generate_words_for_topics(self, topics: List[str], difficulty: str, use_ai: bool) -> List[Dict[str, Any]]:
|
54 |
+
"""Backward compatibility method."""
|
55 |
+
# This method is kept for compatibility but delegates to the fixed generator
|
56 |
+
from .crossword_generator import CrosswordGenerator as ActualGenerator
|
57 |
+
actual_generator = ActualGenerator()
|
58 |
+
return await actual_generator._select_words(topics, difficulty, use_ai)
|
crossword-app/backend-py/src/services/vector_search.py
ADDED
@@ -0,0 +1,587 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Vector similarity search service using sentence-transformers and FAISS.
|
3 |
+
This implements true AI word generation via vector space nearest neighbor search.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import logging
|
8 |
+
import asyncio
|
9 |
+
import time
|
10 |
+
from datetime import datetime
|
11 |
+
from typing import List, Dict, Any, Optional, Tuple
|
12 |
+
import json
|
13 |
+
|
14 |
+
import numpy as np
|
15 |
+
import torch
|
16 |
+
from sentence_transformers import SentenceTransformer
|
17 |
+
import faiss
|
18 |
+
from pathlib import Path
|
19 |
+
|
20 |
+
logger = logging.getLogger(__name__)
|
21 |
+
|
22 |
+
def log_with_timestamp(message):
|
23 |
+
"""Helper to log with precise timestamp."""
|
24 |
+
timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
|
25 |
+
logger.info(f"[{timestamp}] {message}")
|
26 |
+
|
27 |
+
class VectorSearchService:
|
28 |
+
"""
|
29 |
+
Service for finding semantically similar words using vector similarity search.
|
30 |
+
|
31 |
+
This replaces the old approach of filtering static word lists with true
|
32 |
+
vector space search through the model's full vocabulary.
|
33 |
+
"""
|
34 |
+
|
35 |
+
def __init__(self):
|
36 |
+
self.model = None
|
37 |
+
self.vocab = None
|
38 |
+
self.word_embeddings = None
|
39 |
+
self.faiss_index = None
|
40 |
+
self.is_initialized = False
|
41 |
+
|
42 |
+
# Configuration
|
43 |
+
self.model_name = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-mpnet-base-v2")
|
44 |
+
self.similarity_threshold = float(os.getenv("WORD_SIMILARITY_THRESHOLD", "0.3"))
|
45 |
+
self.max_results = 20
|
46 |
+
|
47 |
+
# Cache manager for word fallback
|
48 |
+
self.cache_manager = None
|
49 |
+
|
50 |
+
async def initialize(self):
|
51 |
+
"""Initialize the vector search service."""
|
52 |
+
try:
|
53 |
+
start_time = time.time()
|
54 |
+
log_with_timestamp(f"π§ Loading model: {self.model_name}")
|
55 |
+
|
56 |
+
# Load sentence transformer model
|
57 |
+
model_start = time.time()
|
58 |
+
self.model = SentenceTransformer(self.model_name)
|
59 |
+
model_time = time.time() - model_start
|
60 |
+
log_with_timestamp(f"β
Model loaded in {model_time:.2f}s: {self.model_name}")
|
61 |
+
|
62 |
+
# Get model vocabulary from tokenizer
|
63 |
+
vocab_start = time.time()
|
64 |
+
tokenizer = self.model.tokenizer
|
65 |
+
vocab_dict = tokenizer.get_vocab()
|
66 |
+
|
67 |
+
# Filter vocabulary for crossword-suitable words
|
68 |
+
self.vocab = self._filter_vocabulary(vocab_dict)
|
69 |
+
vocab_time = time.time() - vocab_start
|
70 |
+
log_with_timestamp(f"π Filtered vocabulary in {vocab_time:.2f}s: {len(self.vocab)} words")
|
71 |
+
|
72 |
+
# Pre-compute embeddings for all vocabulary words
|
73 |
+
embedding_start = time.time()
|
74 |
+
log_with_timestamp("π Starting embedding generation...")
|
75 |
+
await self._build_embeddings_index()
|
76 |
+
embedding_time = time.time() - embedding_start
|
77 |
+
log_with_timestamp(f"π Embeddings built in {embedding_time:.2f}s")
|
78 |
+
|
79 |
+
# Initialize cache manager
|
80 |
+
cache_start = time.time()
|
81 |
+
log_with_timestamp("π¦ Initializing word cache manager...")
|
82 |
+
try:
|
83 |
+
from .word_cache import WordCacheManager
|
84 |
+
self.cache_manager = WordCacheManager()
|
85 |
+
await self.cache_manager.initialize()
|
86 |
+
cache_time = time.time() - cache_start
|
87 |
+
log_with_timestamp(f"π¦ Cache manager initialized in {cache_time:.2f}s")
|
88 |
+
except Exception as e:
|
89 |
+
cache_time = time.time() - cache_start
|
90 |
+
log_with_timestamp(f"β οΈ Cache manager initialization failed in {cache_time:.2f}s: {e}")
|
91 |
+
log_with_timestamp("π Continuing without persistent caching (in-memory only)")
|
92 |
+
self.cache_manager = None
|
93 |
+
|
94 |
+
self.is_initialized = True
|
95 |
+
total_time = time.time() - start_time
|
96 |
+
log_with_timestamp(f"β
Vector search service fully initialized in {total_time:.2f}s")
|
97 |
+
|
98 |
+
except Exception as e:
|
99 |
+
logger.error(f"β Failed to initialize vector search: {e}")
|
100 |
+
self.is_initialized = False
|
101 |
+
raise
|
102 |
+
|
103 |
+
def _filter_vocabulary(self, vocab_dict: Dict[str, int]) -> List[str]:
|
104 |
+
"""Filter vocabulary to keep only crossword-suitable words."""
|
105 |
+
filtered = []
|
106 |
+
|
107 |
+
# Words to exclude - boring, generic, or problematic for crosswords
|
108 |
+
excluded_words = {
|
109 |
+
# Generic/boring words
|
110 |
+
'THE', 'AND', 'FOR', 'ARE', 'BUT', 'NOT', 'YOU', 'ALL', 'THIS', 'THAT', 'WITH', 'FROM', 'THEY', 'WERE', 'BEEN', 'HAVE', 'THEIR', 'SAID', 'EACH', 'WHICH', 'WHAT', 'THERE', 'WILL', 'MORE', 'WHEN', 'SOME', 'LIKE', 'INTO', 'TIME', 'VERY', 'ONLY', 'HAS', 'HAD', 'WHO', 'OIL', 'ITS', 'NOW', 'FIND', 'LONG', 'DOWN', 'DAY', 'DID', 'GET', 'COME', 'MADE', 'MAY', 'PART',
|
111 |
+
# Topic words that are too obvious
|
112 |
+
'ANIMAL', 'ANIMALS', 'CREATURE', 'CREATURES', 'BEAST', 'BEASTS', 'THING', 'THINGS'
|
113 |
+
}
|
114 |
+
|
115 |
+
for word, _ in vocab_dict.items():
|
116 |
+
# Clean word (remove special tokens)
|
117 |
+
clean_word = word.strip("##").upper()
|
118 |
+
|
119 |
+
# Filter criteria for crossword words
|
120 |
+
if (
|
121 |
+
len(clean_word) >= 3 and # Minimum length
|
122 |
+
len(clean_word) <= 12 and # Reasonable max length
|
123 |
+
clean_word.isalpha() and # Only letters
|
124 |
+
not clean_word.startswith('[') and # No special tokens
|
125 |
+
not clean_word.startswith('<') and # No special tokens
|
126 |
+
clean_word not in excluded_words and # Avoid boring words
|
127 |
+
not self._is_plural(clean_word) and # No plurals
|
128 |
+
not self._is_boring_word(clean_word) # No boring patterns
|
129 |
+
):
|
130 |
+
filtered.append(clean_word)
|
131 |
+
|
132 |
+
# Remove duplicates and sort
|
133 |
+
return sorted(list(set(filtered)))
|
134 |
+
|
135 |
+
def _is_plural(self, word: str) -> bool:
|
136 |
+
"""Check if word is likely a plural."""
|
137 |
+
# Simple plural detection
|
138 |
+
if len(word) < 4:
|
139 |
+
return False
|
140 |
+
return (
|
141 |
+
word.endswith('S') and not word.endswith('SS') and
|
142 |
+
not word.endswith('US') and not word.endswith('IS')
|
143 |
+
)
|
144 |
+
|
145 |
+
def _is_boring_word(self, word: str) -> bool:
|
146 |
+
"""Check if word is boring or too generic for crosswords."""
|
147 |
+
boring_patterns = [
|
148 |
+
# Words ending in common suffixes that are often generic
|
149 |
+
word.endswith('ING') and len(word) > 6,
|
150 |
+
word.endswith('TION') and len(word) > 7,
|
151 |
+
word.endswith('NESS') and len(word) > 6,
|
152 |
+
# Very common short words
|
153 |
+
word in ['GET', 'GOT', 'PUT', 'SET', 'LET', 'RUN', 'CUT', 'HIT', 'SIT', 'WIN', 'BIG', 'NEW', 'OLD', 'BAD', 'GOOD', 'BEST', 'LAST', 'NEXT', 'REAL']
|
154 |
+
]
|
155 |
+
return any(boring_patterns)
|
156 |
+
|
157 |
+
async def _build_embeddings_index(self):
|
158 |
+
"""Build FAISS index with pre-computed embeddings for all vocabulary."""
|
159 |
+
logger.info("π¨ Building embeddings index...")
|
160 |
+
|
161 |
+
# Compute embeddings in batches to avoid memory issues
|
162 |
+
batch_size = 100
|
163 |
+
embeddings_list = []
|
164 |
+
|
165 |
+
for i in range(0, len(self.vocab), batch_size):
|
166 |
+
batch = self.vocab[i:i + batch_size]
|
167 |
+
batch_embeddings = self.model.encode(batch, convert_to_numpy=True)
|
168 |
+
embeddings_list.append(batch_embeddings)
|
169 |
+
|
170 |
+
if i % 1000 == 0:
|
171 |
+
logger.info(f"π Processed {i}/{len(self.vocab)} words")
|
172 |
+
|
173 |
+
# Combine all embeddings
|
174 |
+
self.word_embeddings = np.vstack(embeddings_list)
|
175 |
+
logger.info(f"π Generated embeddings shape: {self.word_embeddings.shape}")
|
176 |
+
|
177 |
+
# Build FAISS index for fast similarity search
|
178 |
+
dimension = self.word_embeddings.shape[1]
|
179 |
+
self.faiss_index = faiss.IndexFlatIP(dimension) # Inner product similarity
|
180 |
+
|
181 |
+
# Normalize embeddings for cosine similarity
|
182 |
+
faiss.normalize_L2(self.word_embeddings)
|
183 |
+
self.faiss_index.add(self.word_embeddings)
|
184 |
+
|
185 |
+
logger.info(f"π FAISS index built with {self.faiss_index.ntotal} vectors")
|
186 |
+
|
187 |
+
|
188 |
+
async def find_similar_words(
|
189 |
+
self,
|
190 |
+
topic: str,
|
191 |
+
difficulty: str = "medium",
|
192 |
+
max_words: int = 15
|
193 |
+
) -> List[Dict[str, Any]]:
|
194 |
+
"""
|
195 |
+
Find words similar to the given topic using vector similarity search.
|
196 |
+
|
197 |
+
This is the core function that replaces embedding filtering with true
|
198 |
+
vector space nearest neighbor search.
|
199 |
+
"""
|
200 |
+
logger.info(f"π Starting word search for topic: '{topic}', difficulty: '{difficulty}', max_words: {max_words}")
|
201 |
+
logger.info(f"π€ Vector search initialized: {self.is_initialized}")
|
202 |
+
|
203 |
+
if not self.is_initialized:
|
204 |
+
logger.warning("π Vector search not initialized, using cached fallback")
|
205 |
+
return await self._get_cached_fallback(topic, difficulty, max_words)
|
206 |
+
|
207 |
+
try:
|
208 |
+
# Get topic embedding
|
209 |
+
topic_embedding = self.model.encode([topic], convert_to_numpy=True)
|
210 |
+
|
211 |
+
# Add small amount of noise to create variety in search results (with fallback)
|
212 |
+
import numpy as np
|
213 |
+
noise_factor = float(os.getenv("SEARCH_RANDOMNESS", "0.02")) # 2% noise by default
|
214 |
+
if noise_factor > 0:
|
215 |
+
try:
|
216 |
+
noise = np.random.normal(0, noise_factor, topic_embedding.shape)
|
217 |
+
topic_embedding_noisy = topic_embedding + noise
|
218 |
+
# Ensure the array is contiguous and correct type for FAISS
|
219 |
+
topic_embedding = np.ascontiguousarray(topic_embedding_noisy, dtype=np.float32)
|
220 |
+
except Exception as noise_error:
|
221 |
+
logger.warning(f"β οΈ Failed to add search noise: {noise_error}, using original embedding")
|
222 |
+
topic_embedding = np.ascontiguousarray(topic_embedding, dtype=np.float32)
|
223 |
+
else:
|
224 |
+
topic_embedding = np.ascontiguousarray(topic_embedding, dtype=np.float32)
|
225 |
+
|
226 |
+
# Normalize for cosine similarity with error handling
|
227 |
+
try:
|
228 |
+
faiss.normalize_L2(topic_embedding)
|
229 |
+
except Exception as norm_error:
|
230 |
+
logger.warning(f"β οΈ FAISS normalization failed: {norm_error}, trying without noise")
|
231 |
+
# Fallback: use original embedding without noise
|
232 |
+
topic_embedding = self.model.encode([topic], convert_to_numpy=True)
|
233 |
+
topic_embedding = np.ascontiguousarray(topic_embedding, dtype=np.float32)
|
234 |
+
faiss.normalize_L2(topic_embedding)
|
235 |
+
|
236 |
+
# Search for similar words using FAISS (get more results for diversity)
|
237 |
+
search_size = min(self.max_results * 6, 150) # Get many more candidates for variety
|
238 |
+
scores, indices = self.faiss_index.search(topic_embedding, search_size)
|
239 |
+
|
240 |
+
# Debug: log search results
|
241 |
+
logger.info(f"π FAISS search returned {len(scores[0])} results")
|
242 |
+
logger.info(f"π Top 5 scores: {scores[0][:5]}")
|
243 |
+
logger.info(f"π Similarity threshold: {self.similarity_threshold}")
|
244 |
+
|
245 |
+
# Collect candidates with scores
|
246 |
+
candidates = []
|
247 |
+
above_threshold = 0
|
248 |
+
difficulty_passed = 0
|
249 |
+
interesting_passed = 0
|
250 |
+
|
251 |
+
for score, idx in zip(scores[0], indices[0]):
|
252 |
+
if score < self.similarity_threshold:
|
253 |
+
continue
|
254 |
+
above_threshold += 1
|
255 |
+
|
256 |
+
word = self.vocab[idx]
|
257 |
+
|
258 |
+
# Filter by difficulty and quality
|
259 |
+
if self._matches_difficulty(word, difficulty):
|
260 |
+
difficulty_passed += 1
|
261 |
+
if self._is_interesting_word(word, topic):
|
262 |
+
interesting_passed += 1
|
263 |
+
candidates.append({
|
264 |
+
"word": word,
|
265 |
+
"clue": self._generate_clue(word, topic),
|
266 |
+
"similarity": float(score),
|
267 |
+
"source": "vector_search"
|
268 |
+
})
|
269 |
+
|
270 |
+
logger.info(f"π Filtering results: {len(scores[0])} total β {above_threshold} above threshold β {difficulty_passed} difficulty OK β {interesting_passed} interesting β {len(candidates)} final")
|
271 |
+
|
272 |
+
# Smart randomization: favor good words but add variety
|
273 |
+
import random
|
274 |
+
|
275 |
+
if len(candidates) > max_words * 2:
|
276 |
+
# Weighted random selection favoring higher similarity scores
|
277 |
+
similar_words = self._weighted_random_selection(candidates, max_words)
|
278 |
+
else:
|
279 |
+
# If not many candidates, use all but in random order
|
280 |
+
random.shuffle(candidates)
|
281 |
+
similar_words = candidates[:max_words]
|
282 |
+
|
283 |
+
logger.info(f"π― Found {len(similar_words)} similar words for '{topic}' via vector search")
|
284 |
+
|
285 |
+
# Cache successful results for future use
|
286 |
+
if similar_words:
|
287 |
+
await self._cache_successful_search(topic, difficulty, similar_words)
|
288 |
+
|
289 |
+
# If not enough words found, supplement with cached words
|
290 |
+
if len(similar_words) < max_words // 2:
|
291 |
+
cached_supplement = await self._get_cached_fallback(
|
292 |
+
topic, difficulty, max_words - len(similar_words)
|
293 |
+
)
|
294 |
+
similar_words.extend(cached_supplement)
|
295 |
+
logger.info(f"π Supplemented with {len(cached_supplement)} cached words")
|
296 |
+
|
297 |
+
return similar_words[:max_words]
|
298 |
+
|
299 |
+
except Exception as e:
|
300 |
+
logger.error(f"β Vector search failed for '{topic}': {e}")
|
301 |
+
# Try cached fallback first
|
302 |
+
cached_words = await self._get_cached_fallback(topic, difficulty, max_words)
|
303 |
+
if cached_words:
|
304 |
+
return cached_words
|
305 |
+
|
306 |
+
# Last resort: bootstrap with simple topic-related words
|
307 |
+
logger.warning(f"β οΈ No cached words available, using emergency bootstrap for '{topic}'")
|
308 |
+
return self._get_emergency_bootstrap(topic, difficulty, max_words)
|
309 |
+
|
310 |
+
def _matches_difficulty(self, word: str, difficulty: str) -> bool:
|
311 |
+
"""Check if word matches difficulty criteria."""
|
312 |
+
difficulty_map = {
|
313 |
+
"easy": {"min_len": 3, "max_len": 8},
|
314 |
+
"medium": {"min_len": 4, "max_len": 10},
|
315 |
+
"hard": {"min_len": 5, "max_len": 15}
|
316 |
+
}
|
317 |
+
|
318 |
+
criteria = difficulty_map.get(difficulty, difficulty_map["medium"])
|
319 |
+
return criteria["min_len"] <= len(word) <= criteria["max_len"]
|
320 |
+
|
321 |
+
def _generate_clue(self, word: str, topic: str) -> str:
|
322 |
+
"""Generate a simple clue for the word."""
|
323 |
+
# Basic clue templates - can be enhanced with LLM generation later
|
324 |
+
clue_templates = {
|
325 |
+
"Animals": f"{word.lower()} (animal)",
|
326 |
+
"Technology": f"{word.lower()} (tech term)",
|
327 |
+
"Science": f"{word.lower()} (scientific term)",
|
328 |
+
"Geography": f"{word.lower()} (geographic feature)"
|
329 |
+
}
|
330 |
+
|
331 |
+
return clue_templates.get(topic, f"{word.lower()} (related to {topic.lower()})")
|
332 |
+
|
333 |
+
def _is_interesting_word(self, word: str, topic: str) -> bool:
|
334 |
+
"""Check if word is interesting enough for crosswords."""
|
335 |
+
# Exclude words that are too obvious for the topic
|
336 |
+
topic_lower = topic.lower()
|
337 |
+
word_lower = word.lower()
|
338 |
+
|
339 |
+
# Don't include the topic itself or obvious variations
|
340 |
+
if word_lower == topic_lower or word_lower in topic_lower:
|
341 |
+
return False
|
342 |
+
|
343 |
+
# Topic-specific filtering
|
344 |
+
if topic_lower == 'animals':
|
345 |
+
obvious_animals = ['mammal', 'mammals', 'wildlife', 'organism', 'organisms', 'livestock']
|
346 |
+
if word_lower in obvious_animals:
|
347 |
+
return False
|
348 |
+
|
349 |
+
# Prefer concrete nouns over abstract concepts
|
350 |
+
abstract_endings = ['tion', 'ness', 'ment', 'ity', 'ism']
|
351 |
+
if any(word_lower.endswith(ending) for ending in abstract_endings) and len(word) > 8:
|
352 |
+
return False
|
353 |
+
|
354 |
+
return True
|
355 |
+
|
356 |
+
def _weighted_random_selection(self, candidates: List[Dict[str, Any]], max_words: int) -> List[Dict[str, Any]]:
|
357 |
+
"""
|
358 |
+
Weighted random selection that favors higher similarity scores but adds variety.
|
359 |
+
|
360 |
+
This ensures we don't always get the exact same words, while still preferring
|
361 |
+
high-quality matches.
|
362 |
+
"""
|
363 |
+
import random
|
364 |
+
|
365 |
+
if len(candidates) <= max_words:
|
366 |
+
return candidates
|
367 |
+
|
368 |
+
# Create tiers based on similarity scores
|
369 |
+
candidates_sorted = sorted(candidates, key=lambda w: w["similarity"], reverse=True)
|
370 |
+
|
371 |
+
# Tier 1: Top 25% - very high probability
|
372 |
+
tier1_size = max(1, len(candidates_sorted) // 4)
|
373 |
+
tier1 = candidates_sorted[:tier1_size]
|
374 |
+
|
375 |
+
# Tier 2: Next 25% - high probability
|
376 |
+
tier2_size = max(1, len(candidates_sorted) // 4)
|
377 |
+
tier2 = candidates_sorted[tier1_size:tier1_size + tier2_size]
|
378 |
+
|
379 |
+
# Tier 3: Next 35% - medium probability
|
380 |
+
tier3_size = max(1, len(candidates_sorted) * 35 // 100)
|
381 |
+
tier3 = candidates_sorted[tier1_size + tier2_size:tier1_size + tier2_size + tier3_size]
|
382 |
+
|
383 |
+
# Tier 4: Remaining - low probability
|
384 |
+
tier4 = candidates_sorted[tier1_size + tier2_size + tier3_size:]
|
385 |
+
|
386 |
+
selected = []
|
387 |
+
|
388 |
+
# Always include some from tier 1 (but not all)
|
389 |
+
tier1_count = min(max_words // 3, len(tier1))
|
390 |
+
selected.extend(random.sample(tier1, tier1_count))
|
391 |
+
|
392 |
+
# Fill remaining slots with weighted random selection
|
393 |
+
remaining_slots = max_words - len(selected)
|
394 |
+
|
395 |
+
if remaining_slots > 0:
|
396 |
+
# Create weighted pool
|
397 |
+
weighted_pool = []
|
398 |
+
weighted_pool.extend([(w, 3) for w in tier2]) # 3x weight
|
399 |
+
weighted_pool.extend([(w, 2) for w in tier3]) # 2x weight
|
400 |
+
weighted_pool.extend([(w, 1) for w in tier4]) # 1x weight
|
401 |
+
|
402 |
+
# Also add remaining tier1 words with high weight
|
403 |
+
remaining_tier1 = [w for w in tier1 if w not in selected]
|
404 |
+
weighted_pool.extend([(w, 4) for w in remaining_tier1]) # 4x weight
|
405 |
+
|
406 |
+
# Weighted random selection
|
407 |
+
for _ in range(remaining_slots):
|
408 |
+
if not weighted_pool:
|
409 |
+
break
|
410 |
+
|
411 |
+
# Create weighted list
|
412 |
+
weighted_words = []
|
413 |
+
for word, weight in weighted_pool:
|
414 |
+
weighted_words.extend([word] * weight)
|
415 |
+
|
416 |
+
if weighted_words:
|
417 |
+
chosen = random.choice(weighted_words)
|
418 |
+
selected.append(chosen)
|
419 |
+
|
420 |
+
# Remove chosen word from pool
|
421 |
+
weighted_pool = [(w, wt) for w, wt in weighted_pool if w != chosen]
|
422 |
+
|
423 |
+
# Final shuffle to mix up the order
|
424 |
+
random.shuffle(selected)
|
425 |
+
|
426 |
+
logger.info(f"π² Weighted selection: {len(selected)} words from {len(candidates)} candidates")
|
427 |
+
return selected[:max_words]
|
428 |
+
|
429 |
+
async def _get_cached_fallback(
|
430 |
+
self,
|
431 |
+
topic: str,
|
432 |
+
difficulty: str,
|
433 |
+
max_words: int
|
434 |
+
) -> List[Dict[str, Any]]:
|
435 |
+
"""Fallback to cached words when vector search fails."""
|
436 |
+
if not self.cache_manager:
|
437 |
+
logger.warning(f"π No cache manager available for fallback")
|
438 |
+
return []
|
439 |
+
|
440 |
+
logger.info(f"π Looking for cached words for topic: '{topic}', difficulty: '{difficulty}'")
|
441 |
+
|
442 |
+
try:
|
443 |
+
cached_words = await self.cache_manager.get_cached_words(topic, difficulty, max_words)
|
444 |
+
|
445 |
+
if cached_words:
|
446 |
+
logger.info(f"π¦ Found {len(cached_words)} cached words for '{topic}/{difficulty}'")
|
447 |
+
return cached_words
|
448 |
+
else:
|
449 |
+
logger.info(f"π No cached words available for '{topic}/{difficulty}'")
|
450 |
+
return []
|
451 |
+
|
452 |
+
except Exception as e:
|
453 |
+
logger.error(f"β Failed to get cached fallback for '{topic}': {e}")
|
454 |
+
return []
|
455 |
+
|
456 |
+
async def _cache_successful_search(
|
457 |
+
self,
|
458 |
+
topic: str,
|
459 |
+
difficulty: str,
|
460 |
+
words: List[Dict[str, Any]]
|
461 |
+
):
|
462 |
+
"""Cache successful vector search results for future use."""
|
463 |
+
if not self.cache_manager:
|
464 |
+
return
|
465 |
+
|
466 |
+
try:
|
467 |
+
# Filter out any existing cached words to avoid duplicates
|
468 |
+
vector_words = [w for w in words if w.get("source") == "vector_search"]
|
469 |
+
|
470 |
+
if vector_words:
|
471 |
+
success = await self.cache_manager.cache_words(topic, difficulty, vector_words)
|
472 |
+
if success:
|
473 |
+
logger.info(f"πΎ Successfully cached {len(vector_words)} words for {topic}/{difficulty}")
|
474 |
+
|
475 |
+
except Exception as e:
|
476 |
+
logger.error(f"β Failed to cache search results: {e}")
|
477 |
+
|
478 |
+
def _get_emergency_bootstrap(self, topic: str, difficulty: str, max_words: int) -> List[Dict[str, Any]]:
|
479 |
+
"""
|
480 |
+
Emergency bootstrap words when vector search and cache both fail.
|
481 |
+
This prevents complete failure by providing basic topic-related words.
|
482 |
+
"""
|
483 |
+
bootstrap_words = {
|
484 |
+
"animals": [
|
485 |
+
{"word": "DOG", "clue": "Man's best friend"},
|
486 |
+
{"word": "CAT", "clue": "Feline pet"},
|
487 |
+
{"word": "ELEPHANT", "clue": "Large mammal with trunk"},
|
488 |
+
{"word": "TIGER", "clue": "Striped big cat"},
|
489 |
+
{"word": "BIRD", "clue": "Flying creature"},
|
490 |
+
{"word": "FISH", "clue": "Aquatic animal"},
|
491 |
+
{"word": "HORSE", "clue": "Riding animal"},
|
492 |
+
{"word": "BEAR", "clue": "Large mammal"},
|
493 |
+
{"word": "WHALE", "clue": "Marine mammal"},
|
494 |
+
{"word": "LION", "clue": "King of jungle"},
|
495 |
+
{"word": "RABBIT", "clue": "Hopping mammal"},
|
496 |
+
{"word": "SNAKE", "clue": "Slithering reptile"}
|
497 |
+
],
|
498 |
+
"science": [
|
499 |
+
{"word": "ATOM", "clue": "Basic unit of matter"},
|
500 |
+
{"word": "CELL", "clue": "Basic unit of life"},
|
501 |
+
{"word": "DNA", "clue": "Genetic material"},
|
502 |
+
{"word": "ENERGY", "clue": "Capacity to do work"},
|
503 |
+
{"word": "FORCE", "clue": "Push or pull"},
|
504 |
+
{"word": "GRAVITY", "clue": "Force of attraction"},
|
505 |
+
{"word": "LIGHT", "clue": "Electromagnetic radiation"},
|
506 |
+
{"word": "MATTER", "clue": "Physical substance"},
|
507 |
+
{"word": "MOTION", "clue": "Change in position"},
|
508 |
+
{"word": "OXYGEN", "clue": "Essential gas"},
|
509 |
+
{"word": "PHYSICS", "clue": "Study of matter and energy"},
|
510 |
+
{"word": "THEORY", "clue": "Scientific explanation"}
|
511 |
+
],
|
512 |
+
"technology": [
|
513 |
+
{"word": "COMPUTER", "clue": "Electronic device"},
|
514 |
+
{"word": "INTERNET", "clue": "Global network"},
|
515 |
+
{"word": "SOFTWARE", "clue": "Computer programs"},
|
516 |
+
{"word": "ROBOT", "clue": "Automated machine"},
|
517 |
+
{"word": "DATA", "clue": "Information"},
|
518 |
+
{"word": "CODE", "clue": "Programming instructions"},
|
519 |
+
{"word": "DIGITAL", "clue": "Electronic format"},
|
520 |
+
{"word": "NETWORK", "clue": "Connected systems"},
|
521 |
+
{"word": "SYSTEM", "clue": "Organized whole"},
|
522 |
+
{"word": "DEVICE", "clue": "Technical apparatus"},
|
523 |
+
{"word": "MOBILE", "clue": "Portable technology"},
|
524 |
+
{"word": "SCREEN", "clue": "Display surface"}
|
525 |
+
],
|
526 |
+
"geography": [
|
527 |
+
{"word": "MOUNTAIN", "clue": "High landform"},
|
528 |
+
{"word": "RIVER", "clue": "Flowing water"},
|
529 |
+
{"word": "OCEAN", "clue": "Large body of water"},
|
530 |
+
{"word": "DESERT", "clue": "Arid region"},
|
531 |
+
{"word": "FOREST", "clue": "Dense trees"},
|
532 |
+
{"word": "ISLAND", "clue": "Land surrounded by water"},
|
533 |
+
{"word": "VALLEY", "clue": "Low area between hills"},
|
534 |
+
{"word": "LAKE", "clue": "Inland water body"},
|
535 |
+
{"word": "COAST", "clue": "Land by the sea"},
|
536 |
+
{"word": "PLAIN", "clue": "Flat land"},
|
537 |
+
{"word": "HILL", "clue": "Small elevation"},
|
538 |
+
{"word": "CLIFF", "clue": "Steep rock face"}
|
539 |
+
]
|
540 |
+
}
|
541 |
+
|
542 |
+
topic_lower = topic.lower()
|
543 |
+
words = bootstrap_words.get(topic_lower, [])
|
544 |
+
|
545 |
+
if not words:
|
546 |
+
# Generic fallback for unknown topics
|
547 |
+
words = [
|
548 |
+
{"word": "WORD", "clue": "Unit of language"},
|
549 |
+
{"word": "PUZZLE", "clue": "Brain teaser"},
|
550 |
+
{"word": "GAME", "clue": "Form of play"},
|
551 |
+
{"word": "CROSS", "clue": "Intersecting lines"},
|
552 |
+
{"word": "GRID", "clue": "Pattern of squares"},
|
553 |
+
{"word": "CLUE", "clue": "Helpful hint"}
|
554 |
+
]
|
555 |
+
|
556 |
+
# Filter by difficulty and format
|
557 |
+
filtered_words = []
|
558 |
+
for word_obj in words:
|
559 |
+
word = word_obj["word"]
|
560 |
+
if self._matches_difficulty(word, difficulty):
|
561 |
+
filtered_words.append({
|
562 |
+
"word": word,
|
563 |
+
"clue": word_obj["clue"],
|
564 |
+
"similarity": 0.7, # Moderate relevance
|
565 |
+
"source": "emergency_bootstrap"
|
566 |
+
})
|
567 |
+
|
568 |
+
# Shuffle and limit
|
569 |
+
import random
|
570 |
+
random.shuffle(filtered_words)
|
571 |
+
result = filtered_words[:max_words]
|
572 |
+
|
573 |
+
logger.info(f"π Emergency bootstrap provided {len(result)} words for '{topic}'")
|
574 |
+
return result
|
575 |
+
|
576 |
+
async def cleanup(self):
|
577 |
+
"""Cleanup resources."""
|
578 |
+
logger.info("π§Ή Cleaning up vector search service")
|
579 |
+
if hasattr(self, 'model'):
|
580 |
+
del self.model
|
581 |
+
if hasattr(self, 'word_embeddings'):
|
582 |
+
del self.word_embeddings
|
583 |
+
if hasattr(self, 'faiss_index'):
|
584 |
+
del self.faiss_index
|
585 |
+
if self.cache_manager:
|
586 |
+
await self.cache_manager.cleanup_expired_caches()
|
587 |
+
self.is_initialized = False
|
crossword-app/backend-py/src/services/word_cache.py
ADDED
@@ -0,0 +1,347 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Word Cache Manager - Replaces static word file dependencies with intelligent caching.
|
3 |
+
Caches vector-discovered words with quality clues for fast retrieval.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import json
|
8 |
+
import logging
|
9 |
+
import time
|
10 |
+
from datetime import datetime, timedelta
|
11 |
+
from typing import List, Dict, Any, Optional
|
12 |
+
from pathlib import Path
|
13 |
+
import asyncio
|
14 |
+
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
class WordCacheManager:
|
18 |
+
"""
|
19 |
+
Manages cached word data to replace static word file dependencies.
|
20 |
+
|
21 |
+
Features:
|
22 |
+
- Caches vector-discovered words with quality clues
|
23 |
+
- Supports cache expiration and refresh
|
24 |
+
- Fallback for when vector search fails
|
25 |
+
- Progressive cache building from successful searches
|
26 |
+
"""
|
27 |
+
|
28 |
+
def __init__(self, cache_dir: str = None):
|
29 |
+
# Use appropriate default cache directory for the environment
|
30 |
+
if cache_dir is None:
|
31 |
+
# Check if we're in a Docker container or HuggingFace Spaces
|
32 |
+
if os.path.exists("/.dockerenv") or os.getenv("SPACE_ID"):
|
33 |
+
# Use /tmp for containers/spaces where write permissions are limited
|
34 |
+
cache_dir = os.getenv("WORD_CACHE_DIR", "/tmp/crossword_cache")
|
35 |
+
else:
|
36 |
+
# Use local cache directory for development
|
37 |
+
cache_dir = os.getenv("WORD_CACHE_DIR", "cache")
|
38 |
+
|
39 |
+
self.cache_dir = Path(cache_dir)
|
40 |
+
|
41 |
+
# Try to create cache directory with fallback
|
42 |
+
try:
|
43 |
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
44 |
+
logger.info(f"π Cache directory created: {self.cache_dir}")
|
45 |
+
except (PermissionError, OSError) as e:
|
46 |
+
# Fallback to temp directory
|
47 |
+
try:
|
48 |
+
import tempfile
|
49 |
+
temp_cache = Path(tempfile.gettempdir()) / "crossword_cache"
|
50 |
+
temp_cache.mkdir(exist_ok=True)
|
51 |
+
self.cache_dir = temp_cache
|
52 |
+
logger.warning(f"β οΈ Permission denied for '{cache_dir}', using temp: {self.cache_dir}")
|
53 |
+
except Exception as temp_error:
|
54 |
+
# Last resort: use in-memory only
|
55 |
+
logger.error(f"β Failed to create temp cache directory: {temp_error}")
|
56 |
+
logger.warning("β οΈ Using in-memory cache only (no persistence)")
|
57 |
+
self.cache_dir = None
|
58 |
+
except Exception as e:
|
59 |
+
# Last resort: use in-memory only
|
60 |
+
logger.error(f"β Failed to create cache directory: {e}")
|
61 |
+
logger.warning("β οΈ Using in-memory cache only (no persistence)")
|
62 |
+
self.cache_dir = None
|
63 |
+
|
64 |
+
# Cache configuration
|
65 |
+
self.cache_expiry_hours = int(os.getenv("CACHE_EXPIRY_HOURS", "24"))
|
66 |
+
self.max_cached_words_per_topic = int(os.getenv("MAX_CACHED_WORDS", "100"))
|
67 |
+
self.cache_version = "1.0"
|
68 |
+
|
69 |
+
# In-memory cache for fast access
|
70 |
+
self.memory_cache: Dict[str, List[Dict[str, Any]]] = {}
|
71 |
+
self.cache_metadata: Dict[str, Dict[str, Any]] = {}
|
72 |
+
|
73 |
+
logger.info(f"π¦ WordCacheManager initialized with cache_dir: {self.cache_dir}")
|
74 |
+
|
75 |
+
async def initialize(self):
|
76 |
+
"""Initialize cache manager by loading existing cache files."""
|
77 |
+
try:
|
78 |
+
logger.info("π§ Loading existing cache files...")
|
79 |
+
|
80 |
+
# Skip file loading if no cache directory (in-memory only)
|
81 |
+
if self.cache_dir is None:
|
82 |
+
logger.info("π In-memory cache mode - no file loading")
|
83 |
+
return
|
84 |
+
|
85 |
+
# Load all cache files into memory
|
86 |
+
cache_files = list(self.cache_dir.glob("*.json"))
|
87 |
+
loaded_count = 0
|
88 |
+
|
89 |
+
for cache_file in cache_files:
|
90 |
+
if cache_file.stem.endswith("_meta"):
|
91 |
+
continue # Skip metadata files
|
92 |
+
|
93 |
+
try:
|
94 |
+
cache_key = cache_file.stem
|
95 |
+
with open(cache_file, 'r') as f:
|
96 |
+
cached_data = json.load(f)
|
97 |
+
|
98 |
+
# Validate cache structure
|
99 |
+
if self._validate_cache_data(cached_data):
|
100 |
+
self.memory_cache[cache_key] = cached_data["words"]
|
101 |
+
self.cache_metadata[cache_key] = cached_data["metadata"]
|
102 |
+
loaded_count += 1
|
103 |
+
logger.info(f"π₯ Loaded cache: {cache_key} ({len(cached_data['words'])} words)")
|
104 |
+
else:
|
105 |
+
logger.warning(f"β οΈ Invalid cache file: {cache_file}")
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
logger.error(f"β Failed to load cache file {cache_file}: {e}")
|
109 |
+
|
110 |
+
logger.info(f"β
Cache manager initialized with {loaded_count} cached topics")
|
111 |
+
|
112 |
+
except Exception as e:
|
113 |
+
logger.error(f"β Failed to initialize cache manager: {e}")
|
114 |
+
|
115 |
+
def _validate_cache_data(self, data: Dict[str, Any]) -> bool:
|
116 |
+
"""Validate cache data structure."""
|
117 |
+
required_keys = ["words", "metadata", "version"]
|
118 |
+
if not all(key in data for key in required_keys):
|
119 |
+
return False
|
120 |
+
|
121 |
+
# Check metadata structure
|
122 |
+
metadata = data["metadata"]
|
123 |
+
required_meta_keys = ["created_at", "topic", "difficulty", "word_count"]
|
124 |
+
if not all(key in metadata for key in required_meta_keys):
|
125 |
+
return False
|
126 |
+
|
127 |
+
# Check words structure
|
128 |
+
words = data["words"]
|
129 |
+
if not isinstance(words, list) or not words:
|
130 |
+
return True # Empty cache is valid
|
131 |
+
|
132 |
+
# Validate first word structure
|
133 |
+
sample_word = words[0]
|
134 |
+
required_word_keys = ["word", "clue", "similarity", "source"]
|
135 |
+
return all(key in sample_word for key in required_word_keys)
|
136 |
+
|
137 |
+
async def get_cached_words(
|
138 |
+
self,
|
139 |
+
topic: str,
|
140 |
+
difficulty: str = "medium",
|
141 |
+
max_words: int = 15
|
142 |
+
) -> List[Dict[str, Any]]:
|
143 |
+
"""
|
144 |
+
Get cached words for a topic and difficulty.
|
145 |
+
|
146 |
+
Returns cached words if available and fresh, empty list otherwise.
|
147 |
+
"""
|
148 |
+
cache_key = self._get_cache_key(topic, difficulty)
|
149 |
+
|
150 |
+
# Check memory cache first
|
151 |
+
if cache_key in self.memory_cache:
|
152 |
+
# Check if cache is still fresh
|
153 |
+
if self._is_cache_fresh(cache_key):
|
154 |
+
cached_words = self.memory_cache[cache_key]
|
155 |
+
logger.info(f"π¦ Using cached words for {cache_key}: {len(cached_words)} words")
|
156 |
+
|
157 |
+
# Return requested number of words
|
158 |
+
return cached_words[:max_words]
|
159 |
+
else:
|
160 |
+
logger.info(f"β° Cache expired for {cache_key}")
|
161 |
+
await self._remove_expired_cache(cache_key)
|
162 |
+
|
163 |
+
logger.info(f"π No fresh cache available for {cache_key}")
|
164 |
+
return []
|
165 |
+
|
166 |
+
async def cache_words(
|
167 |
+
self,
|
168 |
+
topic: str,
|
169 |
+
difficulty: str,
|
170 |
+
words: List[Dict[str, Any]],
|
171 |
+
source: str = "vector_search"
|
172 |
+
) -> bool:
|
173 |
+
"""
|
174 |
+
Cache words for future use.
|
175 |
+
|
176 |
+
Args:
|
177 |
+
topic: Topic name
|
178 |
+
difficulty: Difficulty level
|
179 |
+
words: List of word objects with clues
|
180 |
+
source: Source of the words (e.g., "vector_search")
|
181 |
+
"""
|
182 |
+
try:
|
183 |
+
cache_key = self._get_cache_key(topic, difficulty)
|
184 |
+
|
185 |
+
# Enhance words with caching metadata
|
186 |
+
enhanced_words = []
|
187 |
+
for word in words[:self.max_cached_words_per_topic]:
|
188 |
+
enhanced_word = {
|
189 |
+
**word,
|
190 |
+
"cached_at": datetime.utcnow().isoformat(),
|
191 |
+
"cache_source": source
|
192 |
+
}
|
193 |
+
enhanced_words.append(enhanced_word)
|
194 |
+
|
195 |
+
# Create cache data structure
|
196 |
+
cache_data = {
|
197 |
+
"version": self.cache_version,
|
198 |
+
"words": enhanced_words,
|
199 |
+
"metadata": {
|
200 |
+
"topic": topic,
|
201 |
+
"difficulty": difficulty,
|
202 |
+
"word_count": len(enhanced_words),
|
203 |
+
"created_at": datetime.utcnow().isoformat(),
|
204 |
+
"source": source,
|
205 |
+
"expiry_hours": self.cache_expiry_hours
|
206 |
+
}
|
207 |
+
}
|
208 |
+
|
209 |
+
# Save to file (if cache directory available)
|
210 |
+
if self.cache_dir is not None:
|
211 |
+
cache_file = self.cache_dir / f"{cache_key}.json"
|
212 |
+
with open(cache_file, 'w') as f:
|
213 |
+
json.dump(cache_data, f, indent=2)
|
214 |
+
|
215 |
+
# Update memory cache
|
216 |
+
self.memory_cache[cache_key] = enhanced_words
|
217 |
+
self.cache_metadata[cache_key] = cache_data["metadata"]
|
218 |
+
|
219 |
+
logger.info(f"πΎ Cached {len(enhanced_words)} words for {cache_key}")
|
220 |
+
return True
|
221 |
+
|
222 |
+
except Exception as e:
|
223 |
+
logger.error(f"β Failed to cache words for {topic}/{difficulty}: {e}")
|
224 |
+
return False
|
225 |
+
|
226 |
+
def _get_cache_key(self, topic: str, difficulty: str) -> str:
|
227 |
+
"""Generate cache key from topic and difficulty."""
|
228 |
+
return f"{topic.lower()}_{difficulty.lower()}"
|
229 |
+
|
230 |
+
def _is_cache_fresh(self, cache_key: str) -> bool:
|
231 |
+
"""Check if cache is still fresh (not expired)."""
|
232 |
+
if cache_key not in self.cache_metadata:
|
233 |
+
return False
|
234 |
+
|
235 |
+
metadata = self.cache_metadata[cache_key]
|
236 |
+
created_at = datetime.fromisoformat(metadata["created_at"])
|
237 |
+
expiry_hours = metadata.get("expiry_hours", self.cache_expiry_hours)
|
238 |
+
|
239 |
+
expiry_time = created_at + timedelta(hours=expiry_hours)
|
240 |
+
return datetime.utcnow() < expiry_time
|
241 |
+
|
242 |
+
async def _remove_expired_cache(self, cache_key: str):
|
243 |
+
"""Remove expired cache from memory and disk."""
|
244 |
+
try:
|
245 |
+
# Remove from memory
|
246 |
+
if cache_key in self.memory_cache:
|
247 |
+
del self.memory_cache[cache_key]
|
248 |
+
if cache_key in self.cache_metadata:
|
249 |
+
del self.cache_metadata[cache_key]
|
250 |
+
|
251 |
+
# Remove from disk (if cache directory available)
|
252 |
+
if self.cache_dir is not None:
|
253 |
+
cache_file = self.cache_dir / f"{cache_key}.json"
|
254 |
+
if cache_file.exists():
|
255 |
+
cache_file.unlink()
|
256 |
+
|
257 |
+
logger.info(f"ποΈ Removed expired cache: {cache_key}")
|
258 |
+
|
259 |
+
except Exception as e:
|
260 |
+
logger.error(f"β Failed to remove expired cache {cache_key}: {e}")
|
261 |
+
|
262 |
+
async def warm_cache_from_static(self, static_words: Dict[str, List[Dict[str, Any]]]):
|
263 |
+
"""
|
264 |
+
Warm cache with high-quality static words as bootstrap data.
|
265 |
+
This converts the existing static words to cache format.
|
266 |
+
"""
|
267 |
+
try:
|
268 |
+
logger.info("π₯ Warming cache with bootstrap data from static words...")
|
269 |
+
|
270 |
+
cached_count = 0
|
271 |
+
for topic, words in static_words.items():
|
272 |
+
if not words:
|
273 |
+
continue
|
274 |
+
|
275 |
+
# Convert static words to cache format
|
276 |
+
cache_words = []
|
277 |
+
for word_obj in words:
|
278 |
+
cache_word = {
|
279 |
+
"word": word_obj["word"].upper(),
|
280 |
+
"clue": word_obj.get("clue", f"Related to {topic.lower()}"),
|
281 |
+
"similarity": 0.9, # Mark as high quality
|
282 |
+
"source": "bootstrap_static",
|
283 |
+
"quality_score": 100 # High quality bootstrap data
|
284 |
+
}
|
285 |
+
cache_words.append(cache_word)
|
286 |
+
|
287 |
+
# Cache for different difficulties
|
288 |
+
for difficulty in ["easy", "medium", "hard"]:
|
289 |
+
# Filter by difficulty
|
290 |
+
filtered_words = self._filter_words_by_difficulty(cache_words, difficulty)
|
291 |
+
|
292 |
+
if filtered_words:
|
293 |
+
success = await self.cache_words(topic, difficulty, filtered_words, "bootstrap")
|
294 |
+
if success:
|
295 |
+
cached_count += 1
|
296 |
+
|
297 |
+
logger.info(f"π₯ Cache warming completed: {cached_count} topic/difficulty combinations cached")
|
298 |
+
|
299 |
+
except Exception as e:
|
300 |
+
logger.error(f"β Failed to warm cache: {e}")
|
301 |
+
|
302 |
+
def _filter_words_by_difficulty(self, words: List[Dict[str, Any]], difficulty: str) -> List[Dict[str, Any]]:
|
303 |
+
"""Filter words by difficulty level."""
|
304 |
+
difficulty_map = {
|
305 |
+
"easy": {"min_len": 3, "max_len": 8},
|
306 |
+
"medium": {"min_len": 4, "max_len": 10},
|
307 |
+
"hard": {"min_len": 5, "max_len": 15}
|
308 |
+
}
|
309 |
+
|
310 |
+
criteria = difficulty_map.get(difficulty, difficulty_map["medium"])
|
311 |
+
|
312 |
+
filtered = []
|
313 |
+
for word_obj in words:
|
314 |
+
word_len = len(word_obj["word"])
|
315 |
+
if criteria["min_len"] <= word_len <= criteria["max_len"]:
|
316 |
+
filtered.append(word_obj)
|
317 |
+
|
318 |
+
return filtered
|
319 |
+
|
320 |
+
def get_cache_stats(self) -> Dict[str, Any]:
|
321 |
+
"""Get cache statistics for monitoring."""
|
322 |
+
total_words = sum(len(words) for words in self.memory_cache.values())
|
323 |
+
|
324 |
+
# Count fresh vs expired caches
|
325 |
+
fresh_caches = sum(1 for key in self.memory_cache.keys() if self._is_cache_fresh(key))
|
326 |
+
total_caches = len(self.memory_cache)
|
327 |
+
|
328 |
+
return {
|
329 |
+
"total_cached_topics": total_caches,
|
330 |
+
"fresh_caches": fresh_caches,
|
331 |
+
"expired_caches": total_caches - fresh_caches,
|
332 |
+
"total_cached_words": total_words,
|
333 |
+
"cache_directory": str(self.cache_dir),
|
334 |
+
"cache_expiry_hours": self.cache_expiry_hours
|
335 |
+
}
|
336 |
+
|
337 |
+
async def cleanup_expired_caches(self):
|
338 |
+
"""Clean up all expired caches."""
|
339 |
+
expired_keys = [
|
340 |
+
key for key in self.memory_cache.keys()
|
341 |
+
if not self._is_cache_fresh(key)
|
342 |
+
]
|
343 |
+
|
344 |
+
for key in expired_keys:
|
345 |
+
await self._remove_expired_cache(key)
|
346 |
+
|
347 |
+
logger.info(f"π§Ή Cleaned up {len(expired_keys)} expired caches")
|
crossword-app/backend-py/test-integration/test_boundary_fix.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import sys
|
4 |
+
import asyncio
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
# Add project root to path
|
8 |
+
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
|
9 |
+
sys.path.insert(0, str(project_root))
|
10 |
+
|
11 |
+
from src.services.crossword_generator import CrosswordGenerator
|
12 |
+
|
13 |
+
async def test_boundary_fix():
|
14 |
+
"""Test that the boundary fix works correctly."""
|
15 |
+
|
16 |
+
# Sample words that are known to cause boundary issues
|
17 |
+
test_words = [
|
18 |
+
{"word": "COMPUTER", "clue": "Electronic device"},
|
19 |
+
{"word": "MACHINE", "clue": "Device with moving parts"},
|
20 |
+
{"word": "SCIENCE", "clue": "Systematic study"},
|
21 |
+
{"word": "EXPERT", "clue": "Specialist"},
|
22 |
+
{"word": "CODE", "clue": "Programming text"},
|
23 |
+
{"word": "DATA", "clue": "Information"}
|
24 |
+
]
|
25 |
+
|
26 |
+
generator = CrosswordGenerator()
|
27 |
+
|
28 |
+
print("π§ͺ Testing Boundary Fix")
|
29 |
+
print("=" * 50)
|
30 |
+
|
31 |
+
# Generate a crossword
|
32 |
+
result = generator._create_grid(test_words)
|
33 |
+
|
34 |
+
if not result:
|
35 |
+
print("β Grid generation failed")
|
36 |
+
return False
|
37 |
+
|
38 |
+
grid = result["grid"]
|
39 |
+
placed_words = result["placed_words"]
|
40 |
+
|
41 |
+
print(f"β
Generated grid with {len(placed_words)} words")
|
42 |
+
print(f"Grid size: {len(grid)}x{len(grid[0])}")
|
43 |
+
|
44 |
+
# Display the grid
|
45 |
+
print("\nGenerated Grid:")
|
46 |
+
for i, row in enumerate(grid):
|
47 |
+
row_str = " ".join(cell if cell != "." else " " for cell in row)
|
48 |
+
print(f"{i:2d} | {row_str}")
|
49 |
+
|
50 |
+
print(f"\nPlaced Words:")
|
51 |
+
for word in placed_words:
|
52 |
+
print(f" {word['word']} at ({word['row']},{word['col']}) {word['direction']}")
|
53 |
+
|
54 |
+
# Analyze for boundary violations
|
55 |
+
print(f"\nπ Analyzing for boundary violations...")
|
56 |
+
|
57 |
+
violations = []
|
58 |
+
|
59 |
+
# Check horizontal words
|
60 |
+
for r in range(len(grid)):
|
61 |
+
current_word = ""
|
62 |
+
word_start = -1
|
63 |
+
|
64 |
+
for c in range(len(grid[r])):
|
65 |
+
if grid[r][c] != ".":
|
66 |
+
if current_word == "":
|
67 |
+
word_start = c
|
68 |
+
current_word += grid[r][c]
|
69 |
+
else:
|
70 |
+
if current_word:
|
71 |
+
# Word ended - check if it's a valid placed word
|
72 |
+
is_valid_word = any(
|
73 |
+
placed['word'] == current_word and
|
74 |
+
placed['row'] == r and
|
75 |
+
placed['col'] == word_start and
|
76 |
+
placed['direction'] == 'horizontal'
|
77 |
+
for placed in placed_words
|
78 |
+
)
|
79 |
+
if not is_valid_word and len(current_word) > 1:
|
80 |
+
violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})")
|
81 |
+
current_word = ""
|
82 |
+
|
83 |
+
# Check word at end of row
|
84 |
+
if current_word:
|
85 |
+
is_valid_word = any(
|
86 |
+
placed['word'] == current_word and
|
87 |
+
placed['row'] == r and
|
88 |
+
placed['col'] == word_start and
|
89 |
+
placed['direction'] == 'horizontal'
|
90 |
+
for placed in placed_words
|
91 |
+
)
|
92 |
+
if not is_valid_word and len(current_word) > 1:
|
93 |
+
violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})")
|
94 |
+
|
95 |
+
# Check vertical words
|
96 |
+
for c in range(len(grid[0])):
|
97 |
+
current_word = ""
|
98 |
+
word_start = -1
|
99 |
+
|
100 |
+
for r in range(len(grid)):
|
101 |
+
if grid[r][c] != ".":
|
102 |
+
if current_word == "":
|
103 |
+
word_start = r
|
104 |
+
current_word += grid[r][c]
|
105 |
+
else:
|
106 |
+
if current_word:
|
107 |
+
# Word ended - check if it's a valid placed word
|
108 |
+
is_valid_word = any(
|
109 |
+
placed['word'] == current_word and
|
110 |
+
placed['row'] == word_start and
|
111 |
+
placed['col'] == c and
|
112 |
+
placed['direction'] == 'vertical'
|
113 |
+
for placed in placed_words
|
114 |
+
)
|
115 |
+
if not is_valid_word and len(current_word) > 1:
|
116 |
+
violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})")
|
117 |
+
current_word = ""
|
118 |
+
|
119 |
+
# Check word at end of column
|
120 |
+
if current_word:
|
121 |
+
is_valid_word = any(
|
122 |
+
placed['word'] == current_word and
|
123 |
+
placed['row'] == word_start and
|
124 |
+
placed['col'] == c and
|
125 |
+
placed['direction'] == 'vertical'
|
126 |
+
for placed in placed_words
|
127 |
+
)
|
128 |
+
if not is_valid_word and len(current_word) > 1:
|
129 |
+
violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})")
|
130 |
+
|
131 |
+
# Report results
|
132 |
+
if violations:
|
133 |
+
print(f"β Found {len(violations)} boundary violations:")
|
134 |
+
for violation in violations:
|
135 |
+
print(f" - {violation}")
|
136 |
+
return False
|
137 |
+
else:
|
138 |
+
print(f"β
No boundary violations found!")
|
139 |
+
print(f"β
All words in grid are properly placed and bounded")
|
140 |
+
return True
|
141 |
+
|
142 |
+
if __name__ == "__main__":
|
143 |
+
success = asyncio.run(test_boundary_fix())
|
144 |
+
if success:
|
145 |
+
print(f"\nπ Boundary fix is working correctly!")
|
146 |
+
else:
|
147 |
+
print(f"\nπ₯ Boundary fix needs more work!")
|
crossword-app/backend-py/test-integration/test_bounds_comprehensive.py
ADDED
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Comprehensive test for bounds checking fixes in crossword generator.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import sys
|
8 |
+
import pytest
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
# Add project root to path
|
12 |
+
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
|
13 |
+
sys.path.insert(0, str(project_root))
|
14 |
+
|
15 |
+
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
|
16 |
+
|
17 |
+
class TestBoundsChecking:
|
18 |
+
"""Test all bounds checking in crossword generator."""
|
19 |
+
|
20 |
+
def setup_method(self):
|
21 |
+
"""Setup test instance."""
|
22 |
+
self.generator = CrosswordGeneratorFixed(vector_service=None)
|
23 |
+
|
24 |
+
def test_can_place_word_bounds_horizontal(self):
|
25 |
+
"""Test _can_place_word bounds checking for horizontal placement."""
|
26 |
+
# Create small grid
|
27 |
+
grid = [["." for _ in range(5)] for _ in range(5)]
|
28 |
+
|
29 |
+
# Test cases that should fail bounds checking
|
30 |
+
assert not self.generator._can_place_word(grid, "TOOLONG", 2, 1, "horizontal") # Word too long
|
31 |
+
assert not self.generator._can_place_word(grid, "TEST", -1, 1, "horizontal") # Negative row
|
32 |
+
assert not self.generator._can_place_word(grid, "TEST", 1, -1, "horizontal") # Negative col
|
33 |
+
assert not self.generator._can_place_word(grid, "TEST", 5, 1, "horizontal") # Row >= size
|
34 |
+
assert not self.generator._can_place_word(grid, "TEST", 1, 5, "horizontal") # Col >= size
|
35 |
+
assert not self.generator._can_place_word(grid, "TEST", 1, 3, "horizontal") # Word extends beyond grid
|
36 |
+
|
37 |
+
# Test cases that should pass
|
38 |
+
assert self.generator._can_place_word(grid, "TEST", 2, 1, "horizontal") # Valid placement
|
39 |
+
assert self.generator._can_place_word(grid, "A", 0, 0, "horizontal") # Single letter
|
40 |
+
|
41 |
+
def test_can_place_word_bounds_vertical(self):
|
42 |
+
"""Test _can_place_word bounds checking for vertical placement."""
|
43 |
+
# Create small grid
|
44 |
+
grid = [["." for _ in range(5)] for _ in range(5)]
|
45 |
+
|
46 |
+
# Test cases that should fail bounds checking
|
47 |
+
assert not self.generator._can_place_word(grid, "TOOLONG", 1, 2, "vertical") # Word too long
|
48 |
+
assert not self.generator._can_place_word(grid, "TEST", -1, 1, "vertical") # Negative row
|
49 |
+
assert not self.generator._can_place_word(grid, "TEST", 1, -1, "vertical") # Negative col
|
50 |
+
assert not self.generator._can_place_word(grid, "TEST", 5, 1, "vertical") # Row >= size
|
51 |
+
assert not self.generator._can_place_word(grid, "TEST", 1, 5, "vertical") # Col >= size
|
52 |
+
assert not self.generator._can_place_word(grid, "TEST", 3, 1, "vertical") # Word extends beyond grid
|
53 |
+
|
54 |
+
# Test cases that should pass
|
55 |
+
assert self.generator._can_place_word(grid, "TEST", 1, 2, "vertical") # Valid placement
|
56 |
+
assert self.generator._can_place_word(grid, "A", 0, 0, "vertical") # Single letter
|
57 |
+
|
58 |
+
def test_place_word_bounds_horizontal(self):
|
59 |
+
"""Test _place_word bounds checking for horizontal placement."""
|
60 |
+
grid = [["." for _ in range(5)] for _ in range(5)]
|
61 |
+
|
62 |
+
# Valid placement should work
|
63 |
+
original_state = self.generator._place_word(grid, "TEST", 2, 1, "horizontal")
|
64 |
+
assert len(original_state) == 4
|
65 |
+
assert grid[2][1] == "T"
|
66 |
+
assert grid[2][4] == "T"
|
67 |
+
|
68 |
+
# Test out-of-bounds placement should raise IndexError
|
69 |
+
with pytest.raises(IndexError):
|
70 |
+
self.generator._place_word(grid, "TOOLONG", 2, 1, "horizontal")
|
71 |
+
|
72 |
+
with pytest.raises(IndexError):
|
73 |
+
self.generator._place_word(grid, "TEST", -1, 1, "horizontal")
|
74 |
+
|
75 |
+
with pytest.raises(IndexError):
|
76 |
+
self.generator._place_word(grid, "TEST", 5, 1, "horizontal")
|
77 |
+
|
78 |
+
with pytest.raises(IndexError):
|
79 |
+
self.generator._place_word(grid, "TEST", 1, 5, "horizontal")
|
80 |
+
|
81 |
+
def test_place_word_bounds_vertical(self):
|
82 |
+
"""Test _place_word bounds checking for vertical placement."""
|
83 |
+
grid = [["." for _ in range(5)] for _ in range(5)]
|
84 |
+
|
85 |
+
# Valid placement should work
|
86 |
+
original_state = self.generator._place_word(grid, "TEST", 1, 2, "vertical")
|
87 |
+
assert len(original_state) == 4
|
88 |
+
assert grid[1][2] == "T"
|
89 |
+
assert grid[4][2] == "T"
|
90 |
+
|
91 |
+
# Test out-of-bounds placement should raise IndexError
|
92 |
+
with pytest.raises(IndexError):
|
93 |
+
self.generator._place_word(grid, "TOOLONG", 1, 2, "vertical")
|
94 |
+
|
95 |
+
with pytest.raises(IndexError):
|
96 |
+
self.generator._place_word(grid, "TEST", -1, 2, "vertical")
|
97 |
+
|
98 |
+
with pytest.raises(IndexError):
|
99 |
+
self.generator._place_word(grid, "TEST", 5, 2, "vertical")
|
100 |
+
|
101 |
+
with pytest.raises(IndexError):
|
102 |
+
self.generator._place_word(grid, "TEST", 2, 5, "vertical")
|
103 |
+
|
104 |
+
def test_remove_word_bounds(self):
|
105 |
+
"""Test _remove_word bounds checking."""
|
106 |
+
grid = [["." for _ in range(5)] for _ in range(5)]
|
107 |
+
|
108 |
+
# Place a word first
|
109 |
+
original_state = self.generator._place_word(grid, "TEST", 2, 1, "horizontal")
|
110 |
+
|
111 |
+
# Normal removal should work
|
112 |
+
self.generator._remove_word(grid, original_state)
|
113 |
+
assert grid[2][1] == "."
|
114 |
+
|
115 |
+
# Test invalid original state should raise IndexError
|
116 |
+
bad_state = [{"row": -1, "col": 1, "value": "."}]
|
117 |
+
with pytest.raises(IndexError):
|
118 |
+
self.generator._remove_word(grid, bad_state)
|
119 |
+
|
120 |
+
bad_state = [{"row": 5, "col": 1, "value": "."}]
|
121 |
+
with pytest.raises(IndexError):
|
122 |
+
self.generator._remove_word(grid, bad_state)
|
123 |
+
|
124 |
+
bad_state = [{"row": 1, "col": -1, "value": "."}]
|
125 |
+
with pytest.raises(IndexError):
|
126 |
+
self.generator._remove_word(grid, bad_state)
|
127 |
+
|
128 |
+
bad_state = [{"row": 1, "col": 5, "value": "."}]
|
129 |
+
with pytest.raises(IndexError):
|
130 |
+
self.generator._remove_word(grid, bad_state)
|
131 |
+
|
132 |
+
def test_create_simple_cross_bounds(self):
|
133 |
+
"""Test _create_simple_cross bounds checking."""
|
134 |
+
# Test with words that have intersections
|
135 |
+
word_list = ["CAT", "TOY"] # 'T' intersection
|
136 |
+
word_objs = [{"word": w, "clue": f"Clue for {w}"} for w in word_list]
|
137 |
+
|
138 |
+
# This should work without bounds errors
|
139 |
+
result = self.generator._create_simple_cross(word_list, word_objs)
|
140 |
+
assert result is not None
|
141 |
+
assert len(result["placed_words"]) == 2
|
142 |
+
|
143 |
+
# Test with words that might cause issues
|
144 |
+
word_list = ["A", "A"] # Same single letter
|
145 |
+
word_objs = [{"word": w, "clue": f"Clue for {w}"} for w in word_list]
|
146 |
+
|
147 |
+
# This should not crash with bounds errors
|
148 |
+
result = self.generator._create_simple_cross(word_list, word_objs)
|
149 |
+
# May return None due to placement issues, but should not crash
|
150 |
+
|
151 |
+
def test_trim_grid_bounds(self):
|
152 |
+
"""Test _trim_grid bounds checking."""
|
153 |
+
# Create a grid with words placed
|
154 |
+
grid = [["." for _ in range(10)] for _ in range(10)]
|
155 |
+
|
156 |
+
# Place some letters
|
157 |
+
grid[5][3] = "T"
|
158 |
+
grid[5][4] = "E"
|
159 |
+
grid[5][5] = "S"
|
160 |
+
grid[5][6] = "T"
|
161 |
+
|
162 |
+
placed_words = [{
|
163 |
+
"word": "TEST",
|
164 |
+
"row": 5,
|
165 |
+
"col": 3,
|
166 |
+
"direction": "horizontal",
|
167 |
+
"number": 1
|
168 |
+
}]
|
169 |
+
|
170 |
+
# This should work without bounds errors
|
171 |
+
result = self.generator._trim_grid(grid, placed_words)
|
172 |
+
assert result is not None
|
173 |
+
assert "grid" in result
|
174 |
+
assert "placed_words" in result
|
175 |
+
|
176 |
+
# Test with edge case placements
|
177 |
+
placed_words = [{
|
178 |
+
"word": "A",
|
179 |
+
"row": 0,
|
180 |
+
"col": 0,
|
181 |
+
"direction": "horizontal",
|
182 |
+
"number": 1
|
183 |
+
}]
|
184 |
+
|
185 |
+
grid[0][0] = "A"
|
186 |
+
result = self.generator._trim_grid(grid, placed_words)
|
187 |
+
assert result is not None
|
188 |
+
|
189 |
+
def test_calculation_placement_score_bounds(self):
|
190 |
+
"""Test _calculate_placement_score bounds checking."""
|
191 |
+
grid = [["." for _ in range(5)] for _ in range(5)]
|
192 |
+
|
193 |
+
# Place some letters for intersection testing
|
194 |
+
grid[2][2] = "T"
|
195 |
+
grid[2][3] = "E"
|
196 |
+
|
197 |
+
placement = {"row": 2, "col": 2, "direction": "horizontal"}
|
198 |
+
placed_words = []
|
199 |
+
|
200 |
+
# This should work without bounds errors
|
201 |
+
score = self.generator._calculate_placement_score(grid, "TEST", placement, placed_words)
|
202 |
+
assert isinstance(score, int)
|
203 |
+
|
204 |
+
# Test with out-of-bounds placement (should handle gracefully)
|
205 |
+
placement = {"row": 4, "col": 3, "direction": "horizontal"} # Would extend beyond grid
|
206 |
+
score = self.generator._calculate_placement_score(grid, "TEST", placement, placed_words)
|
207 |
+
assert isinstance(score, int)
|
208 |
+
|
209 |
+
# Test with negative placement (should handle gracefully)
|
210 |
+
placement = {"row": -1, "col": 0, "direction": "horizontal"}
|
211 |
+
score = self.generator._calculate_placement_score(grid, "TEST", placement, placed_words)
|
212 |
+
assert isinstance(score, int)
|
213 |
+
|
214 |
+
async def test_full_generation_stress():
|
215 |
+
"""Stress test full generation to catch index errors."""
|
216 |
+
generator = CrosswordGeneratorFixed(vector_service=None)
|
217 |
+
|
218 |
+
# Mock word selection to return test words
|
219 |
+
test_words = [
|
220 |
+
{"word": "CAT", "clue": "Feline pet"},
|
221 |
+
{"word": "DOG", "clue": "Man's best friend"},
|
222 |
+
{"word": "BIRD", "clue": "Flying animal"},
|
223 |
+
{"word": "FISH", "clue": "Aquatic animal"},
|
224 |
+
{"word": "ELEPHANT", "clue": "Large mammal"},
|
225 |
+
{"word": "TIGER", "clue": "Striped cat"},
|
226 |
+
{"word": "HORSE", "clue": "Riding animal"},
|
227 |
+
{"word": "BEAR", "clue": "Large carnivore"},
|
228 |
+
{"word": "WOLF", "clue": "Pack animal"},
|
229 |
+
{"word": "LION", "clue": "King of jungle"}
|
230 |
+
]
|
231 |
+
|
232 |
+
generator._select_words = lambda topics, difficulty, use_ai: test_words
|
233 |
+
|
234 |
+
# Run multiple generation attempts
|
235 |
+
for i in range(20):
|
236 |
+
try:
|
237 |
+
result = await generator.generate_puzzle(["animals"], "medium", use_ai=False)
|
238 |
+
if result:
|
239 |
+
print(f"β
Generation {i+1} succeeded")
|
240 |
+
else:
|
241 |
+
print(f"β οΈ Generation {i+1} returned None")
|
242 |
+
except IndexError as e:
|
243 |
+
print(f"β Index error in generation {i+1}: {e}")
|
244 |
+
raise
|
245 |
+
except Exception as e:
|
246 |
+
print(f"β οΈ Other error in generation {i+1}: {e}")
|
247 |
+
# Don't raise for other errors, just continue
|
248 |
+
|
249 |
+
print("β
All stress test generations completed without index errors!")
|
250 |
+
|
251 |
+
if __name__ == "__main__":
|
252 |
+
# Run tests
|
253 |
+
print("π§ͺ Running comprehensive bounds checking tests...")
|
254 |
+
|
255 |
+
# Run pytest on this file
|
256 |
+
import subprocess
|
257 |
+
result = subprocess.run([sys.executable, "-m", "pytest", __file__, "-v"],
|
258 |
+
capture_output=True, text=True)
|
259 |
+
|
260 |
+
print("STDOUT:", result.stdout)
|
261 |
+
if result.stderr:
|
262 |
+
print("STDERR:", result.stderr)
|
263 |
+
|
264 |
+
# Run stress test
|
265 |
+
print("\nποΈ Running stress test...")
|
266 |
+
asyncio.run(test_full_generation_stress())
|
crossword-app/backend-py/test-integration/test_bounds_fix.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Quick test to verify the bounds checking fix.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import sys
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
+
# Add project root to path
|
10 |
+
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
|
11 |
+
sys.path.insert(0, str(project_root))
|
12 |
+
|
13 |
+
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
|
14 |
+
|
15 |
+
def test_bounds_checking():
|
16 |
+
"""Test that placement score calculation doesn't crash with out-of-bounds access."""
|
17 |
+
print("π§ͺ Testing bounds checking fix...")
|
18 |
+
|
19 |
+
generator = CrosswordGeneratorFixed()
|
20 |
+
|
21 |
+
# Create a small grid
|
22 |
+
grid = [["." for _ in range(5)] for _ in range(5)]
|
23 |
+
|
24 |
+
# Test placement that would go out of bounds
|
25 |
+
placement = {
|
26 |
+
"row": 3, # Starting at row 3
|
27 |
+
"col": 2, # Starting at col 2
|
28 |
+
"direction": "vertical"
|
29 |
+
}
|
30 |
+
|
31 |
+
# Word that would extend beyond grid (3+8=11 > 5)
|
32 |
+
word = "ELEPHANT" # 8 letters, would go from row 3 to row 10 (out of bounds)
|
33 |
+
|
34 |
+
try:
|
35 |
+
# This should NOT crash with bounds checking
|
36 |
+
score = generator._calculate_placement_score(grid, word, placement, [])
|
37 |
+
print(f"β
Success! Placement score calculated: {score}")
|
38 |
+
print("β
Bounds checking is working correctly")
|
39 |
+
return True
|
40 |
+
except IndexError as e:
|
41 |
+
print(f"β IndexError still occurs: {e}")
|
42 |
+
return False
|
43 |
+
except Exception as e:
|
44 |
+
print(f"β Other error: {e}")
|
45 |
+
return False
|
46 |
+
|
47 |
+
def test_valid_placement():
|
48 |
+
"""Test that valid placements still work correctly."""
|
49 |
+
print("\nπ§ͺ Testing valid placement scoring...")
|
50 |
+
|
51 |
+
generator = CrosswordGeneratorFixed()
|
52 |
+
|
53 |
+
# Create a grid with some letters
|
54 |
+
grid = [["." for _ in range(8)] for _ in range(8)]
|
55 |
+
grid[2][2] = "A" # Place an 'A' at position (2,2)
|
56 |
+
|
57 |
+
# Test placement that intersects properly
|
58 |
+
placement = {
|
59 |
+
"row": 2,
|
60 |
+
"col": 1,
|
61 |
+
"direction": "horizontal"
|
62 |
+
}
|
63 |
+
|
64 |
+
word = "CAT" # Should intersect at the 'A'
|
65 |
+
|
66 |
+
try:
|
67 |
+
score = generator._calculate_placement_score(grid, word, placement, [])
|
68 |
+
print(f"β
Valid placement score: {score}")
|
69 |
+
|
70 |
+
# Should have intersection bonus (score > 100)
|
71 |
+
if score > 300: # Base 100 + intersection 200
|
72 |
+
print("β
Intersection detection working")
|
73 |
+
else:
|
74 |
+
print(f"β οΈ Expected intersection bonus, got score {score}")
|
75 |
+
|
76 |
+
return True
|
77 |
+
except Exception as e:
|
78 |
+
print(f"β Error with valid placement: {e}")
|
79 |
+
return False
|
80 |
+
|
81 |
+
if __name__ == "__main__":
|
82 |
+
print("π§ Testing crossword generator bounds fix\n")
|
83 |
+
|
84 |
+
test1_pass = test_bounds_checking()
|
85 |
+
test2_pass = test_valid_placement()
|
86 |
+
|
87 |
+
if test1_pass and test2_pass:
|
88 |
+
print("\nβ
All tests passed! The bounds checking fix is working.")
|
89 |
+
else:
|
90 |
+
print("\nβ Some tests failed. More work needed.")
|
crossword-app/backend-py/test-integration/test_cache_permissions.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test cache permission handling.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import sys
|
8 |
+
import tempfile
|
9 |
+
import os
|
10 |
+
from pathlib import Path
|
11 |
+
from unittest.mock import patch
|
12 |
+
|
13 |
+
# Add project root to path
|
14 |
+
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
|
15 |
+
sys.path.insert(0, str(project_root))
|
16 |
+
|
17 |
+
from src.services.word_cache import WordCacheManager
|
18 |
+
|
19 |
+
async def test_permission_denied():
|
20 |
+
"""Test cache handling when permissions are denied."""
|
21 |
+
print("π§ͺ Testing permission denied scenario...")
|
22 |
+
|
23 |
+
# Mock Path.mkdir to raise PermissionError
|
24 |
+
with patch.object(Path, 'mkdir', side_effect=PermissionError("Permission denied")):
|
25 |
+
try:
|
26 |
+
cache_manager = WordCacheManager(cache_dir="/some/protected/path")
|
27 |
+
await cache_manager.initialize()
|
28 |
+
|
29 |
+
print(f"β
Cache manager created with fallback: {cache_manager.cache_dir}")
|
30 |
+
|
31 |
+
# Test caching still works (in-memory or temp dir)
|
32 |
+
test_words = [
|
33 |
+
{"word": "TEST", "clue": "A test word", "similarity": 0.8, "source": "test"}
|
34 |
+
]
|
35 |
+
|
36 |
+
success = await cache_manager.cache_words("TestTopic", "medium", test_words)
|
37 |
+
print(f"β
Caching {'succeeded' if success else 'failed'}")
|
38 |
+
|
39 |
+
cached_words = await cache_manager.get_cached_words("TestTopic", "medium", 5)
|
40 |
+
print(f"β
Retrieved {len(cached_words)} cached words")
|
41 |
+
|
42 |
+
return True
|
43 |
+
|
44 |
+
except Exception as e:
|
45 |
+
print(f"β Permission handling failed: {e}")
|
46 |
+
return False
|
47 |
+
|
48 |
+
async def test_in_memory_mode():
|
49 |
+
"""Test pure in-memory cache mode."""
|
50 |
+
print("\nπ§ͺ Testing in-memory only mode...")
|
51 |
+
|
52 |
+
# Force in-memory mode by setting cache_dir to None
|
53 |
+
cache_manager = WordCacheManager()
|
54 |
+
cache_manager.cache_dir = None # Force in-memory mode
|
55 |
+
|
56 |
+
await cache_manager.initialize()
|
57 |
+
|
58 |
+
# Test that caching still works in memory
|
59 |
+
test_words = [
|
60 |
+
{"word": "MEMORY", "clue": "Stored in RAM", "similarity": 0.9, "source": "test"}
|
61 |
+
]
|
62 |
+
|
63 |
+
success = await cache_manager.cache_words("Memory", "medium", test_words)
|
64 |
+
print(f"β
In-memory caching {'succeeded' if success else 'failed'}")
|
65 |
+
|
66 |
+
cached_words = await cache_manager.get_cached_words("Memory", "medium", 5)
|
67 |
+
print(f"β
Retrieved {len(cached_words)} words from memory")
|
68 |
+
|
69 |
+
stats = cache_manager.get_cache_stats()
|
70 |
+
print(f"π Cache stats: {stats}")
|
71 |
+
|
72 |
+
return len(cached_words) > 0
|
73 |
+
|
74 |
+
async def main():
|
75 |
+
"""Run permission tests."""
|
76 |
+
print("π Testing Cache Permission Handling\n")
|
77 |
+
|
78 |
+
test1 = await test_permission_denied()
|
79 |
+
test2 = await test_in_memory_mode()
|
80 |
+
|
81 |
+
if test1 and test2:
|
82 |
+
print("\nβ
All permission tests passed!")
|
83 |
+
print("π¦ Cache system gracefully handles permission issues")
|
84 |
+
else:
|
85 |
+
print("\nβ Some permission tests failed")
|
86 |
+
|
87 |
+
if __name__ == "__main__":
|
88 |
+
asyncio.run(main())
|
crossword-app/backend-py/test-integration/test_cache_system.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test the new cache system to verify it works correctly.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import sys
|
8 |
+
import tempfile
|
9 |
+
import shutil
|
10 |
+
from pathlib import Path
|
11 |
+
|
12 |
+
# Add project root to path
|
13 |
+
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
|
14 |
+
sys.path.insert(0, str(project_root))
|
15 |
+
|
16 |
+
from src.services.word_cache import WordCacheManager
|
17 |
+
|
18 |
+
async def test_cache_system():
|
19 |
+
"""Test the cache system functionality."""
|
20 |
+
print("π§ͺ Testing Word Cache System\n")
|
21 |
+
|
22 |
+
# Create temporary cache directory
|
23 |
+
temp_dir = tempfile.mkdtemp()
|
24 |
+
print(f"π Using temporary cache directory: {temp_dir}")
|
25 |
+
|
26 |
+
try:
|
27 |
+
# Initialize cache manager
|
28 |
+
cache_manager = WordCacheManager(cache_dir=temp_dir)
|
29 |
+
await cache_manager.initialize()
|
30 |
+
|
31 |
+
# Test 1: Cache some words
|
32 |
+
print("\nπ§ͺ Test 1: Caching words")
|
33 |
+
test_words = [
|
34 |
+
{"word": "ELEPHANT", "clue": "Large mammal with trunk", "similarity": 0.8, "source": "vector_search"},
|
35 |
+
{"word": "TIGER", "clue": "Striped big cat", "similarity": 0.7, "source": "vector_search"},
|
36 |
+
{"word": "LION", "clue": "King of jungle", "similarity": 0.75, "source": "vector_search"},
|
37 |
+
]
|
38 |
+
|
39 |
+
success = await cache_manager.cache_words("Animals", "medium", test_words)
|
40 |
+
print(f"β
Cache operation {'succeeded' if success else 'failed'}")
|
41 |
+
|
42 |
+
# Test 2: Retrieve cached words
|
43 |
+
print("\nπ§ͺ Test 2: Retrieving cached words")
|
44 |
+
cached_words = await cache_manager.get_cached_words("Animals", "medium", 5)
|
45 |
+
print(f"π¦ Retrieved {len(cached_words)} cached words")
|
46 |
+
|
47 |
+
if cached_words:
|
48 |
+
print("π Cached words:")
|
49 |
+
for word in cached_words:
|
50 |
+
print(f" - {word['word']}: {word['clue']}")
|
51 |
+
|
52 |
+
# Test 3: Cache statistics
|
53 |
+
print("\nπ§ͺ Test 3: Cache statistics")
|
54 |
+
stats = cache_manager.get_cache_stats()
|
55 |
+
print(f"π Cache stats: {stats}")
|
56 |
+
|
57 |
+
# Test 4: Test non-existent topic
|
58 |
+
print("\nπ§ͺ Test 4: Non-existent topic")
|
59 |
+
empty_words = await cache_manager.get_cached_words("NonExistent", "medium", 5)
|
60 |
+
print(f"π Non-existent topic returned {len(empty_words)} words (expected 0)")
|
61 |
+
|
62 |
+
# Test 5: Test bootstrap warming (if static data exists)
|
63 |
+
print("\nπ§ͺ Test 5: Bootstrap warming simulation")
|
64 |
+
static_data = {
|
65 |
+
"Technology": [
|
66 |
+
{"word": "COMPUTER", "clue": "Electronic device"},
|
67 |
+
{"word": "ROBOT", "clue": "Automated machine"},
|
68 |
+
]
|
69 |
+
}
|
70 |
+
await cache_manager.warm_cache_from_static(static_data)
|
71 |
+
|
72 |
+
tech_words = await cache_manager.get_cached_words("Technology", "medium", 5)
|
73 |
+
print(f"π₯ Bootstrap warming: Retrieved {len(tech_words)} tech words")
|
74 |
+
|
75 |
+
print("\nβ
All cache system tests completed!")
|
76 |
+
return True
|
77 |
+
|
78 |
+
except Exception as e:
|
79 |
+
print(f"\nβ Cache system test failed: {e}")
|
80 |
+
import traceback
|
81 |
+
traceback.print_exc()
|
82 |
+
return False
|
83 |
+
|
84 |
+
finally:
|
85 |
+
# Cleanup temporary directory
|
86 |
+
shutil.rmtree(temp_dir)
|
87 |
+
print(f"π§Ή Cleaned up temporary directory")
|
88 |
+
|
89 |
+
async def test_vector_integration():
|
90 |
+
"""Test integration with vector search service."""
|
91 |
+
print("\nπ Testing Vector Search Integration\n")
|
92 |
+
|
93 |
+
try:
|
94 |
+
from src.services.vector_search import VectorSearchService
|
95 |
+
|
96 |
+
# Create vector service (won't initialize model, just test cache integration)
|
97 |
+
vector_service = VectorSearchService()
|
98 |
+
|
99 |
+
# Test cache fallback without initialization
|
100 |
+
print("π§ͺ Testing cache fallback when vector search not initialized")
|
101 |
+
fallback_words = await vector_service._get_cached_fallback("Animals", "medium", 5)
|
102 |
+
print(f"π¦ Fallback returned {len(fallback_words)} words")
|
103 |
+
|
104 |
+
print("β
Vector integration test completed!")
|
105 |
+
return True
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
print(f"β Vector integration test failed: {e}")
|
109 |
+
import traceback
|
110 |
+
traceback.print_exc()
|
111 |
+
return False
|
112 |
+
|
113 |
+
async def main():
|
114 |
+
"""Run all tests."""
|
115 |
+
print("π Testing Cache System Replacement\n")
|
116 |
+
|
117 |
+
cache_test = await test_cache_system()
|
118 |
+
integration_test = await test_vector_integration()
|
119 |
+
|
120 |
+
if cache_test and integration_test:
|
121 |
+
print("\nπ All tests passed! Cache system is working correctly.")
|
122 |
+
print("π¦ Static word dependencies have been successfully replaced with caching.")
|
123 |
+
else:
|
124 |
+
print("\nβ Some tests failed. Check the output above.")
|
125 |
+
|
126 |
+
if __name__ == "__main__":
|
127 |
+
asyncio.run(main())
|
crossword-app/backend-py/test-integration/test_crossword_display.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import json
|
4 |
+
|
5 |
+
# The crossword grid from the API response
|
6 |
+
grid = [
|
7 |
+
[".", ".", ".", ".", ".", ".", ".", ".", "D", ".", "."],
|
8 |
+
[".", ".", ".", ".", ".", "M", "I", "C", "E", ".", "."],
|
9 |
+
[".", ".", ".", "H", "U", "M", "A", "N", "E", ".", "."],
|
10 |
+
[".", ".", ".", "W", "H", "A", "L", "E", "R", ".", "."],
|
11 |
+
[".", "P", "Z", ".", ".", ".", "L", ".", ".", ".", "."],
|
12 |
+
["Z", "O", "O", "L", "O", "G", "I", "C", "A", "L", "."],
|
13 |
+
[".", "U", "O", ".", ".", ".", "G", "E", "E", "S", "E"],
|
14 |
+
[".", "L", "L", "H", "U", "M", "A", "N", "I", "T", "Y"],
|
15 |
+
[".", "T", "O", ".", ".", ".", "T", "I", "G", "E", "R"],
|
16 |
+
[".", "R", "G", ".", "B", "I", "O", "L", "O", "G", "Y"],
|
17 |
+
[".", "Y", "Y", ".", ".", ".", "R", ".", ".", ".", "."]
|
18 |
+
]
|
19 |
+
|
20 |
+
print("Generated Crossword Grid:")
|
21 |
+
print("=" * 50)
|
22 |
+
|
23 |
+
for i, row in enumerate(grid):
|
24 |
+
row_str = ""
|
25 |
+
for j, cell in enumerate(row):
|
26 |
+
if cell == ".":
|
27 |
+
row_str += " " # Empty space
|
28 |
+
else:
|
29 |
+
row_str += f"{cell} "
|
30 |
+
print(f"{i:2d} | {row_str}")
|
31 |
+
|
32 |
+
print("=" * 50)
|
33 |
+
|
34 |
+
# Check for word boundaries
|
35 |
+
def check_word_boundaries(grid):
|
36 |
+
issues = []
|
37 |
+
|
38 |
+
# Horizontal words
|
39 |
+
for r in range(len(grid)):
|
40 |
+
in_word = False
|
41 |
+
word_start = -1
|
42 |
+
for c in range(len(grid[r])):
|
43 |
+
if grid[r][c] != ".":
|
44 |
+
if not in_word:
|
45 |
+
in_word = True
|
46 |
+
word_start = c
|
47 |
+
else:
|
48 |
+
if in_word:
|
49 |
+
# Word ended
|
50 |
+
word_length = c - word_start
|
51 |
+
word = "".join(grid[r][word_start:c])
|
52 |
+
print(f"Horizontal word at ({r},{word_start}): {word} (length {word_length})")
|
53 |
+
in_word = False
|
54 |
+
|
55 |
+
# Check if word extends to end of row
|
56 |
+
if in_word:
|
57 |
+
word_length = len(grid[r]) - word_start
|
58 |
+
word = "".join(grid[r][word_start:])
|
59 |
+
print(f"Horizontal word at ({r},{word_start}): {word} (length {word_length})")
|
60 |
+
|
61 |
+
# Vertical words
|
62 |
+
for c in range(len(grid[0])):
|
63 |
+
in_word = False
|
64 |
+
word_start = -1
|
65 |
+
for r in range(len(grid)):
|
66 |
+
if grid[r][c] != ".":
|
67 |
+
if not in_word:
|
68 |
+
in_word = True
|
69 |
+
word_start = r
|
70 |
+
else:
|
71 |
+
if in_word:
|
72 |
+
# Word ended
|
73 |
+
word_length = r - word_start
|
74 |
+
word = "".join([grid[i][c] for i in range(word_start, r)])
|
75 |
+
print(f"Vertical word at ({word_start},{c}): {word} (length {word_length})")
|
76 |
+
in_word = False
|
77 |
+
|
78 |
+
# Check if word extends to end of column
|
79 |
+
if in_word:
|
80 |
+
word_length = len(grid) - word_start
|
81 |
+
word = "".join([grid[i][c] for i in range(word_start, len(grid))])
|
82 |
+
print(f"Vertical word at ({word_start},{c}): {word} (length {word_length})")
|
83 |
+
|
84 |
+
print("\nWord boundary analysis:")
|
85 |
+
check_word_boundaries(grid)
|
crossword-app/backend-py/test-integration/test_final_crossword_validation.py
ADDED
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Final test to validate that the crossword generator produces clean grids
|
4 |
+
without unwanted prefixes, suffixes, or unintended letter sequences.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import sys
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
# Add project root to path
|
11 |
+
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
|
12 |
+
sys.path.insert(0, str(project_root))
|
13 |
+
|
14 |
+
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
|
15 |
+
|
16 |
+
def test_clean_crossword_generation():
|
17 |
+
"""Test that crossword generation produces clean grids without unwanted sequences."""
|
18 |
+
|
19 |
+
print("π§ͺ Final Crossword Validation Test\n")
|
20 |
+
|
21 |
+
generator = CrosswordGeneratorFixed(vector_service=None)
|
22 |
+
|
23 |
+
# Test multiple scenarios that previously caused issues
|
24 |
+
test_scenarios = [
|
25 |
+
{
|
26 |
+
"name": "Basic Technology Words",
|
27 |
+
"words": [
|
28 |
+
{"word": "COMPUTER", "clue": "Electronic device"},
|
29 |
+
{"word": "MACHINE", "clue": "Device with moving parts"},
|
30 |
+
{"word": "SCIENCE", "clue": "Systematic study"},
|
31 |
+
{"word": "EXPERT", "clue": "Specialist"},
|
32 |
+
]
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"name": "Similar Words (MACHINE/MACHINERY)",
|
36 |
+
"words": [
|
37 |
+
{"word": "MACHINE", "clue": "Device with moving parts"},
|
38 |
+
{"word": "MACHINERY", "clue": "Mechanical equipment"},
|
39 |
+
{"word": "TECHNOLOGY", "clue": "Applied science"},
|
40 |
+
{"word": "RESEARCH", "clue": "Investigation"},
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"name": "Animal Words",
|
45 |
+
"words": [
|
46 |
+
{"word": "ELEPHANT", "clue": "Large mammal"},
|
47 |
+
{"word": "TIGER", "clue": "Striped cat"},
|
48 |
+
{"word": "BEAR", "clue": "Large carnivore"},
|
49 |
+
{"word": "HORSE", "clue": "Riding animal"},
|
50 |
+
{"word": "BIRD", "clue": "Flying creature"},
|
51 |
+
]
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"name": "Mixed Length Words",
|
55 |
+
"words": [
|
56 |
+
{"word": "CAT", "clue": "Feline pet"},
|
57 |
+
{"word": "COMPUTER", "clue": "Electronic device"},
|
58 |
+
{"word": "A", "clue": "First letter"}, # Edge case
|
59 |
+
{"word": "TECHNOLOGY", "clue": "Applied science"},
|
60 |
+
]
|
61 |
+
}
|
62 |
+
]
|
63 |
+
|
64 |
+
all_passed = True
|
65 |
+
|
66 |
+
for i, scenario in enumerate(test_scenarios):
|
67 |
+
print(f"=" * 60)
|
68 |
+
print(f"TEST {i+1}: {scenario['name']}")
|
69 |
+
print(f"=" * 60)
|
70 |
+
|
71 |
+
words = scenario["words"]
|
72 |
+
print(f"Testing with {len(words)} words: {[w['word'] for w in words]}")
|
73 |
+
|
74 |
+
try:
|
75 |
+
result = generator._create_grid(words)
|
76 |
+
|
77 |
+
if result:
|
78 |
+
grid = result["grid"]
|
79 |
+
placed_words = result["placed_words"]
|
80 |
+
clues = result["clues"]
|
81 |
+
|
82 |
+
print(f"β
Grid generated successfully")
|
83 |
+
print(f" Grid size: {len(grid)}x{len(grid[0])}")
|
84 |
+
print(f" Words placed: {len(placed_words)}")
|
85 |
+
print(f" Clues generated: {len(clues)}")
|
86 |
+
|
87 |
+
# Print the grid
|
88 |
+
print("\nGenerated Grid:")
|
89 |
+
print_clean_grid(grid)
|
90 |
+
|
91 |
+
# Validate the grid
|
92 |
+
validation_result = validate_grid_cleanliness(grid, placed_words)
|
93 |
+
|
94 |
+
if validation_result["is_clean"]:
|
95 |
+
print("β
Grid validation: CLEAN - No unwanted sequences")
|
96 |
+
else:
|
97 |
+
print("β Grid validation: ISSUES FOUND")
|
98 |
+
for issue in validation_result["issues"]:
|
99 |
+
print(f" - {issue}")
|
100 |
+
all_passed = False
|
101 |
+
|
102 |
+
# Print word placements
|
103 |
+
print("\nWord Placements:")
|
104 |
+
for j, word_info in enumerate(placed_words):
|
105 |
+
print(f" {j+1}. {word_info['word']} at ({word_info['row']}, {word_info['col']}) {word_info['direction']}")
|
106 |
+
|
107 |
+
else:
|
108 |
+
print("β οΈ Grid generation returned None - algorithm may be too strict")
|
109 |
+
# This might happen if validation is too restrictive
|
110 |
+
|
111 |
+
except Exception as e:
|
112 |
+
print(f"β Grid generation failed: {e}")
|
113 |
+
all_passed = False
|
114 |
+
|
115 |
+
print()
|
116 |
+
|
117 |
+
# Summary
|
118 |
+
print("=" * 60)
|
119 |
+
print("FINAL SUMMARY")
|
120 |
+
print("=" * 60)
|
121 |
+
|
122 |
+
if all_passed:
|
123 |
+
print("π ALL TESTS PASSED!")
|
124 |
+
print("β
Crossword generator produces clean grids without unwanted sequences")
|
125 |
+
print("β
No more issues with unwanted prefixes, suffixes, or letter combinations")
|
126 |
+
else:
|
127 |
+
print("β Some tests failed - additional improvements needed")
|
128 |
+
|
129 |
+
return all_passed
|
130 |
+
|
131 |
+
def print_clean_grid(grid):
|
132 |
+
"""Print grid in a clean, readable format."""
|
133 |
+
if not grid:
|
134 |
+
print(" Empty grid")
|
135 |
+
return
|
136 |
+
|
137 |
+
# Print column headers
|
138 |
+
print(" ", end="")
|
139 |
+
for c in range(len(grid[0])):
|
140 |
+
print(f"{c:2d}", end="")
|
141 |
+
print()
|
142 |
+
|
143 |
+
# Print rows
|
144 |
+
for r in range(len(grid)):
|
145 |
+
print(f" {r:2d}: ", end="")
|
146 |
+
for c in range(len(grid[0])):
|
147 |
+
cell = grid[r][c]
|
148 |
+
if cell == ".":
|
149 |
+
print(" .", end="")
|
150 |
+
else:
|
151 |
+
print(f" {cell}", end="")
|
152 |
+
print()
|
153 |
+
|
154 |
+
def validate_grid_cleanliness(grid, placed_words):
|
155 |
+
"""Validate that grid contains only intended words without unwanted sequences."""
|
156 |
+
|
157 |
+
issues = []
|
158 |
+
|
159 |
+
# Find all letter sequences in the grid
|
160 |
+
all_sequences = []
|
161 |
+
|
162 |
+
# Horizontal sequences
|
163 |
+
for r in range(len(grid)):
|
164 |
+
current_seq = ""
|
165 |
+
start_col = None
|
166 |
+
|
167 |
+
for c in range(len(grid[0])):
|
168 |
+
if grid[r][c] != ".":
|
169 |
+
if start_col is None:
|
170 |
+
start_col = c
|
171 |
+
current_seq += grid[r][c]
|
172 |
+
else:
|
173 |
+
if current_seq and len(current_seq) > 1:
|
174 |
+
all_sequences.append((r, start_col, "horizontal", current_seq))
|
175 |
+
current_seq = ""
|
176 |
+
start_col = None
|
177 |
+
|
178 |
+
# Handle end of row
|
179 |
+
if current_seq and len(current_seq) > 1:
|
180 |
+
all_sequences.append((r, start_col, "horizontal", current_seq))
|
181 |
+
|
182 |
+
# Vertical sequences
|
183 |
+
for c in range(len(grid[0])):
|
184 |
+
current_seq = ""
|
185 |
+
start_row = None
|
186 |
+
|
187 |
+
for r in range(len(grid)):
|
188 |
+
if grid[r][c] != ".":
|
189 |
+
if start_row is None:
|
190 |
+
start_row = r
|
191 |
+
current_seq += grid[r][c]
|
192 |
+
else:
|
193 |
+
if current_seq and len(current_seq) > 1:
|
194 |
+
all_sequences.append((start_row, c, "vertical", current_seq))
|
195 |
+
current_seq = ""
|
196 |
+
start_row = None
|
197 |
+
|
198 |
+
# Handle end of column
|
199 |
+
if current_seq and len(current_seq) > 1:
|
200 |
+
all_sequences.append((start_row, c, "vertical", current_seq))
|
201 |
+
|
202 |
+
# Check if all sequences correspond to intended words
|
203 |
+
intended_words = set()
|
204 |
+
for word_info in placed_words:
|
205 |
+
key = (word_info["row"], word_info["col"], word_info["direction"], word_info["word"])
|
206 |
+
intended_words.add(key)
|
207 |
+
|
208 |
+
# Check each sequence
|
209 |
+
for row, col, direction, sequence in all_sequences:
|
210 |
+
key = (row, col, direction, sequence)
|
211 |
+
if key not in intended_words:
|
212 |
+
issues.append(f"Unintended sequence: '{sequence}' at ({row}, {col}) {direction}")
|
213 |
+
|
214 |
+
# Check for specific problematic patterns
|
215 |
+
for row, col, direction, sequence in all_sequences:
|
216 |
+
# Check for 2-letter sequences (should not exist)
|
217 |
+
if len(sequence) == 2:
|
218 |
+
issues.append(f"Unwanted 2-letter sequence: '{sequence}' at ({row}, {col}) {direction}")
|
219 |
+
|
220 |
+
# Check for words that appear to extend beyond their intended boundaries
|
221 |
+
# But exclude cases where both the shorter and longer words are intentionally placed
|
222 |
+
placed_word_set = {w["word"] for w in placed_words}
|
223 |
+
for word_info in placed_words:
|
224 |
+
word = word_info["word"]
|
225 |
+
if word in sequence and sequence != word:
|
226 |
+
if sequence.startswith(word) or sequence.endswith(word):
|
227 |
+
# Check if the sequence itself is also an intended word
|
228 |
+
if sequence not in placed_word_set:
|
229 |
+
issues.append(f"Word '{word}' appears extended as '{sequence}' at ({row}, {col}) {direction}")
|
230 |
+
|
231 |
+
return {
|
232 |
+
"is_clean": len(issues) == 0,
|
233 |
+
"issues": issues,
|
234 |
+
"total_sequences": len(all_sequences),
|
235 |
+
"intended_sequences": len(intended_words)
|
236 |
+
}
|
237 |
+
|
238 |
+
if __name__ == "__main__":
|
239 |
+
test_clean_crossword_generation()
|
crossword-app/backend-py/test-integration/test_final_validation.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
|
6 |
+
def test_api_crossword():
|
7 |
+
"""Test that the API generates valid crosswords without boundary issues."""
|
8 |
+
|
9 |
+
url = "http://localhost:7860/api/generate"
|
10 |
+
data = {
|
11 |
+
"topics": ["animals"],
|
12 |
+
"difficulty": "medium",
|
13 |
+
"useAI": True
|
14 |
+
}
|
15 |
+
|
16 |
+
print("π§ͺ Testing API Crossword Generation")
|
17 |
+
print("=" * 50)
|
18 |
+
|
19 |
+
try:
|
20 |
+
response = requests.post(url, json=data, timeout=30)
|
21 |
+
|
22 |
+
if response.status_code != 200:
|
23 |
+
print(f"β API Error: {response.status_code}")
|
24 |
+
print(response.text)
|
25 |
+
return False
|
26 |
+
|
27 |
+
result = response.json()
|
28 |
+
|
29 |
+
if 'detail' in result:
|
30 |
+
print(f"β Error: {result['detail']}")
|
31 |
+
return False
|
32 |
+
|
33 |
+
grid = result['grid']
|
34 |
+
clues = result['clues']
|
35 |
+
metadata = result['metadata']
|
36 |
+
|
37 |
+
print(f"β
Generated crossword with {metadata['wordCount']} words")
|
38 |
+
print(f"Grid size: {len(grid)}x{len(grid[0])}")
|
39 |
+
print(f"AI Generated: {metadata['aiGenerated']}")
|
40 |
+
|
41 |
+
# Validate boundary issues
|
42 |
+
violations = validate_word_boundaries(grid, clues)
|
43 |
+
|
44 |
+
if violations:
|
45 |
+
print(f"\nβ Found {len(violations)} boundary violations:")
|
46 |
+
for violation in violations:
|
47 |
+
print(f" - {violation}")
|
48 |
+
return False
|
49 |
+
else:
|
50 |
+
print(f"\nβ
No boundary violations found!")
|
51 |
+
print(f"β
All words are properly bounded")
|
52 |
+
|
53 |
+
# Display sample of the grid
|
54 |
+
print(f"\nSample Grid (first 8 rows):")
|
55 |
+
for i, row in enumerate(grid[:8]):
|
56 |
+
row_str = " ".join(cell if cell != "." else " " for cell in row)
|
57 |
+
print(f"{i:2d} | {row_str}")
|
58 |
+
|
59 |
+
return True
|
60 |
+
|
61 |
+
except Exception as e:
|
62 |
+
print(f"β Test failed: {e}")
|
63 |
+
return False
|
64 |
+
|
65 |
+
def validate_word_boundaries(grid, clues):
|
66 |
+
"""Validate that all words in the grid have proper boundaries."""
|
67 |
+
violations = []
|
68 |
+
|
69 |
+
# Create a set of valid word placements from clues
|
70 |
+
valid_words = set()
|
71 |
+
for clue in clues:
|
72 |
+
word = clue['word']
|
73 |
+
pos = clue['position']
|
74 |
+
direction = clue['direction']
|
75 |
+
row, col = pos['row'], pos['col']
|
76 |
+
|
77 |
+
if direction == 'across':
|
78 |
+
valid_words.add((word, row, col, 'horizontal'))
|
79 |
+
else:
|
80 |
+
valid_words.add((word, row, col, 'vertical'))
|
81 |
+
|
82 |
+
# Check all horizontal sequences in grid
|
83 |
+
for r in range(len(grid)):
|
84 |
+
current_word = ""
|
85 |
+
word_start = -1
|
86 |
+
|
87 |
+
for c in range(len(grid[r])):
|
88 |
+
if grid[r][c] != ".":
|
89 |
+
if current_word == "":
|
90 |
+
word_start = c
|
91 |
+
current_word += grid[r][c]
|
92 |
+
else:
|
93 |
+
if current_word and len(current_word) > 1:
|
94 |
+
# Check if this is a valid placed word
|
95 |
+
if (current_word, r, word_start, 'horizontal') not in valid_words:
|
96 |
+
violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})")
|
97 |
+
current_word = ""
|
98 |
+
|
99 |
+
# Check word at end of row
|
100 |
+
if current_word and len(current_word) > 1:
|
101 |
+
if (current_word, r, word_start, 'horizontal') not in valid_words:
|
102 |
+
violations.append(f"Invalid horizontal word '{current_word}' at ({r},{word_start})")
|
103 |
+
|
104 |
+
# Check all vertical sequences in grid
|
105 |
+
for c in range(len(grid[0])):
|
106 |
+
current_word = ""
|
107 |
+
word_start = -1
|
108 |
+
|
109 |
+
for r in range(len(grid)):
|
110 |
+
if grid[r][c] != ".":
|
111 |
+
if current_word == "":
|
112 |
+
word_start = r
|
113 |
+
current_word += grid[r][c]
|
114 |
+
else:
|
115 |
+
if current_word and len(current_word) > 1:
|
116 |
+
# Check if this is a valid placed word
|
117 |
+
if (current_word, word_start, c, 'vertical') not in valid_words:
|
118 |
+
violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})")
|
119 |
+
current_word = ""
|
120 |
+
|
121 |
+
# Check word at end of column
|
122 |
+
if current_word and len(current_word) > 1:
|
123 |
+
if (current_word, word_start, c, 'vertical') not in valid_words:
|
124 |
+
violations.append(f"Invalid vertical word '{current_word}' at ({word_start},{c})")
|
125 |
+
|
126 |
+
return violations
|
127 |
+
|
128 |
+
if __name__ == "__main__":
|
129 |
+
success = test_api_crossword()
|
130 |
+
if success:
|
131 |
+
print(f"\nπ All tests passed! The boundary fix is working correctly.")
|
132 |
+
else:
|
133 |
+
print(f"\nπ₯ Tests failed! The boundary issue still exists.")
|
crossword-app/backend-py/test-integration/test_intersection_issues.py
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test to reproduce the exact intersection and boundary issues seen in the crossword images.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import sys
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
+
# Add project root to path
|
10 |
+
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
|
11 |
+
sys.path.insert(0, str(project_root))
|
12 |
+
|
13 |
+
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
|
14 |
+
|
15 |
+
def reproduce_image_issues():
|
16 |
+
"""Try to reproduce the specific issues seen in the crossword images."""
|
17 |
+
|
18 |
+
print("π Reproducing crossword boundary issues from images...\n")
|
19 |
+
|
20 |
+
generator = CrosswordGeneratorFixed(vector_service=None)
|
21 |
+
|
22 |
+
# Test Case 1: Try to reproduce the "MACHINERY" extension issue
|
23 |
+
print("=" * 60)
|
24 |
+
print("TEST 1: Reproducing MACHINERY extension issue")
|
25 |
+
print("=" * 60)
|
26 |
+
|
27 |
+
grid = [["." for _ in range(15)] for _ in range(15)]
|
28 |
+
placed_words = []
|
29 |
+
|
30 |
+
# Place MACHINE first
|
31 |
+
if generator._can_place_word(grid, "MACHINE", 6, 3, "horizontal"):
|
32 |
+
generator._place_word(grid, "MACHINE", 6, 3, "horizontal")
|
33 |
+
placed_words.append({
|
34 |
+
"word": "MACHINE", "row": 6, "col": 3, "direction": "horizontal", "number": 1
|
35 |
+
})
|
36 |
+
print("β
Placed MACHINE")
|
37 |
+
print_grid(grid, 4, 10, 0, 12)
|
38 |
+
|
39 |
+
# Now try to place words that might create the extension
|
40 |
+
test_placements = [
|
41 |
+
("VERY", 4, 8, "vertical"), # V-E-R-Y going down, might intersect with E in MACHINE
|
42 |
+
("EXPERT", 5, 8, "horizontal"), # Horizontal word that might extend MACHINE
|
43 |
+
("PROTOTYPE", 6, 9, "horizontal"), # Direct extension after MACHINE
|
44 |
+
]
|
45 |
+
|
46 |
+
for word, row, col, direction in test_placements:
|
47 |
+
print(f"\nπ Testing: '{word}' at ({row}, {col}) {direction}")
|
48 |
+
|
49 |
+
can_place = generator._can_place_word(grid, word, row, col, direction)
|
50 |
+
print(f"Can place: {can_place}")
|
51 |
+
|
52 |
+
if can_place:
|
53 |
+
# Make a copy and test the placement
|
54 |
+
test_grid = [r[:] for r in grid]
|
55 |
+
generator._place_word(test_grid, word, row, col, direction)
|
56 |
+
print("After placement:")
|
57 |
+
print_grid(test_grid, 4, 10, 0, 15)
|
58 |
+
|
59 |
+
# Check if MACHINE now appears to be extended
|
60 |
+
machine_row = 6
|
61 |
+
extended_word = ""
|
62 |
+
for c in range(15):
|
63 |
+
if test_grid[machine_row][c] != ".":
|
64 |
+
extended_word += test_grid[machine_row][c]
|
65 |
+
elif extended_word:
|
66 |
+
break
|
67 |
+
|
68 |
+
if extended_word != "MACHINE":
|
69 |
+
print(f"β οΈ MACHINE appears extended to: '{extended_word}'")
|
70 |
+
|
71 |
+
print("-" * 40)
|
72 |
+
|
73 |
+
# Test Case 2: Check intersection logic specifically
|
74 |
+
print("\n" + "=" * 60)
|
75 |
+
print("TEST 2: Checking intersection calculation logic")
|
76 |
+
print("=" * 60)
|
77 |
+
|
78 |
+
# Test the intersection finding logic
|
79 |
+
word1 = "MACHINE"
|
80 |
+
word2 = "EXPERT"
|
81 |
+
|
82 |
+
intersections = generator._find_word_intersections(word1, word2)
|
83 |
+
print(f"Intersections between '{word1}' and '{word2}': {intersections}")
|
84 |
+
|
85 |
+
for intersection in intersections:
|
86 |
+
word_pos = intersection["word_pos"]
|
87 |
+
placed_pos = intersection["placed_pos"]
|
88 |
+
print(f" Letter '{word1[word_pos]}' at pos {word_pos} in '{word1}' matches")
|
89 |
+
print(f" Letter '{word2[placed_pos]}' at pos {placed_pos} in '{word2}'")
|
90 |
+
|
91 |
+
# Calculate where EXPERT would be placed to intersect with MACHINE
|
92 |
+
machine_placement = {"word": "MACHINE", "row": 6, "col": 3, "direction": "horizontal"}
|
93 |
+
placement = generator._calculate_intersection_placement(
|
94 |
+
word2, placed_pos, machine_placement, word_pos
|
95 |
+
)
|
96 |
+
|
97 |
+
if placement:
|
98 |
+
print(f" EXPERT would be placed at: row={placement['row']}, col={placement['col']}, dir={placement['direction']}")
|
99 |
+
|
100 |
+
# Check if this would be valid
|
101 |
+
can_place = generator._can_place_word(grid, word2, placement['row'], placement['col'], placement['direction'])
|
102 |
+
print(f" Valid placement: {can_place}")
|
103 |
+
|
104 |
+
# Test Case 3: Multi-word intersection scenario
|
105 |
+
print("\n" + "=" * 60)
|
106 |
+
print("TEST 3: Multi-word intersection scenario")
|
107 |
+
print("=" * 60)
|
108 |
+
|
109 |
+
# Create a more complex scenario like in the images
|
110 |
+
complex_grid = [["." for _ in range(15)] for _ in range(15)]
|
111 |
+
complex_words = []
|
112 |
+
|
113 |
+
# Place several words to create intersection opportunities
|
114 |
+
word_placements = [
|
115 |
+
("MACHINE", 7, 4, "horizontal"),
|
116 |
+
("EXPERT", 5, 6, "vertical"), # Try to intersect at 'E'
|
117 |
+
("SMART", 6, 8, "vertical"), # Try to intersect at another letter
|
118 |
+
]
|
119 |
+
|
120 |
+
for word, row, col, direction in word_placements:
|
121 |
+
print(f"\nPlacing '{word}' at ({row}, {col}) {direction}")
|
122 |
+
|
123 |
+
if generator._can_place_word(complex_grid, word, row, col, direction):
|
124 |
+
generator._place_word(complex_grid, word, row, col, direction)
|
125 |
+
complex_words.append({
|
126 |
+
"word": word, "row": row, "col": col, "direction": direction, "number": len(complex_words) + 1
|
127 |
+
})
|
128 |
+
print(f"β
Placed '{word}'")
|
129 |
+
else:
|
130 |
+
print(f"β Cannot place '{word}'")
|
131 |
+
|
132 |
+
print_grid(complex_grid, 4, 11, 2, 13)
|
133 |
+
|
134 |
+
# Check for any unintended word formations
|
135 |
+
print("\nChecking for unintended word formations:")
|
136 |
+
check_unintended_words(complex_grid, complex_words)
|
137 |
+
|
138 |
+
def print_grid(grid, start_row, end_row, start_col, end_col):
|
139 |
+
"""Print a section of the grid."""
|
140 |
+
print("Grid:")
|
141 |
+
for r in range(max(0, start_row), min(end_row, len(grid))):
|
142 |
+
row_str = f"R{r:2d}: "
|
143 |
+
for c in range(max(0, start_col), min(end_col, len(grid[0]))):
|
144 |
+
if grid[r][c] == ".":
|
145 |
+
row_str += ". "
|
146 |
+
else:
|
147 |
+
row_str += f"{grid[r][c]} "
|
148 |
+
print(row_str)
|
149 |
+
print()
|
150 |
+
|
151 |
+
def check_unintended_words(grid, placed_words):
|
152 |
+
"""Check for unintended word formations in the grid."""
|
153 |
+
unintended = []
|
154 |
+
|
155 |
+
# Check all horizontal sequences
|
156 |
+
for r in range(len(grid)):
|
157 |
+
current_word = ""
|
158 |
+
start_col = None
|
159 |
+
|
160 |
+
for c in range(len(grid[0])):
|
161 |
+
if grid[r][c] != ".":
|
162 |
+
if start_col is None:
|
163 |
+
start_col = c
|
164 |
+
current_word += grid[r][c]
|
165 |
+
else:
|
166 |
+
if current_word and len(current_word) > 1:
|
167 |
+
# Check if this is an intended word
|
168 |
+
intended = False
|
169 |
+
for word_info in placed_words:
|
170 |
+
if (word_info["direction"] == "horizontal" and
|
171 |
+
word_info["row"] == r and
|
172 |
+
word_info["col"] == start_col and
|
173 |
+
word_info["word"] == current_word):
|
174 |
+
intended = True
|
175 |
+
break
|
176 |
+
|
177 |
+
if not intended:
|
178 |
+
unintended.append(f"Horizontal '{current_word}' at row {r}, col {start_col}")
|
179 |
+
|
180 |
+
current_word = ""
|
181 |
+
start_col = None
|
182 |
+
|
183 |
+
# Check final word if row ends with letters
|
184 |
+
if current_word and len(current_word) > 1:
|
185 |
+
intended = False
|
186 |
+
for word_info in placed_words:
|
187 |
+
if (word_info["direction"] == "horizontal" and
|
188 |
+
word_info["row"] == r and
|
189 |
+
word_info["col"] == start_col and
|
190 |
+
word_info["word"] == current_word):
|
191 |
+
intended = True
|
192 |
+
break
|
193 |
+
|
194 |
+
if not intended:
|
195 |
+
unintended.append(f"Horizontal '{current_word}' at row {r}, col {start_col}")
|
196 |
+
|
197 |
+
# Check all vertical sequences
|
198 |
+
for c in range(len(grid[0])):
|
199 |
+
current_word = ""
|
200 |
+
start_row = None
|
201 |
+
|
202 |
+
for r in range(len(grid)):
|
203 |
+
if grid[r][c] != ".":
|
204 |
+
if start_row is None:
|
205 |
+
start_row = r
|
206 |
+
current_word += grid[r][c]
|
207 |
+
else:
|
208 |
+
if current_word and len(current_word) > 1:
|
209 |
+
# Check if this is an intended word
|
210 |
+
intended = False
|
211 |
+
for word_info in placed_words:
|
212 |
+
if (word_info["direction"] == "vertical" and
|
213 |
+
word_info["col"] == c and
|
214 |
+
word_info["row"] == start_row and
|
215 |
+
word_info["word"] == current_word):
|
216 |
+
intended = True
|
217 |
+
break
|
218 |
+
|
219 |
+
if not intended:
|
220 |
+
unintended.append(f"Vertical '{current_word}' at row {start_row}, col {c}")
|
221 |
+
|
222 |
+
current_word = ""
|
223 |
+
start_row = None
|
224 |
+
|
225 |
+
# Check final word if column ends with letters
|
226 |
+
if current_word and len(current_word) > 1:
|
227 |
+
intended = False
|
228 |
+
for word_info in placed_words:
|
229 |
+
if (word_info["direction"] == "vertical" and
|
230 |
+
word_info["col"] == c and
|
231 |
+
word_info["row"] == start_row and
|
232 |
+
word_info["word"] == current_word):
|
233 |
+
intended = True
|
234 |
+
break
|
235 |
+
|
236 |
+
if not intended:
|
237 |
+
unintended.append(f"Vertical '{current_word}' at row {start_row}, col {c}")
|
238 |
+
|
239 |
+
if unintended:
|
240 |
+
print("β Unintended words found:")
|
241 |
+
for word in unintended:
|
242 |
+
print(f" {word}")
|
243 |
+
else:
|
244 |
+
print("β
No unintended words detected")
|
245 |
+
|
246 |
+
if __name__ == "__main__":
|
247 |
+
reproduce_image_issues()
|
crossword-app/backend-py/test-integration/test_local.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Simple test script to verify Python backend works locally.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import sys
|
8 |
+
import os
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
# Add project root to path
|
12 |
+
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
|
13 |
+
sys.path.insert(0, str(project_root))
|
14 |
+
|
15 |
+
async def test_vector_search():
|
16 |
+
"""Test vector search service initialization."""
|
17 |
+
try:
|
18 |
+
from src.services.vector_search import VectorSearchService
|
19 |
+
|
20 |
+
print("π§ Testing Vector Search Service...")
|
21 |
+
|
22 |
+
# Set minimal configuration for testing
|
23 |
+
os.environ["EMBEDDING_MODEL"] = "sentence-transformers/all-MiniLM-L6-v2" # Smaller model for testing
|
24 |
+
os.environ["WORD_SIMILARITY_THRESHOLD"] = "0.6"
|
25 |
+
|
26 |
+
service = VectorSearchService()
|
27 |
+
|
28 |
+
print("π¦ Initializing service (this may take a moment)...")
|
29 |
+
await service.initialize()
|
30 |
+
|
31 |
+
if service.is_initialized:
|
32 |
+
print("β
Vector search service initialized successfully!")
|
33 |
+
|
34 |
+
# Test word generation
|
35 |
+
print("\nπ§ͺ Testing word generation for 'Animals'...")
|
36 |
+
words = await service.find_similar_words("Animals", "medium", 5)
|
37 |
+
|
38 |
+
print(f"Found {len(words)} words:")
|
39 |
+
for i, word_obj in enumerate(words, 1):
|
40 |
+
word = word_obj["word"]
|
41 |
+
similarity = word_obj.get("similarity", 0)
|
42 |
+
source = word_obj.get("source", "unknown")
|
43 |
+
print(f" {i}. {word} (similarity: {similarity:.3f}, source: {source})")
|
44 |
+
else:
|
45 |
+
print("β Service initialization failed")
|
46 |
+
|
47 |
+
await service.cleanup()
|
48 |
+
|
49 |
+
except Exception as e:
|
50 |
+
print(f"β Test failed: {e}")
|
51 |
+
import traceback
|
52 |
+
traceback.print_exc()
|
53 |
+
|
54 |
+
async def test_crossword_generator():
|
55 |
+
"""Test crossword generator."""
|
56 |
+
try:
|
57 |
+
from src.services.crossword_generator_wrapper import CrosswordGenerator
|
58 |
+
|
59 |
+
print("\nπ― Testing Crossword Generator...")
|
60 |
+
|
61 |
+
generator = CrosswordGenerator()
|
62 |
+
|
63 |
+
# Test static word generation
|
64 |
+
words = await generator.generate_words_for_topics(
|
65 |
+
topics=["Animals"],
|
66 |
+
difficulty="medium",
|
67 |
+
use_ai=False
|
68 |
+
)
|
69 |
+
|
70 |
+
print(f"β
Generated {len(words)} static words for Animals:")
|
71 |
+
for word_obj in words[:3]: # Show first 3
|
72 |
+
print(f" - {word_obj['word']}: {word_obj['clue']}")
|
73 |
+
|
74 |
+
except Exception as e:
|
75 |
+
print(f"β Crossword generator test failed: {e}")
|
76 |
+
import traceback
|
77 |
+
traceback.print_exc()
|
78 |
+
|
79 |
+
async def main():
|
80 |
+
"""Run all tests."""
|
81 |
+
print("π Testing Python Backend Components\n")
|
82 |
+
|
83 |
+
# Test individual components
|
84 |
+
await test_crossword_generator()
|
85 |
+
|
86 |
+
# Test vector search (commented out as it requires large download)
|
87 |
+
print("\nβ οΈ Skipping vector search test (requires model download)")
|
88 |
+
print("π‘ To test vector search, uncomment the line below:")
|
89 |
+
print("# await test_vector_search()")
|
90 |
+
|
91 |
+
print("\nβ
Basic tests completed!")
|
92 |
+
print("π Ready to test with FastAPI server")
|
93 |
+
print("\nπ§ͺ For comprehensive unit tests, run:")
|
94 |
+
print(" python run_tests.py")
|
95 |
+
print(" or: pytest tests/ -v")
|
96 |
+
|
97 |
+
if __name__ == "__main__":
|
98 |
+
asyncio.run(main())
|