Manga_OCR / Dockerfile
Drag2121's picture
tesseract
a91e387
FROM python:3.9-slim
# Create user with UID 1000 (required by HF Spaces)
RUN useradd -m -u 1000 user
# Set environment variables
ENV PATH="/home/user/.local/bin:$PATH"
# Install system dependencies including Tesseract OCR and language packs
RUN apt-get update && apt-get install -y \
libgl1-mesa-glx \
libglib2.0-0 \
poppler-utils \
build-essential \
tesseract-ocr \
libtesseract-dev \
tesseract-ocr-jpn \
tesseract-ocr-chi-sim \
tesseract-ocr-kor \
tesseract-ocr-eng \
fonts-noto-cjk \
&& rm -rf /var/lib/apt/lists/*
# Debug print Tesseract info
RUN tesseract --version && \
tesseract --list-langs
# Set working directory
WORKDIR /app
# Copy requirements and install as user
COPY --chown=user requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy rest of the app with correct permissions
COPY --chown=user . .
# Ensure static directories exist
RUN mkdir -p static/translated
# Verify PyTesseract installation
RUN python -c "import pytesseract; print('PyTesseract version:', pytesseract.__version__)"
# Switch to non-root user (required by HF Spaces)
USER user
# Expose port 7860 as required by HF Spaces
EXPOSE 7860
# Run the FastAPI app
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]