# Use the official Python 3.11 slim image for better compatibility FROM python:3.11-slim # Set environment variables for optimal Python and Gradio behavior ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 ENV PIP_NO_CACHE_DIR=1 ENV PIP_DISABLE_PIP_VERSION_CHECK=1 ENV DEBIAN_FRONTEND=noninteractive # Create app user for security (but run as root for HF Spaces compatibility) RUN useradd --create-home --shell /bin/bash app # Set the working directory inside the container WORKDIR /app # Install system dependencies required for multi-user AI application RUN apt-get update && apt-get install -y --no-install-recommends \ # Build tools build-essential \ gcc \ g++ \ make \ cmake \ pkg-config \ # Network and download tools curl \ wget \ git \ # Development libraries libffi-dev \ libssl-dev \ # Image processing libraries libjpeg-dev \ libpng-dev \ libfreetype6-dev \ libtiff5-dev \ libopenjp2-7-dev \ # Document processing libraries libxml2-dev \ libxslt1-dev \ zlib1g-dev \ # OCR and PDF processing tesseract-ocr \ tesseract-ocr-eng \ poppler-utils \ # SQLite for session storage sqlite3 \ libsqlite3-dev \ # Cleanup && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ && rm -rf /var/cache/apt/* # Upgrade pip to latest version RUN python -m pip install --upgrade pip setuptools wheel # Set pip configuration for better performance and reliability RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org" \ && pip config set global.no-cache-dir true \ && pip config set global.disable-pip-version-check true # Copy the requirements file first to leverage Docker's build cache COPY requirements.txt . # Install Python dependencies with optimizations for concurrent usage RUN pip install --no-cache-dir --upgrade -r requirements.txt \ && pip install --no-cache-dir \ # Additional packages for multi-user support gunicorn \ uvloop \ # Performance monitoring psutil \ && pip list --outdated # Copy the rest of the application code COPY . . # Create comprehensive directory structure for multi-user application RUN mkdir -p \ # Core application directories temp logs uploads downloads cache \ # Multi-user session directories (will be created dynamically) /tmp/data_extractor_temp \ # WebSocket and terminal directories static \ # Database directory for session storage data \ && chmod -R 755 /app # Set optimized permissions for multi-user concurrent access RUN chmod -R 777 temp logs uploads downloads cache /tmp \ && chmod -R 755 static \ && chmod 755 app.py \ && chmod -R 755 config utils workflow models # Create non-root user but keep root permissions for HF Spaces RUN chown -R app:app /app \ && chown -R app:app /tmp/data_extractor_temp # Set comprehensive environment variables for multi-user application ENV PYTHONPATH=/app ENV GRADIO_SERVER_NAME=0.0.0.0 ENV GRADIO_SERVER_PORT=7860 ENV GRADIO_SHARE=False ENV GRADIO_DEBUG=False # Matplotlib configuration for headless operation ENV MPLBACKEND=Agg ENV MPLCONFIGDIR=/tmp/mpl_cache # Optimize for multi-user concurrent access ENV GRADIO_QUEUE_DEFAULT_CONCURRENCY=10 ENV GRADIO_MAX_THREADS=20 # WebSocket and networking configuration ENV WEBSOCKET_HOST=0.0.0.0 ENV WEBSOCKET_PORT=8765 # Session and temporary file configuration ENV TEMP_DIR=/tmp/data_extractor_temp ENV SESSION_TIMEOUT=1800 ENV MAX_FILE_SIZE_MB=50 # AI model configuration (will be overridden by user env vars) ENV COORDINATOR_MODEL=gemini-2.5-pro ENV DATA_EXTRACTOR_MODEL=gemini-2.5-pro ENV DATA_ARRANGER_MODEL=gemini-2.5-pro ENV CODE_GENERATOR_MODEL=gemini-2.5-pro # Security and performance settings ENV PYTHONSAFEPATH=1 ENV PYTHONHASHSEED=random # Expose the port that the Gradio application will run on EXPOSE 7860 EXPOSE 8765 # Health check for container monitoring HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \ CMD curl -f http://localhost:7860/ || exit 1 # Run as root for Hugging Face Spaces compatibility USER root # Create startup script for better error handling and logging RUN echo '#!/bin/bash\n\ set -e\n\ echo "🚀 Starting Data Extractor Multi-User Application..."\n\ echo "📊 Python version: $(python --version)"\n\ echo "🌐 Server: 0.0.0.0:7860"\n\ echo "👥 Multi-user concurrency: Enabled"\n\ echo "🔒 Session isolation: Active"\n\ echo "💾 Temp directory: $TEMP_DIR"\n\ \n\ # Create runtime directories\n\ mkdir -p "$TEMP_DIR"\n\ mkdir -p /tmp/mpl_cache\n\ chmod 777 "$TEMP_DIR" /tmp/mpl_cache\n\ \n\ # Start the application\n\ exec python app.py\n\ ' > /app/start.sh && chmod +x /app/start.sh # The command to run when the container starts CMD ["/app/start.sh"]