methunraj
refactor(settings): update google api key environment variable name
17e3d1d
# Use the official Python 3.11 slim image for better compatibility
FROM python:3.11-slim
# Set environment variables for optimal Python and Gradio behavior
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV PIP_NO_CACHE_DIR=1
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
ENV DEBIAN_FRONTEND=noninteractive
# Create app user for security (but run as root for HF Spaces compatibility)
RUN useradd --create-home --shell /bin/bash app
# Set the working directory inside the container
WORKDIR /app
# Install system dependencies required for multi-user AI application
RUN apt-get update && apt-get install -y --no-install-recommends \
# Build tools
build-essential \
gcc \
g++ \
make \
cmake \
pkg-config \
# Network and download tools
curl \
wget \
git \
# Development libraries
libffi-dev \
libssl-dev \
# Image processing libraries
libjpeg-dev \
libpng-dev \
libfreetype6-dev \
libtiff5-dev \
libopenjp2-7-dev \
# Document processing libraries
libxml2-dev \
libxslt1-dev \
zlib1g-dev \
# OCR and PDF processing
tesseract-ocr \
tesseract-ocr-eng \
poppler-utils \
# SQLite for session storage
sqlite3 \
libsqlite3-dev \
# Cleanup
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /var/cache/apt/*
# Upgrade pip to latest version
RUN python -m pip install --upgrade pip setuptools wheel
# Set pip configuration for better performance and reliability
RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org" \
&& pip config set global.no-cache-dir true \
&& pip config set global.disable-pip-version-check true
# Copy the requirements file first to leverage Docker's build cache
COPY requirements.txt .
# Install Python dependencies with optimizations for concurrent usage
RUN pip install --no-cache-dir --upgrade -r requirements.txt \
&& pip install --no-cache-dir \
# Additional packages for multi-user support
gunicorn \
uvloop \
# Performance monitoring
psutil \
&& pip list --outdated
# Copy the rest of the application code
COPY . .
# Create comprehensive directory structure for multi-user application
RUN mkdir -p \
# Core application directories
temp logs uploads downloads cache \
# Multi-user session directories (will be created dynamically)
/tmp/data_extractor_temp \
# WebSocket and terminal directories
static \
# Database directory for session storage
data \
&& chmod -R 755 /app
# Set optimized permissions for multi-user concurrent access
RUN chmod -R 777 temp logs uploads downloads cache /tmp \
&& chmod -R 755 static \
&& chmod 755 app.py \
&& chmod -R 755 config utils workflow models
# Create non-root user but keep root permissions for HF Spaces
RUN chown -R app:app /app \
&& chown -R app:app /tmp/data_extractor_temp
# Set comprehensive environment variables for multi-user application
ENV PYTHONPATH=/app
ENV GRADIO_SERVER_NAME=0.0.0.0
ENV GRADIO_SERVER_PORT=7860
# ENV GRADIO_SHARE=false
# ENV GRADIO_DEBUG=false
# Matplotlib configuration for headless operation
ENV MPLBACKEND=Agg
ENV MPLCONFIGDIR=/tmp/mpl_cache
# Optimize for multi-user concurrent access
ENV GRADIO_QUEUE_DEFAULT_CONCURRENCY=10
ENV GRADIO_MAX_THREADS=20
# WebSocket and networking configuration
ENV WEBSOCKET_HOST=0.0.0.0
ENV WEBSOCKET_PORT=8765
# Session and temporary file configuration
ENV TEMP_DIR=/tmp/data_extractor_temp
ENV SESSION_TIMEOUT=1800
ENV MAX_FILE_SIZE_MB=50
# AI model configuration (will be overridden by user env vars)
ENV COORDINATOR_MODEL=gemini-2.5-pro
ENV DATA_EXTRACTOR_MODEL=gemini-2.5-pro
ENV DATA_ARRANGER_MODEL=gemini-2.5-pro
ENV CODE_GENERATOR_MODEL=gemini-2.5-pro
# Security and performance settings
ENV PYTHONSAFEPATH=1
ENV PYTHONHASHSEED=random
# Expose the port that the Gradio application will run on
EXPOSE 7860
EXPOSE 8765
# Health check for container monitoring
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
CMD curl -f http://localhost:7860/ || exit 1
# Run as root for Hugging Face Spaces compatibility
USER root
# Create startup script for better error handling and logging
RUN echo '#!/bin/bash\n\
set -e\n\
echo "πŸš€ Starting Data Extractor Multi-User Application..."\n\
echo "πŸ“Š Python version: $(python --version)"\n\
echo "🌐 Server: 0.0.0.0:7860"\n\
echo "πŸ‘₯ Multi-user concurrency: Enabled"\n\
echo "πŸ”’ Session isolation: Active"\n\
echo "πŸ’Ύ Temp directory: $TEMP_DIR"\n\
\n\
# Create runtime directories\n\
mkdir -p "$TEMP_DIR"\n\
mkdir -p /tmp/mpl_cache\n\
chmod 777 "$TEMP_DIR" /tmp/mpl_cache\n\
\n\
# Start the application\n\
exec python app.py\n\
' > /app/start.sh && chmod +x /app/start.sh
# The command to run when the container starts
CMD ["/app/start.sh"]