|
|
|
FROM python:3.11-slim |
|
|
|
|
|
ENV PYTHONUNBUFFERED=1 |
|
ENV PYTHONDONTWRITEBYTECODE=1 |
|
ENV PIP_NO_CACHE_DIR=1 |
|
ENV PIP_DISABLE_PIP_VERSION_CHECK=1 |
|
ENV DEBIAN_FRONTEND=noninteractive |
|
|
|
|
|
RUN useradd --create-home --shell /bin/bash app |
|
|
|
|
|
WORKDIR /app |
|
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \ |
|
|
|
build-essential \ |
|
gcc \ |
|
g++ \ |
|
make \ |
|
cmake \ |
|
pkg-config \ |
|
|
|
curl \ |
|
wget \ |
|
git \ |
|
|
|
libffi-dev \ |
|
libssl-dev \ |
|
|
|
libjpeg-dev \ |
|
libpng-dev \ |
|
libfreetype6-dev \ |
|
libtiff5-dev \ |
|
libopenjp2-7-dev \ |
|
|
|
libxml2-dev \ |
|
libxslt1-dev \ |
|
zlib1g-dev \ |
|
|
|
tesseract-ocr \ |
|
tesseract-ocr-eng \ |
|
poppler-utils \ |
|
|
|
sqlite3 \ |
|
libsqlite3-dev \ |
|
|
|
&& apt-get clean \ |
|
&& rm -rf /var/lib/apt/lists/* \ |
|
&& rm -rf /var/cache/apt/* |
|
|
|
|
|
RUN python -m pip install --upgrade pip setuptools wheel |
|
|
|
|
|
RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org" \ |
|
&& pip config set global.no-cache-dir true \ |
|
&& pip config set global.disable-pip-version-check true |
|
|
|
|
|
COPY requirements.txt . |
|
|
|
|
|
RUN pip install --no-cache-dir --upgrade -r requirements.txt \ |
|
&& pip install --no-cache-dir \ |
|
|
|
gunicorn \ |
|
uvloop \ |
|
|
|
psutil \ |
|
&& pip list --outdated |
|
|
|
|
|
COPY . . |
|
|
|
|
|
RUN mkdir -p \ |
|
|
|
temp logs uploads downloads cache \ |
|
|
|
/tmp/data_extractor_temp \ |
|
|
|
static \ |
|
|
|
data \ |
|
&& chmod -R 755 /app |
|
|
|
|
|
RUN chmod -R 777 temp logs uploads downloads cache /tmp \ |
|
&& chmod -R 755 static \ |
|
&& chmod 755 app.py \ |
|
&& chmod -R 755 config utils workflow models |
|
|
|
|
|
RUN chown -R app:app /app \ |
|
&& chown -R app:app /tmp/data_extractor_temp |
|
|
|
|
|
ENV PYTHONPATH=/app |
|
ENV GRADIO_SERVER_NAME=0.0.0.0 |
|
ENV GRADIO_SERVER_PORT=7860 |
|
|
|
|
|
|
|
|
|
ENV MPLBACKEND=Agg |
|
ENV MPLCONFIGDIR=/tmp/mpl_cache |
|
|
|
|
|
ENV GRADIO_QUEUE_DEFAULT_CONCURRENCY=10 |
|
ENV GRADIO_MAX_THREADS=20 |
|
|
|
|
|
ENV WEBSOCKET_HOST=0.0.0.0 |
|
ENV WEBSOCKET_PORT=8765 |
|
|
|
|
|
ENV TEMP_DIR=/tmp/data_extractor_temp |
|
ENV SESSION_TIMEOUT=1800 |
|
ENV MAX_FILE_SIZE_MB=50 |
|
|
|
|
|
ENV COORDINATOR_MODEL=gemini-2.5-pro |
|
ENV DATA_EXTRACTOR_MODEL=gemini-2.5-pro |
|
ENV DATA_ARRANGER_MODEL=gemini-2.5-pro |
|
ENV CODE_GENERATOR_MODEL=gemini-2.5-pro |
|
|
|
|
|
ENV PYTHONSAFEPATH=1 |
|
ENV PYTHONHASHSEED=random |
|
|
|
|
|
EXPOSE 7860 |
|
EXPOSE 8765 |
|
|
|
|
|
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \ |
|
CMD curl -f http://localhost:7860/ || exit 1 |
|
|
|
|
|
USER root |
|
|
|
|
|
RUN echo ' |
|
set -e\n\ |
|
echo "π Starting Data Extractor Multi-User Application..."\n\ |
|
echo "π Python version: $(python --version)"\n\ |
|
echo "π Server: 0.0.0.0:7860"\n\ |
|
echo "π₯ Multi-user concurrency: Enabled"\n\ |
|
echo "π Session isolation: Active"\n\ |
|
echo "πΎ Temp directory: $TEMP_DIR"\n\ |
|
\n\ |
|
|
|
mkdir -p "$TEMP_DIR"\n\ |
|
mkdir -p /tmp/mpl_cache\n\ |
|
chmod 777 "$TEMP_DIR" /tmp/mpl_cache\n\ |
|
\n\ |
|
|
|
exec python app.py\n\ |
|
' > /app/start.sh && chmod +x /app/start.sh |
|
|
|
|
|
CMD ["/app/start.sh"] |
|
|