# ---- Base image ---- FROM python:3.10-slim ENV PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ HF_HUB_DISABLE_TELEMETRY=1 \ PORT=7860 \ # ✅ Writable + persistent on Spaces HF_HOME=/data/hf_cache \ SENTENCE_TRANSFORMERS_HOME=/data/hf_cache \ NLTK_DATA=/data/nltk_data \ TLDEXTRACT_CACHE=/data/tld_cache \ HOME=/data # Handy tools RUN apt-get update && apt-get install -y --no-install-recommends curl git && \ rm -rf /var/lib/apt/lists/* WORKDIR /app # ---- Python deps ---- COPY requirements.txt ./ RUN python -m pip install --upgrade pip && \ # CPU-only PyTorch first pip install torch --index-url https://download.pytorch.org/whl/cpu && \ pip install -r requirements.txt && \ pip install sentencepiece # ---- App code ---- COPY . . # ✅ Make caches writable for the runtime user RUN mkdir -p /data/hf_cache /data/nltk_data /data/tld_cache && chmod -R 777 /data # ---- Warm caches into the image layer ---- # 1) Cache SBERT RUN python - <<'PY' from sentence_transformers import SentenceTransformer SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") print("✅ SBERT cached") PY # 2) Cache NLTK VADER RUN python - <<'PY' import os, nltk os.makedirs(os.getenv("NLTK_DATA","/data/nltk_data"), exist_ok=True) nltk.download("vader_lexicon", download_dir=os.getenv("NLTK_DATA","/data/nltk_data")) print("✅ VADER cached") PY # 3) (Recommended) Pre-warm tweet-topic model so first request is instant RUN python - <<'PY' from transformers import pipeline p = pipeline("text-classification", model="cardiffnlp/tweet-topic-21-multi", top_k=1) p("warmup") print("✅ Topic model cached") PY # Ensure everything under /data is writable after warms RUN chmod -R 777 /data EXPOSE 7860 # ---- Run ---- CMD ["sh","-c","uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}"]