#!/bin/bash set -u echo "Starting AdvisorAI Data Pipeline with Gradio..." # Determine writable data dir via existing Python config logic NLTK_DIR=$(python - <<'PY' import os try: from src.config import DATA_DIR except Exception: # fallback order for p in ['/data', '/app/data', '/tmp']: try: os.makedirs(p, exist_ok=True) test = os.path.join(p, '.wtest') open(test,'w').close(); os.remove(test) DATA_DIR = p break except Exception: continue else: DATA_DIR = '/tmp' nl = os.path.join(DATA_DIR, 'nltk_data') os.makedirs(nl, exist_ok=True) print(nl) PY ) export NLTK_DATA="$NLTK_DIR" echo "NLTK_DATA set to: $NLTK_DATA" # Best-effort NLTK downloads (do not fail on errors) python - <<'PY' import os print('Preparing NLTK into', os.environ.get('NLTK_DATA')) try: import nltk for pkg in ['punkt', 'stopwords', 'vader_lexicon']: try: nltk.download(pkg, download_dir=os.environ.get('NLTK_DATA'), quiet=True) print('Downloaded', pkg) except Exception as e: print('NLTK download failed for', pkg, e) except Exception as e: print('NLTK import failed:', e) PY echo "Starting services..." exec "$@"