advisorai-data-enhanced / deployment /gradio_entrypoint.sh
Maaroufabousaleh
Refactor Gradio entrypoint script to improve DATA_DIR determination and enhance NLTK download error handling
010dba3
#!/bin/bash
set -u
echo "Starting AdvisorAI Data Pipeline with Gradio..."
# Determine a writable DATA_DIR using the app's own logic, export it, and prepare NLTK dir
read -r DATA_DIR NLTK_DIR <<< "$(python - <<'PY'
import os
try:
from src.config import DATA_DIR as BASE
except Exception:
# fallback order
for p in ['/data', '/app/data', '/tmp']:
try:
os.makedirs(p, exist_ok=True)
test = os.path.join(p, '.wtest')
open(test,'w').close(); os.remove(test)
BASE = p
break
except Exception:
continue
else:
BASE = '/tmp'
nl = os.path.join(BASE, 'nltk_data')
os.makedirs(nl, exist_ok=True)
print(BASE, nl)
PY
)"
export DATA_DIR
export NLTK_DATA="$NLTK_DIR"
echo "DATA_DIR set to: $DATA_DIR"
echo "NLTK_DATA set to: $NLTK_DATA"
# Optionally fetch historical data from Filebase/S3 before starting services
if [ "${SKIP_FILEBASE_FETCH:-0}" != "1" ]; then
if [ -n "${FILEBASE_BUCKET:-}" ] && [ -n "${FILEBASE_ACCESS_KEY:-}" ] && [ -n "${FILEBASE_SECRET_KEY:-}" ]; then
echo "Fetching historical data from Filebase into $DATA_DIR ..."
python /app/deployment/fetch_filebase.py --base-dir "$DATA_DIR" || echo "[WARN] Filebase fetch failed (continuing startup)."
else
echo "[INFO] Skipping Filebase fetch: credentials or bucket not configured."
fi
else
echo "[INFO] SKIP_FILEBASE_FETCH=1 -> skipping Filebase fetch."
fi
# Best-effort NLTK downloads (do not fail on errors)
python - <<'PY'
import os
print('Preparing NLTK into', os.environ.get('NLTK_DATA'))
try:
import nltk
for pkg in ['punkt', 'stopwords', 'vader_lexicon']:
try:
nltk.download(pkg, download_dir=os.environ.get('NLTK_DATA'), quiet=True)
print('Downloaded', pkg)
except Exception as e:
print('NLTK download failed for', pkg, e)
except Exception as e:
print('NLTK import failed:', e)
PY
echo "Starting services..."
exec "$@"