Said Lfagrouche commited on
Commit
ed78072
·
1 Parent(s): 9edf83b

Fix NLTK data permission issues by pre-downloading during build

Browse files
Files changed (3) hide show
  1. Dockerfile +7 -2
  2. api_mental_health.py +3 -6
  3. run_api.sh +12 -1
Dockerfile CHANGED
@@ -15,10 +15,15 @@ COPY requirements.txt .
15
  RUN pip install --no-cache-dir --upgrade pip && \
16
  pip install --no-cache-dir --upgrade -r requirements.txt
17
 
 
 
 
 
 
 
18
  # Create necessary directories
19
  RUN mkdir -p data/users data/sessions data/conversations data/feedback && \
20
- mkdir -p mental_health_model_artifacts/chroma_db && \
21
- mkdir -p nltk_data
22
 
23
  # Copy application files
24
  COPY app.py .
 
15
  RUN pip install --no-cache-dir --upgrade pip && \
16
  pip install --no-cache-dir --upgrade -r requirements.txt
17
 
18
+ # Pre-download NLTK data during build time
19
+ RUN python -c "import nltk; nltk.download('punkt'); nltk.download('wordnet'); nltk.download('stopwords')" && \
20
+ mkdir -p /app/nltk_data && \
21
+ cp -r /root/nltk_data/* /app/nltk_data/ && \
22
+ chmod -R 755 /app/nltk_data
23
+
24
  # Create necessary directories
25
  RUN mkdir -p data/users data/sessions data/conversations data/feedback && \
26
+ mkdir -p mental_health_model_artifacts/chroma_db
 
27
 
28
  # Copy application files
29
  COPY app.py .
api_mental_health.py CHANGED
@@ -37,16 +37,13 @@ logger = logging.getLogger(__name__)
37
  # Load environment variables
38
  load_dotenv()
39
 
40
- # Set NLTK data path to a directory where we have write permissions
41
  nltk_data_path = os.path.join(os.path.dirname(__file__), "nltk_data")
42
  os.makedirs(nltk_data_path, exist_ok=True)
43
  nltk.data.path.append(nltk_data_path)
44
 
45
- # Download required NLTK data to our custom directory
46
- logger.info(f"Downloading NLTK data to {nltk_data_path}")
47
- nltk.download('punkt', download_dir=nltk_data_path)
48
- nltk.download('wordnet', download_dir=nltk_data_path)
49
- nltk.download('stopwords', download_dir=nltk_data_path)
50
 
51
  # Initialize FastAPI app
52
  app = FastAPI(title="Mental Health Counselor API")
 
37
  # Load environment variables
38
  load_dotenv()
39
 
40
+ # Set NLTK data path to a directory where we want to look for data
41
  nltk_data_path = os.path.join(os.path.dirname(__file__), "nltk_data")
42
  os.makedirs(nltk_data_path, exist_ok=True)
43
  nltk.data.path.append(nltk_data_path)
44
 
45
+ # Skip downloading NLTK data to avoid permission errors
46
+ logger.info(f"Using NLTK data from {nltk_data_path} if available")
 
 
 
47
 
48
  # Initialize FastAPI app
49
  app = FastAPI(title="Mental Health Counselor API")
run_api.sh CHANGED
@@ -7,7 +7,18 @@ echo "===== Application Startup at $(date -u '+%Y-%m-%d %H:%M:%S') ====="
7
  echo "Setting up directories..."
8
  mkdir -p data/users data/sessions data/conversations data/feedback
9
  mkdir -p mental_health_model_artifacts/chroma_db
10
- mkdir -p nltk_data
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Create .env file if it doesn't exist
13
  if [ ! -f .env ]; then
 
7
  echo "Setting up directories..."
8
  mkdir -p data/users data/sessions data/conversations data/feedback
9
  mkdir -p mental_health_model_artifacts/chroma_db
10
+
11
+ # Verify NLTK data exists and is accessible
12
+ echo "Checking NLTK data..."
13
+ if [ -d "/app/nltk_data" ]; then
14
+ echo "NLTK data directory exists at /app/nltk_data"
15
+ # List contents to verify
16
+ ls -la /app/nltk_data
17
+ else
18
+ echo "WARNING: NLTK data directory not found at /app/nltk_data"
19
+ # Create the directory anyway
20
+ mkdir -p /app/nltk_data
21
+ fi
22
 
23
  # Create .env file if it doesn't exist
24
  if [ ! -f .env ]; then