Vela commited on
Commit
4cb5342
·
1 Parent(s): 819d093

updated docker file

Browse files
Dockerfile CHANGED
@@ -13,13 +13,15 @@ RUN pip install --no-cache-dir --upgrade pip \
13
  COPY . /app
14
 
15
  # Create logs directory with proper permissions
16
- RUN mkdir -p /app/logs && chmod -R 777 /app/logs
 
17
 
18
  # Install additional dependencies
19
  RUN apt-get update && apt-get install -y tmux curl
20
 
21
  # Ensure the Hugging Face cache is set correctly
22
  ENV TRANSFORMERS_CACHE="/app/.cache/huggingface"
 
23
 
24
  # Set Python path
25
  ENV PYTHONPATH="/app/src"
 
13
  COPY . /app
14
 
15
  # Create logs directory with proper permissions
16
+ RUN mkdir -p /app/logs /app/.cache/huggingface && chmod -R 777 /app/logs /app/.cache/huggingface
17
+
18
 
19
  # Install additional dependencies
20
  RUN apt-get update && apt-get install -y tmux curl
21
 
22
  # Ensure the Hugging Face cache is set correctly
23
  ENV TRANSFORMERS_CACHE="/app/.cache/huggingface"
24
+ ENV HF_HOME="/app/.cache/huggingface"
25
 
26
  # Set Python path
27
  ENV PYTHONPATH="/app/src"
requirements.txt CHANGED
@@ -7,5 +7,6 @@ pinecone
7
  torch
8
  torchvision
9
  torchaudio
 
10
  sentence_transformers
11
  groq
 
7
  torch
8
  torchvision
9
  torchaudio
10
+ transformers
11
  sentence_transformers
12
  groq
src/backend/data/pinecone_db.py CHANGED
@@ -136,7 +136,7 @@ def upsert_data_in_db(df: pd.DataFrame):
136
 
137
  vectors = []
138
  for idx, (embedding, (_, row_data)) in enumerate(zip(batch["embedding"], batch.iterrows())):
139
- vector_id = f"q_{i + idx}" # Ensures IDs remain unique across batches
140
  metadata = {
141
  "question": row_data.get("input"),
142
  "answer": row_data.get("output")
 
136
 
137
  vectors = []
138
  for idx, (embedding, (_, row_data)) in enumerate(zip(batch["embedding"], batch.iterrows())):
139
+ vector_id = f"question_{i + idx}" # Ensures IDs remain unique across batches
140
  metadata = {
141
  "question": row_data.get("input"),
142
  "answer": row_data.get("output")