Spaces:
Sleeping
Sleeping
Vela
commited on
Commit
·
4cb5342
1
Parent(s):
819d093
updated docker file
Browse files- Dockerfile +3 -1
- requirements.txt +1 -0
- src/backend/data/pinecone_db.py +1 -1
Dockerfile
CHANGED
@@ -13,13 +13,15 @@ RUN pip install --no-cache-dir --upgrade pip \
|
|
13 |
COPY . /app
|
14 |
|
15 |
# Create logs directory with proper permissions
|
16 |
-
RUN mkdir -p /app/logs && chmod -R 777 /app/logs
|
|
|
17 |
|
18 |
# Install additional dependencies
|
19 |
RUN apt-get update && apt-get install -y tmux curl
|
20 |
|
21 |
# Ensure the Hugging Face cache is set correctly
|
22 |
ENV TRANSFORMERS_CACHE="/app/.cache/huggingface"
|
|
|
23 |
|
24 |
# Set Python path
|
25 |
ENV PYTHONPATH="/app/src"
|
|
|
13 |
COPY . /app
|
14 |
|
15 |
# Create logs directory with proper permissions
|
16 |
+
RUN mkdir -p /app/logs /app/.cache/huggingface && chmod -R 777 /app/logs /app/.cache/huggingface
|
17 |
+
|
18 |
|
19 |
# Install additional dependencies
|
20 |
RUN apt-get update && apt-get install -y tmux curl
|
21 |
|
22 |
# Ensure the Hugging Face cache is set correctly
|
23 |
ENV TRANSFORMERS_CACHE="/app/.cache/huggingface"
|
24 |
+
ENV HF_HOME="/app/.cache/huggingface"
|
25 |
|
26 |
# Set Python path
|
27 |
ENV PYTHONPATH="/app/src"
|
requirements.txt
CHANGED
@@ -7,5 +7,6 @@ pinecone
|
|
7 |
torch
|
8 |
torchvision
|
9 |
torchaudio
|
|
|
10 |
sentence_transformers
|
11 |
groq
|
|
|
7 |
torch
|
8 |
torchvision
|
9 |
torchaudio
|
10 |
+
transformers
|
11 |
sentence_transformers
|
12 |
groq
|
src/backend/data/pinecone_db.py
CHANGED
@@ -136,7 +136,7 @@ def upsert_data_in_db(df: pd.DataFrame):
|
|
136 |
|
137 |
vectors = []
|
138 |
for idx, (embedding, (_, row_data)) in enumerate(zip(batch["embedding"], batch.iterrows())):
|
139 |
-
vector_id = f"
|
140 |
metadata = {
|
141 |
"question": row_data.get("input"),
|
142 |
"answer": row_data.get("output")
|
|
|
136 |
|
137 |
vectors = []
|
138 |
for idx, (embedding, (_, row_data)) in enumerate(zip(batch["embedding"], batch.iterrows())):
|
139 |
+
vector_id = f"question_{i + idx}" # Ensures IDs remain unique across batches
|
140 |
metadata = {
|
141 |
"question": row_data.get("input"),
|
142 |
"answer": row_data.get("output")
|