Vela commited on
Commit
c899329
Β·
1 Parent(s): 43f57f4

create a branch for chat feature

Browse files
Files changed (32) hide show
  1. README.md +17 -0
  2. requirements.txt +6 -3
  3. src/backend/__pycache__/chatbot.cpython-313.pyc +0 -0
  4. src/backend/__pycache__/main.cpython-313.pyc +0 -0
  5. src/backend/{routes β†’ api_routes}/__pycache__/__init__.cpython-313.pyc +0 -0
  6. src/backend/api_routes/__pycache__/chat_api.cpython-313.pyc +0 -0
  7. src/backend/{routes β†’ api_routes}/__pycache__/knowledge_base_api.cpython-313.pyc +0 -0
  8. src/backend/{routes β†’ api_routes}/__pycache__/test_api.cpython-313.pyc +0 -0
  9. src/backend/{routes β†’ api_routes}/__pycache__/upsert_data.cpython-313.pyc +0 -0
  10. src/backend/api_routes/chat_api.py +33 -0
  11. src/backend/{routes β†’ api_routes}/knowledge_base_api.py +1 -1
  12. src/backend/chatbot.py +21 -0
  13. src/backend/data/__pycache__/pinecone_db.cpython-313.pyc +0 -0
  14. src/backend/data/pinecone_db.py +1 -1
  15. src/backend/main.py +39 -26
  16. src/backend/models/__pycache__/embedding_model.cpython-313.pyc +0 -0
  17. src/backend/models/__pycache__/schemas.cpython-313.pyc +0 -0
  18. src/backend/models/schemas.py +10 -1
  19. src/backend/routes/__pycache__/chat_api.cpython-313.pyc +0 -0
  20. src/backend/services/__pycache__/embedding_service.cpython-313.pyc +0 -0
  21. src/backend/services/__pycache__/llm_model_service.cpython-313.pyc +0 -0
  22. src/backend/services/__pycache__/pinecone_service.cpython-313.pyc +0 -0
  23. src/backend/services/__pycache__/schemas.cpython-313.pyc +0 -0
  24. src/backend/services/__pycache__/supabase_service.cpython-313.pyc +0 -0
  25. src/backend/services/embedding_service.py +18 -0
  26. src/backend/services/llm_model_service.py +33 -0
  27. src/backend/services/pinecone_service.py +251 -0
  28. src/backend/services/schemas.py +11 -0
  29. src/backend/services/supabase_service.py +46 -0
  30. src/backend/utils/__pycache__/logger.cpython-313.pyc +0 -0
  31. src/frontend/app/__pycache__/common_fuctions.cpython-313.pyc +0 -0
  32. src/frontend/app/__pycache__/pinecone_data_handler.cpython-313.pyc +0 -0
README.md CHANGED
@@ -100,3 +100,20 @@ Once launched, interact with Yuvabe Care Companion AI as follows:
100
  ## Contributions
101
 
102
  If you'd like to contribute to Yuvabe Care Companion AI, please fork the repository and create a pull request with your features or fixes.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  ## Contributions
101
 
102
  If you'd like to contribute to Yuvabe Care Companion AI, please fork the repository and create a pull request with your features or fixes.
103
+
104
+ πŸ“‹ Recommended Tech Stack (Enhanced)
105
+ Library/Framework Purpose
106
+ streamlit For building the chatbot UI with a simple and interactive interface.
107
+ fastapi[standard] For creating scalable APIs to manage backend logic and endpoints.
108
+ uvicorn Fast ASGI server for running FastAPI apps.
109
+ requests For making HTTP requests (e.g., fetching data from APIs).
110
+ Pillow For handling and processing images in chat responses (if needed).
111
+ pandas For data manipulation and analysis.
112
+ torch, torchvision, torchaudio For model inference and custom model development (if required).
113
+ transformers For powerful NLP models like GPT, LLaMA, or Mistral.
114
+ sentence_transformers For efficient text embeddings.
115
+ groq For ultra-fast model inference.
116
+ sentence-transformers For additional embedding options and fine-tuning.
117
+ pinecone-client For vector search and storage (ideal for RAG architecture).
118
+ supabase For efficient, scalable chat history storage.
119
+ langchain For implementing text chunking, prompt chaining, and retrieval pipelines
requirements.txt CHANGED
@@ -3,10 +3,13 @@ requests
3
  Pillow
4
  pandas
5
  fastapi[standard]
6
- pinecone
7
  torch
8
  torchvision
9
  torchaudio
10
  transformers
11
- sentence_transformers
12
- groq
 
 
 
 
 
3
  Pillow
4
  pandas
5
  fastapi[standard]
 
6
  torch
7
  torchvision
8
  torchaudio
9
  transformers
10
+ groq
11
+ sentence-transformers
12
+ pinecone
13
+ supabase
14
+ langchain
15
+ uvicorn
src/backend/__pycache__/chatbot.cpython-313.pyc ADDED
Binary file (1.5 kB). View file
 
src/backend/__pycache__/main.cpython-313.pyc CHANGED
Binary files a/src/backend/__pycache__/main.cpython-313.pyc and b/src/backend/__pycache__/main.cpython-313.pyc differ
 
src/backend/{routes β†’ api_routes}/__pycache__/__init__.cpython-313.pyc RENAMED
File without changes
src/backend/api_routes/__pycache__/chat_api.cpython-313.pyc ADDED
Binary file (1.55 kB). View file
 
src/backend/{routes β†’ api_routes}/__pycache__/knowledge_base_api.cpython-313.pyc RENAMED
Binary files a/src/backend/routes/__pycache__/knowledge_base_api.cpython-313.pyc and b/src/backend/api_routes/__pycache__/knowledge_base_api.cpython-313.pyc differ
 
src/backend/{routes β†’ api_routes}/__pycache__/test_api.cpython-313.pyc RENAMED
File without changes
src/backend/{routes β†’ api_routes}/__pycache__/upsert_data.cpython-313.pyc RENAMED
File without changes
src/backend/api_routes/chat_api.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from chatbot import chatbot_response
3
+ from services import pinecone_service,llm_model_service
4
+ from services.supabase_service import get_chat_history
5
+ from utils import logger
6
+ from services.schemas import ChatRequest,ChatResponse
7
+
8
+
9
+
10
+ logger= logger.get_logger()
11
+
12
+ router = APIRouter(prefix="/chat", tags=["Chat"])
13
+
14
+
15
+ @router.post("/query")
16
+ async def chat_query(request: ChatRequest):
17
+ try:
18
+ logger.info("Trying to fetch response")
19
+ query = request.query
20
+ context = pinecone_service.retrieve_context_from_pinecone(query)
21
+ response= llm_model_service.generate_response_with_context(query,context)
22
+ logger.info("Fetched response")
23
+ return response
24
+ except Exception as e:
25
+ raise HTTPException(status_code=500, detail=str(e))
26
+
27
+ # @router.get("/history/{date}", response_model=list)
28
+ # async def get_history(date: str):
29
+ # try:
30
+ # history = get_chat_history(date)
31
+ # return history
32
+ # except Exception as e:
33
+ # raise HTTPException(status_code=500, detail=str(e))
src/backend/{routes β†’ api_routes}/knowledge_base_api.py RENAMED
@@ -4,7 +4,7 @@ from models.schemas import UpsertRequest,DeleteRequest,MetadataRequest
4
  from data import pinecone_db
5
  import pandas as pd
6
 
7
- router = APIRouter()
8
 
9
  @router.post("/upsert-data")
10
  def upsert_data(request: UpsertRequest):
 
4
  from data import pinecone_db
5
  import pandas as pd
6
 
7
+ router = APIRouter(prefix="/knowledge-base", tags=['Knowledge Base Operations'])
8
 
9
  @router.post("/upsert-data")
10
  def upsert_data(request: UpsertRequest):
src/backend/chatbot.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from services.pinecone_service import retrieve_relevant_metadata
2
+ from services.supabase_service import store_chat_history
3
+ from sentence_transformers import CrossEncoder
4
+
5
+ reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
6
+
7
+ def rerank_results(query, results):
8
+ pairs = [(query, result["metadata"]["text"]) for result in results]
9
+ scores = reranker.predict(pairs)
10
+ return [x for _, x in sorted(zip(scores, results), key=lambda pair: pair[0], reverse=True)]
11
+
12
+ def chatbot_response(query):
13
+ results = retrieve_relevant_metadata(query)
14
+ if results:
15
+ reranked_results = rerank_results(query, results)
16
+ best_answer = reranked_results[0]["metadata"]["question"]
17
+ else:
18
+ best_answer = "I'm sorry, I couldn't find an answer for your query."
19
+
20
+ # store_chat_history(query, best_answer)
21
+ return best_answer
src/backend/data/__pycache__/pinecone_db.cpython-313.pyc CHANGED
Binary files a/src/backend/data/__pycache__/pinecone_db.cpython-313.pyc and b/src/backend/data/__pycache__/pinecone_db.cpython-313.pyc differ
 
src/backend/data/pinecone_db.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  # sys.path.append(src_directory)
5
  from pinecone import Pinecone, ServerlessSpec
6
  import time
7
- from tqdm import tqdm # Progress bar for large datasets
8
  from dotenv import load_dotenv
9
  from utils import logger
10
  import pandas as pd
 
4
  # sys.path.append(src_directory)
5
  from pinecone import Pinecone, ServerlessSpec
6
  import time
7
+ from tqdm import tqdm
8
  from dotenv import load_dotenv
9
  from utils import logger
10
  import pandas as pd
src/backend/main.py CHANGED
@@ -1,33 +1,46 @@
1
- from fastapi import FastAPI
2
- from routes import knowledge_base_api
3
- from fastapi.middleware.cors import CORSMiddleware
4
- from utils import logger
5
 
6
- # Initialize FastAPI app
7
- app = FastAPI(
8
- title="HealthCare VectorDB API",
9
- description="API for managing Pinecone VectorDB operations for healthcare data.",
10
- version="1.0.0"
11
- )
12
 
13
- # Logger setup
14
- logger = logger.get_logger()
15
 
16
- # CORS Middleware (for better cross-origin request handling)
17
- app.add_middleware(
18
- CORSMiddleware,
19
- allow_origins=["*"], # Adjust for security if needed
20
- allow_credentials=True,
21
- allow_methods=["*"],
22
- allow_headers=["*"],
23
- )
 
 
 
24
 
25
- # Include API routes
26
- app.include_router(knowledge_base_api.router, prefix="/knowledge-base", tags=['Knowledge Base Operations'])
27
 
 
 
 
 
28
 
29
- # Health Check Endpoint
30
- @app.get("/health", tags=["Health Check"])
31
- async def health_check():
32
- return {"status": "API is healthy and running."}
 
 
 
 
 
33
 
 
 
 
 
1
+ # from fastapi import FastAPI
2
+ # from routes import knowledge_base_api
3
+ # from fastapi.middleware.cors import CORSMiddleware
4
+ # from utils import logger
5
 
6
+ # # Initialize FastAPI app
7
+ # app = FastAPI(
8
+ # title="HealthCare VectorDB API",
9
+ # description="API for managing Pinecone VectorDB operations for healthcare data.",
10
+ # version="1.0.0"
11
+ # )
12
 
13
+ # # Logger setup
14
+ # logger = logger.get_logger()
15
 
16
+ # # CORS Middleware (for better cross-origin request handling)
17
+ # app.add_middleware(
18
+ # CORSMiddleware,
19
+ # allow_origins=["*"], # Adjust for security if needed
20
+ # allow_credentials=True,
21
+ # allow_methods=["*"],
22
+ # allow_headers=["*"],
23
+ # )
24
+
25
+ # # Include API routes
26
+ # app.include_router(knowledge_base_api.router, prefix="/knowledge-base", tags=['Knowledge Base Operations'])
27
 
 
 
28
 
29
+ # # Health Check Endpoint
30
+ # @app.get("/health", tags=["Health Check"])
31
+ # async def health_check():
32
+ # return {"status": "API is healthy and running."}
33
 
34
+ from fastapi import FastAPI
35
+ from api_routes.chat_api import router as chat_router
36
+ from api_routes.knowledge_base_api import router as knowledge_base_router
37
+
38
+ app = FastAPI(
39
+ title="Yuvabe Care Companion AI",
40
+ description="A chatbot for health-related queries",
41
+ version="1.0.0"
42
+ )
43
 
44
+ # Register Routes
45
+ app.include_router(chat_router)
46
+ app.include_router(knowledge_base_router)
src/backend/models/__pycache__/embedding_model.cpython-313.pyc CHANGED
Binary files a/src/backend/models/__pycache__/embedding_model.cpython-313.pyc and b/src/backend/models/__pycache__/embedding_model.cpython-313.pyc differ
 
src/backend/models/__pycache__/schemas.cpython-313.pyc CHANGED
Binary files a/src/backend/models/__pycache__/schemas.cpython-313.pyc and b/src/backend/models/__pycache__/schemas.cpython-313.pyc differ
 
src/backend/models/schemas.py CHANGED
@@ -14,4 +14,13 @@ class DeleteRequest(BaseModel):
14
  class MetadataRequest(BaseModel):
15
  prompt: str
16
  n_result: int = 3
17
- score_threshold: float = 0.45
 
 
 
 
 
 
 
 
 
 
14
  class MetadataRequest(BaseModel):
15
  prompt: str
16
  n_result: int = 3
17
+ score_threshold: float = 0.45
18
+
19
+ class ChatRequest(BaseModel):
20
+ query: str
21
+
22
+ class ChatResponse(BaseModel):
23
+ response: str
24
+
25
+ class ChatHistoryResponse(BaseModel):
26
+ date: str
src/backend/routes/__pycache__/chat_api.cpython-313.pyc DELETED
Binary file (3.17 kB)
 
src/backend/services/__pycache__/embedding_service.cpython-313.pyc ADDED
Binary file (1.3 kB). View file
 
src/backend/services/__pycache__/llm_model_service.cpython-313.pyc ADDED
Binary file (1.17 kB). View file
 
src/backend/services/__pycache__/pinecone_service.cpython-313.pyc ADDED
Binary file (11.2 kB). View file
 
src/backend/services/__pycache__/schemas.cpython-313.pyc ADDED
Binary file (1.27 kB). View file
 
src/backend/services/__pycache__/supabase_service.cpython-313.pyc ADDED
Binary file (2.82 kB). View file
 
src/backend/services/embedding_service.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from utils import logger
4
+
5
+ logger = logger.get_logger()
6
+
7
+ model = SentenceTransformer("all-MiniLM-L6-v2")
8
+
9
+ def get_text_embedding(text):
10
+ try:
11
+ return model.encode(text, convert_to_tensor=True).cpu().numpy().tolist()
12
+ except Exception as e:
13
+ logger.error(f"Error generating embedding: {e}")
14
+ raise
15
+
16
+ def chunk_text(text, chunk_size=500, chunk_overlap=100):
17
+ splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
18
+ return splitter.split_text(text)
src/backend/services/llm_model_service.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from groq import Groq
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ LLM_MODEL_NAME="llama-3.3-70b-versatile"
8
+ GROQ_KEY = os.environ.get("GROQ_API")
9
+ client = Groq(api_key=GROQ_KEY)
10
+
11
+
12
+ def generate_response_with_context(prompt, context):
13
+ # Construct the final prompt for the LLaMA model
14
+ final_prompt = (
15
+ f"Context: {context}\n\n"
16
+ f"Question: {prompt}\n"
17
+ "Answer:"
18
+ )
19
+
20
+ # Send the prompt to the Groq API
21
+ chat_completion = client.chat.completions.create(
22
+ messages=[
23
+ {"role": "system", "content": final_prompt},
24
+ {"role": "user", "content": prompt},
25
+ ],
26
+ model=LLM_MODEL_NAME,
27
+ )
28
+
29
+ assistant_response = chat_completion.choices[0].message.content
30
+
31
+ # Extract the response text
32
+ return assistant_response
33
+
src/backend/services/pinecone_service.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # import sys
3
+ # src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "backend"))
4
+ # sys.path.append(src_directory)
5
+ from pinecone import Pinecone, ServerlessSpec
6
+ import time
7
+ from tqdm import tqdm
8
+ from dotenv import load_dotenv
9
+ from utils import logger
10
+ import pandas as pd
11
+ from services.embedding_service import get_text_embedding
12
+ from sentence_transformers import CrossEncoder
13
+
14
+ reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
15
+
16
+ load_dotenv()
17
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
18
+ logger = logger.get_logger()
19
+ NAMESPACE = "health-care-dataset"
20
+ INDEX_NAME = "health-care-index"
21
+ PINECONE = Pinecone(api_key=PINECONE_API_KEY)
22
+
23
+ def rerank_results(query, results, score_threshold=0.5):
24
+ pairs = [(query, result["metadata"]["question"]) for result in results]
25
+ scores = reranker.predict(pairs)
26
+
27
+ # Filter based on score threshold
28
+ filtered_results = [
29
+ result for score, result in zip(scores, results) if score >= score_threshold
30
+ ]
31
+
32
+ # Sort remaining results by score in descending order
33
+ return sorted(filtered_results, key=lambda x: x['score'], reverse=True)
34
+
35
+ def initialize_pinecone_index(pinecone, index_name, dimension=384, metric="cosine", cloud="aws", region="us-east-1"):
36
+ """
37
+ Retrieves an existing Pinecone index or creates a new one if it does not exist.
38
+
39
+ This method checks for the presence of the specified index. If the index does not exist,
40
+ it initiates the creation process, waits until the index is ready, and then returns the index.
41
+
42
+ Args:
43
+ pinecone (Pinecone): Pinecone client instance.
44
+ index_name (str): Name of the index to retrieve or create.
45
+ dimension (int, optional): Vector dimension for the index. Default is 384.
46
+ metric (str, optional): Distance metric for the index. Default is "cosine".
47
+ cloud (str, optional): Cloud provider for hosting the index. Default is "aws".
48
+ region (str, optional): Region where the index will be hosted. Default is "us-east-1".
49
+
50
+ Returns:
51
+ pinecone.Index: The Pinecone index instance.
52
+
53
+ Raises:
54
+ Exception: If an error occurs during index creation or retrieval.
55
+
56
+ Example:
57
+ >>> index = get_or_create_index(pinecone, "sample_index")
58
+ Logs: "Index 'sample_index' is ready and accessible."
59
+ """
60
+ try:
61
+ logger.info(f"Checking if the index '{index_name}' exists...")
62
+
63
+ # Check if index already exists
64
+ if not pinecone.has_index(index_name):
65
+ logger.info(f"Index '{index_name}' does not exist. Creating a new index...")
66
+
67
+ # Create a new index
68
+ pinecone.create_index(
69
+ name=index_name,
70
+ dimension=dimension,
71
+ metric=metric,
72
+ spec=ServerlessSpec(cloud=cloud, region=region)
73
+ )
74
+ logger.info(f"Index '{index_name}' creation initiated. Waiting for it to be ready...")
75
+
76
+ # Wait until index is ready
77
+ while True:
78
+ index_status = pinecone.describe_index(index_name)
79
+ if index_status.status.get("ready", False):
80
+ index = pinecone.Index(index_name)
81
+ logger.info(f"Index '{index_name}' is ready and accessible.")
82
+ return index
83
+ else:
84
+ logger.debug(f"Index '{index_name}' is not ready yet. Checking again in 1 second.")
85
+ time.sleep(1)
86
+ else:
87
+ # Return the existing index
88
+ index = pinecone.Index(index_name)
89
+ logger.info(f"Index '{index_name}' already exists. Returning the existing index.")
90
+ return index
91
+
92
+ except Exception as e:
93
+ logger.error(f"Error occurred while getting or creating the Pinecone index: {str(e)}", exc_info=True)
94
+ return None
95
+
96
+ def delete_records_by_ids(ids_to_delete):
97
+ """
98
+ Deletes specified IDs from the database index.
99
+
100
+ This method interacts with the index to delete entries based on the provided list of IDs.
101
+ It logs a success message if the deletion is successful or returns an error message if it fails.
102
+
103
+ Args:
104
+ ids_to_delete (list):
105
+ A list of unique identifiers (IDs) to be deleted from the database.
106
+
107
+ Returns:
108
+ str: A success message is logged upon successful deletion.
109
+ If an error occurs, a string describing the failure is returned.
110
+
111
+ Raises:
112
+ Exception: Logs an error if the deletion process encounters an issue.
113
+
114
+ Example:
115
+ >>> remove_ids_from_database(['id_123', 'id_456'])
116
+ Logs: "IDs deleted successfully."
117
+
118
+ Notes:
119
+ - The method assumes `get_index()` initializes the index object.
120
+ - Deletion occurs within the specified `NAMESPACE`.
121
+ """
122
+ try:
123
+ index = initialize_pinecone_index(PINECONE,INDEX_NAME)
124
+ index.delete(ids=ids_to_delete, namespace=NAMESPACE)
125
+ logger.info("IDs deleted successfully.")
126
+ except Exception as e:
127
+ return f"Failed to delete the IDs: {e}"
128
+
129
+ def retrieve_relevant_metadata(prompt, n_result=3, score_threshold=0.47):
130
+ """
131
+ Retrieves and reranks relevant context data based on a given prompt.
132
+ """
133
+ try:
134
+ index = initialize_pinecone_index(PINECONE, INDEX_NAME)
135
+ prompt = prompt[-1] if isinstance(prompt, list) else prompt
136
+
137
+ # Generate embedding for the provided prompt
138
+ embedding = get_text_embedding(prompt)
139
+ response = index.query(
140
+ top_k=n_result,
141
+ vector=embedding,
142
+ namespace=NAMESPACE,
143
+ include_metadata=True
144
+ )
145
+
146
+ # Extract metadata and filter by score threshold
147
+ filtered_results = [
148
+ {
149
+ "question": entry.get('metadata', {}).get('question', 'N/A'),
150
+ "answer": entry.get('metadata', {}).get('answer', 'N/A'),
151
+ "instruction": entry.get('metadata', {}).get('instruction', 'N/A'),
152
+ "score": f"{entry.get('score', 0)}",
153
+ "id": f"{entry.get('id', 'N/A')}"
154
+ }
155
+ for entry in response.get('matches', [])
156
+ if entry.get('score', 0) >= score_threshold
157
+ ]
158
+
159
+ # Rerank the filtered results using a reranker model
160
+ if filtered_results:
161
+ pairs = [(prompt, item["question"]) for item in filtered_results]
162
+ scores = reranker.predict(pairs) # Predict relevance scores
163
+
164
+ # Attach reranker scores and sort by relevance
165
+ for item, score in zip(filtered_results, scores):
166
+ item["reranker_score"] = score
167
+
168
+ filtered_results = sorted(
169
+ filtered_results,
170
+ key=lambda x: x["reranker_score"],
171
+ reverse=True
172
+ )
173
+
174
+ # Return metadata or fallback message
175
+ return filtered_results if filtered_results else [{"response": "No relevant data found."}]
176
+
177
+ except Exception as e:
178
+ logger.error(f"Failed to fetch context for '{prompt[:20]}'. Error: {e}")
179
+ return [{"response": "Failed to fetch data due to an error."}]
180
+
181
+
182
+ def upsert_vector_data(df: pd.DataFrame):
183
+
184
+ """
185
+ Generates embeddings for the given DataFrame and uploads data to Pinecone in batches.
186
+
187
+ Parameters:
188
+ - df (pd.DataFrame): DataFrame containing 'input', 'question', and 'answer' columns.
189
+
190
+ Returns:
191
+ - None
192
+ """
193
+
194
+ try:
195
+ index = initialize_pinecone_index(PINECONE,INDEX_NAME)
196
+ df["embedding"] = [
197
+ get_text_embedding([q])[0]
198
+ for q in tqdm(df["input"], desc="Generating Embeddings")
199
+ ]
200
+ except Exception as e:
201
+ logger.error(f"Error generating embeddings: {e}")
202
+ return
203
+
204
+ # # Upload data to Pinecone in batches
205
+ BATCH_SIZE = 500
206
+
207
+ for i in tqdm(range(0, len(df), BATCH_SIZE), desc="Uploading Data to Pinecone"):
208
+ batch = df.iloc[i : i + BATCH_SIZE]
209
+
210
+ vectors = []
211
+ for idx, (embedding, (_, row_data)) in enumerate(zip(batch["embedding"], batch.iterrows())):
212
+ question = row_data.get("input")
213
+ vector_id = f"{question[:50]}:{i + idx}" # Ensures IDs remain unique across
214
+ metadata = {
215
+ "question": row_data.get("input"),
216
+ "answer": row_data.get("output"),
217
+ "instruction": row_data.get("instruction"),
218
+ }
219
+ vectors.append((vector_id, embedding, metadata))
220
+
221
+ try:
222
+ index.upsert(vectors=vectors,namespace=NAMESPACE)
223
+ except Exception as e:
224
+ logger.error(f"Error uploading batch starting at index {i}: {e}")
225
+
226
+ logger.info("All question-answer pairs stored successfully!")
227
+
228
+ def retrieve_context_from_pinecone(prompt, n_result=3, score_threshold=0.5):
229
+
230
+ index = initialize_pinecone_index(PINECONE,INDEX_NAME)
231
+ # Generate embedding for the provided prompt
232
+ embedding = get_text_embedding(prompt)
233
+ # Query Pinecone for relevant context
234
+ response = index.query(
235
+ top_k=n_result,
236
+ vector=embedding,
237
+ namespace="your_namespace",
238
+ include_metadata=True
239
+ )
240
+
241
+ # Extract metadata and filter results
242
+ filtered_results = [
243
+ entry['metadata'].get('question', 'N/A')
244
+ for entry in response.get('matches', [])
245
+ if entry.get('score', 0) >= score_threshold
246
+ ]
247
+
248
+ # Combine the context into a single string
249
+ context = "\n".join(filtered_results) if filtered_results else "No relevant context found."
250
+
251
+ return context
src/backend/services/schemas.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional
3
+
4
+ class ChatRequest(BaseModel):
5
+ query: str = Field(..., description="User's query for the chatbot")
6
+
7
+ class ChatResponse(BaseModel):
8
+ response: str = Field(..., description="Response generated by the chatbot")
9
+
10
+ class ChatHistoryResponse(BaseModel):
11
+ date: str = Field(..., description="Date of the chat history in 'YYYY-MM-DD' format")
src/backend/services/supabase_service.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from supabase import create_client, Client
4
+ from datetime import datetime
5
+ from utils import logger
6
+
7
+ SUPABASE_URL = os.getenv('SUPABASE_URL')
8
+ SUPABASE_KEY = os.getenv('SUPABASE_KEY')
9
+ SUPABASE_BUCKET = os.getenv('SUPABASE_BUCKET')
10
+
11
+ logger = logger.get_logger()
12
+
13
+ supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
14
+
15
+ def store_chat_history(user_query, bot_response):
16
+ today = datetime.now().strftime("%Y-%m-%d")
17
+ file_path = f"{today}/{datetime.now().isoformat()}.json"
18
+
19
+ chat_data = {
20
+ "timestamp": datetime.now().isoformat(),
21
+ "user_query": user_query,
22
+ "bot_response": bot_response
23
+ }
24
+
25
+ try:
26
+ supabase.storage.from_(SUPABASE_BUCKET).upload(file_path, json.dumps(chat_data).encode('utf-8'))
27
+ logger.info(f"Chat history stored successfully: {file_path}")
28
+ except Exception as e:
29
+ logger.error(f"Error storing chat history: {e}")
30
+
31
+ def get_chat_history(date):
32
+ try:
33
+ prefix = f"{date}/"
34
+ files = supabase.storage.from_(SUPABASE_BUCKET).list(prefix)
35
+ chat_history = []
36
+
37
+ for file in files:
38
+ file_path = file['name']
39
+ response = supabase.storage.from_(SUPABASE_BUCKET).download(file_path)
40
+ chat_data = json.loads(response)
41
+ chat_history.append(chat_data)
42
+
43
+ return chat_history
44
+ except Exception as e:
45
+ logger.error(f"Error retrieving chat history: {e}")
46
+ return []
src/backend/utils/__pycache__/logger.cpython-313.pyc CHANGED
Binary files a/src/backend/utils/__pycache__/logger.cpython-313.pyc and b/src/backend/utils/__pycache__/logger.cpython-313.pyc differ
 
src/frontend/app/__pycache__/common_fuctions.cpython-313.pyc CHANGED
Binary files a/src/frontend/app/__pycache__/common_fuctions.cpython-313.pyc and b/src/frontend/app/__pycache__/common_fuctions.cpython-313.pyc differ
 
src/frontend/app/__pycache__/pinecone_data_handler.cpython-313.pyc CHANGED
Binary files a/src/frontend/app/__pycache__/pinecone_data_handler.cpython-313.pyc and b/src/frontend/app/__pycache__/pinecone_data_handler.cpython-313.pyc differ