DrishtiSharma commited on
Commit
ae9100f
·
verified ·
1 Parent(s): afdff8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -5
app.py CHANGED
@@ -14,7 +14,17 @@ from langchain_community.document_loaders import (
14
  from datetime import datetime
15
  import pytz
16
 
17
- # DocumentRAG class with environment variable support for API Key
 
 
 
 
 
 
 
 
 
 
18
  class DocumentRAG:
19
  def __init__(self):
20
  self.document_store = None
@@ -28,6 +38,10 @@ class DocumentRAG:
28
  if not self.api_key:
29
  raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
30
 
 
 
 
 
31
  def process_documents(self, uploaded_files):
32
  """Process uploaded files by saving them temporarily and extracting content."""
33
  if not self.api_key:
@@ -51,14 +65,13 @@ class DocumentRAG:
51
  elif temp_file_path.endswith('.csv'):
52
  loader = CSVLoader(temp_file_path)
53
  else:
54
- continue
55
 
56
  # Load the documents
57
  try:
58
  documents.extend(loader.load())
59
  except Exception as e:
60
- print(f"Error loading {temp_file_path}: {str(e)}")
61
- continue
62
 
63
  if not documents:
64
  return "No valid documents were processed. Please check your files."
@@ -77,7 +90,12 @@ class DocumentRAG:
77
 
78
  # Create embeddings and initialize retrieval chain
79
  embeddings = OpenAIEmbeddings(api_key=self.api_key)
80
- self.document_store = Chroma.from_documents(documents, embeddings)
 
 
 
 
 
81
  self.qa_chain = ConversationalRetrievalChain.from_llm(
82
  ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
83
  self.document_store.as_retriever(search_kwargs={'k': 6}),
@@ -109,6 +127,7 @@ class DocumentRAG:
109
  return f"Error generating summary: {str(e)}"
110
 
111
  def handle_query(self, question, history):
 
112
  if not self.qa_chain:
113
  return history + [("System", "Please process the documents first.")]
114
  try:
 
14
  from datetime import datetime
15
  import pytz
16
 
17
+ from langchain.chains import ConversationalRetrievalChain
18
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
19
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
20
+ from langchain_community.vectorstores import Chroma
21
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader, CSVLoader
22
+ import os
23
+ import tempfile
24
+ from datetime import datetime
25
+ import pytz
26
+
27
+
28
  class DocumentRAG:
29
  def __init__(self):
30
  self.document_store = None
 
38
  if not self.api_key:
39
  raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
40
 
41
+ # Persistent directory for Chroma to avoid tenant-related errors
42
+ self.chroma_persist_dir = "./chroma_storage"
43
+ os.makedirs(self.chroma_persist_dir, exist_ok=True)
44
+
45
  def process_documents(self, uploaded_files):
46
  """Process uploaded files by saving them temporarily and extracting content."""
47
  if not self.api_key:
 
65
  elif temp_file_path.endswith('.csv'):
66
  loader = CSVLoader(temp_file_path)
67
  else:
68
+ return f"Unsupported file type: {uploaded_file.name}"
69
 
70
  # Load the documents
71
  try:
72
  documents.extend(loader.load())
73
  except Exception as e:
74
+ return f"Error loading {uploaded_file.name}: {str(e)}"
 
75
 
76
  if not documents:
77
  return "No valid documents were processed. Please check your files."
 
90
 
91
  # Create embeddings and initialize retrieval chain
92
  embeddings = OpenAIEmbeddings(api_key=self.api_key)
93
+ self.document_store = Chroma.from_documents(
94
+ documents,
95
+ embeddings,
96
+ persist_directory=self.chroma_persist_dir # Persistent directory for Chroma
97
+ )
98
+
99
  self.qa_chain = ConversationalRetrievalChain.from_llm(
100
  ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
101
  self.document_store.as_retriever(search_kwargs={'k': 6}),
 
127
  return f"Error generating summary: {str(e)}"
128
 
129
  def handle_query(self, question, history):
130
+ """Handle user queries."""
131
  if not self.qa_chain:
132
  return history + [("System", "Please process the documents first.")]
133
  try: