DrishtiSharma commited on
Commit
20a103e
·
verified ·
1 Parent(s): a6c6535

Update interim.py

Browse files
Files changed (1) hide show
  1. interim.py +36 -26
interim.py CHANGED
@@ -14,7 +14,17 @@ from langchain_community.document_loaders import (
14
  from datetime import datetime
15
  import pytz
16
 
17
- # DocumentRAG class with environment variable support for API Key
 
 
 
 
 
 
 
 
 
 
18
  class DocumentRAG:
19
  def __init__(self):
20
  self.document_store = None
@@ -28,6 +38,10 @@ class DocumentRAG:
28
  if not self.api_key:
29
  raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
30
 
 
 
 
 
31
  def process_documents(self, uploaded_files):
32
  """Process uploaded files by saving them temporarily and extracting content."""
33
  if not self.api_key:
@@ -51,14 +65,13 @@ class DocumentRAG:
51
  elif temp_file_path.endswith('.csv'):
52
  loader = CSVLoader(temp_file_path)
53
  else:
54
- continue
55
 
56
  # Load the documents
57
  try:
58
  documents.extend(loader.load())
59
  except Exception as e:
60
- print(f"Error loading {temp_file_path}: {str(e)}")
61
- continue
62
 
63
  if not documents:
64
  return "No valid documents were processed. Please check your files."
@@ -77,7 +90,12 @@ class DocumentRAG:
77
 
78
  # Create embeddings and initialize retrieval chain
79
  embeddings = OpenAIEmbeddings(api_key=self.api_key)
80
- self.document_store = Chroma.from_documents(documents, embeddings)
 
 
 
 
 
81
  self.qa_chain = ConversationalRetrievalChain.from_llm(
82
  ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
83
  self.document_store.as_retriever(search_kwargs={'k': 6}),
@@ -109,6 +127,7 @@ class DocumentRAG:
109
  return f"Error generating summary: {str(e)}"
110
 
111
  def handle_query(self, question, history):
 
112
  if not self.qa_chain:
113
  return history + [("System", "Please process the documents first.")]
114
  try:
@@ -131,21 +150,16 @@ class DocumentRAG:
131
  except Exception as e:
132
  return history + [("System", f"Error: {str(e)}")]
133
 
 
 
 
 
134
  # Streamlit UI
135
  st.title("Document Analyzer and Podcast Generator")
136
 
137
  # Fetch the API key status
138
  if "OPENAI_API_KEY" not in os.environ or not os.getenv("OPENAI_API_KEY"):
139
  st.error("The 'OPENAI_API_KEY' environment variable is not set. Please configure it in your hosting environment.")
140
- else:
141
- st.success("API Key successfully loaded from environment variable.")
142
-
143
- # Initialize RAG system
144
- try:
145
- rag_system = DocumentRAG()
146
- except ValueError as e:
147
- st.error(str(e))
148
- st.stop()
149
 
150
  # File upload
151
  st.subheader("Step 1: Upload Documents")
@@ -154,28 +168,24 @@ uploaded_files = st.file_uploader("Upload files (PDF, TXT, CSV)", accept_multipl
154
  if st.button("Process Documents"):
155
  if uploaded_files:
156
  # Process the uploaded files
157
- result = rag_system.process_documents(uploaded_files)
158
-
159
- # Ensure that result is a string and display appropriately
160
- if isinstance(result, str):
161
- if "successfully" in result:
162
- st.success(result)
163
- else:
164
- st.error(result)
165
  else:
166
- st.error("An unexpected error occurred during document processing.")
167
  else:
168
  st.warning("No files uploaded.")
169
 
170
  # Document Q&A
171
  st.subheader("Step 2: Ask Questions")
172
- if rag_system.qa_chain:
173
  history = []
174
  user_question = st.text_input("Ask a question:")
175
  if st.button("Submit Question"):
176
- history = rag_system.handle_query(user_question, history)
 
177
  for question, answer in history:
178
  st.chat_message("user").write(question)
179
  st.chat_message("assistant").write(answer)
180
  else:
181
- st.info("Please process documents before asking questions.")
 
14
  from datetime import datetime
15
  import pytz
16
 
17
+ from langchain.chains import ConversationalRetrievalChain
18
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
19
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
20
+ from langchain_community.vectorstores import Chroma
21
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader, CSVLoader
22
+ import os
23
+ import tempfile
24
+ from datetime import datetime
25
+ import pytz
26
+
27
+
28
  class DocumentRAG:
29
  def __init__(self):
30
  self.document_store = None
 
38
  if not self.api_key:
39
  raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
40
 
41
+ # Persistent directory for Chroma to avoid tenant-related errors
42
+ self.chroma_persist_dir = "./chroma_storage"
43
+ os.makedirs(self.chroma_persist_dir, exist_ok=True)
44
+
45
  def process_documents(self, uploaded_files):
46
  """Process uploaded files by saving them temporarily and extracting content."""
47
  if not self.api_key:
 
65
  elif temp_file_path.endswith('.csv'):
66
  loader = CSVLoader(temp_file_path)
67
  else:
68
+ return f"Unsupported file type: {uploaded_file.name}"
69
 
70
  # Load the documents
71
  try:
72
  documents.extend(loader.load())
73
  except Exception as e:
74
+ return f"Error loading {uploaded_file.name}: {str(e)}"
 
75
 
76
  if not documents:
77
  return "No valid documents were processed. Please check your files."
 
90
 
91
  # Create embeddings and initialize retrieval chain
92
  embeddings = OpenAIEmbeddings(api_key=self.api_key)
93
+ self.document_store = Chroma.from_documents(
94
+ documents,
95
+ embeddings,
96
+ persist_directory=self.chroma_persist_dir # Persistent directory for Chroma
97
+ )
98
+
99
  self.qa_chain = ConversationalRetrievalChain.from_llm(
100
  ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
101
  self.document_store.as_retriever(search_kwargs={'k': 6}),
 
127
  return f"Error generating summary: {str(e)}"
128
 
129
  def handle_query(self, question, history):
130
+ """Handle user queries."""
131
  if not self.qa_chain:
132
  return history + [("System", "Please process the documents first.")]
133
  try:
 
150
  except Exception as e:
151
  return history + [("System", f"Error: {str(e)}")]
152
 
153
+ # Initialize RAG system in session state
154
+ if "rag_system" not in st.session_state:
155
+ st.session_state.rag_system = DocumentRAG()
156
+
157
  # Streamlit UI
158
  st.title("Document Analyzer and Podcast Generator")
159
 
160
  # Fetch the API key status
161
  if "OPENAI_API_KEY" not in os.environ or not os.getenv("OPENAI_API_KEY"):
162
  st.error("The 'OPENAI_API_KEY' environment variable is not set. Please configure it in your hosting environment.")
 
 
 
 
 
 
 
 
 
163
 
164
  # File upload
165
  st.subheader("Step 1: Upload Documents")
 
168
  if st.button("Process Documents"):
169
  if uploaded_files:
170
  # Process the uploaded files
171
+ result = st.session_state.rag_system.process_documents(uploaded_files)
172
+ if "successfully" in result:
173
+ st.success(result)
 
 
 
 
 
174
  else:
175
+ st.error(result)
176
  else:
177
  st.warning("No files uploaded.")
178
 
179
  # Document Q&A
180
  st.subheader("Step 2: Ask Questions")
181
+ if st.session_state.rag_system.qa_chain:
182
  history = []
183
  user_question = st.text_input("Ask a question:")
184
  if st.button("Submit Question"):
185
+ # Handle the user query
186
+ history = st.session_state.rag_system.handle_query(user_question, history)
187
  for question, answer in history:
188
  st.chat_message("user").write(question)
189
  st.chat_message("assistant").write(answer)
190
  else:
191
+ st.info("Please process documents before asking questions.")