version3

Running

App Files Files Community

DrishtiSharma commited on Dec 5, 2024

Commit

20a103e

verified ·

1 Parent(s): a6c6535

Update interim.py

Browse files

Files changed (1) hide show

interim.py +36 -26

interim.py CHANGED Viewed

@@ -14,7 +14,17 @@ from langchain_community.document_loaders import (
 from datetime import datetime
 import pytz
-# DocumentRAG class with environment variable support for API Key
 class DocumentRAG:
     def __init__(self):
         self.document_store = None
@@ -28,6 +38,10 @@ class DocumentRAG:
         if not self.api_key:
             raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
     def process_documents(self, uploaded_files):
         """Process uploaded files by saving them temporarily and extracting content."""
         if not self.api_key:
@@ -51,14 +65,13 @@ class DocumentRAG:
                 elif temp_file_path.endswith('.csv'):
                     loader = CSVLoader(temp_file_path)
                 else:
-                    continue
                 # Load the documents
                 try:
                     documents.extend(loader.load())
                 except Exception as e:
-                    print(f"Error loading {temp_file_path}: {str(e)}")
-                    continue
             if not documents:
                 return "No valid documents were processed. Please check your files."
@@ -77,7 +90,12 @@ class DocumentRAG:
             # Create embeddings and initialize retrieval chain
             embeddings = OpenAIEmbeddings(api_key=self.api_key)
-            self.document_store = Chroma.from_documents(documents, embeddings)
             self.qa_chain = ConversationalRetrievalChain.from_llm(
                 ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
                 self.document_store.as_retriever(search_kwargs={'k': 6}),
@@ -109,6 +127,7 @@ class DocumentRAG:
             return f"Error generating summary: {str(e)}"
     def handle_query(self, question, history):
         if not self.qa_chain:
             return history + [("System", "Please process the documents first.")]
         try:
@@ -131,21 +150,16 @@ class DocumentRAG:
         except Exception as e:
             return history + [("System", f"Error: {str(e)}")]
 # Streamlit UI
 st.title("Document Analyzer and Podcast Generator")
 # Fetch the API key status
 if "OPENAI_API_KEY" not in os.environ or not os.getenv("OPENAI_API_KEY"):
     st.error("The 'OPENAI_API_KEY' environment variable is not set. Please configure it in your hosting environment.")
-else:
-    st.success("API Key successfully loaded from environment variable.")
-# Initialize RAG system
-try:
-    rag_system = DocumentRAG()
-except ValueError as e:
-    st.error(str(e))
-    st.stop()
 # File upload
 st.subheader("Step 1: Upload Documents")
@@ -154,28 +168,24 @@ uploaded_files = st.file_uploader("Upload files (PDF, TXT, CSV)", accept_multipl
 if st.button("Process Documents"):
     if uploaded_files:
         # Process the uploaded files
-        result = rag_system.process_documents(uploaded_files)
-        # Ensure that result is a string and display appropriately
-        if isinstance(result, str):
-            if "successfully" in result:
-                st.success(result)
-            else:
-                st.error(result)
         else:
-            st.error("An unexpected error occurred during document processing.")
     else:
         st.warning("No files uploaded.")
 # Document Q&A
 st.subheader("Step 2: Ask Questions")
-if rag_system.qa_chain:
     history = []
     user_question = st.text_input("Ask a question:")
     if st.button("Submit Question"):
-        history = rag_system.handle_query(user_question, history)
         for question, answer in history:
             st.chat_message("user").write(question)
             st.chat_message("assistant").write(answer)
 else:
-    st.info("Please process documents before asking questions.")

 from datetime import datetime
 import pytz
+from langchain.chains import ConversationalRetrievalChain
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_community.document_loaders import PyPDFLoader, TextLoader, CSVLoader
+import os
+import tempfile
+from datetime import datetime
+import pytz
 class DocumentRAG:
     def __init__(self):
         self.document_store = None
         if not self.api_key:
             raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
+        # Persistent directory for Chroma to avoid tenant-related errors
+        self.chroma_persist_dir = "./chroma_storage"
+        os.makedirs(self.chroma_persist_dir, exist_ok=True)
     def process_documents(self, uploaded_files):
         """Process uploaded files by saving them temporarily and extracting content."""
         if not self.api_key:
                 elif temp_file_path.endswith('.csv'):
                     loader = CSVLoader(temp_file_path)
                 else:
+                    return f"Unsupported file type: {uploaded_file.name}"
                 # Load the documents
                 try:
                     documents.extend(loader.load())
                 except Exception as e:
+                    return f"Error loading {uploaded_file.name}: {str(e)}"
             if not documents:
                 return "No valid documents were processed. Please check your files."
             # Create embeddings and initialize retrieval chain
             embeddings = OpenAIEmbeddings(api_key=self.api_key)
+            self.document_store = Chroma.from_documents(
+                documents,
+                embeddings,
+                persist_directory=self.chroma_persist_dir  # Persistent directory for Chroma
+            )
             self.qa_chain = ConversationalRetrievalChain.from_llm(
                 ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
                 self.document_store.as_retriever(search_kwargs={'k': 6}),
             return f"Error generating summary: {str(e)}"
     def handle_query(self, question, history):
+        """Handle user queries."""
         if not self.qa_chain:
             return history + [("System", "Please process the documents first.")]
         try:
         except Exception as e:
             return history + [("System", f"Error: {str(e)}")]
+# Initialize RAG system in session state
+if "rag_system" not in st.session_state:
+    st.session_state.rag_system = DocumentRAG()
 # Streamlit UI
 st.title("Document Analyzer and Podcast Generator")
 # Fetch the API key status
 if "OPENAI_API_KEY" not in os.environ or not os.getenv("OPENAI_API_KEY"):
     st.error("The 'OPENAI_API_KEY' environment variable is not set. Please configure it in your hosting environment.")
 # File upload
 st.subheader("Step 1: Upload Documents")
 if st.button("Process Documents"):
     if uploaded_files:
         # Process the uploaded files
+        result = st.session_state.rag_system.process_documents(uploaded_files)
+        if "successfully" in result:
+            st.success(result)
         else:
+            st.error(result)
     else:
         st.warning("No files uploaded.")
 # Document Q&A
 st.subheader("Step 2: Ask Questions")
+if st.session_state.rag_system.qa_chain:
     history = []
     user_question = st.text_input("Ask a question:")
     if st.button("Submit Question"):
+        # Handle the user query
+        history = st.session_state.rag_system.handle_query(user_question, history)
         for question, answer in history:
             st.chat_message("user").write(question)
             st.chat_message("assistant").write(answer)
 else:
+    st.info("Please process documents before asking questions.")