DrishtiSharma commited on
Commit
ec7ddd1
·
verified ·
1 Parent(s): 8f91954

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -0
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from openai import OpenAI
4
+ import tempfile
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_community.vectorstores import Chroma
9
+ from langchain_community.document_loaders import (
10
+ PyPDFLoader,
11
+ TextLoader,
12
+ CSVLoader
13
+ )
14
+ from datetime import datetime
15
+ import pytz
16
+
17
+ # DocumentRAG class with environment variable support for API Key
18
+ class DocumentRAG:
19
+ def __init__(self):
20
+ self.document_store = None
21
+ self.qa_chain = None
22
+ self.document_summary = ""
23
+ self.chat_history = []
24
+ self.last_processed_time = None
25
+ self.api_key = os.getenv("OPENAI_API_KEY") # Fetch the API key from environment variable
26
+ self.init_time = datetime.now(pytz.UTC)
27
+
28
+ if not self.api_key:
29
+ raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
30
+
31
+ def process_documents(self, file_paths):
32
+ if not self.api_key:
33
+ return "Please set the OpenAI API key in the environment variables."
34
+ if not file_paths:
35
+ return "Please upload documents first."
36
+
37
+ try:
38
+ documents = []
39
+ for file_path in file_paths:
40
+ if file_path.name.endswith('.pdf'):
41
+ loader = PyPDFLoader(file_path.name)
42
+ elif file_path.name.endswith('.txt'):
43
+ loader = TextLoader(file_path.name)
44
+ elif file_path.name.endswith('.csv'):
45
+ loader = CSVLoader(file_path.name)
46
+ else:
47
+ continue
48
+
49
+ try:
50
+ documents.extend(loader.load())
51
+ except Exception as e:
52
+ print(f"Error loading {file_path.name}: {str(e)}")
53
+ continue
54
+
55
+ if not documents:
56
+ return "No valid documents were processed. Please check your files."
57
+
58
+ text_splitter = RecursiveCharacterTextSplitter(
59
+ chunk_size=1000,
60
+ chunk_overlap=200,
61
+ length_function=len
62
+ )
63
+ documents = text_splitter.split_documents(documents)
64
+
65
+ combined_text = " ".join([doc.page_content for doc in documents])
66
+ self.document_summary = self.generate_summary(combined_text)
67
+
68
+ embeddings = OpenAIEmbeddings(api_key=self.api_key)
69
+ self.document_store = Chroma.from_documents(documents, embeddings)
70
+ self.qa_chain = ConversationalRetrievalChain.from_llm(
71
+ ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
72
+ self.document_store.as_retriever(search_kwargs={'k': 6}),
73
+ return_source_documents=True,
74
+ verbose=False
75
+ )
76
+
77
+ self.last_processed_time = datetime.now(pytz.UTC)
78
+ return "Documents processed successfully!"
79
+ except Exception as e:
80
+ return f"Error processing documents: {str(e)}"
81
+
82
+ def generate_summary(self, text):
83
+ """Generate a summary of the uploaded documents."""
84
+ if not self.api_key:
85
+ return "API Key not set. Please set it in the environment variables."
86
+ try:
87
+ client = OpenAI(api_key=self.api_key)
88
+ response = client.chat.completions.create(
89
+ model="gpt-4",
90
+ messages=[
91
+ {"role": "system", "content": "Summarize the document content concisely and provide 3-5 key points for discussion."},
92
+ {"role": "user", "content": text[:4000]}
93
+ ],
94
+ temperature=0.3
95
+ )
96
+ return response.choices[0].message.content
97
+ except Exception as e:
98
+ return f"Error generating summary: {str(e)}"
99
+
100
+ def handle_query(self, question, history):
101
+ if not self.qa_chain:
102
+ return history + [("System", "Please process the documents first.")]
103
+ try:
104
+ preface = """
105
+ Instruction: Respond in English. Be professional and concise, keeping the response under 300 words.
106
+ If you cannot provide an answer, say: "I am not sure about this question. Please try asking something else."
107
+ """
108
+ query = f"{preface}\nQuery: {question}"
109
+
110
+ result = self.qa_chain({
111
+ "question": query,
112
+ "chat_history": [(q, a) for q, a in history]
113
+ })
114
+
115
+ if "answer" not in result:
116
+ return history + [("System", "Sorry, an error occurred.")]
117
+
118
+ history.append((question, result["answer"]))
119
+ return history
120
+ except Exception as e:
121
+ return history + [("System", f"Error: {str(e)}")]
122
+
123
+ # Streamlit UI
124
+ st.title("Document Analyzer and Podcast Generator")
125
+
126
+ # Fetch the API key status
127
+ if "OPENAI_API_KEY" not in os.environ or not os.getenv("OPENAI_API_KEY"):
128
+ st.error("The 'OPENAI_API_KEY' environment variable is not set. Please configure it in your hosting environment.")
129
+ else:
130
+ st.success("API Key successfully loaded from environment variable.")
131
+
132
+ # Initialize RAG system
133
+ try:
134
+ rag_system = DocumentRAG()
135
+ except ValueError as e:
136
+ st.error(str(e))
137
+ st.stop()
138
+
139
+ # File upload
140
+ st.subheader("Step 1: Upload Documents")
141
+ uploaded_files = st.file_uploader("Upload files (PDF, TXT, CSV)", accept_multiple_files=True)
142
+ if st.button("Process Documents"):
143
+ if uploaded_files:
144
+ file_paths = [tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[1]).name for file in uploaded_files]
145
+ for file, temp_path in zip(uploaded_files, file_paths):
146
+ with open(temp_path, 'wb') as temp_file:
147
+ temp_file.write(file.read())
148
+ st.success(rag_system.process_documents(file_paths))
149
+ else:
150
+ st.warning("No files uploaded.")
151
+
152
+ # Document Q&A
153
+ st.subheader("Step 2: Ask Questions")
154
+ if rag_system.qa_chain:
155
+ history = []
156
+ user_question = st.text_input("Ask a question:")
157
+ if st.button("Submit Question"):
158
+ history = rag_system.handle_query(user_question, history)
159
+ for question, answer in history:
160
+ st.chat_message("user").write(question)
161
+ st.chat_message("assistant").write(answer)
162
+ else:
163
+ st.info("Please process documents before asking questions.")