Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -87,7 +87,7 @@ class DocumentRAG:
|
|
87 |
|
88 |
# Combine text for summary
|
89 |
combined_text = " ".join([doc.page_content for doc in documents])
|
90 |
-
self.document_summary = combined_text
|
91 |
|
92 |
# Create embeddings and initialize retrieval chain
|
93 |
embeddings = OpenAIEmbeddings(api_key=self.api_key)
|
@@ -109,8 +109,8 @@ class DocumentRAG:
|
|
109 |
except Exception as e:
|
110 |
return f"Error processing documents: {str(e)}"
|
111 |
|
112 |
-
def generate_summary(self, text
|
113 |
-
"""Generate a summary of the provided text
|
114 |
if not self.api_key:
|
115 |
return "API Key not set. Please set it in the environment variables."
|
116 |
try:
|
@@ -118,7 +118,7 @@ class DocumentRAG:
|
|
118 |
response = client.chat.completions.create(
|
119 |
model="gpt-4",
|
120 |
messages=[
|
121 |
-
{"role": "system", "content":
|
122 |
{"role": "user", "content": text[:4000]}
|
123 |
],
|
124 |
temperature=0.3
|
@@ -127,8 +127,8 @@ class DocumentRAG:
|
|
127 |
except Exception as e:
|
128 |
return f"Error generating summary: {str(e)}"
|
129 |
|
130 |
-
def create_podcast(self
|
131 |
-
"""Generate a podcast script and audio
|
132 |
if not self.document_summary:
|
133 |
return "Please process documents before generating a podcast.", None
|
134 |
|
@@ -142,7 +142,7 @@ class DocumentRAG:
|
|
142 |
script_response = client.chat.completions.create(
|
143 |
model="gpt-4",
|
144 |
messages=[
|
145 |
-
{"role": "system", "content":
|
146 |
{"role": "user", "content": f"""Based on the following document summary, create a 1-2 minute podcast script:
|
147 |
1. Clearly label the dialogue as 'Host 1:' and 'Host 2:'
|
148 |
2. Keep the content engaging and insightful.
|
@@ -157,20 +157,76 @@ class DocumentRAG:
|
|
157 |
if not script:
|
158 |
return "Error: Failed to generate podcast script.", None
|
159 |
|
160 |
-
#
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
except Exception as e:
|
164 |
return f"Error generating podcast: {str(e)}", None
|
165 |
|
166 |
-
def
|
167 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
if not self.qa_chain:
|
169 |
return history + [("System", "Please process the documents first.")]
|
170 |
try:
|
171 |
-
preface =
|
172 |
-
Instruction: Respond in
|
173 |
-
If you cannot provide an answer, say:
|
174 |
"""
|
175 |
query = f"{preface}\nQuery: {question}"
|
176 |
|
@@ -187,7 +243,6 @@ class DocumentRAG:
|
|
187 |
except Exception as e:
|
188 |
return history + [("System", f"Error: {str(e)}")]
|
189 |
|
190 |
-
|
191 |
# Initialize RAG system in session state
|
192 |
if "rag_system" not in st.session_state:
|
193 |
st.session_state.rag_system = DocumentRAG()
|
@@ -307,4 +362,4 @@ if st.session_state.rag_system.document_summary:
|
|
307 |
else:
|
308 |
st.error(script)
|
309 |
else:
|
310 |
-
st.info("Please process documents and generate summaries before creating a podcast.")
|
|
|
87 |
|
88 |
# Combine text for summary
|
89 |
combined_text = " ".join([doc.page_content for doc in documents])
|
90 |
+
self.document_summary = self.generate_summary(combined_text)
|
91 |
|
92 |
# Create embeddings and initialize retrieval chain
|
93 |
embeddings = OpenAIEmbeddings(api_key=self.api_key)
|
|
|
109 |
except Exception as e:
|
110 |
return f"Error processing documents: {str(e)}"
|
111 |
|
112 |
+
def generate_summary(self, text):
|
113 |
+
"""Generate a summary of the provided text."""
|
114 |
if not self.api_key:
|
115 |
return "API Key not set. Please set it in the environment variables."
|
116 |
try:
|
|
|
118 |
response = client.chat.completions.create(
|
119 |
model="gpt-4",
|
120 |
messages=[
|
121 |
+
{"role": "system", "content": "Summarize the document content concisely and provide 3-5 key points for discussion."},
|
122 |
{"role": "user", "content": text[:4000]}
|
123 |
],
|
124 |
temperature=0.3
|
|
|
127 |
except Exception as e:
|
128 |
return f"Error generating summary: {str(e)}"
|
129 |
|
130 |
+
def create_podcast(self):
|
131 |
+
"""Generate a podcast script and audio based on the document summary."""
|
132 |
if not self.document_summary:
|
133 |
return "Please process documents before generating a podcast.", None
|
134 |
|
|
|
142 |
script_response = client.chat.completions.create(
|
143 |
model="gpt-4",
|
144 |
messages=[
|
145 |
+
{"role": "system", "content": "You are a professional podcast producer. Create a natural dialogue based on the provided document summary."},
|
146 |
{"role": "user", "content": f"""Based on the following document summary, create a 1-2 minute podcast script:
|
147 |
1. Clearly label the dialogue as 'Host 1:' and 'Host 2:'
|
148 |
2. Keep the content engaging and insightful.
|
|
|
157 |
if not script:
|
158 |
return "Error: Failed to generate podcast script.", None
|
159 |
|
160 |
+
# Convert script to audio
|
161 |
+
final_audio = AudioSegment.empty()
|
162 |
+
is_first_speaker = True
|
163 |
+
|
164 |
+
lines = [line.strip() for line in script.split("\n") if line.strip()]
|
165 |
+
for line in lines:
|
166 |
+
if ":" not in line:
|
167 |
+
continue
|
168 |
+
|
169 |
+
speaker, text = line.split(":", 1)
|
170 |
+
if not text.strip():
|
171 |
+
continue
|
172 |
+
|
173 |
+
try:
|
174 |
+
voice = "nova" if is_first_speaker else "onyx"
|
175 |
+
audio_response = client.audio.speech.create(
|
176 |
+
model="tts-1",
|
177 |
+
voice=voice,
|
178 |
+
input=text.strip()
|
179 |
+
)
|
180 |
+
|
181 |
+
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
182 |
+
audio_response.stream_to_file(temp_audio_file.name)
|
183 |
+
|
184 |
+
segment = AudioSegment.from_file(temp_audio_file.name)
|
185 |
+
final_audio += segment
|
186 |
+
final_audio += AudioSegment.silent(duration=300)
|
187 |
+
|
188 |
+
is_first_speaker = not is_first_speaker
|
189 |
+
except Exception as e:
|
190 |
+
print(f"Error generating audio for line: {text}")
|
191 |
+
print(f"Details: {e}")
|
192 |
+
continue
|
193 |
+
|
194 |
+
if len(final_audio) == 0:
|
195 |
+
return "Error: No audio could be generated.", None
|
196 |
+
|
197 |
+
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
|
198 |
+
final_audio.export(output_file, format="mp3")
|
199 |
+
return script, output_file
|
200 |
|
201 |
except Exception as e:
|
202 |
return f"Error generating podcast: {str(e)}", None
|
203 |
|
204 |
+
def generate_summary(self, text):
|
205 |
+
"""Generate a summary of the provided text."""
|
206 |
+
if not self.api_key:
|
207 |
+
return "API Key not set. Please set it in the environment variables."
|
208 |
+
try:
|
209 |
+
client = OpenAI(api_key=self.api_key)
|
210 |
+
response = client.chat.completions.create(
|
211 |
+
model="gpt-4",
|
212 |
+
messages=[
|
213 |
+
{"role": "system", "content": "Summarize the document content concisely and provide 3-5 key points for discussion."},
|
214 |
+
{"role": "user", "content": text[:4000]}
|
215 |
+
],
|
216 |
+
temperature=0.3
|
217 |
+
)
|
218 |
+
return response.choices[0].message.content
|
219 |
+
except Exception as e:
|
220 |
+
return f"Error generating summary: {str(e)}"
|
221 |
+
|
222 |
+
def handle_query(self, question, history):
|
223 |
+
"""Handle user queries."""
|
224 |
if not self.qa_chain:
|
225 |
return history + [("System", "Please process the documents first.")]
|
226 |
try:
|
227 |
+
preface = """
|
228 |
+
Instruction: Respond in English. Be professional and concise, keeping the response under 300 words.
|
229 |
+
If you cannot provide an answer, say: "I am not sure about this question. Please try asking something else."
|
230 |
"""
|
231 |
query = f"{preface}\nQuery: {question}"
|
232 |
|
|
|
243 |
except Exception as e:
|
244 |
return history + [("System", f"Error: {str(e)}")]
|
245 |
|
|
|
246 |
# Initialize RAG system in session state
|
247 |
if "rag_system" not in st.session_state:
|
248 |
st.session_state.rag_system = DocumentRAG()
|
|
|
362 |
else:
|
363 |
st.error(script)
|
364 |
else:
|
365 |
+
st.info("Please process documents and generate summaries before creating a podcast.")
|