DrishtiSharma commited on
Commit
eccd11a
ยท
verified ยท
1 Parent(s): cfa79bc

Create good_progress.py

Browse files
Files changed (1) hide show
  1. good_progress.py +601 -0
good_progress.py ADDED
@@ -0,0 +1,601 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from openai import OpenAI
4
+ import tempfile
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_community.vectorstores import Chroma
9
+ from langchain_community.document_loaders import (
10
+ PyPDFLoader,
11
+ TextLoader,
12
+ CSVLoader
13
+ )
14
+ from datetime import datetime
15
+ from pydub import AudioSegment
16
+ import pytz
17
+ import chromadb
18
+ from langchain.chains import ConversationalRetrievalChain
19
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
20
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
21
+ from langchain_community.vectorstores import Chroma
22
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader, CSVLoader
23
+ import os
24
+ import tempfile
25
+ from datetime import datetime
26
+ import pytz
27
+ from langgraph.graph import StateGraph, START, END, add_messages
28
+ from langgraph.constants import Send
29
+
30
+ from langgraph.checkpoint.memory import MemorySaver
31
+ from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
32
+ from pydantic import BaseModel
33
+ from typing import List, Annotated, Any
34
+ import re, operator
35
+
36
+
37
+ chromadb.api.client.SharedSystemClient.clear_system_cache()
38
+
39
+ class MultiAgentState(BaseModel):
40
+ state: List[str] = []
41
+ messages: Annotated[list[AnyMessage], add_messages]
42
+ topic: List[str] = []
43
+ context: List[str] = []
44
+ sub_topic_list: List[str] = []
45
+ sub_topics: Annotated[list[AnyMessage], add_messages]
46
+ stories: Annotated[list[AnyMessage], add_messages]
47
+ stories_lst: Annotated[list, operator.add]
48
+
49
+ class StoryState(BaseModel):
50
+ retrieved_docs: List[Any] = []
51
+ reranked_docs: List[str] = []
52
+ stories: Annotated[list[AnyMessage], add_messages]
53
+ story_topic: str = ""
54
+ stories_lst: Annotated[list, operator.add]
55
+
56
+ class DocumentRAG:
57
+ def __init__(self, embedding_choice="OpenAI"):
58
+ self.document_store = None
59
+ self.qa_chain = None
60
+ self.document_summary = ""
61
+ self.chat_history = []
62
+ self.last_processed_time = None
63
+ self.api_key = os.getenv("OPENAI_API_KEY")
64
+ self.init_time = datetime.now(pytz.UTC)
65
+ self.embedding_choice = embedding_choice
66
+
67
+ # Set up appropriate LLM
68
+ if self.embedding_choice == "Cohere":
69
+ from langchain_cohere import ChatCohere
70
+ import cohere
71
+ self.llm = ChatCohere(
72
+ model="command-r-plus-08-2024",
73
+ temperature=0.7,
74
+ cohere_api_key=os.getenv("COHERE_API_KEY")
75
+ )
76
+ self.cohere_client = cohere.Client(os.getenv("COHERE_API_KEY"))
77
+ else:
78
+ self.llm = ChatOpenAI(
79
+ model_name="gpt-4",
80
+ temperature=0.7,
81
+ api_key=self.api_key
82
+ )
83
+
84
+ # Persistent directory for Chroma
85
+ self.chroma_persist_dir = "./chroma_storage"
86
+ os.makedirs(self.chroma_persist_dir, exist_ok=True)
87
+
88
+
89
+ def _get_embedding_model(self):
90
+ if not self.api_key:
91
+ raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
92
+
93
+ if self.embedding_choice == "OpenAI":
94
+ return OpenAIEmbeddings(api_key=self.api_key)
95
+ else:
96
+ from langchain.embeddings import CohereEmbeddings
97
+ return CohereEmbeddings(
98
+ model="embed-multilingual-light-v3.0",
99
+ cohere_api_key=os.getenv("COHERE_API_KEY")
100
+ )
101
+
102
+
103
+
104
+
105
+ def process_documents(self, uploaded_files):
106
+ """Process uploaded files by saving them temporarily and extracting content."""
107
+ if not self.api_key:
108
+ return "Please set the OpenAI API key in the environment variables."
109
+ if not uploaded_files:
110
+ return "Please upload documents first."
111
+
112
+ try:
113
+ documents = []
114
+ for uploaded_file in uploaded_files:
115
+ # Save uploaded file to a temporary location
116
+ temp_file_path = tempfile.NamedTemporaryFile(
117
+ delete=False, suffix=os.path.splitext(uploaded_file.name)[1]
118
+ ).name
119
+ with open(temp_file_path, "wb") as temp_file:
120
+ temp_file.write(uploaded_file.read())
121
+
122
+ # Determine the loader based on the file type
123
+ if temp_file_path.endswith('.pdf'):
124
+ loader = PyPDFLoader(temp_file_path)
125
+ elif temp_file_path.endswith('.txt'):
126
+ loader = TextLoader(temp_file_path)
127
+ elif temp_file_path.endswith('.csv'):
128
+ loader = CSVLoader(temp_file_path)
129
+ else:
130
+ return f"Unsupported file type: {uploaded_file.name}"
131
+
132
+ # Load the documents
133
+ try:
134
+ documents.extend(loader.load())
135
+ except Exception as e:
136
+ return f"Error loading {uploaded_file.name}: {str(e)}"
137
+
138
+ if not documents:
139
+ return "No valid documents were processed. Please check your files."
140
+
141
+ # Split text for better processing
142
+ text_splitter = RecursiveCharacterTextSplitter(
143
+ chunk_size=1000,
144
+ chunk_overlap=200,
145
+ length_function=len
146
+ )
147
+ documents = text_splitter.split_documents(documents)
148
+
149
+ # Combine text for later summary generation
150
+ self.document_text = " ".join([doc.page_content for doc in documents]) # Store for later use
151
+
152
+ # Create embeddings and initialize retrieval chain
153
+ embeddings = self._get_embedding_model()
154
+ self.document_store = Chroma.from_documents(
155
+ documents,
156
+ embeddings,
157
+ persist_directory=self.chroma_persist_dir # Persistent directory for Chroma
158
+ )
159
+
160
+ self.qa_chain = ConversationalRetrievalChain.from_llm(
161
+ ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
162
+ self.document_store.as_retriever(search_kwargs={'k': 6}),
163
+ return_source_documents=True,
164
+ verbose=False
165
+ )
166
+
167
+ self.last_processed_time = datetime.now(pytz.UTC)
168
+ return "Documents processed successfully!"
169
+ except Exception as e:
170
+ return f"Error processing documents: {str(e)}"
171
+
172
+ def generate_summary(self, text, language):
173
+ """Generate a summary of the provided text focusing on specific sections in the specified language."""
174
+ if not self.api_key:
175
+ return "API Key not set. Please set it in the environment variables."
176
+ try:
177
+ client = OpenAI(api_key=self.api_key)
178
+ response = client.chat.completions.create(
179
+ model="gpt-4",
180
+ messages=[
181
+ {"role": "system", "content": f"""
182
+ Summarize the following document focusing mainly on these sections:
183
+ 1. Abstract
184
+ 2. In the Introduction, specifically focus on the portion where the key contributions of the research paper are highlighted.
185
+ 3. Conclusion
186
+ 4. Limitations
187
+ 5. Future Work
188
+
189
+ Ensure the summary is concise, logically ordered, and suitable for {language}.
190
+ Provide 7-9 key points for discussion in a structured format."""},
191
+ {"role": "user", "content": text[:4000]}
192
+ ],
193
+ temperature=0.3
194
+ )
195
+ return response.choices[0].message.content
196
+ except Exception as e:
197
+ return f"Error generating summary: {str(e)}"
198
+
199
+ def create_podcast(self, language):
200
+ """Generate a podcast script and audio based on doc summary in the specified language."""
201
+ if not self.document_summary:
202
+ return "Please process documents before generating a podcast.", None
203
+
204
+ if not self.api_key:
205
+ return "Please set the OpenAI API key in the environment variables.", None
206
+
207
+ try:
208
+ client = OpenAI(api_key=self.api_key)
209
+
210
+ # Generate podcast script
211
+ script_response = client.chat.completions.create(
212
+ model="gpt-4",
213
+ messages=[
214
+ {"role": "system", "content": f"""
215
+ You are a professional podcast producer. Create a 1-2 minute structured podcast dialogue in {language}
216
+ based on the provided document summary. Follow this flow:
217
+ 1. Brief Introduction of the Topic
218
+ 2. Highlight the limitations of existing methods, the key contributions of the research paper, and its advantages over the current state of the art.
219
+ 3. Discuss Limitations of the research work.
220
+ 4. Present the Conclusion
221
+ 5. Mention Future Work
222
+
223
+ Clearly label the dialogue as 'Host 1:' and 'Host 2:'. Maintain a tone that is engaging, conversational,
224
+ and insightful, while ensuring the flow remains logical and natural. Include a well-structured opening
225
+ to introduce the topic and a clear, thoughtful closing that provides a smooth conclusion, avoiding any
226
+ abrupt endings."""
227
+ },
228
+ {"role": "user", "content": f"""
229
+ Document Summary: {self.document_summary}"""}
230
+ ],
231
+ temperature=0.7
232
+ )
233
+
234
+ script = script_response.choices[0].message.content
235
+ if not script:
236
+ return "Error: Failed to generate podcast script.", None
237
+
238
+ # Convert script to audio
239
+ final_audio = AudioSegment.empty()
240
+ is_first_speaker = True
241
+
242
+ lines = [line.strip() for line in script.split("\n") if line.strip()]
243
+ for line in lines:
244
+ if ":" not in line:
245
+ continue
246
+
247
+ speaker, text = line.split(":", 1)
248
+ if not text.strip():
249
+ continue
250
+
251
+ try:
252
+ voice = "nova" if is_first_speaker else "onyx"
253
+ audio_response = client.audio.speech.create(
254
+ model="tts-1",
255
+ voice=voice,
256
+ input=text.strip()
257
+ )
258
+
259
+ temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
260
+ audio_response.stream_to_file(temp_audio_file.name)
261
+
262
+ segment = AudioSegment.from_file(temp_audio_file.name)
263
+ final_audio += segment
264
+ final_audio += AudioSegment.silent(duration=300)
265
+
266
+ is_first_speaker = not is_first_speaker
267
+ except Exception as e:
268
+ print(f"Error generating audio for line: {text}")
269
+ print(f"Details: {e}")
270
+ continue
271
+
272
+ if len(final_audio) == 0:
273
+ return "Error: No audio could be generated.", None
274
+
275
+ output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
276
+ final_audio.export(output_file, format="mp3")
277
+ return script, output_file
278
+
279
+ except Exception as e:
280
+ return f"Error generating podcast: {str(e)}", None
281
+
282
+ def handle_query(self, question, history, language):
283
+ """Handle user queries in the specified language."""
284
+ if not self.qa_chain:
285
+ return history + [("System", "Please process the documents first.")]
286
+ try:
287
+ preface = (
288
+ f"Instruction: Respond in {language}. Be professional and concise, "
289
+ f"keeping the response under 300 words. If you cannot provide an answer, say: "
290
+ f'"I am not sure about this question. Please try asking something else."'
291
+ )
292
+ query = f"{preface}\nQuery: {question}"
293
+
294
+ result = self.qa_chain({
295
+ "question": query,
296
+ "chat_history": [(q, a) for q, a in history]
297
+ })
298
+
299
+ if "answer" not in result:
300
+ return history + [("System", "Sorry, an error occurred.")]
301
+
302
+ history.append((question, result["answer"]))
303
+ return history
304
+ except Exception as e:
305
+ return history + [("System", f"Error: {str(e)}")]
306
+
307
+ def extract_subtopics(self, messages):
308
+ text = "\n".join([msg.content for msg in messages])
309
+ return re.findall(r'- \*\*(.*?)\*\*', text)
310
+
311
+ def beginner_topic(self, state: MultiAgentState):
312
+ prompt = f"What are the beginner-level topics you can learn about {', '.join(state.topic)} in {', '.join(state.context)}?"
313
+ msg = self.llm.invoke([SystemMessage("Suppose you're a middle grader..."), HumanMessage(prompt)])
314
+ return {"message": msg, "sub_topics": msg}
315
+
316
+ def middle_topic(self, state: MultiAgentState):
317
+ prompt = f"What are the middle-level topics for {', '.join(state.topic)} in {', '.join(state.context)}? Avoid previous."
318
+ msg = self.llm.invoke([SystemMessage("Suppose you're a college student..."), HumanMessage(prompt)])
319
+ return {"message": msg, "sub_topics": msg}
320
+
321
+ def advanced_topic(self, state: MultiAgentState):
322
+ prompt = f"What are the advanced-level topics for {', '.join(state.topic)} in {', '.join(state.context)}? Avoid previous."
323
+ msg = self.llm.invoke([SystemMessage("Suppose you're a teacher..."), HumanMessage(prompt)])
324
+ return {"message": msg, "sub_topics": msg}
325
+
326
+ def topic_extractor(self, state: MultiAgentState):
327
+ return {"sub_topic_list": self.extract_subtopics(state.sub_topics)}
328
+
329
+
330
+ def retrieve_node(self, state: StoryState):
331
+ if not self.document_store:
332
+ return {"retrieved_docs": [], "question": "No documents processed yet."}
333
+
334
+ retriever = self.document_store.as_retriever(search_kwargs={"k": 20})
335
+
336
+
337
+ topic = state.story_topic
338
+ query = f"information about {topic}"
339
+ docs = retriever.get_relevant_documents(query)
340
+ return {"retrieved_docs": docs, "question": query}
341
+
342
+
343
+
344
+
345
+ def rerank_node(self, state: StoryState):
346
+ topic = state.story_topic
347
+ query = f"Rerank documents based on how well they explain the topic {topic}"
348
+ docs = state.retrieved_docs
349
+ texts = [doc.page_content for doc in docs]
350
+
351
+ if not texts:
352
+ return {"reranked_docs": [], "question": query}
353
+
354
+ if self.embedding_choice == "Cohere" and hasattr(self, "cohere_client"):
355
+ rerank_results = self.cohere_client.rerank(
356
+ query=query,
357
+ documents=texts,
358
+ top_n=5,
359
+ model="rerank-v3.5"
360
+ )
361
+ top_docs = [texts[result.index] for result in rerank_results.results]
362
+ else:
363
+ top_docs = sorted(texts, key=lambda t: -len(t))[:5]
364
+
365
+ return {"reranked_docs": top_docs, "question": query}
366
+
367
+
368
+
369
+
370
+
371
+ def generate_story_node(self, state: StoryState):
372
+ context = "\n\n".join(state.reranked_docs)
373
+ topic = state.story_topic
374
+
375
+ system_message = f"""
376
+ Suppose you're a brilliant science storyteller.
377
+ You write stories that help middle schoolers understand complex science topics with fun and clarity.
378
+ Add subtle humor and make it engaging.
379
+ """
380
+ prompt = f"""
381
+ Use the following context to write a fun and simple story explaining **{topic}** to a middle schooler:\n
382
+ Context:\n{context}\n\n
383
+ Story:
384
+ """
385
+
386
+ msg = self.llm.invoke([SystemMessage(system_message), HumanMessage(prompt)])
387
+ return {"stories": msg}
388
+
389
+
390
+
391
+
392
+ def run_multiagent_storygraph(self, topic: str, context: str):
393
+ if self.embedding_choice == "OpenAI":
394
+ self.llm = ChatOpenAI(model_name="gpt-4", temperature=0.7, api_key=self.api_key)
395
+ elif self.embedding_choice == "Cohere":
396
+ from langchain_cohere import ChatCohere
397
+ self.llm = ChatCohere(
398
+ model="command-r-plus-08-2024",
399
+ temperature=0.7,
400
+ cohere_api_key=os.getenv("COHERE_API_KEY")
401
+ )
402
+
403
+ # Define the story subgraph with reranking
404
+ story_graph = StateGraph(StoryState)
405
+ story_graph.add_node("Retrieve", self.retrieve_node)
406
+ story_graph.add_node("Rerank", self.rerank_node)
407
+ story_graph.add_node("Generate", self.generate_story_node)
408
+ story_graph.set_entry_point("Retrieve")
409
+ story_graph.add_edge("Retrieve", "Rerank")
410
+ story_graph.add_edge("Rerank", "Generate")
411
+ story_graph.set_finish_point("Generate")
412
+ story_subgraph = story_graph.compile()
413
+
414
+ # Define the main graph
415
+ graph = StateGraph(MultiAgentState)
416
+ graph.add_node("beginner_topic", self.beginner_topic)
417
+ graph.add_node("middle_topic", self.middle_topic)
418
+ graph.add_node("advanced_topic", self.advanced_topic)
419
+ graph.add_node("topic_extractor", self.topic_extractor)
420
+ graph.add_node("story_generator", story_subgraph)
421
+
422
+ graph.add_edge(START, "beginner_topic")
423
+ graph.add_edge("beginner_topic", "middle_topic")
424
+ graph.add_edge("middle_topic", "advanced_topic")
425
+ graph.add_edge("advanced_topic", "topic_extractor")
426
+ graph.add_conditional_edges(
427
+ "topic_extractor",
428
+ lambda state: [Send("story_generator", {"story_topic": t}) for t in state.sub_topic_list],
429
+ ["story_generator"]
430
+ )
431
+ graph.add_edge("story_generator", END)
432
+
433
+ compiled = graph.compile(checkpointer=MemorySaver())
434
+ thread = {"configurable": {"thread_id": "storygraph-session"}}
435
+
436
+ # Initial invocation
437
+ result = compiled.invoke({"topic": [topic], "context": [context]}, thread)
438
+
439
+ # Fallback if no subtopics found
440
+ if not result.get("sub_topic_list"):
441
+ fallback_subs = ["Neural Networks", "Reinforcement Learning", "Supervised vs Unsupervised"]
442
+ compiled.update_state(thread, {"sub_topic_list": fallback_subs})
443
+ result = compiled.invoke(None, thread, stream_mode="values")
444
+
445
+ return result
446
+
447
+
448
+
449
+
450
+ # Sidebar
451
+ with st.sidebar:
452
+ st.title("About")
453
+ st.markdown(
454
+ """
455
+ This app is inspired by the [RAG_HW HuggingFace Space](https://huggingface.co/spaces/wint543/RAG_HW).
456
+ It allows users to upload documents, generate summaries, ask questions, and create podcasts.
457
+ """
458
+ )
459
+ st.markdown("### Steps:")
460
+ st.markdown("1. Upload documents.")
461
+ st.markdown("2. Generate summary.")
462
+ st.markdown("3. Ask questions.")
463
+ st.markdown("4. Create podcast.")
464
+
465
+ st.markdown("### Credits:")
466
+ st.markdown("Image Source: [Geeksforgeeks](https://www.geeksforgeeks.org/how-to-convert-document-into-podcast/)")
467
+
468
+ # Streamlit UI
469
+ st.title("Document Analyzer & Podcast Generator")
470
+ st.image("./cover_image.png", use_container_width=True)
471
+
472
+ # Embedding model selector (main screen)
473
+ st.subheader("Embedding Model Selection")
474
+ embedding_choice = st.radio(
475
+ "Choose the embedding model for document processing and story generation:",
476
+ ["OpenAI", "Cohere"],
477
+ horizontal=True,
478
+ key="embedding_model"
479
+ )
480
+
481
+ if "rag_system" not in st.session_state:
482
+ st.session_state.rag_system = DocumentRAG(embedding_choice=embedding_choice)
483
+ elif st.session_state.rag_system.embedding_choice != embedding_choice:
484
+ st.session_state.rag_system = DocumentRAG(embedding_choice=embedding_choice)
485
+
486
+
487
+ # Step 1: Upload and Process Documents
488
+ st.subheader("Step 1: Upload and Process Documents")
489
+ uploaded_files = st.file_uploader("Upload files (PDF, TXT, CSV)", accept_multiple_files=True)
490
+
491
+ if st.button("Process Documents"):
492
+ if uploaded_files:
493
+ with st.spinner("Processing documents, please wait..."):
494
+ result = st.session_state.rag_system.process_documents(uploaded_files)
495
+ if "successfully" in result:
496
+ st.success(result)
497
+ else:
498
+ st.error(result)
499
+ else:
500
+ st.warning("No files uploaded.")
501
+
502
+ # Step 2: Generate Summary
503
+ st.subheader("Step 2: Generate Summary")
504
+ st.write("Select Summary Language:")
505
+ summary_language_options = ["English", "Hindi", "Spanish", "French", "German", "Chinese", "Japanese"]
506
+ summary_language = st.radio(
507
+ "",
508
+ summary_language_options,
509
+ horizontal=True,
510
+ key="summary_language"
511
+ )
512
+
513
+ if st.button("Generate Summary"):
514
+ if hasattr(st.session_state.rag_system, "document_text") and st.session_state.rag_system.document_text:
515
+ with st.spinner("Generating summary, please wait..."):
516
+ summary = st.session_state.rag_system.generate_summary(st.session_state.rag_system.document_text, summary_language)
517
+ if summary:
518
+ st.session_state.rag_system.document_summary = summary
519
+ st.text_area("Document Summary", summary, height=200)
520
+ st.success("Summary generated successfully!")
521
+ else:
522
+ st.error("Failed to generate summary.")
523
+ else:
524
+ st.info("Please process documents first to generate summary.")
525
+
526
+ # Step 3: Ask Questions
527
+ st.subheader("Step 3: Ask Questions")
528
+ st.write("Select Q&A Language:")
529
+ qa_language_options = ["English", "Hindi", "Spanish", "French", "German", "Chinese", "Japanese"]
530
+ qa_language = st.radio(
531
+ "",
532
+ qa_language_options,
533
+ horizontal=True,
534
+ key="qa_language"
535
+ )
536
+
537
+ if st.session_state.rag_system.qa_chain:
538
+ history = []
539
+ user_question = st.text_input("Ask a question:")
540
+ if st.button("Submit Question"):
541
+ with st.spinner("Answering your question, please wait..."):
542
+ history = st.session_state.rag_system.handle_query(user_question, history, qa_language)
543
+ for question, answer in history:
544
+ st.chat_message("user").write(question)
545
+ st.chat_message("assistant").write(answer)
546
+ else:
547
+ st.info("Please process documents first to enable Q&A.")
548
+
549
+
550
+ # Step 4: Multi-Agent Story Explorer
551
+ st.subheader("Step 5: Explore Subtopics via Multi-Agent Graph")
552
+ story_topic = st.text_input("Enter main topic:", value="Machine Learning")
553
+ story_context = st.text_input("Enter learning context:", value="Education")
554
+
555
+ if st.button("Run Story Graph"):
556
+ if st.session_state.rag_system.document_store is None:
557
+ st.warning("Please process documents first before running the story graph.")
558
+ else:
559
+ with st.spinner("Generating subtopics and stories..."):
560
+ result = st.session_state.rag_system.run_multiagent_storygraph(topic=story_topic, context=story_context)
561
+
562
+ subtopics = result.get("sub_topic_list", [])
563
+ st.markdown("### ๐Ÿง  Extracted Subtopics")
564
+ for sub in subtopics:
565
+ st.markdown(f"- {sub}")
566
+
567
+ stories = result.get("stories", [])
568
+ if stories:
569
+ st.markdown("### ๐Ÿ“š Generated Stories")
570
+ for i, story in enumerate(stories):
571
+ st.markdown(f"**Story {i+1}:**")
572
+ st.markdown(story.content)
573
+ else:
574
+ st.warning("No stories were generated.")
575
+
576
+
577
+ # Step 5: Generate Podcast
578
+ st.subheader("Step 4: Generate Podcast")
579
+ st.write("Select Podcast Language:")
580
+ podcast_language_options = ["English", "Hindi", "Spanish", "French", "German", "Chinese", "Japanese"]
581
+ podcast_language = st.radio(
582
+ "",
583
+ podcast_language_options,
584
+ horizontal=True,
585
+ key="podcast_language"
586
+ )
587
+
588
+
589
+ if st.session_state.rag_system.document_summary:
590
+ if st.button("Generate Podcast"):
591
+ with st.spinner("Generating podcast, please wait..."):
592
+ script, audio_path = st.session_state.rag_system.create_podcast(podcast_language)
593
+ if audio_path:
594
+ st.text_area("Generated Podcast Script", script, height=200)
595
+ st.audio(audio_path, format="audio/mp3")
596
+ st.success("Podcast generated successfully! You can listen to it above.")
597
+ else:
598
+ st.error(script)
599
+ else:
600
+ st.info("Please process documents and generate summary before creating a podcast.")
601
+