DrishtiSharma commited on
Commit
7f0132c
ยท
verified ยท
1 Parent(s): cf28062

Create attempt2.py

Browse files
Files changed (1) hide show
  1. attempt2.py +628 -0
attempt2.py ADDED
@@ -0,0 +1,628 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from openai import OpenAI
4
+ import tempfile
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_community.vectorstores import Chroma
9
+ from langchain_community.document_loaders import (
10
+ PyPDFLoader,
11
+ TextLoader,
12
+ CSVLoader
13
+ )
14
+ from datetime import datetime
15
+ from pydub import AudioSegment
16
+ import pytz
17
+ import chromadb
18
+ from langchain.chains import ConversationalRetrievalChain
19
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
20
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
21
+ from langchain_community.vectorstores import Chroma
22
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader, CSVLoader
23
+ import os
24
+ import tempfile
25
+ from datetime import datetime
26
+ import pytz
27
+ from langgraph.graph import StateGraph, START, END, add_messages
28
+ from langgraph.constants import Send
29
+
30
+ from langgraph.checkpoint.memory import MemorySaver
31
+ from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
32
+ from pydantic import BaseModel
33
+ from typing import List, Annotated, Any
34
+ import re, operator
35
+
36
+
37
+ chromadb.api.client.SharedSystemClient.clear_system_cache()
38
+
39
+ class MultiAgentState(BaseModel):
40
+ state: List[str] = []
41
+ messages: Annotated[list[AnyMessage], add_messages]
42
+ topic: List[str] = []
43
+ context: List[str] = []
44
+ sub_topic_list: List[str] = []
45
+ sub_topics: Annotated[list[AnyMessage], add_messages]
46
+ stories: Annotated[list[AnyMessage], add_messages]
47
+ stories_lst: Annotated[list, operator.add]
48
+
49
+ class StoryState(BaseModel):
50
+ retrieved_docs: List[Any] = []
51
+ reranked_docs: List[str] = []
52
+ stories: Annotated[list[AnyMessage], add_messages]
53
+ story_topic: str = ""
54
+ stories_lst: Annotated[list, operator.add]
55
+
56
+ class DocumentRAG:
57
+ def __init__(self, embedding_choice="OpenAI"):
58
+ self.document_store = None
59
+ self.qa_chain = None
60
+ self.document_summary = ""
61
+ self.chat_history = []
62
+ self.last_processed_time = None
63
+ self.api_key = os.getenv("OPENAI_API_KEY")
64
+ self.init_time = datetime.now(pytz.UTC)
65
+ self.embedding_choice = embedding_choice
66
+
67
+ # Set up appropriate LLM
68
+ if self.embedding_choice == "Cohere":
69
+ from langchain_cohere import ChatCohere
70
+ import cohere
71
+ self.llm = ChatCohere(
72
+ model="command-r-plus-08-2024",
73
+ temperature=0.7,
74
+ cohere_api_key=os.getenv("COHERE_API_KEY")
75
+ )
76
+ self.cohere_client = cohere.Client(os.getenv("COHERE_API_KEY"))
77
+ else:
78
+ self.llm = ChatOpenAI(
79
+ model_name="gpt-4",
80
+ temperature=0.7,
81
+ api_key=self.api_key
82
+ )
83
+
84
+ # Persistent directory for Chroma
85
+ self.chroma_persist_dir = "./chroma_storage"
86
+ os.makedirs(self.chroma_persist_dir, exist_ok=True)
87
+
88
+
89
+ def _get_embedding_model(self):
90
+ if not self.api_key:
91
+ raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
92
+
93
+ if self.embedding_choice == "OpenAI":
94
+ return OpenAIEmbeddings(api_key=self.api_key)
95
+ else:
96
+ from langchain.embeddings import CohereEmbeddings
97
+ return CohereEmbeddings(
98
+ model="embed-multilingual-light-v3.0",
99
+ cohere_api_key=os.getenv("COHERE_API_KEY")
100
+ )
101
+
102
+
103
+
104
+
105
+ def process_documents(self, uploaded_files):
106
+ """Process uploaded files by saving them temporarily and extracting content."""
107
+ if not self.api_key:
108
+ return "Please set the OpenAI API key in the environment variables."
109
+ if not uploaded_files:
110
+ return "Please upload documents first."
111
+
112
+ try:
113
+ documents = []
114
+ for uploaded_file in uploaded_files:
115
+ # Save uploaded file to a temporary location
116
+ temp_file_path = tempfile.NamedTemporaryFile(
117
+ delete=False, suffix=os.path.splitext(uploaded_file.name)[1]
118
+ ).name
119
+ with open(temp_file_path, "wb") as temp_file:
120
+ temp_file.write(uploaded_file.read())
121
+
122
+ # Determine the loader based on the file type
123
+ if temp_file_path.endswith('.pdf'):
124
+ loader = PyPDFLoader(temp_file_path)
125
+ elif temp_file_path.endswith('.txt'):
126
+ loader = TextLoader(temp_file_path)
127
+ elif temp_file_path.endswith('.csv'):
128
+ loader = CSVLoader(temp_file_path)
129
+ else:
130
+ return f"Unsupported file type: {uploaded_file.name}"
131
+
132
+ # Load the documents
133
+ try:
134
+ documents.extend(loader.load())
135
+ except Exception as e:
136
+ return f"Error loading {uploaded_file.name}: {str(e)}"
137
+
138
+ if not documents:
139
+ return "No valid documents were processed. Please check your files."
140
+
141
+ # Split text for better processing
142
+ text_splitter = RecursiveCharacterTextSplitter(
143
+ chunk_size=1000,
144
+ chunk_overlap=200,
145
+ length_function=len
146
+ )
147
+ documents = text_splitter.split_documents(documents)
148
+
149
+ # Combine text for later summary generation
150
+ self.document_text = " ".join([doc.page_content for doc in documents]) # Store for later use
151
+
152
+ # Create embeddings and initialize retrieval chain
153
+ embeddings = self._get_embedding_model()
154
+ self.document_store = Chroma.from_documents(
155
+ documents,
156
+ embeddings,
157
+ persist_directory=self.chroma_persist_dir # Persistent directory for Chroma
158
+ )
159
+
160
+ self.qa_chain = ConversationalRetrievalChain.from_llm(
161
+ ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
162
+ self.document_store.as_retriever(search_kwargs={'k': 6}),
163
+ return_source_documents=True,
164
+ verbose=False
165
+ )
166
+
167
+ self.last_processed_time = datetime.now(pytz.UTC)
168
+ return "Documents processed successfully!"
169
+ except Exception as e:
170
+ return f"Error processing documents: {str(e)}"
171
+
172
+ def generate_summary(self, text, language):
173
+ """Generate a structured summary from all chunks of the document."""
174
+ if not self.api_key:
175
+ return "API Key not set. Please set it in the environment variables."
176
+
177
+ try:
178
+ client = OpenAI(api_key=self.api_key)
179
+
180
+ # Split into chunks
181
+ chunks = [text[i:i + 3000] for i in range(0, len(text), 3000)]
182
+ summaries = []
183
+
184
+ for i, chunk in enumerate(chunks):
185
+ response = client.chat.completions.create(
186
+ model="gpt-4",
187
+ messages=[
188
+ {"role": "system", "content": f"""
189
+ You are a scientific summarization assistant.
190
+ Summarize the input below in {language} in a structured format, covering:
191
+ - Abstract (if present)
192
+ - Key Contributions
193
+ - Results/Findings
194
+ - Conclusion
195
+ - Limitations
196
+ - Future Work
197
+
198
+ If any section is missing, just skip it. Keep the language clear and concise.
199
+ """},
200
+ {"role": "user", "content": chunk}
201
+ ],
202
+ temperature=0.4
203
+ )
204
+
205
+ content = response.choices[0].message.content.strip()
206
+ summaries.append(f"### Part {i+1}\n{content}")
207
+
208
+ full_summary = "\n\n".join(summaries)
209
+ return full_summary
210
+
211
+ except Exception as e:
212
+ return f"Error generating summary: {str(e)}"
213
+
214
+
215
+ def create_podcast(self, language):
216
+ """Generate a podcast script and audio based on doc summary in the specified language."""
217
+ if not self.document_summary:
218
+ return "Please process documents before generating a podcast.", None
219
+
220
+ if not self.api_key:
221
+ return "Please set the OpenAI API key in the environment variables.", None
222
+
223
+ try:
224
+ client = OpenAI(api_key=self.api_key)
225
+
226
+ # Generate podcast script
227
+ script_response = client.chat.completions.create(
228
+ model="gpt-4",
229
+ messages=[
230
+ {"role": "system", "content": f"""
231
+ You are a professional podcast producer. Create a 1-2 minute structured podcast dialogue in {language}
232
+ based on the provided document summary. Follow this flow:
233
+ 1. Brief Introduction of the Topic
234
+ 2. Highlight the limitations of existing methods, the key contributions of the research paper, and its advantages over the current state of the art.
235
+ 3. Discuss Limitations of the research work.
236
+ 4. Present the Conclusion
237
+ 5. Mention Future Work
238
+
239
+ Clearly label the dialogue as 'Host 1:' and 'Host 2:'. Maintain a tone that is engaging, conversational,
240
+ and insightful, while ensuring the flow remains logical and natural. Include a well-structured opening
241
+ to introduce the topic and a clear, thoughtful closing that provides a smooth conclusion, avoiding any
242
+ abrupt endings."""
243
+ },
244
+ {"role": "user", "content": f"""
245
+ Document Summary: {self.document_summary}"""}
246
+ ],
247
+ temperature=0.7
248
+ )
249
+
250
+ script = script_response.choices[0].message.content
251
+ if not script:
252
+ return "Error: Failed to generate podcast script.", None
253
+
254
+ # Convert script to audio
255
+ final_audio = AudioSegment.empty()
256
+ is_first_speaker = True
257
+
258
+ lines = [line.strip() for line in script.split("\n") if line.strip()]
259
+ for line in lines:
260
+ if ":" not in line:
261
+ continue
262
+
263
+ speaker, text = line.split(":", 1)
264
+ if not text.strip():
265
+ continue
266
+
267
+ try:
268
+ voice = "nova" if is_first_speaker else "onyx"
269
+ audio_response = client.audio.speech.create(
270
+ model="tts-1",
271
+ voice=voice,
272
+ input=text.strip()
273
+ )
274
+
275
+ temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
276
+ audio_response.stream_to_file(temp_audio_file.name)
277
+
278
+ segment = AudioSegment.from_file(temp_audio_file.name)
279
+ final_audio += segment
280
+ final_audio += AudioSegment.silent(duration=300)
281
+
282
+ is_first_speaker = not is_first_speaker
283
+ except Exception as e:
284
+ print(f"Error generating audio for line: {text}")
285
+ print(f"Details: {e}")
286
+ continue
287
+
288
+ if len(final_audio) == 0:
289
+ return "Error: No audio could be generated.", None
290
+
291
+ output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
292
+ final_audio.export(output_file, format="mp3")
293
+ return script, output_file
294
+
295
+ except Exception as e:
296
+ return f"Error generating podcast: {str(e)}", None
297
+
298
+ def handle_query(self, question, history, language):
299
+ """Handle user queries in the specified language."""
300
+ if not self.qa_chain:
301
+ return history + [("System", "Please process the documents first.")]
302
+ try:
303
+ preface = (
304
+ f"Instruction: Respond in {language}. Be professional and concise, "
305
+ f"keeping the response under 300 words. If you cannot provide an answer, say: "
306
+ f'"I am not sure about this question. Please try asking something else."'
307
+ )
308
+ query = f"{preface}\nQuery: {question}"
309
+
310
+ result = self.qa_chain({
311
+ "question": query,
312
+ "chat_history": [(q, a) for q, a in history]
313
+ })
314
+
315
+ if "answer" not in result:
316
+ return history + [("System", "Sorry, an error occurred.")]
317
+
318
+ history.append((question, result["answer"]))
319
+ return history
320
+ except Exception as e:
321
+ return history + [("System", f"Error: {str(e)}")]
322
+
323
+ def extract_subtopics(self, messages):
324
+ text = "\n".join([msg.content for msg in messages])
325
+ return re.findall(r'- \*\*(.*?)\*\*', text)
326
+
327
+ def beginner_topic(self, state: MultiAgentState):
328
+ prompt = f"What are the beginner-level topics you can learn about {', '.join(state.topic)} in {', '.join(state.context)}?"
329
+ msg = self.llm.invoke([SystemMessage("Suppose you're a middle grader..."), HumanMessage(prompt)])
330
+ return {"message": msg, "sub_topics": msg}
331
+
332
+ def middle_topic(self, state: MultiAgentState):
333
+ prompt = f"What are the middle-level topics for {', '.join(state.topic)} in {', '.join(state.context)}? Avoid previous."
334
+ msg = self.llm.invoke([SystemMessage("Suppose you're a college student..."), HumanMessage(prompt)])
335
+ return {"message": msg, "sub_topics": msg}
336
+
337
+ def advanced_topic(self, state: MultiAgentState):
338
+ prompt = f"What are the advanced-level topics for {', '.join(state.topic)} in {', '.join(state.context)}? Avoid previous."
339
+ msg = self.llm.invoke([SystemMessage("Suppose you're a teacher..."), HumanMessage(prompt)])
340
+ return {"message": msg, "sub_topics": msg}
341
+
342
+ def topic_extractor(self, state: MultiAgentState):
343
+ return {"sub_topic_list": self.extract_subtopics(state.sub_topics)}
344
+
345
+
346
+ def retrieve_node(self, state: StoryState):
347
+ if not self.document_store:
348
+ return {"retrieved_docs": [], "question": "No documents processed yet."}
349
+
350
+ retriever = self.document_store.as_retriever(search_kwargs={"k": 20})
351
+
352
+
353
+ topic = state.story_topic
354
+ query = f"information about {topic}"
355
+ docs = retriever.get_relevant_documents(query)
356
+ return {"retrieved_docs": docs, "question": query}
357
+
358
+
359
+
360
+
361
+ def rerank_node(self, state: StoryState):
362
+ topic = state.story_topic
363
+ query = f"Rerank documents based on how well they explain the topic {topic}"
364
+ docs = state.retrieved_docs
365
+ texts = [doc.page_content for doc in docs]
366
+
367
+ if not texts:
368
+ return {"reranked_docs": [], "question": query}
369
+
370
+ if self.embedding_choice == "Cohere" and hasattr(self, "cohere_client"):
371
+ rerank_results = self.cohere_client.rerank(
372
+ query=query,
373
+ documents=texts,
374
+ top_n=5,
375
+ model="rerank-v3.5"
376
+ )
377
+ top_docs = [texts[result.index] for result in rerank_results.results]
378
+ else:
379
+ top_docs = sorted(texts, key=lambda t: -len(t))[:5]
380
+
381
+ return {"reranked_docs": top_docs, "question": query}
382
+
383
+
384
+
385
+ def generate_story_node(self, state: StoryState, language="English"):
386
+ context = "\n\n".join(state.reranked_docs)
387
+ topic = state.story_topic
388
+
389
+ system_message = f"""
390
+ Suppose you're a brilliant science storyteller.
391
+ You write stories that help middle schoolers understand complex science topics with fun and clarity.
392
+ Add subtle humor and make it engaging.
393
+ Write the story in {language}.
394
+ """
395
+ prompt = f"""
396
+ Use the following context to write a fun and simple story explaining **{topic}** to a middle schooler:\n
397
+ Context:\n{context}\n\n
398
+ Story:
399
+ """
400
+
401
+ msg = self.llm.invoke([SystemMessage(system_message), HumanMessage(prompt)])
402
+ return {"stories": msg}
403
+
404
+
405
+
406
+ def run_multiagent_storygraph(self, topic: str, context: str, language: str = "English"):
407
+
408
+ if self.embedding_choice == "OpenAI":
409
+ self.llm = ChatOpenAI(model_name="gpt-4", temperature=0.7, api_key=self.api_key)
410
+ elif self.embedding_choice == "Cohere":
411
+ from langchain_cohere import ChatCohere
412
+ self.llm = ChatCohere(
413
+ model="command-r-plus-08-2024",
414
+ temperature=0.7,
415
+ cohere_api_key=os.getenv("COHERE_API_KEY")
416
+ )
417
+
418
+ # Define the story subgraph with reranking
419
+ story_graph = StateGraph(StoryState)
420
+ story_graph.add_node("Retrieve", self.retrieve_node)
421
+ story_graph.add_node("Rerank", self.rerank_node)
422
+ story_graph.add_node("Generate", lambda state: self.generate_story_node(state, language=state.get("language", "English")))
423
+ story_graph.set_entry_point("Retrieve")
424
+ story_graph.add_edge("Retrieve", "Rerank")
425
+ story_graph.add_edge("Rerank", "Generate")
426
+ story_graph.set_finish_point("Generate")
427
+ story_subgraph = story_graph.compile()
428
+
429
+ # Define the main graph
430
+ graph = StateGraph(MultiAgentState)
431
+ graph.add_node("beginner_topic", self.beginner_topic)
432
+ graph.add_node("middle_topic", self.middle_topic)
433
+ graph.add_node("advanced_topic", self.advanced_topic)
434
+ graph.add_node("topic_extractor", self.topic_extractor)
435
+ graph.add_node("story_generator", story_subgraph)
436
+
437
+ graph.add_edge(START, "beginner_topic")
438
+ graph.add_edge("beginner_topic", "middle_topic")
439
+ graph.add_edge("middle_topic", "advanced_topic")
440
+ graph.add_edge("advanced_topic", "topic_extractor")
441
+ graph.add_conditional_edges(
442
+ "topic_extractor",
443
+ lambda state: [Send("story_generator", {"story_topic": t, "language": language}) for t in state.sub_topic_list],
444
+ ["story_generator"]
445
+ )
446
+ graph.add_edge("story_generator", END)
447
+
448
+ compiled = graph.compile(checkpointer=MemorySaver())
449
+ thread = {"configurable": {"thread_id": "storygraph-session"}}
450
+
451
+ # Initial invocation
452
+ result = compiled.invoke({"topic": [topic], "context": [context]}, thread)
453
+
454
+ # Fallback if no subtopics found
455
+ if not result.get("sub_topic_list"):
456
+ fallback_subs = ["Neural Networks", "Reinforcement Learning", "Supervised vs Unsupervised"]
457
+ compiled.update_state(thread, {"sub_topic_list": fallback_subs})
458
+ result = compiled.invoke(None, thread, stream_mode="values")
459
+
460
+ return result
461
+
462
+
463
+
464
+
465
+ # Sidebar
466
+ with st.sidebar:
467
+ st.title("About")
468
+ st.markdown(
469
+ """
470
+ This app is inspired by the [RAG_HW HuggingFace Space](https://huggingface.co/spaces/wint543/RAG_HW).
471
+ It allows users to upload documents, generate summaries, ask questions, and create podcasts.
472
+ """
473
+ )
474
+ st.markdown("### Steps:")
475
+ st.markdown("1. Upload documents.")
476
+ st.markdown("2. Generate summary.")
477
+ st.markdown("3. Ask questions.")
478
+ st.markdown("4. Create podcast.")
479
+
480
+ st.markdown("### Credits:")
481
+ st.markdown("Image Source: [Geeksforgeeks](https://www.geeksforgeeks.org/how-to-convert-document-into-podcast/)")
482
+
483
+ # Streamlit UI
484
+ st.title("Document Analyzer & Podcast Generator")
485
+ st.image("./cover_image.png", use_container_width=True)
486
+
487
+ # Embedding model selector (main screen)
488
+ st.subheader("Embedding Model Selection")
489
+ embedding_choice = st.radio(
490
+ "Choose the embedding model for document processing and story generation:",
491
+ ["OpenAI", "Cohere"],
492
+ horizontal=True,
493
+ key="embedding_model"
494
+ )
495
+
496
+ if "rag_system" not in st.session_state:
497
+ st.session_state.rag_system = DocumentRAG(embedding_choice=embedding_choice)
498
+ elif st.session_state.rag_system.embedding_choice != embedding_choice:
499
+ st.session_state.rag_system = DocumentRAG(embedding_choice=embedding_choice)
500
+
501
+
502
+ # Step 1: Upload and Process Documents
503
+ st.subheader("Step 1: Upload and Process Documents")
504
+ uploaded_files = st.file_uploader("Upload files (PDF, TXT, CSV)", accept_multiple_files=True)
505
+
506
+ if st.button("Process Documents"):
507
+ if uploaded_files:
508
+ with st.spinner("Processing documents, please wait..."):
509
+ result = st.session_state.rag_system.process_documents(uploaded_files)
510
+ if "successfully" in result:
511
+ st.success(result)
512
+ else:
513
+ st.error(result)
514
+ else:
515
+ st.warning("No files uploaded.")
516
+
517
+ # Step 2: Generate Summary
518
+ st.subheader("Step 2: Generate Summary")
519
+ st.write("Select Summary Language:")
520
+ summary_language_options = ["English", "Hindi", "Spanish", "French", "German", "Chinese", "Japanese"]
521
+ summary_language = st.radio(
522
+ "",
523
+ summary_language_options,
524
+ horizontal=True,
525
+ key="summary_language"
526
+ )
527
+
528
+ if st.button("Generate Summary"):
529
+ if hasattr(st.session_state.rag_system, "document_text") and st.session_state.rag_system.document_text:
530
+ with st.spinner("Generating summary, please wait..."):
531
+ summary = st.session_state.rag_system.generate_summary(st.session_state.rag_system.document_text, summary_language)
532
+ if summary:
533
+ st.session_state.rag_system.document_summary = summary
534
+ st.text_area("Document Summary", summary, height=200)
535
+ st.success("Summary generated successfully!")
536
+ else:
537
+ st.error("Failed to generate summary.")
538
+ else:
539
+ st.info("Please process documents first to generate summary.")
540
+
541
+ # Step 3: Ask Questions
542
+ st.subheader("Step 3: Ask Questions")
543
+ st.write("Select Q&A Language:")
544
+ qa_language_options = ["English", "Hindi", "Spanish", "French", "German", "Chinese", "Japanese"]
545
+ qa_language = st.radio(
546
+ "",
547
+ qa_language_options,
548
+ horizontal=True,
549
+ key="qa_language"
550
+ )
551
+
552
+ if st.session_state.rag_system.qa_chain:
553
+ history = []
554
+ user_question = st.text_input("Ask a question:")
555
+ if st.button("Submit Question"):
556
+ with st.spinner("Answering your question, please wait..."):
557
+ history = st.session_state.rag_system.handle_query(user_question, history, qa_language)
558
+ for question, answer in history:
559
+ st.chat_message("user").write(question)
560
+ st.chat_message("assistant").write(answer)
561
+ else:
562
+ st.info("Please process documents first to enable Q&A.")
563
+
564
+
565
+ # Step 4: Multi-Agent Story Explorer
566
+ st.subheader("Step 5: Explore Subtopics via Multi-Agent Graph")
567
+ st.write("Select Story Language:")
568
+ story_language_options = ["English", "Hindi", "Spanish", "French", "German", "Chinese", "Japanese"]
569
+ story_language = st.radio(
570
+ "",
571
+ story_language_options,
572
+ horizontal=True,
573
+ key="story_language"
574
+ )
575
+
576
+ story_topic = st.text_input("Enter main topic:", value="Machine Learning")
577
+ story_context = st.text_input("Enter learning context:", value="Pollution")
578
+
579
+ if st.button("Run Story Graph"):
580
+ if st.session_state.rag_system.document_store is None:
581
+ st.warning("Please process documents first before running the story graph.")
582
+ else:
583
+ with st.spinner("Generating subtopics and stories..."):
584
+ result = st.session_state.rag_system.run_multiagent_storygraph(topic=story_topic, context=story_context)
585
+
586
+ subtopics = result.get("sub_topic_list", [])
587
+ st.markdown("### ๐Ÿง  Extracted Subtopics")
588
+ for sub in subtopics:
589
+ st.markdown(f"- {sub}")
590
+
591
+ stories = result.get("stories", [])
592
+ if stories:
593
+ st.markdown("### ๐Ÿ“š Generated Stories")
594
+
595
+ # Present stories in tabs
596
+ tabs = st.tabs([f"Story {i+1}" for i in range(len(stories))])
597
+ for tab, story in zip(tabs, stories):
598
+ with tab:
599
+ st.markdown(story.content)
600
+ else:
601
+ st.warning("No stories were generated.")
602
+
603
+
604
+
605
+ # Step 5: Generate Podcast
606
+ st.subheader("Step 4: Generate Podcast")
607
+ st.write("Select Podcast Language:")
608
+ podcast_language_options = ["English", "Hindi", "Spanish", "French", "German", "Chinese", "Japanese"]
609
+ podcast_language = st.radio(
610
+ "",
611
+ podcast_language_options,
612
+ horizontal=True,
613
+ key="podcast_language"
614
+ )
615
+
616
+
617
+ if st.session_state.rag_system.document_summary:
618
+ if st.button("Generate Podcast"):
619
+ with st.spinner("Generating podcast, please wait..."):
620
+ script, audio_path = st.session_state.rag_system.create_podcast(podcast_language)
621
+ if audio_path:
622
+ st.text_area("Generated Podcast Script", script, height=200)
623
+ st.audio(audio_path, format="audio/mp3")
624
+ st.success("Podcast generated successfully! You can listen to it above.")
625
+ else:
626
+ st.error(script)
627
+ else:
628
+ st.info("Please process documents and generate summary before creating a podcast.")