vishalshelke commited on
Commit
1d95600
·
verified ·
1 Parent(s): 49322d8

Upload 15 files

Browse files
.env ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ GOOGLE_API_KEY=AIzaSyAsiDT8HYL-eQf19ePi0yoCg6H9gMdYMZk
2
+ TAVILY_API_KEY=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
3
+ LANGSMITH_TRACING=true
4
+ LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
5
+ LANGSMITH_API_KEY="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"
6
+ LANGSMITH_PROJECT="pr-proper-venue-61"
7
+ LANGCHAIN_TRACING_V2=true
8
+ LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
9
+ MISTRAL_API_KEY=CRYAdmnhV0rAQcf36jLcjxKDCd50NCOM
10
+ OPENAI_API_KEY=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
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ generated-icon.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,4 @@
1
- ---
2
- title: AI Tutor
3
- emoji: 📊
4
- colorFrom: red
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.33.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: AI tutor
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ title: AI tutor
2
+ sdk: gradio
3
+ app_file: main.py
4
+ pinned: false
 
 
 
 
 
 
 
 
 
 
app/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """
2
+ AI Tutor Application
3
+ Converts PDFs into interactive lectures with voice narration and RAG chatbot functionality
4
+ """
5
+
6
+ __version__ = "1.0.0"
app/chatbot.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ import logging
4
+ from typing import Dict, Any, List, Optional
5
+ from datetime import datetime
6
+ from app.models import ChatMessage, ChatSession
7
+ from app.rag_system import RAGSystem
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class RAGChatbot:
16
+ """RAG-powered chatbot with memory of PDF and lecture content"""
17
+
18
+ def __init__(self):
19
+ self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
20
+ self.rag_system = RAGSystem()
21
+ self.sessions: Dict[str, ChatSession] = {}
22
+ self.max_context_length = 8000 # Token limit for context
23
+
24
+ def create_session(self, session_id: str, pdf_content: str = None, lecture_content: str = None) -> bool:
25
+ """Create a new chat session with optional PDF and lecture content"""
26
+ try:
27
+ session = ChatSession(
28
+ session_id=session_id,
29
+ pdf_content=pdf_content,
30
+ lecture_content=lecture_content
31
+ )
32
+
33
+ self.sessions[session_id] = session
34
+
35
+ # Add content to RAG system if provided
36
+ if pdf_content:
37
+ self.rag_system.add_pdf_content(session_id, pdf_content)
38
+
39
+ if lecture_content:
40
+ self.rag_system.add_lecture_content(session_id, lecture_content)
41
+
42
+ logger.info(f"Created chat session {session_id}")
43
+ return True
44
+
45
+ except Exception as e:
46
+ logger.error(f"Failed to create session {session_id}: {str(e)}")
47
+ return False
48
+
49
+ def add_message(self, session_id: str, role: str, content: str) -> bool:
50
+ """Add a message to the session history"""
51
+ try:
52
+ if session_id not in self.sessions:
53
+ return False
54
+
55
+ message = ChatMessage(role=role, content=content)
56
+ self.sessions[session_id].messages.append(message)
57
+ return True
58
+
59
+ except Exception as e:
60
+ logger.error(f"Failed to add message to session {session_id}: {str(e)}")
61
+ return False
62
+
63
+ def get_response(self, session_id: str, user_message: str) -> Dict[str, Any]:
64
+ """Generate a response to user message using RAG"""
65
+ try:
66
+ if session_id not in self.sessions:
67
+ return {
68
+ 'success': False,
69
+ 'error': 'Session not found',
70
+ 'response': ''
71
+ }
72
+
73
+ session = self.sessions[session_id]
74
+
75
+ # Add user message to history
76
+ self.add_message(session_id, "user", user_message)
77
+
78
+ # Retrieve relevant content
79
+ retrieval_result = self.rag_system.retrieve_relevant_content(
80
+ session_id, user_message, n_results=5
81
+ )
82
+
83
+ if not retrieval_result['success']:
84
+ logger.warning(f"Content retrieval failed for session {session_id}")
85
+ relevant_content = []
86
+ else:
87
+ relevant_content = retrieval_result['results']
88
+
89
+ # Generate response
90
+ response = self._generate_response(session, user_message, relevant_content)
91
+
92
+ # Add assistant response to history
93
+ self.add_message(session_id, "assistant", response)
94
+
95
+ return {
96
+ 'success': True,
97
+ 'response': response,
98
+ 'sources_used': len(relevant_content),
99
+ 'session_id': session_id
100
+ }
101
+
102
+ except Exception as e:
103
+ logger.error(f"Failed to generate response for session {session_id}: {str(e)}")
104
+ return {
105
+ 'success': False,
106
+ 'error': str(e),
107
+ 'response': 'I apologize, but I encountered an error while processing your message. Please try again.'
108
+ }
109
+
110
+ def _generate_response(self, session: ChatSession, user_message: str, relevant_content: List[Dict]) -> str:
111
+ """Generate response using OpenAI with RAG context"""
112
+ try:
113
+ # Build context from relevant content
114
+ context_parts = []
115
+
116
+ if relevant_content:
117
+ context_parts.append("Relevant information from your documents:")
118
+ for i, item in enumerate(relevant_content[:3], 1): # Limit to top 3 results
119
+ source = "PDF" if item['source'] == 'pdf' else "Lecture"
120
+ context_parts.append(f"{i}. [{source}] {item['content'][:500]}...")
121
+ context_parts.append("")
122
+
123
+ # Build conversation history (limited to recent messages)
124
+ conversation_history = []
125
+ recent_messages = session.messages[-6:] # Last 6 messages for context
126
+
127
+ for msg in recent_messages[:-1]: # Exclude the current user message
128
+ conversation_history.append(f"{msg.role.title()}: {msg.content}")
129
+
130
+ # Create system prompt
131
+ system_prompt = """You are a helpful AI assistant that can answer questions about uploaded PDF documents and generated lectures.
132
+
133
+ Key guidelines:
134
+ 1. Use the provided relevant information to answer questions accurately
135
+ 2. If you don't have enough information in the context, say so clearly
136
+ 3. Maintain a conversational and educational tone
137
+ 4. Reference the source (PDF or Lecture) when appropriate
138
+ 5. Be concise but thorough in your explanations
139
+ 6. If asked about something not in the documents, explain that your knowledge is limited to the uploaded content
140
+
141
+ Always strive to be helpful while being honest about the limitations of your knowledge."""
142
+
143
+ # Build the full prompt
144
+ messages = [{"role": "system", "content": system_prompt}]
145
+
146
+ # Add context if available
147
+ if context_parts:
148
+ context_message = "\n".join(context_parts)
149
+ messages.append({"role": "system", "content": context_message})
150
+
151
+ # Add conversation history
152
+ if conversation_history:
153
+ history_message = "Previous conversation:\n" + "\n".join(conversation_history)
154
+ messages.append({"role": "system", "content": history_message})
155
+
156
+ # Add current user message
157
+ messages.append({"role": "user", "content": user_message})
158
+
159
+ # Generate response
160
+ response = self.client.chat.completions.create(
161
+ model="gpt-4o-mini",
162
+ messages=messages,
163
+ temperature=0.7,
164
+ max_tokens=1000
165
+ )
166
+
167
+ return response.choices[0].message.content
168
+
169
+ except Exception as e:
170
+ logger.error(f"Response generation failed: {str(e)}")
171
+ return "I apologize, but I'm having trouble generating a response right now. Please try rephrasing your question."
172
+
173
+ def get_session_history(self, session_id: str) -> List[Dict[str, Any]]:
174
+ """Get chat history for a session"""
175
+ try:
176
+ if session_id not in self.sessions:
177
+ return []
178
+
179
+ session = self.sessions[session_id]
180
+ return [
181
+ {
182
+ 'role': msg.role,
183
+ 'content': msg.content,
184
+ 'timestamp': msg.timestamp.isoformat()
185
+ }
186
+ for msg in session.messages
187
+ ]
188
+
189
+ except Exception as e:
190
+ logger.error(f"Failed to get session history {session_id}: {str(e)}")
191
+ return []
192
+
193
+ def clear_session(self, session_id: str) -> bool:
194
+ """Clear a chat session and its data"""
195
+ try:
196
+ # Clear from RAG system
197
+ self.rag_system.clear_session_data(session_id)
198
+
199
+ # Remove from local sessions
200
+ if session_id in self.sessions:
201
+ del self.sessions[session_id]
202
+
203
+ logger.info(f"Cleared session {session_id}")
204
+ return True
205
+
206
+ except Exception as e:
207
+ logger.error(f"Failed to clear session {session_id}: {str(e)}")
208
+ return False
209
+
210
+ def get_session_stats(self, session_id: str) -> Dict[str, Any]:
211
+ """Get statistics about a session"""
212
+ try:
213
+ if session_id not in self.sessions:
214
+ return {'exists': False}
215
+
216
+ session = self.sessions[session_id]
217
+ rag_stats = self.rag_system.get_session_stats(session_id)
218
+
219
+ return {
220
+ 'exists': True,
221
+ 'message_count': len(session.messages),
222
+ 'created_at': session.created_at.isoformat(),
223
+ 'has_pdf': session.pdf_content is not None,
224
+ 'has_lecture': session.lecture_content is not None,
225
+ **rag_stats
226
+ }
227
+
228
+ except Exception as e:
229
+ logger.error(f"Failed to get session stats {session_id}: {str(e)}")
230
+ return {'exists': False, 'error': str(e)}
231
+
232
+ def update_session_content(self, session_id: str, pdf_content: str = None, lecture_content: str = None) -> bool:
233
+ """Update session with new content"""
234
+ try:
235
+ if session_id not in self.sessions:
236
+ return False
237
+
238
+ session = self.sessions[session_id]
239
+
240
+ # Update PDF content
241
+ if pdf_content:
242
+ session.pdf_content = pdf_content
243
+ self.rag_system.add_pdf_content(session_id, pdf_content)
244
+
245
+ # Update lecture content
246
+ if lecture_content:
247
+ session.lecture_content = lecture_content
248
+ self.rag_system.add_lecture_content(session_id, lecture_content)
249
+
250
+ logger.info(f"Updated content for session {session_id}")
251
+ return True
252
+
253
+ except Exception as e:
254
+ logger.error(f"Failed to update session content {session_id}: {str(e)}")
255
+ return False
app/gradio_interface.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import uuid
4
+ import tempfile
5
+ from typing import Dict, Any, Optional, Tuple
6
+ import logging
7
+ from datetime import datetime
8
+
9
+ from app.pdf_processor import PDFProcessor
10
+ from app.lecture_generator import LectureGenerator
11
+ from app.voice_synthesizer import VoiceSynthesizer
12
+ from app.chatbot import RAGChatbot
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Initialize components
17
+ pdf_processor = PDFProcessor()
18
+ lecture_generator = LectureGenerator()
19
+ voice_synthesizer = VoiceSynthesizer()
20
+ chatbot = RAGChatbot()
21
+
22
+ # Global state for sessions
23
+ current_session = None
24
+ session_data = {}
25
+
26
+ def create_gradio_interface():
27
+ """Create and configure the Gradio interface"""
28
+
29
+ # Custom CSS for better styling
30
+ css = """
31
+ .container {
32
+ max-width: 1200px;
33
+ margin: 0 auto;
34
+ }
35
+ .status-box {
36
+ padding: 10px;
37
+ border-radius: 5px;
38
+ margin: 10px 0;
39
+ }
40
+ .success {
41
+ background-color: #d4edda;
42
+ border: 1px solid #c3e6cb;
43
+ color: #155724;
44
+ }
45
+ .error {
46
+ background-color: #f8d7da;
47
+ border: 1px solid #f5c6cb;
48
+ color: #721c24;
49
+ }
50
+ .processing {
51
+ background-color: #d1ecf1;
52
+ border: 1px solid #bee5eb;
53
+ color: #0c5460;
54
+ }
55
+ """
56
+
57
+ with gr.Blocks(css=css, title="AI Tutor") as interface:
58
+ gr.Markdown("# 🎓 AI Tutor")
59
+ gr.Markdown("Convert PDFs into interactive lectures with voice narration and chat with your AI tutor about any topic!")
60
+
61
+ # Session state
62
+ session_id_state = gr.State(value=str(uuid.uuid4()))
63
+
64
+ with gr.Tab("📄 PDF Upload & Processing"):
65
+ with gr.Row():
66
+ with gr.Column(scale=1):
67
+ pdf_upload = gr.File(
68
+ label="Upload PDF Document (Optional)",
69
+ file_types=[".pdf"],
70
+ type="binary"
71
+ )
72
+
73
+ lecture_style = gr.Dropdown(
74
+ choices=["academic", "casual", "detailed"],
75
+ value="academic",
76
+ label="Lecture Style"
77
+ )
78
+
79
+ include_examples = gr.Checkbox(
80
+ value=True,
81
+ label="Include Examples"
82
+ )
83
+
84
+ learning_objectives = gr.Textbox(
85
+ label="Learning Objectives & Topic",
86
+ placeholder="What do you want to learn? e.g., 'Machine Learning basics', 'Python programming fundamentals', 'Explain quantum physics concepts'",
87
+ lines=3,
88
+ max_lines=5
89
+ )
90
+
91
+ gr.Markdown("**Note:** You can generate a lecture with just learning objectives, or upload a PDF for content-based lectures.")
92
+
93
+ process_btn = gr.Button("🚀 Generate Lecture", variant="primary")
94
+
95
+ with gr.Column(scale=2):
96
+ processing_status = gr.HTML()
97
+ pdf_info = gr.JSON(label="PDF Information")
98
+
99
+ with gr.Tab("📚 Generated Lecture"):
100
+ with gr.Row():
101
+ with gr.Column():
102
+ lecture_title = gr.Textbox(label="Lecture Title", interactive=False)
103
+ lecture_content = gr.Textbox(
104
+ label="Lecture Content",
105
+ lines=20,
106
+ max_lines=30,
107
+ interactive=False
108
+ )
109
+
110
+ with gr.Row():
111
+ download_pdf_btn = gr.Button("📄 Download PDF")
112
+ download_audio_btn = gr.Button("🎤 Generate & Download Audio")
113
+
114
+ pdf_download = gr.File(label="Download Lecture PDF")
115
+ audio_download = gr.File(label="Download Audio Lecture")
116
+
117
+ with gr.Tab("💬 Tutor Chat"):
118
+ with gr.Row():
119
+ with gr.Column(scale=3):
120
+ chatbot_interface = gr.Chatbot(
121
+ label="Chat with your AI Tutor about your content",
122
+ height=400,
123
+ type="messages"
124
+ )
125
+
126
+ with gr.Row():
127
+ msg_input = gr.Textbox(
128
+ label="Your Message",
129
+ placeholder="Ask your AI tutor about any topic, PDF content, or lecture...",
130
+ scale=4
131
+ )
132
+ send_btn = gr.Button("Send", scale=1)
133
+
134
+ clear_chat_btn = gr.Button("Clear Chat History")
135
+
136
+ with gr.Column(scale=1):
137
+ chat_stats = gr.JSON(label="Session Statistics")
138
+ refresh_stats_btn = gr.Button("Refresh Stats")
139
+
140
+ # Event handlers
141
+ def process_pdf_handler(pdf_file, style, examples, learning_objectives, session_id):
142
+ """Handle PDF processing or topic-based lecture generation"""
143
+ global session_data
144
+
145
+ try:
146
+ # Check if we have either PDF or learning objectives
147
+ if pdf_file is None and not learning_objectives.strip():
148
+ return (
149
+ '<div class="status-box error">❌ Please either upload a PDF file or provide learning objectives</div>',
150
+ {},
151
+ session_id
152
+ )
153
+
154
+ # Update status based on input type
155
+ if pdf_file is not None:
156
+ status_html = '<div class="status-box processing">🔄 Processing PDF...</div>'
157
+
158
+ # Validate PDF
159
+ validation = pdf_processor.validate_pdf(pdf_file)
160
+ if not validation['valid']:
161
+ return (
162
+ f'<div class="status-box error">❌ {validation["error"]}</div>',
163
+ {},
164
+ session_id
165
+ )
166
+
167
+ # Extract text
168
+ extraction_result = pdf_processor.extract_text_from_pdf(pdf_file)
169
+ if not extraction_result['success']:
170
+ return (
171
+ f'<div class="status-box error">❌ {extraction_result["error"]}</div>',
172
+ {},
173
+ session_id
174
+ )
175
+
176
+ pdf_content = extraction_result['text']
177
+ pdf_data = extraction_result
178
+ else:
179
+ # Generate lecture from learning objectives only
180
+ status_html = '<div class="status-box processing">🔄 Generating lecture from learning objectives...</div>'
181
+ pdf_content = ""
182
+ pdf_data = {
183
+ 'success': True,
184
+ 'text': "",
185
+ 'metadata': {'total_pages': 0, 'title': learning_objectives[:50], 'author': '', 'subject': ''},
186
+ 'word_count': 0,
187
+ 'character_count': 0
188
+ }
189
+
190
+ # Generate lecture
191
+ lecture_result = lecture_generator.generate_lecture(
192
+ pdf_content,
193
+ style=style,
194
+ include_examples=examples,
195
+ learning_objectives=learning_objectives
196
+ )
197
+
198
+ if not lecture_result['success']:
199
+ return (
200
+ f'<div class="status-box error">❌ Lecture generation failed: {lecture_result["error"]}</div>',
201
+ {},
202
+ session_id
203
+ )
204
+
205
+ # Store session data
206
+ session_data[session_id] = {
207
+ 'pdf_data': pdf_data,
208
+ 'lecture_data': lecture_result,
209
+ 'processed_at': datetime.now().isoformat()
210
+ }
211
+
212
+ # Create chatbot session
213
+ chatbot.create_session(
214
+ session_id,
215
+ pdf_content=pdf_content,
216
+ lecture_content=lecture_result['content']
217
+ )
218
+
219
+ if pdf_file is not None:
220
+ success_html = '<div class="status-box success">✅ PDF processed successfully!</div>'
221
+ info = {
222
+ 'filename': getattr(pdf_file, 'name', 'uploaded_file.pdf'),
223
+ 'pages': pdf_data['metadata']['total_pages'],
224
+ 'word_count': pdf_data['word_count'],
225
+ 'lecture_title': lecture_result['title'],
226
+ 'estimated_duration': f"{lecture_result['estimated_duration']} minutes"
227
+ }
228
+ else:
229
+ success_html = '<div class="status-box success">✅ Lecture generated from learning objectives!</div>'
230
+ info = {
231
+ 'source': 'Learning Objectives',
232
+ 'topic': learning_objectives[:100] + "..." if len(learning_objectives) > 100 else learning_objectives,
233
+ 'lecture_title': lecture_result['title'],
234
+ 'estimated_duration': f"{lecture_result['estimated_duration']} minutes"
235
+ }
236
+
237
+ return success_html, info, session_id
238
+
239
+ except Exception as e:
240
+ logger.error(f"PDF processing error: {str(e)}")
241
+ return (
242
+ f'<div class="status-box error">❌ Processing failed: {str(e)}</div>',
243
+ {},
244
+ session_id
245
+ )
246
+
247
+ def update_lecture_display(session_id):
248
+ """Update lecture display with generated content"""
249
+ global session_data
250
+
251
+ if session_id not in session_data:
252
+ return "", ""
253
+
254
+ lecture_data = session_data[session_id]['lecture_data']
255
+ return lecture_data['title'], lecture_data['content']
256
+
257
+ def generate_pdf_download(session_id):
258
+ """Generate PDF download"""
259
+ global session_data
260
+
261
+ try:
262
+ if session_id not in session_data:
263
+ return None
264
+
265
+ lecture_data = session_data[session_id]['lecture_data']
266
+
267
+ # Generate PDF
268
+ output_path = os.path.join("output", f"lecture_{session_id}.pdf")
269
+ success = lecture_generator.generate_pdf(lecture_data, output_path)
270
+
271
+ if success:
272
+ return output_path
273
+ else:
274
+ return None
275
+
276
+ except Exception as e:
277
+ logger.error(f"PDF generation error: {str(e)}")
278
+ return None
279
+
280
+ def generate_audio_download(session_id):
281
+ """Generate audio download"""
282
+ global session_data
283
+
284
+ try:
285
+ if session_id not in session_data:
286
+ return None
287
+
288
+ lecture_data = session_data[session_id]['lecture_data']
289
+
290
+ # Generate audio
291
+ output_path = os.path.join("output", f"lecture_audio_{session_id}.mp3")
292
+ result = voice_synthesizer.synthesize_lecture(
293
+ lecture_data['content'],
294
+ voice="nova",
295
+ output_path=output_path
296
+ )
297
+
298
+ if result['success']:
299
+ return result['file_path']
300
+ else:
301
+ return None
302
+
303
+ except Exception as e:
304
+ logger.error(f"Audio generation error: {str(e)}")
305
+ return None
306
+
307
+ def chat_handler(message, history, session_id):
308
+ """Handle chat messages"""
309
+ if not message.strip():
310
+ return history, ""
311
+
312
+ try:
313
+ response_result = chatbot.get_response(session_id, message)
314
+
315
+ if response_result['success']:
316
+ history.append({"role": "user", "content": message})
317
+ history.append({"role": "assistant", "content": response_result['response']})
318
+ else:
319
+ history.append({"role": "user", "content": message})
320
+ history.append({"role": "assistant", "content": f"Error: {response_result['error']}"})
321
+
322
+ return history, ""
323
+
324
+ except Exception as e:
325
+ logger.error(f"Chat error: {str(e)}")
326
+ history.append({"role": "user", "content": message})
327
+ history.append({"role": "assistant", "content": f"Sorry, I encountered an error: {str(e)}"})
328
+ return history, ""
329
+
330
+ def clear_chat_handler(session_id):
331
+ """Clear chat history"""
332
+ chatbot.clear_session(session_id)
333
+ new_session_id = str(uuid.uuid4())
334
+
335
+ # Recreate session with existing content if available
336
+ if session_id in session_data:
337
+ pdf_content = session_data[session_id]['pdf_data']['text']
338
+ lecture_content = session_data[session_id]['lecture_data']['content']
339
+ chatbot.create_session(new_session_id, pdf_content, lecture_content)
340
+ session_data[new_session_id] = session_data[session_id]
341
+ del session_data[session_id]
342
+
343
+ return [], new_session_id
344
+
345
+ def get_chat_stats(session_id):
346
+ """Get chat statistics"""
347
+ return chatbot.get_session_stats(session_id)
348
+
349
+ # Wire up event handlers
350
+ process_btn.click(
351
+ fn=process_pdf_handler,
352
+ inputs=[pdf_upload, lecture_style, include_examples, learning_objectives, session_id_state],
353
+ outputs=[processing_status, pdf_info, session_id_state]
354
+ ).then(
355
+ fn=update_lecture_display,
356
+ inputs=[session_id_state],
357
+ outputs=[lecture_title, lecture_content]
358
+ )
359
+
360
+ download_pdf_btn.click(
361
+ fn=generate_pdf_download,
362
+ inputs=[session_id_state],
363
+ outputs=[pdf_download]
364
+ )
365
+
366
+ download_audio_btn.click(
367
+ fn=generate_audio_download,
368
+ inputs=[session_id_state],
369
+ outputs=[audio_download]
370
+ )
371
+
372
+ send_btn.click(
373
+ fn=chat_handler,
374
+ inputs=[msg_input, chatbot_interface, session_id_state],
375
+ outputs=[chatbot_interface, msg_input]
376
+ )
377
+
378
+ msg_input.submit(
379
+ fn=chat_handler,
380
+ inputs=[msg_input, chatbot_interface, session_id_state],
381
+ outputs=[chatbot_interface, msg_input]
382
+ )
383
+
384
+ clear_chat_btn.click(
385
+ fn=clear_chat_handler,
386
+ inputs=[session_id_state],
387
+ outputs=[chatbot_interface, session_id_state]
388
+ )
389
+
390
+ refresh_stats_btn.click(
391
+ fn=get_chat_stats,
392
+ inputs=[session_id_state],
393
+ outputs=[chat_stats]
394
+ )
395
+
396
+ return interface
app/lecture_generator.py ADDED
@@ -0,0 +1,538 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import Graph, START, END
2
+ from langgraph.graph.message import add_messages
3
+ from typing_extensions import TypedDict, Annotated
4
+ from typing import Dict, List, Any, Optional
5
+ import openai
6
+ import os
7
+ import logging
8
+ from datetime import datetime
9
+ from reportlab.lib.pagesizes import letter, A4
10
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
11
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
12
+ from reportlab.lib.units import inch
13
+ from reportlab.lib.colors import HexColor
14
+ from dotenv import load_dotenv
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Set up OpenAI client
22
+ openai.api_key = os.getenv("OPENAI_API_KEY", "")
23
+
24
+ class LectureState(TypedDict):
25
+ pdf_content: str
26
+ style: str
27
+ include_examples: bool
28
+ learning_objectives: str
29
+ analysis: Dict[str, Any]
30
+ outline: Dict[str, Any]
31
+ sections: List[Dict[str, Any]]
32
+ lecture_content: str
33
+ title: str
34
+ metadata: Dict[str, Any]
35
+ messages: Annotated[list, add_messages]
36
+
37
+ class LectureGenerator:
38
+ """AI agent for converting PDF content into structured lectures using LangGraph"""
39
+
40
+ def __init__(self):
41
+ self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
42
+ self.graph = self._build_graph()
43
+
44
+ def _build_graph(self) -> Graph:
45
+ """Build the LangGraph workflow for lecture generation"""
46
+ workflow = Graph()
47
+
48
+ # Add nodes
49
+ workflow.add_node("analyze_content", self._analyze_content)
50
+ workflow.add_node("create_outline", self._create_outline)
51
+ workflow.add_node("generate_sections", self._generate_sections)
52
+ workflow.add_node("compile_lecture", self._compile_lecture)
53
+ workflow.add_node("finalize_output", self._finalize_output)
54
+
55
+ # Add edges
56
+ workflow.add_edge(START, "analyze_content")
57
+ workflow.add_edge("analyze_content", "create_outline")
58
+ workflow.add_edge("create_outline", "generate_sections")
59
+ workflow.add_edge("generate_sections", "compile_lecture")
60
+ workflow.add_edge("compile_lecture", "finalize_output")
61
+ workflow.add_edge("finalize_output", END)
62
+
63
+ return workflow.compile()
64
+
65
+ def generate_lecture(self, pdf_content: str, style: str = "academic", include_examples: bool = True, learning_objectives: str = "") -> Dict[str, Any]:
66
+ """Generate a structured lecture from PDF content"""
67
+ try:
68
+ initial_state = LectureState(
69
+ pdf_content=pdf_content,
70
+ style=style,
71
+ include_examples=include_examples,
72
+ learning_objectives=learning_objectives,
73
+ analysis={},
74
+ outline={},
75
+ sections=[],
76
+ lecture_content="",
77
+ title="",
78
+ metadata={},
79
+ messages=[]
80
+ )
81
+
82
+ # Run the graph
83
+ result = self.graph.invoke(initial_state)
84
+
85
+ return {
86
+ 'success': True,
87
+ 'title': result['title'],
88
+ 'content': result['lecture_content'],
89
+ 'sections': result['sections'],
90
+ 'metadata': result['metadata'],
91
+ 'word_count': len(result['lecture_content'].split()),
92
+ 'estimated_duration': self._estimate_duration(result['lecture_content'])
93
+ }
94
+
95
+ except Exception as e:
96
+ logger.error(f"Lecture generation failed: {str(e)}")
97
+ return {
98
+ 'success': False,
99
+ 'error': str(e),
100
+ 'title': '',
101
+ 'content': '',
102
+ 'sections': [],
103
+ 'metadata': {},
104
+ 'word_count': 0,
105
+ 'estimated_duration': 0
106
+ }
107
+
108
+ def _analyze_content(self, state: LectureState) -> LectureState:
109
+ """Analyze content or learning objectives to understand structure and main topics"""
110
+ try:
111
+ if state['pdf_content'].strip():
112
+ # PDF-based analysis
113
+ learning_context = f"\n\nUser Learning Objectives: {state['learning_objectives']}" if state['learning_objectives'].strip() else ""
114
+
115
+ prompt = f"""
116
+ Analyze the following document content and provide a structured analysis:
117
+
118
+ Content: {state['pdf_content'][:5000]}...{learning_context}
119
+
120
+ Please provide:
121
+ 1. Main topic/subject
122
+ 2. Key themes and concepts
123
+ 3. Document type (research paper, textbook, article, etc.)
124
+ 4. Complexity level (beginner, intermediate, advanced)
125
+ 5. Target audience
126
+ 6. Key learning objectives (consider user's stated objectives if provided)
127
+
128
+ Format your response as a JSON object.
129
+ """
130
+ else:
131
+ # Learning objectives-based analysis
132
+ prompt = f"""
133
+ Create a structured analysis for a lecture based on these learning objectives:
134
+
135
+ Learning Objectives: {state['learning_objectives']}
136
+
137
+ Please provide:
138
+ 1. Main topic/subject (extracted from learning objectives)
139
+ 2. Key themes and concepts that should be covered
140
+ 3. Document type: "educational lecture"
141
+ 4. Complexity level (beginner, intermediate, advanced) based on objectives
142
+ 5. Target audience (inferred from objectives)
143
+ 6. Detailed learning objectives breakdown
144
+
145
+ Format your response as a JSON object.
146
+ """
147
+
148
+ response = self.client.chat.completions.create(
149
+ model="gpt-4o-mini",
150
+ messages=[{"role": "user", "content": prompt}],
151
+ temperature=0.3
152
+ )
153
+
154
+ # Parse the analysis
155
+ import json
156
+ try:
157
+ analysis = json.loads(response.choices[0].message.content)
158
+ except:
159
+ # Fallback parsing if JSON parsing fails
160
+ if state['learning_objectives'].strip():
161
+ # Extract topic from learning objectives
162
+ topic = state['learning_objectives'].split('.')[0].split(',')[0][:50]
163
+ main_topic = topic if topic else "Educational Lecture"
164
+ else:
165
+ main_topic = "Document Analysis"
166
+
167
+ analysis = {
168
+ "main_topic": main_topic,
169
+ "key_themes": ["Content Summary"],
170
+ "document_type": "Document",
171
+ "complexity_level": "intermediate",
172
+ "target_audience": "General",
173
+ "learning_objectives": ["Understand main concepts"]
174
+ }
175
+
176
+ state['analysis'] = analysis
177
+ return state
178
+
179
+ except Exception as e:
180
+ logger.error(f"Content analysis failed: {str(e)}")
181
+ if state['learning_objectives'].strip():
182
+ # Extract topic from learning objectives for fallback
183
+ topic = state['learning_objectives'].split('.')[0].split(',')[0][:50]
184
+ main_topic = topic if topic else "Educational Lecture"
185
+ else:
186
+ main_topic = "Document Analysis"
187
+
188
+ state['analysis'] = {
189
+ "main_topic": main_topic,
190
+ "key_themes": ["Content Summary"],
191
+ "document_type": "Document",
192
+ "complexity_level": "intermediate",
193
+ "target_audience": "General",
194
+ "learning_objectives": ["Understand main concepts"]
195
+ }
196
+ return state
197
+
198
+ def _create_outline(self, state: LectureState) -> LectureState:
199
+ """Create a detailed lecture outline based on analysis"""
200
+ try:
201
+ analysis = state['analysis']
202
+ style = state['style']
203
+
204
+ learning_context = f"\n\nUser Learning Objectives: {state['learning_objectives']}" if state['learning_objectives'].strip() else ""
205
+
206
+ prompt = f"""
207
+ Based on this analysis, create a detailed lecture outline:
208
+
209
+ Analysis: {analysis}
210
+ Style: {style}
211
+ Include Examples: {state['include_examples']}{learning_context}
212
+
213
+ Create an outline with:
214
+ 1. Engaging title that reflects user's learning goals
215
+ 2. Introduction section
216
+ 3. 3-5 main sections with subsections (prioritize user's stated learning objectives)
217
+ 4. Conclusion section
218
+ 5. Estimated time for each section
219
+
220
+ Format as JSON with sections array containing title, subsections, and duration.
221
+ """
222
+
223
+ response = self.client.chat.completions.create(
224
+ model="gpt-4o-mini",
225
+ messages=[{"role": "user", "content": prompt}],
226
+ temperature=0.4
227
+ )
228
+
229
+ import json
230
+ try:
231
+ outline = json.loads(response.choices[0].message.content)
232
+ except:
233
+ # Fallback outline with proper title
234
+ title = analysis.get("main_topic", "Educational Lecture")
235
+ if state['learning_objectives'].strip() and not state['pdf_content'].strip():
236
+ # For learning objectives only, create a more descriptive title
237
+ objectives_words = state['learning_objectives'].split()[:5] # First 5 words
238
+ title = " ".join(objectives_words).title()
239
+
240
+ outline = {
241
+ "title": title,
242
+ "sections": [
243
+ {"title": "Introduction", "subsections": ["Overview"], "duration": 5},
244
+ {"title": "Core Concepts", "subsections": ["Key Points"], "duration": 15},
245
+ {"title": "Conclusion", "subsections": ["Summary"], "duration": 5}
246
+ ]
247
+ }
248
+
249
+ state['outline'] = outline
250
+ state['title'] = outline.get('title', 'Generated Lecture')
251
+ return state
252
+
253
+ except Exception as e:
254
+ logger.error(f"Outline creation failed: {str(e)}")
255
+ # Generate appropriate fallback title based on learning objectives
256
+ if state['learning_objectives'].strip():
257
+ objectives_words = state['learning_objectives'].split()[:5]
258
+ title = " ".join(objectives_words).title() if objectives_words else "Educational Lecture"
259
+ else:
260
+ title = "Generated Lecture"
261
+
262
+ state['outline'] = {
263
+ "title": title,
264
+ "sections": [
265
+ {"title": "Introduction", "subsections": ["Overview"], "duration": 5},
266
+ {"title": "Core Concepts", "subsections": ["Key Points"], "duration": 15},
267
+ {"title": "Conclusion", "subsections": ["Summary"], "duration": 5}
268
+ ]
269
+ }
270
+ state['title'] = title
271
+ return state
272
+
273
+ def _generate_sections(self, state: LectureState) -> LectureState:
274
+ """Generate detailed content for each section"""
275
+ try:
276
+ sections = []
277
+ outline = state['outline']
278
+ pdf_content = state['pdf_content']
279
+ style = state['style']
280
+
281
+ for section in outline.get('sections', []):
282
+ learning_context = f"\n\nUser Learning Objectives: {state['learning_objectives']}" if state['learning_objectives'].strip() else ""
283
+
284
+ if pdf_content.strip():
285
+ # PDF-based section generation
286
+ section_prompt = f"""
287
+ Generate detailed content for this lecture section:
288
+
289
+ Section Title: {section['title']}
290
+ Subsections: {section.get('subsections', [])}
291
+ Style: {style}
292
+ Include Examples: {state['include_examples']}{learning_context}
293
+
294
+ Source Material: {pdf_content[:3000]}...
295
+
296
+ Create engaging, educational content that:
297
+ 1. Explains concepts clearly (focus on user's learning objectives if provided)
298
+ 2. Includes relevant examples if requested
299
+ 3. Uses appropriate tone for {style} style
300
+ 4. Builds logically on previous sections
301
+ 5. Addresses the user's specific learning goals
302
+
303
+ Format with clear headings and structured paragraphs.
304
+ """
305
+ else:
306
+ # Learning objectives-based section generation
307
+ section_prompt = f"""
308
+ Generate comprehensive educational content for this lecture section:
309
+
310
+ Section Title: {section['title']}
311
+ Subsections: {section.get('subsections', [])}
312
+ Style: {style}
313
+ Include Examples: {state['include_examples']}
314
+ Learning Objectives: {state['learning_objectives']}
315
+
316
+ Create engaging, educational content that:
317
+ 1. Thoroughly explains concepts related to the learning objectives
318
+ 2. Includes practical examples and real-world applications if requested
319
+ 3. Uses appropriate {style} tone and language
320
+ 4. Builds logically on previous sections
321
+ 5. Directly addresses the stated learning objectives
322
+ 6. Provides comprehensive coverage of the topic
323
+
324
+ Format with clear headings and structured paragraphs.
325
+ """
326
+
327
+ response = self.client.chat.completions.create(
328
+ model="gpt-4o-mini",
329
+ messages=[{"role": "user", "content": section_prompt}],
330
+ temperature=0.5
331
+ )
332
+
333
+ section_content = response.choices[0].message.content
334
+
335
+ sections.append({
336
+ 'title': section['title'],
337
+ 'content': section_content,
338
+ 'duration': section.get('duration', 10),
339
+ 'subsections': section.get('subsections', [])
340
+ })
341
+
342
+ state['sections'] = sections
343
+ return state
344
+
345
+ except Exception as e:
346
+ logger.error(f"Section generation failed: {str(e)}")
347
+ # Create basic fallback sections
348
+ state['sections'] = [
349
+ {
350
+ 'title': 'Introduction',
351
+ 'content': 'Welcome to this lecture based on the provided document.',
352
+ 'duration': 5,
353
+ 'subsections': ['Overview']
354
+ },
355
+ {
356
+ 'title': 'Main Content',
357
+ 'content': 'Here are the key points from the document.',
358
+ 'duration': 15,
359
+ 'subsections': ['Key Points']
360
+ }
361
+ ]
362
+ return state
363
+
364
+ def _compile_lecture(self, state: LectureState) -> LectureState:
365
+ """Compile all sections into a cohesive lecture"""
366
+ try:
367
+ title = state['title']
368
+ sections = state['sections']
369
+
370
+ # Create dynamic introduction based on content type
371
+ if state['pdf_content'].strip():
372
+ # PDF-based lecture introduction
373
+ intro = f"""# {title}
374
+
375
+ Welcome to this comprehensive lecture on {title}. This presentation has been crafted from your uploaded document to provide you with a thorough understanding of the key concepts and insights.
376
+
377
+ ## Learning Objectives
378
+ By the end of this lecture, you will be able to:
379
+ - Understand the main concepts presented in the source material
380
+ - Apply the knowledge to practical situations
381
+ - Engage in meaningful discussions about the topic
382
+
383
+ ---
384
+ """
385
+ else:
386
+ # Topic-based lecture introduction
387
+ learning_goals = state['learning_objectives'][:200] + "..." if len(state['learning_objectives']) > 200 else state['learning_objectives']
388
+ intro = f"""# {title}
389
+
390
+ Welcome to this comprehensive lecture on {title}. This presentation has been crafted to address your specific learning objectives and provide you with a thorough understanding of the topic.
391
+
392
+ ## Your Learning Goals
393
+ {learning_goals}
394
+
395
+ ## What You'll Learn
396
+ By the end of this lecture, you will be able to:
397
+ - Master the concepts you've requested to learn
398
+ - Apply this knowledge to real-world scenarios
399
+ - Build upon these foundations for further learning
400
+
401
+ ---
402
+ """
403
+
404
+ # Compile all sections
405
+ compiled_content = intro
406
+ for i, section in enumerate(sections, 1):
407
+ compiled_content += f"\n## {i}. {section['title']}\n\n"
408
+ compiled_content += section['content']
409
+ compiled_content += "\n\n---\n"
410
+
411
+ # Add conclusion
412
+ compiled_content += """
413
+ ## Conclusion
414
+
415
+ Thank you for joining this lecture. We've covered the essential concepts and insights from the source material. Remember to review the key points and consider how they apply to your understanding of the subject.
416
+
417
+ ### Key Takeaways
418
+ - Review the main concepts discussed
419
+ - Consider practical applications
420
+ - Engage with additional resources for deeper learning
421
+
422
+ ---
423
+
424
+ *This lecture was generated using AI to transform written content into an engaging educational experience.*
425
+ """
426
+
427
+ state['lecture_content'] = compiled_content
428
+ return state
429
+
430
+ except Exception as e:
431
+ logger.error(f"Lecture compilation failed: {str(e)}")
432
+ state['lecture_content'] = f"# {state['title']}\n\nThis is a generated lecture based on the provided document."
433
+ return state
434
+
435
+ def _finalize_output(self, state: LectureState) -> LectureState:
436
+ """Finalize the output with metadata"""
437
+ try:
438
+ word_count = len(state['lecture_content'].split())
439
+
440
+ state['metadata'] = {
441
+ 'generated_at': datetime.now().isoformat(),
442
+ 'style': state['style'],
443
+ 'include_examples': state['include_examples'],
444
+ 'word_count': word_count,
445
+ 'estimated_duration': self._estimate_duration(state['lecture_content']),
446
+ 'sections_count': len(state['sections']),
447
+ 'analysis': state['analysis']
448
+ }
449
+
450
+ return state
451
+
452
+ except Exception as e:
453
+ logger.error(f"Output finalization failed: {str(e)}")
454
+ state['metadata'] = {
455
+ 'generated_at': datetime.now().isoformat(),
456
+ 'style': state['style'],
457
+ 'include_examples': state['include_examples'],
458
+ 'word_count': 0,
459
+ 'estimated_duration': 0,
460
+ 'sections_count': 0,
461
+ 'analysis': {}
462
+ }
463
+ return state
464
+
465
+ def _estimate_duration(self, content: str) -> int:
466
+ """Estimate lecture duration in minutes based on word count"""
467
+ word_count = len(content.split())
468
+ # Assume average speaking rate of 150 words per minute
469
+ return max(1, round(word_count / 150))
470
+
471
+ def generate_pdf(self, lecture_data: Dict[str, Any], output_path: str) -> bool:
472
+ """Generate PDF version of the lecture"""
473
+ try:
474
+ doc = SimpleDocTemplate(output_path, pagesize=A4)
475
+ styles = getSampleStyleSheet()
476
+
477
+ # Custom styles
478
+ title_style = ParagraphStyle(
479
+ 'CustomTitle',
480
+ parent=styles['Heading1'],
481
+ fontSize=24,
482
+ spaceAfter=30,
483
+ textColor=HexColor('#2C3E50'),
484
+ alignment=1 # Center alignment
485
+ )
486
+
487
+ heading_style = ParagraphStyle(
488
+ 'CustomHeading',
489
+ parent=styles['Heading2'],
490
+ fontSize=18,
491
+ spaceAfter=20,
492
+ spaceBefore=20,
493
+ textColor=HexColor('#34495E')
494
+ )
495
+
496
+ content = []
497
+
498
+ # Title
499
+ content.append(Paragraph(lecture_data['title'], title_style))
500
+ content.append(Spacer(1, 20))
501
+
502
+ # Metadata
503
+ metadata_text = f"""
504
+ <b>Generated:</b> {lecture_data['metadata']['generated_at']}<br/>
505
+ <b>Style:</b> {lecture_data['metadata']['style'].title()}<br/>
506
+ <b>Duration:</b> ~{lecture_data['metadata']['estimated_duration']} minutes<br/>
507
+ <b>Word Count:</b> {lecture_data['metadata']['word_count']}
508
+ """
509
+ content.append(Paragraph(metadata_text, styles['Normal']))
510
+ content.append(Spacer(1, 30))
511
+
512
+ # Process lecture content
513
+ lecture_lines = lecture_data['content'].split('\n')
514
+
515
+ for line in lecture_lines:
516
+ line = line.strip()
517
+ if not line:
518
+ content.append(Spacer(1, 12))
519
+ elif line.startswith('# '):
520
+ # Main title (already added)
521
+ continue
522
+ elif line.startswith('## '):
523
+ # Section heading
524
+ content.append(Paragraph(line[3:], heading_style))
525
+ elif line.startswith('---'):
526
+ # Section separator
527
+ content.append(Spacer(1, 20))
528
+ else:
529
+ # Regular content
530
+ content.append(Paragraph(line, styles['Normal']))
531
+ content.append(Spacer(1, 6))
532
+
533
+ doc.build(content)
534
+ return True
535
+
536
+ except Exception as e:
537
+ logger.error(f"PDF generation failed: {str(e)}")
538
+ return False
app/models.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional, Dict, Any
3
+ from datetime import datetime
4
+
5
+ class ChatMessage(BaseModel):
6
+ role: str # "user" or "assistant"
7
+ content: str
8
+ timestamp: datetime = datetime.now()
9
+
10
+ class ChatSession(BaseModel):
11
+ session_id: str
12
+ messages: List[ChatMessage] = []
13
+ pdf_content: Optional[str] = None
14
+ lecture_content: Optional[str] = None
15
+ created_at: datetime = datetime.now()
16
+
17
+ class LectureRequest(BaseModel):
18
+ pdf_content: str
19
+ style: str = "academic" # academic, casual, detailed
20
+ include_examples: bool = True
21
+
22
+ class LectureResponse(BaseModel):
23
+ title: str
24
+ content: str
25
+ sections: List[Dict[str, Any]]
26
+ word_count: int
27
+ estimated_duration: int # in minutes
28
+
29
+ class ProcessingStatus(BaseModel):
30
+ status: str # "processing", "completed", "error"
31
+ progress: int # 0-100
32
+ message: str
33
+ result: Optional[Dict[str, Any]] = None
app/pdf_processor.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ import pdfplumber
3
+ from typing import Dict, List, Optional, Union, Any
4
+ import re
5
+ import logging
6
+ import io
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class PDFProcessor:
11
+ """Handles PDF file processing and text extraction"""
12
+
13
+ def __init__(self):
14
+ self.supported_formats = ['.pdf']
15
+
16
+ def extract_text_from_pdf(self, pdf_file) -> Dict[str, Any]:
17
+ """
18
+ Extract text content from PDF file
19
+ Returns structured data with text, metadata, and page information
20
+ """
21
+ try:
22
+ # Handle bytes input from Gradio
23
+ if isinstance(pdf_file, bytes):
24
+ pdf_file = io.BytesIO(pdf_file)
25
+
26
+ # Try pdfplumber first (better for complex layouts)
27
+ with pdfplumber.open(pdf_file) as pdf:
28
+ text_content = []
29
+ metadata = {
30
+ 'total_pages': len(pdf.pages),
31
+ 'title': '',
32
+ 'author': '',
33
+ 'subject': ''
34
+ }
35
+
36
+ # Extract metadata if available
37
+ if pdf.metadata:
38
+ metadata.update({
39
+ 'title': pdf.metadata.get('Title', ''),
40
+ 'author': pdf.metadata.get('Author', ''),
41
+ 'subject': pdf.metadata.get('Subject', '')
42
+ })
43
+
44
+ # Extract text from each page
45
+ for page_num, page in enumerate(pdf.pages, 1):
46
+ page_text = page.extract_text()
47
+ if page_text:
48
+ text_content.append({
49
+ 'page_number': page_num,
50
+ 'text': self._clean_text(page_text)
51
+ })
52
+
53
+ combined_text = '\n\n'.join([page['text'] for page in text_content])
54
+
55
+ return {
56
+ 'success': True,
57
+ 'text': combined_text,
58
+ 'pages': text_content,
59
+ 'metadata': metadata,
60
+ 'word_count': len(combined_text.split()),
61
+ 'character_count': len(combined_text)
62
+ }
63
+
64
+ except Exception as e:
65
+ logger.error(f"pdfplumber extraction failed: {str(e)}")
66
+ # Fallback to PyPDF2
67
+ return self._extract_with_pypdf2(pdf_file)
68
+
69
+ def _extract_with_pypdf2(self, pdf_file) -> Dict[str, Any]:
70
+ """Fallback method using PyPDF2"""
71
+ try:
72
+ # Handle bytes input from Gradio
73
+ if isinstance(pdf_file, bytes):
74
+ pdf_file = io.BytesIO(pdf_file)
75
+ else:
76
+ pdf_file.seek(0) # Reset file pointer
77
+ reader = PyPDF2.PdfReader(pdf_file)
78
+
79
+ text_content = []
80
+ metadata = {
81
+ 'total_pages': len(reader.pages),
82
+ 'title': '',
83
+ 'author': '',
84
+ 'subject': ''
85
+ }
86
+
87
+ # Extract metadata
88
+ if reader.metadata:
89
+ metadata.update({
90
+ 'title': reader.metadata.get('/Title', ''),
91
+ 'author': reader.metadata.get('/Author', ''),
92
+ 'subject': reader.metadata.get('/Subject', '')
93
+ })
94
+
95
+ # Extract text from each page
96
+ for page_num, page in enumerate(reader.pages, 1):
97
+ page_text = page.extract_text()
98
+ if page_text:
99
+ text_content.append({
100
+ 'page_number': page_num,
101
+ 'text': self._clean_text(page_text)
102
+ })
103
+
104
+ combined_text = '\n\n'.join([page['text'] for page in text_content])
105
+
106
+ return {
107
+ 'success': True,
108
+ 'text': combined_text,
109
+ 'pages': text_content,
110
+ 'metadata': metadata,
111
+ 'word_count': len(combined_text.split()),
112
+ 'character_count': len(combined_text)
113
+ }
114
+
115
+ except Exception as e:
116
+ logger.error(f"PyPDF2 extraction failed: {str(e)}")
117
+ return {
118
+ 'success': False,
119
+ 'error': f"Failed to extract text from PDF: {str(e)}",
120
+ 'text': '',
121
+ 'pages': [],
122
+ 'metadata': {},
123
+ 'word_count': 0,
124
+ 'character_count': 0
125
+ }
126
+
127
+ def _clean_text(self, text: str) -> str:
128
+ """Clean and normalize extracted text"""
129
+ # Remove excessive whitespace
130
+ text = re.sub(r'\s+', ' ', text)
131
+
132
+ # Remove page numbers and headers/footers (common patterns)
133
+ text = re.sub(r'\n\d+\n', '\n', text)
134
+
135
+ # Fix common PDF extraction issues
136
+ text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text) # Split concatenated words
137
+ text = re.sub(r'(\w)-\n(\w)', r'\1\2', text) # Fix hyphenated words across lines
138
+
139
+ # Remove excessive line breaks
140
+ text = re.sub(r'\n{3,}', '\n\n', text)
141
+
142
+ return text.strip()
143
+
144
+ def validate_pdf(self, pdf_file) -> Dict[str, Any]:
145
+ """Validate PDF file before processing"""
146
+ try:
147
+ # Handle bytes input from Gradio
148
+ if isinstance(pdf_file, bytes):
149
+ file_size = len(pdf_file)
150
+ pdf_file = io.BytesIO(pdf_file)
151
+ else:
152
+ # Check file size (limit to 50MB)
153
+ pdf_file.seek(0, 2) # Seek to end
154
+ file_size = pdf_file.tell()
155
+ pdf_file.seek(0) # Reset to beginning
156
+
157
+ if file_size > 50 * 1024 * 1024: # 50MB limit
158
+ return {
159
+ 'valid': False,
160
+ 'error': 'File size exceeds 50MB limit'
161
+ }
162
+
163
+ # Try to open the PDF to validate format
164
+ try:
165
+ reader = PyPDF2.PdfReader(pdf_file)
166
+ if len(reader.pages) == 0:
167
+ return {
168
+ 'valid': False,
169
+ 'error': 'PDF contains no pages'
170
+ }
171
+ pdf_file.seek(0) # Reset file pointer
172
+
173
+ return {
174
+ 'valid': True,
175
+ 'pages': len(reader.pages),
176
+ 'size_mb': round(file_size / (1024 * 1024), 2)
177
+ }
178
+
179
+ except Exception as e:
180
+ return {
181
+ 'valid': False,
182
+ 'error': f'Invalid PDF format: {str(e)}'
183
+ }
184
+
185
+ except Exception as e:
186
+ return {
187
+ 'valid': False,
188
+ 'error': f'Error validating PDF: {str(e)}'
189
+ }
app/rag_system.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from chromadb.utils import embedding_functions
3
+ import openai
4
+ import os
5
+ import logging
6
+ from typing import List, Dict, Any, Optional
7
+ import uuid
8
+ from datetime import datetime
9
+ import numpy as np
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class RAGSystem:
18
+ """Retrieval-Augmented Generation system for chatbot functionality"""
19
+
20
+ def __init__(self, persist_directory: str = "chroma_db"):
21
+ self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
22
+
23
+ # Initialize ChromaDB
24
+ self.chroma_client = chromadb.PersistentClient(path=persist_directory)
25
+
26
+ # Create embedding function
27
+ self.embedding_function = embedding_functions.OpenAIEmbeddingFunction(
28
+ api_key=os.getenv("OPENAI_API_KEY", ""),
29
+ model_name="text-embedding-ada-002"
30
+ )
31
+
32
+ # Collections for different document types
33
+ self.pdf_collection = self._get_or_create_collection("pdf_documents")
34
+ self.lecture_collection = self._get_or_create_collection("lecture_content")
35
+
36
+ def _get_or_create_collection(self, name: str):
37
+ """Get existing collection or create new one"""
38
+ try:
39
+ return self.chroma_client.get_collection(
40
+ name=name,
41
+ embedding_function=self.embedding_function
42
+ )
43
+ except:
44
+ return self.chroma_client.create_collection(
45
+ name=name,
46
+ embedding_function=self.embedding_function,
47
+ metadata={"description": f"Collection for {name}"}
48
+ )
49
+
50
+ def add_pdf_content(self, session_id: str, pdf_content: str, metadata: Dict[str, Any] = None) -> bool:
51
+ """Add PDF content to the vector database"""
52
+ try:
53
+ # Split content into chunks
54
+ chunks = self._split_text(pdf_content, chunk_size=1000, overlap=200)
55
+
56
+ # Prepare documents for insertion
57
+ documents = []
58
+ metadatas = []
59
+ ids = []
60
+
61
+ base_metadata = {
62
+ "session_id": session_id,
63
+ "document_type": "pdf",
64
+ "added_at": datetime.now().isoformat(),
65
+ **(metadata or {})
66
+ }
67
+
68
+ for i, chunk in enumerate(chunks):
69
+ doc_id = f"{session_id}_pdf_{i}_{uuid.uuid4().hex[:8]}"
70
+
71
+ documents.append(chunk)
72
+ metadatas.append({
73
+ **base_metadata,
74
+ "chunk_index": i,
75
+ "chunk_id": doc_id
76
+ })
77
+ ids.append(doc_id)
78
+
79
+ # Add to collection
80
+ self.pdf_collection.add(
81
+ documents=documents,
82
+ metadatas=metadatas,
83
+ ids=ids
84
+ )
85
+
86
+ logger.info(f"Added {len(chunks)} PDF chunks for session {session_id}")
87
+ return True
88
+
89
+ except Exception as e:
90
+ logger.error(f"Failed to add PDF content: {str(e)}")
91
+ return False
92
+
93
+ def add_lecture_content(self, session_id: str, lecture_content: str, metadata: Dict[str, Any] = None) -> bool:
94
+ """Add lecture content to the vector database"""
95
+ try:
96
+ # Split content into chunks
97
+ chunks = self._split_text(lecture_content, chunk_size=1000, overlap=200)
98
+
99
+ documents = []
100
+ metadatas = []
101
+ ids = []
102
+
103
+ base_metadata = {
104
+ "session_id": session_id,
105
+ "document_type": "lecture",
106
+ "added_at": datetime.now().isoformat(),
107
+ **(metadata or {})
108
+ }
109
+
110
+ for i, chunk in enumerate(chunks):
111
+ doc_id = f"{session_id}_lecture_{i}_{uuid.uuid4().hex[:8]}"
112
+
113
+ documents.append(chunk)
114
+ metadatas.append({
115
+ **base_metadata,
116
+ "chunk_index": i,
117
+ "chunk_id": doc_id
118
+ })
119
+ ids.append(doc_id)
120
+
121
+ # Add to collection
122
+ self.lecture_collection.add(
123
+ documents=documents,
124
+ metadatas=metadatas,
125
+ ids=ids
126
+ )
127
+
128
+ logger.info(f"Added {len(chunks)} lecture chunks for session {session_id}")
129
+ return True
130
+
131
+ except Exception as e:
132
+ logger.error(f"Failed to add lecture content: {str(e)}")
133
+ return False
134
+
135
+ def retrieve_relevant_content(self, session_id: str, query: str, n_results: int = 5) -> Dict[str, Any]:
136
+ """Retrieve relevant content for a query"""
137
+ try:
138
+ # Search in both collections
139
+ pdf_results = self.pdf_collection.query(
140
+ query_texts=[query],
141
+ n_results=n_results,
142
+ where={"session_id": session_id}
143
+ )
144
+
145
+ lecture_results = self.lecture_collection.query(
146
+ query_texts=[query],
147
+ n_results=n_results,
148
+ where={"session_id": session_id}
149
+ )
150
+
151
+ # Combine and rank results
152
+ all_results = []
153
+
154
+ # Process PDF results
155
+ if pdf_results['documents'] and pdf_results['documents'][0]:
156
+ for i, doc in enumerate(pdf_results['documents'][0]):
157
+ all_results.append({
158
+ 'content': doc,
159
+ 'metadata': pdf_results['metadatas'][0][i],
160
+ 'distance': pdf_results['distances'][0][i],
161
+ 'source': 'pdf'
162
+ })
163
+
164
+ # Process lecture results
165
+ if lecture_results['documents'] and lecture_results['documents'][0]:
166
+ for i, doc in enumerate(lecture_results['documents'][0]):
167
+ all_results.append({
168
+ 'content': doc,
169
+ 'metadata': lecture_results['metadatas'][0][i],
170
+ 'distance': lecture_results['distances'][0][i],
171
+ 'source': 'lecture'
172
+ })
173
+
174
+ # Sort by relevance (distance)
175
+ all_results.sort(key=lambda x: x['distance'])
176
+
177
+ return {
178
+ 'success': True,
179
+ 'results': all_results[:n_results],
180
+ 'total_found': len(all_results)
181
+ }
182
+
183
+ except Exception as e:
184
+ logger.error(f"Content retrieval failed: {str(e)}")
185
+ return {
186
+ 'success': False,
187
+ 'results': [],
188
+ 'total_found': 0,
189
+ 'error': str(e)
190
+ }
191
+
192
+ def _split_text(self, text: str, chunk_size: int = 1000, overlap: int = 200) -> List[str]:
193
+ """Split text into overlapping chunks"""
194
+ if len(text) <= chunk_size:
195
+ return [text]
196
+
197
+ chunks = []
198
+ start = 0
199
+
200
+ while start < len(text):
201
+ end = start + chunk_size
202
+
203
+ # Try to end at a sentence boundary
204
+ if end < len(text):
205
+ # Look for sentence endings within the last 100 characters
206
+ search_start = max(end - 100, start)
207
+ sentence_ends = []
208
+
209
+ for punct in ['. ', '! ', '? ', '\n\n']:
210
+ pos = text.rfind(punct, search_start, end)
211
+ if pos > start:
212
+ sentence_ends.append(pos + len(punct))
213
+
214
+ if sentence_ends:
215
+ end = max(sentence_ends)
216
+
217
+ chunk = text[start:end].strip()
218
+ if chunk:
219
+ chunks.append(chunk)
220
+
221
+ # Move start position with overlap
222
+ start = end - overlap
223
+ if start >= len(text):
224
+ break
225
+
226
+ return chunks
227
+
228
+ def get_session_stats(self, session_id: str) -> Dict[str, Any]:
229
+ """Get statistics about stored content for a session"""
230
+ try:
231
+ # Count PDF chunks
232
+ pdf_count = len(self.pdf_collection.get(
233
+ where={"session_id": session_id}
234
+ )['ids'])
235
+
236
+ # Count lecture chunks
237
+ lecture_count = len(self.lecture_collection.get(
238
+ where={"session_id": session_id}
239
+ )['ids'])
240
+
241
+ return {
242
+ 'pdf_chunks': pdf_count,
243
+ 'lecture_chunks': lecture_count,
244
+ 'total_chunks': pdf_count + lecture_count
245
+ }
246
+
247
+ except Exception as e:
248
+ logger.error(f"Failed to get session stats: {str(e)}")
249
+ return {
250
+ 'pdf_chunks': 0,
251
+ 'lecture_chunks': 0,
252
+ 'total_chunks': 0
253
+ }
254
+
255
+ def clear_session_data(self, session_id: str) -> bool:
256
+ """Clear all data for a specific session"""
257
+ try:
258
+ # Get all document IDs for this session
259
+ pdf_ids = self.pdf_collection.get(
260
+ where={"session_id": session_id}
261
+ )['ids']
262
+
263
+ lecture_ids = self.lecture_collection.get(
264
+ where={"session_id": session_id}
265
+ )['ids']
266
+
267
+ # Delete documents
268
+ if pdf_ids:
269
+ self.pdf_collection.delete(ids=pdf_ids)
270
+
271
+ if lecture_ids:
272
+ self.lecture_collection.delete(ids=lecture_ids)
273
+
274
+ logger.info(f"Cleared data for session {session_id}")
275
+ return True
276
+
277
+ except Exception as e:
278
+ logger.error(f"Failed to clear session data: {str(e)}")
279
+ return False
app/utils.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import hashlib
4
+ from typing import Dict, Any, Optional
5
+ from datetime import datetime
6
+ import json
7
+ from dotenv import load_dotenv
8
+
9
+ def setup_logging():
10
+ """Setup logging configuration"""
11
+ logging.basicConfig(
12
+ level=logging.INFO,
13
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
14
+ handlers=[
15
+ logging.StreamHandler(),
16
+ logging.FileHandler('app.log')
17
+ ]
18
+ )
19
+
20
+ def ensure_directory(path: str) -> bool:
21
+ """Ensure directory exists, create if it doesn't"""
22
+ try:
23
+ os.makedirs(path, exist_ok=True)
24
+ return True
25
+ except Exception as e:
26
+ logging.error(f"Failed to create directory {path}: {str(e)}")
27
+ return False
28
+
29
+ def generate_file_hash(content: str) -> str:
30
+ """Generate SHA-256 hash for content"""
31
+ return hashlib.sha256(content.encode()).hexdigest()
32
+
33
+ def sanitize_filename(filename: str) -> str:
34
+ """Sanitize filename for safe file system operations"""
35
+ import re
36
+ # Remove invalid characters
37
+ filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
38
+ # Limit length
39
+ if len(filename) > 255:
40
+ name, ext = os.path.splitext(filename)
41
+ filename = name[:255-len(ext)] + ext
42
+ return filename
43
+
44
+ def format_file_size(size_bytes: int) -> str:
45
+ """Format file size in human readable format"""
46
+ if size_bytes == 0:
47
+ return "0B"
48
+
49
+ size_names = ["B", "KB", "MB", "GB", "TB"]
50
+ import math
51
+ i = int(math.floor(math.log(size_bytes, 1024)))
52
+ p = math.pow(1024, i)
53
+ s = round(size_bytes / p, 2)
54
+ return f"{s} {size_names[i]}"
55
+
56
+ def validate_environment():
57
+ """Validate required environment variables"""
58
+ required_vars = ['OPENAI_API_KEY']
59
+ missing_vars = []
60
+
61
+ for var in required_vars:
62
+ if not os.getenv(var):
63
+ missing_vars.append(var)
64
+
65
+ if missing_vars:
66
+ logging.warning(f"Missing environment variables: {', '.join(missing_vars)}")
67
+ return False
68
+
69
+ return True
70
+
71
+ def save_json_file(data: Dict[Any, Any], filepath: str) -> bool:
72
+ """Save data to JSON file"""
73
+ try:
74
+ ensure_directory(os.path.dirname(filepath))
75
+ with open(filepath, 'w', encoding='utf-8') as f:
76
+ json.dump(data, f, indent=2, ensure_ascii=False)
77
+ return True
78
+ except Exception as e:
79
+ logging.error(f"Failed to save JSON file {filepath}: {str(e)}")
80
+ return False
81
+
82
+ def load_json_file(filepath: str) -> Optional[Dict[Any, Any]]:
83
+ """Load data from JSON file"""
84
+ try:
85
+ if not os.path.exists(filepath):
86
+ return None
87
+
88
+ with open(filepath, 'r', encoding='utf-8') as f:
89
+ return json.load(f)
90
+ except Exception as e:
91
+ logging.error(f"Failed to load JSON file {filepath}: {str(e)}")
92
+ return None
93
+
94
+ def cleanup_old_files(directory: str, max_age_hours: int = 24) -> int:
95
+ """Clean up old files in directory"""
96
+ try:
97
+ if not os.path.exists(directory):
98
+ return 0
99
+
100
+ now = datetime.now()
101
+ removed_count = 0
102
+
103
+ for filename in os.listdir(directory):
104
+ filepath = os.path.join(directory, filename)
105
+
106
+ if os.path.isfile(filepath):
107
+ file_age = now - datetime.fromtimestamp(os.path.getmtime(filepath))
108
+ if file_age.total_seconds() > max_age_hours * 3600:
109
+ try:
110
+ os.remove(filepath)
111
+ removed_count += 1
112
+ logging.info(f"Removed old file: {filepath}")
113
+ except Exception as e:
114
+ logging.error(f"Failed to remove file {filepath}: {str(e)}")
115
+
116
+ return removed_count
117
+
118
+ except Exception as e:
119
+ logging.error(f"Failed to cleanup directory {directory}: {str(e)}")
120
+ return 0
121
+
122
+ def get_system_info() -> Dict[str, Any]:
123
+ """Get basic system information"""
124
+ import platform
125
+ import psutil
126
+
127
+ try:
128
+ return {
129
+ 'platform': platform.system(),
130
+ 'python_version': platform.python_version(),
131
+ 'cpu_count': os.cpu_count(),
132
+ 'memory_gb': round(psutil.virtual_memory().total / (1024**3), 2),
133
+ 'disk_usage': {
134
+ 'total_gb': round(psutil.disk_usage('/').total / (1024**3), 2),
135
+ 'free_gb': round(psutil.disk_usage('/').free / (1024**3), 2)
136
+ }
137
+ }
138
+ except Exception as e:
139
+ logging.error(f"Failed to get system info: {str(e)}")
140
+ return {'error': str(e)}
141
+
142
+ def measure_execution_time(func):
143
+ """Decorator to measure function execution time"""
144
+ import functools
145
+ import time
146
+
147
+ @functools.wraps(func)
148
+ def wrapper(*args, **kwargs):
149
+ start_time = time.time()
150
+ result = func(*args, **kwargs)
151
+ end_time = time.time()
152
+ execution_time = end_time - start_time
153
+
154
+ logging.info(f"{func.__name__} executed in {execution_time:.2f} seconds")
155
+ return result
156
+
157
+ return wrapper
158
+
159
+ # Load environment variables
160
+ load_dotenv()
161
+
162
+ # Initialize logging when module is imported
163
+ setup_logging()
164
+
165
+ # Validate environment on import
166
+ if not validate_environment():
167
+ logging.warning("Environment validation failed. Some features may not work properly.")
app/voice_synthesizer.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ import logging
4
+ from typing import Dict, Any, Optional
5
+ from pathlib import Path
6
+ import tempfile
7
+ import io
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class VoiceSynthesizer:
16
+ """Handles text-to-speech conversion for lecture content"""
17
+
18
+ def __init__(self):
19
+ self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
20
+ self.supported_voices = [
21
+ "alloy", "echo", "fable", "onyx", "nova", "shimmer"
22
+ ]
23
+ self.default_voice = "nova"
24
+
25
+ def synthesize_lecture(self, lecture_content: str, voice: str = None, output_path: str = None) -> Dict[str, Any]:
26
+ """
27
+ Convert lecture text to speech using OpenAI TTS
28
+
29
+ Args:
30
+ lecture_content: The lecture text to convert
31
+ voice: Voice to use (alloy, echo, fable, onyx, nova, shimmer)
32
+ output_path: Where to save the audio file
33
+
34
+ Returns:
35
+ Dict with success status, file path, and metadata
36
+ """
37
+ try:
38
+ if not lecture_content.strip():
39
+ return {
40
+ 'success': False,
41
+ 'error': 'No content provided for synthesis',
42
+ 'file_path': None,
43
+ 'duration': 0
44
+ }
45
+
46
+ # Validate and set voice
47
+ selected_voice = voice if voice in self.supported_voices else self.default_voice
48
+
49
+ # Prepare content for TTS (remove markdown formatting)
50
+ clean_content = self._clean_content_for_tts(lecture_content)
51
+
52
+ # Split content into chunks if too long (OpenAI TTS has limits)
53
+ chunks = self._split_content(clean_content, max_length=4000)
54
+
55
+ if not output_path:
56
+ output_path = os.path.join("output", f"lecture_audio_{hash(lecture_content)}.mp3")
57
+
58
+ # Ensure output directory exists
59
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
60
+
61
+ if len(chunks) == 1:
62
+ # Single chunk - direct synthesis
63
+ response = self.client.audio.speech.create(
64
+ model="tts-1",
65
+ voice=selected_voice,
66
+ input=chunks[0],
67
+ response_format="mp3"
68
+ )
69
+
70
+ # Save the audio file
71
+ with open(output_path, "wb") as f:
72
+ f.write(response.content)
73
+
74
+ else:
75
+ # Multiple chunks - synthesize and combine
76
+ self._synthesize_multiple_chunks(chunks, selected_voice, output_path)
77
+
78
+ # Get file size and estimate duration
79
+ file_size = os.path.getsize(output_path)
80
+ estimated_duration = self._estimate_audio_duration(clean_content)
81
+
82
+ return {
83
+ 'success': True,
84
+ 'file_path': output_path,
85
+ 'voice': selected_voice,
86
+ 'duration': estimated_duration,
87
+ 'file_size': file_size,
88
+ 'chunks_count': len(chunks)
89
+ }
90
+
91
+ except Exception as e:
92
+ logger.error(f"Voice synthesis failed: {str(e)}")
93
+ return {
94
+ 'success': False,
95
+ 'error': str(e),
96
+ 'file_path': None,
97
+ 'duration': 0
98
+ }
99
+
100
+ def _clean_content_for_tts(self, content: str) -> str:
101
+ """Clean markdown and formatting for better TTS output"""
102
+ import re
103
+
104
+ # Remove markdown headers
105
+ content = re.sub(r'^#{1,6}\s+', '', content, flags=re.MULTILINE)
106
+
107
+ # Remove markdown emphasis
108
+ content = re.sub(r'\*\*(.*?)\*\*', r'\1', content) # Bold
109
+ content = re.sub(r'\*(.*?)\*', r'\1', content) # Italic
110
+
111
+ # Remove markdown links
112
+ content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content)
113
+
114
+ # Remove horizontal rules
115
+ content = re.sub(r'^---+$', '', content, flags=re.MULTILINE)
116
+
117
+ # Clean up extra whitespace
118
+ content = re.sub(r'\n{3,}', '\n\n', content)
119
+ content = re.sub(r' {2,}', ' ', content)
120
+
121
+ # Add pauses for better speech flow
122
+ content = re.sub(r'\n\n', '\n\n... \n\n', content) # Longer pause between sections
123
+
124
+ return content.strip()
125
+
126
+ def _split_content(self, content: str, max_length: int = 4000) -> list:
127
+ """Split content into chunks suitable for TTS API"""
128
+ if len(content) <= max_length:
129
+ return [content]
130
+
131
+ chunks = []
132
+ sentences = content.split('. ')
133
+ current_chunk = ""
134
+
135
+ for sentence in sentences:
136
+ # Check if adding this sentence would exceed the limit
137
+ if len(current_chunk) + len(sentence) + 2 > max_length:
138
+ if current_chunk:
139
+ chunks.append(current_chunk.strip())
140
+ current_chunk = sentence + ". "
141
+ else:
142
+ # Single sentence is too long, split by words
143
+ words = sentence.split()
144
+ word_chunk = ""
145
+ for word in words:
146
+ if len(word_chunk) + len(word) + 1 > max_length:
147
+ if word_chunk:
148
+ chunks.append(word_chunk.strip())
149
+ word_chunk = word + " "
150
+ else:
151
+ # Single word is too long, truncate
152
+ chunks.append(word[:max_length])
153
+ else:
154
+ word_chunk += word + " "
155
+ if word_chunk:
156
+ current_chunk = word_chunk + ". "
157
+ else:
158
+ current_chunk += sentence + ". "
159
+
160
+ if current_chunk:
161
+ chunks.append(current_chunk.strip())
162
+
163
+ return [chunk for chunk in chunks if chunk.strip()]
164
+
165
+ def _synthesize_multiple_chunks(self, chunks: list, voice: str, output_path: str):
166
+ """Synthesize multiple chunks and combine them"""
167
+ import tempfile
168
+ import shutil
169
+
170
+ temp_files = []
171
+
172
+ try:
173
+ # Synthesize each chunk
174
+ for i, chunk in enumerate(chunks):
175
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f"_chunk_{i}.mp3")
176
+ temp_files.append(temp_file.name)
177
+ temp_file.close()
178
+
179
+ response = self.client.audio.speech.create(
180
+ model="tts-1",
181
+ voice=voice,
182
+ input=chunk,
183
+ response_format="mp3"
184
+ )
185
+
186
+ with open(temp_file.name, "wb") as f:
187
+ f.write(response.content)
188
+
189
+ # Combine audio files (simple concatenation for MP3)
190
+ with open(output_path, "wb") as outfile:
191
+ for temp_file in temp_files:
192
+ with open(temp_file, "rb") as infile:
193
+ shutil.copyfileobj(infile, outfile)
194
+
195
+ finally:
196
+ # Clean up temporary files
197
+ for temp_file in temp_files:
198
+ try:
199
+ os.unlink(temp_file)
200
+ except:
201
+ pass
202
+
203
+ def _estimate_audio_duration(self, content: str) -> int:
204
+ """Estimate audio duration in seconds based on content length"""
205
+ # Average speaking rate: ~150 words per minute
206
+ word_count = len(content.split())
207
+ duration_minutes = word_count / 150
208
+ return int(duration_minutes * 60)
209
+
210
+ def get_available_voices(self) -> Dict[str, str]:
211
+ """Get list of available voices with descriptions"""
212
+ return {
213
+ "alloy": "Neutral, balanced voice",
214
+ "echo": "Crisp, clear voice",
215
+ "fable": "Warm, engaging voice",
216
+ "onyx": "Deep, authoritative voice",
217
+ "nova": "Pleasant, professional voice (default)",
218
+ "shimmer": "Bright, energetic voice"
219
+ }
220
+
221
+ def validate_voice(self, voice: str) -> bool:
222
+ """Validate if the provided voice is supported"""
223
+ return voice in self.supported_voices
generated-icon.png ADDED

Git LFS Details

  • SHA256: 7744464930e49597fd71be0dfe1cb9b017e9e6e89717cddbc15bf68a6ed47430
  • Pointer size: 131 Bytes
  • Size of remote file: 439 kB
main.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ import gradio as gr
3
+ from app.gradio_interface import create_gradio_interface
4
+ import os
5
+
6
+ # Create output directory if it doesn't exist
7
+ os.makedirs("output", exist_ok=True)
8
+
9
+ # Create Gradio interface
10
+ gradio_app = create_gradio_interface()
11
+
12
+ if __name__ == "__main__":
13
+ gradio_app.launch(
14
+ server_name="0.0.0.0",
15
+ server_port=5000,
16
+ share=False,
17
+ show_error=True
18
+ )
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ chromadb>=1.0.12
2
+ fastapi>=0.115.9
3
+ gradio>=5.33.0
4
+ langgraph>=0.4.8
5
+ numpy>=2.3.0
6
+ openai>=1.85.0
7
+ pdfplumber>=0.11.6
8
+ psutil>=7.0.0
9
+ pypdf2>=3.0.1
10
+ reportlab>=4.4.1
11
+ uvicorn>=0.34.3
12
+ pydantic
13
+ typing-extensions
14
+ python-dotenv
static/style.css ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Custom styles for AI Lecture Generator */
2
+
3
+ :root {
4
+ --primary-color: #2c3e50;
5
+ --secondary-color: #3498db;
6
+ --success-color: #27ae60;
7
+ --warning-color: #f39c12;
8
+ --error-color: #e74c3c;
9
+ --background-color: #f8f9fa;
10
+ --text-color: #2c3e50;
11
+ --border-color: #ddd;
12
+ }
13
+
14
+ /* Global styles */
15
+ body {
16
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
17
+ background-color: var(--background-color);
18
+ color: var(--text-color);
19
+ line-height: 1.6;
20
+ }
21
+
22
+ /* Header styles */
23
+ .gradio-container h1 {
24
+ color: var(--primary-color);
25
+ text-align: center;
26
+ margin-bottom: 2rem;
27
+ font-size: 2.5rem;
28
+ font-weight: bold;
29
+ }
30
+
31
+ /* Status boxes */
32
+ .status-box {
33
+ padding: 15px;
34
+ border-radius: 8px;
35
+ margin: 15px 0;
36
+ font-weight: 500;
37
+ border-left: 4px solid;
38
+ }
39
+
40
+ .status-box.success {
41
+ background-color: #d4edda;
42
+ border-left-color: var(--success-color);
43
+ color: #155724;
44
+ }
45
+
46
+ .status-box.error {
47
+ background-color: #f8d7da;
48
+ border-left-color: var(--error-color);
49
+ color: #721c24;
50
+ }
51
+
52
+ .status-box.processing {
53
+ background-color: #d1ecf1;
54
+ border-left-color: var(--secondary-color);
55
+ color: #0c5460;
56
+ }
57
+
58
+ .status-box.warning {
59
+ background-color: #fff3cd;
60
+ border-left-color: var(--warning-color);
61
+ color: #856404;
62
+ }
63
+
64
+ /* Button styles */
65
+ .gradio-button {
66
+ background: linear-gradient(135deg, var(--secondary-color), #2980b9);
67
+ color: white;
68
+ border: none;
69
+ padding: 12px 24px;
70
+ border-radius: 6px;
71
+ font-weight: 500;
72
+ transition: all 0.3s ease;
73
+ cursor: pointer;
74
+ }
75
+
76
+ .gradio-button:hover {
77
+ transform: translateY(-2px);
78
+ box-shadow: 0 4px 12px rgba(52, 152, 219, 0.3);
79
+ }
80
+
81
+ .gradio-button.primary {
82
+ background: linear-gradient(135deg, var(--success-color), #229954);
83
+ }
84
+
85
+ .gradio-button.secondary {
86
+ background: linear-gradient(135deg, #95a5a6, #7f8c8d);
87
+ }
88
+
89
+ /* File upload area */
90
+ .file-upload {
91
+ border: 2px dashed var(--border-color);
92
+ border-radius: 8px;
93
+ padding: 2rem;
94
+ text-align: center;
95
+ transition: all 0.3s ease;
96
+ }
97
+
98
+ .file-upload:hover {
99
+ border-color: var(--secondary-color);
100
+ background-color: #f0f8ff;
101
+ }
102
+
103
+ /* Input fields */
104
+ .gradio-textbox,
105
+ .gradio-dropdown {
106
+ border: 1px solid var(--border-color);
107
+ border-radius: 6px;
108
+ padding: 10px;
109
+ font-size: 14px;
110
+ transition: border-color 0.3s ease;
111
+ }
112
+
113
+ .gradio-textbox:focus,
114
+ .gradio-dropdown:focus {
115
+ border-color: var(--secondary-color);
116
+ outline: none;
117
+ box-shadow: 0 0 0 3px rgba(52, 152, 219, 0.1);
118
+ }
119
+
120
+ /* Chat interface */
121
+ .chatbot {
122
+ border: 1px solid var(--border-color);
123
+ border-radius: 8px;
124
+ background: white;
125
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
126
+ }
127
+
128
+ .chat-message {
129
+ padding: 12px;
130
+ margin: 8px;
131
+ border-radius: 8px;
132
+ max-width: 80%;
133
+ }
134
+
135
+ .chat-message.user {
136
+ background: var(--secondary-color);
137
+ color: white;
138
+ margin-left: auto;
139
+ }
140
+
141
+ .chat-message.assistant {
142
+ background: #f1f3f4;
143
+ color: var(--text-color);
144
+ margin-right: auto;
145
+ }
146
+
147
+ /* Tab styles */
148
+ .gradio-tab {
149
+ border-bottom: 3px solid transparent;
150
+ padding: 12px 24px;
151
+ font-weight: 500;
152
+ transition: all 0.3s ease;
153
+ }
154
+
155
+ .gradio-tab.selected {
156
+ border-bottom-color: var(--secondary-color);
157
+ color: var(--secondary-color);
158
+ }
159
+
160
+ /* Card-like containers */
161
+ .gradio-column {
162
+ background: white;
163
+ border-radius: 8px;
164
+ padding: 20px;
165
+ margin: 10px;
166
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
167
+ }
168
+
169
+ /* JSON display */
170
+ .gradio-json {
171
+ background: #f8f9fa;
172
+ border: 1px solid var(--border-color);
173
+ border-radius: 6px;
174
+ padding: 15px;
175
+ font-family: 'Courier New', monospace;
176
+ font-size: 13px;
177
+ }
178
+
179
+ /* Loading animation */
180
+ .loading {
181
+ display: inline-block;
182
+ width: 20px;
183
+ height: 20px;
184
+ border: 3px solid #f3f3f3;
185
+ border-top: 3px solid var(--secondary-color);
186
+ border-radius: 50%;
187
+ animation: spin 1s linear infinite;
188
+ }
189
+
190
+ @keyframes spin {
191
+ 0% { transform: rotate(0deg); }
192
+ 100% { transform: rotate(360deg); }
193
+ }
194
+
195
+ /* Progress bar */
196
+ .progress-bar {
197
+ width: 100%;
198
+ height: 8px;
199
+ background-color: #f0f0f0;
200
+ border-radius: 4px;
201
+ overflow: hidden;
202
+ }
203
+
204
+ .progress-bar-fill {
205
+ height: 100%;
206
+ background: linear-gradient(90deg, var(--secondary-color), var(--success-color));
207
+ transition: width 0.3s ease;
208
+ }
209
+
210
+ /* Responsive design */
211
+ @media (max-width: 768px) {
212
+ .gradio-container {
213
+ padding: 10px;
214
+ }
215
+
216
+ .gradio-container h1 {
217
+ font-size: 2rem;
218
+ }
219
+
220
+ .gradio-column {
221
+ margin: 5px;
222
+ padding: 15px;
223
+ }
224
+
225
+ .chat-message {
226
+ max-width: 95%;
227
+ }
228
+ }
229
+
230
+ /* Accessibility improvements */
231
+ .gradio-button:focus,
232
+ .gradio-textbox:focus,
233
+ .gradio-dropdown:focus {
234
+ outline: 2px solid var(--secondary-color);
235
+ outline-offset: 2px;
236
+ }
237
+
238
+ /* Dark mode support */
239
+ @media (prefers-color-scheme: dark) {
240
+ :root {
241
+ --background-color: #1a1a1a;
242
+ --text-color: #ffffff;
243
+ --border-color: #404040;
244
+ }
245
+
246
+ .gradio-column {
247
+ background: #2d2d2d;
248
+ color: var(--text-color);
249
+ }
250
+
251
+ .gradio-json {
252
+ background: #2d2d2d;
253
+ color: var(--text-color);
254
+ }
255
+
256
+ .chatbot {
257
+ background: #2d2d2d;
258
+ }
259
+
260
+ .chat-message.assistant {
261
+ background: #404040;
262
+ color: var(--text-color);
263
+ }
264
+ }