Spaces:
Runtime error
Runtime error
Upload 15 files
Browse files- .env +10 -0
- .gitattributes +1 -0
- README.md +4 -14
- app/__init__.py +6 -0
- app/chatbot.py +255 -0
- app/gradio_interface.py +396 -0
- app/lecture_generator.py +538 -0
- app/models.py +33 -0
- app/pdf_processor.py +189 -0
- app/rag_system.py +279 -0
- app/utils.py +167 -0
- app/voice_synthesizer.py +223 -0
- generated-icon.png +3 -0
- main.py +18 -0
- requirements.txt +14 -0
- static/style.css +264 -0
.env
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GOOGLE_API_KEY=AIzaSyAsiDT8HYL-eQf19ePi0yoCg6H9gMdYMZk
|
2 |
+
TAVILY_API_KEY=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
|
3 |
+
LANGSMITH_TRACING=true
|
4 |
+
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
|
5 |
+
LANGSMITH_API_KEY="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"
|
6 |
+
LANGSMITH_PROJECT="pr-proper-venue-61"
|
7 |
+
LANGCHAIN_TRACING_V2=true
|
8 |
+
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
|
9 |
+
MISTRAL_API_KEY=CRYAdmnhV0rAQcf36jLcjxKDCd50NCOM
|
10 |
+
OPENAI_API_KEY=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
|
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
generated-icon.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,14 +1,4 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
colorTo: green
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.33.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
short_description: AI tutor
|
12 |
-
---
|
13 |
-
|
14 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
title: AI tutor
|
2 |
+
sdk: gradio
|
3 |
+
app_file: main.py
|
4 |
+
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
AI Tutor Application
|
3 |
+
Converts PDFs into interactive lectures with voice narration and RAG chatbot functionality
|
4 |
+
"""
|
5 |
+
|
6 |
+
__version__ = "1.0.0"
|
app/chatbot.py
ADDED
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import os
|
3 |
+
import logging
|
4 |
+
from typing import Dict, Any, List, Optional
|
5 |
+
from datetime import datetime
|
6 |
+
from app.models import ChatMessage, ChatSession
|
7 |
+
from app.rag_system import RAGSystem
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
|
10 |
+
# Load environment variables
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
|
15 |
+
class RAGChatbot:
|
16 |
+
"""RAG-powered chatbot with memory of PDF and lecture content"""
|
17 |
+
|
18 |
+
def __init__(self):
|
19 |
+
self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
|
20 |
+
self.rag_system = RAGSystem()
|
21 |
+
self.sessions: Dict[str, ChatSession] = {}
|
22 |
+
self.max_context_length = 8000 # Token limit for context
|
23 |
+
|
24 |
+
def create_session(self, session_id: str, pdf_content: str = None, lecture_content: str = None) -> bool:
|
25 |
+
"""Create a new chat session with optional PDF and lecture content"""
|
26 |
+
try:
|
27 |
+
session = ChatSession(
|
28 |
+
session_id=session_id,
|
29 |
+
pdf_content=pdf_content,
|
30 |
+
lecture_content=lecture_content
|
31 |
+
)
|
32 |
+
|
33 |
+
self.sessions[session_id] = session
|
34 |
+
|
35 |
+
# Add content to RAG system if provided
|
36 |
+
if pdf_content:
|
37 |
+
self.rag_system.add_pdf_content(session_id, pdf_content)
|
38 |
+
|
39 |
+
if lecture_content:
|
40 |
+
self.rag_system.add_lecture_content(session_id, lecture_content)
|
41 |
+
|
42 |
+
logger.info(f"Created chat session {session_id}")
|
43 |
+
return True
|
44 |
+
|
45 |
+
except Exception as e:
|
46 |
+
logger.error(f"Failed to create session {session_id}: {str(e)}")
|
47 |
+
return False
|
48 |
+
|
49 |
+
def add_message(self, session_id: str, role: str, content: str) -> bool:
|
50 |
+
"""Add a message to the session history"""
|
51 |
+
try:
|
52 |
+
if session_id not in self.sessions:
|
53 |
+
return False
|
54 |
+
|
55 |
+
message = ChatMessage(role=role, content=content)
|
56 |
+
self.sessions[session_id].messages.append(message)
|
57 |
+
return True
|
58 |
+
|
59 |
+
except Exception as e:
|
60 |
+
logger.error(f"Failed to add message to session {session_id}: {str(e)}")
|
61 |
+
return False
|
62 |
+
|
63 |
+
def get_response(self, session_id: str, user_message: str) -> Dict[str, Any]:
|
64 |
+
"""Generate a response to user message using RAG"""
|
65 |
+
try:
|
66 |
+
if session_id not in self.sessions:
|
67 |
+
return {
|
68 |
+
'success': False,
|
69 |
+
'error': 'Session not found',
|
70 |
+
'response': ''
|
71 |
+
}
|
72 |
+
|
73 |
+
session = self.sessions[session_id]
|
74 |
+
|
75 |
+
# Add user message to history
|
76 |
+
self.add_message(session_id, "user", user_message)
|
77 |
+
|
78 |
+
# Retrieve relevant content
|
79 |
+
retrieval_result = self.rag_system.retrieve_relevant_content(
|
80 |
+
session_id, user_message, n_results=5
|
81 |
+
)
|
82 |
+
|
83 |
+
if not retrieval_result['success']:
|
84 |
+
logger.warning(f"Content retrieval failed for session {session_id}")
|
85 |
+
relevant_content = []
|
86 |
+
else:
|
87 |
+
relevant_content = retrieval_result['results']
|
88 |
+
|
89 |
+
# Generate response
|
90 |
+
response = self._generate_response(session, user_message, relevant_content)
|
91 |
+
|
92 |
+
# Add assistant response to history
|
93 |
+
self.add_message(session_id, "assistant", response)
|
94 |
+
|
95 |
+
return {
|
96 |
+
'success': True,
|
97 |
+
'response': response,
|
98 |
+
'sources_used': len(relevant_content),
|
99 |
+
'session_id': session_id
|
100 |
+
}
|
101 |
+
|
102 |
+
except Exception as e:
|
103 |
+
logger.error(f"Failed to generate response for session {session_id}: {str(e)}")
|
104 |
+
return {
|
105 |
+
'success': False,
|
106 |
+
'error': str(e),
|
107 |
+
'response': 'I apologize, but I encountered an error while processing your message. Please try again.'
|
108 |
+
}
|
109 |
+
|
110 |
+
def _generate_response(self, session: ChatSession, user_message: str, relevant_content: List[Dict]) -> str:
|
111 |
+
"""Generate response using OpenAI with RAG context"""
|
112 |
+
try:
|
113 |
+
# Build context from relevant content
|
114 |
+
context_parts = []
|
115 |
+
|
116 |
+
if relevant_content:
|
117 |
+
context_parts.append("Relevant information from your documents:")
|
118 |
+
for i, item in enumerate(relevant_content[:3], 1): # Limit to top 3 results
|
119 |
+
source = "PDF" if item['source'] == 'pdf' else "Lecture"
|
120 |
+
context_parts.append(f"{i}. [{source}] {item['content'][:500]}...")
|
121 |
+
context_parts.append("")
|
122 |
+
|
123 |
+
# Build conversation history (limited to recent messages)
|
124 |
+
conversation_history = []
|
125 |
+
recent_messages = session.messages[-6:] # Last 6 messages for context
|
126 |
+
|
127 |
+
for msg in recent_messages[:-1]: # Exclude the current user message
|
128 |
+
conversation_history.append(f"{msg.role.title()}: {msg.content}")
|
129 |
+
|
130 |
+
# Create system prompt
|
131 |
+
system_prompt = """You are a helpful AI assistant that can answer questions about uploaded PDF documents and generated lectures.
|
132 |
+
|
133 |
+
Key guidelines:
|
134 |
+
1. Use the provided relevant information to answer questions accurately
|
135 |
+
2. If you don't have enough information in the context, say so clearly
|
136 |
+
3. Maintain a conversational and educational tone
|
137 |
+
4. Reference the source (PDF or Lecture) when appropriate
|
138 |
+
5. Be concise but thorough in your explanations
|
139 |
+
6. If asked about something not in the documents, explain that your knowledge is limited to the uploaded content
|
140 |
+
|
141 |
+
Always strive to be helpful while being honest about the limitations of your knowledge."""
|
142 |
+
|
143 |
+
# Build the full prompt
|
144 |
+
messages = [{"role": "system", "content": system_prompt}]
|
145 |
+
|
146 |
+
# Add context if available
|
147 |
+
if context_parts:
|
148 |
+
context_message = "\n".join(context_parts)
|
149 |
+
messages.append({"role": "system", "content": context_message})
|
150 |
+
|
151 |
+
# Add conversation history
|
152 |
+
if conversation_history:
|
153 |
+
history_message = "Previous conversation:\n" + "\n".join(conversation_history)
|
154 |
+
messages.append({"role": "system", "content": history_message})
|
155 |
+
|
156 |
+
# Add current user message
|
157 |
+
messages.append({"role": "user", "content": user_message})
|
158 |
+
|
159 |
+
# Generate response
|
160 |
+
response = self.client.chat.completions.create(
|
161 |
+
model="gpt-4o-mini",
|
162 |
+
messages=messages,
|
163 |
+
temperature=0.7,
|
164 |
+
max_tokens=1000
|
165 |
+
)
|
166 |
+
|
167 |
+
return response.choices[0].message.content
|
168 |
+
|
169 |
+
except Exception as e:
|
170 |
+
logger.error(f"Response generation failed: {str(e)}")
|
171 |
+
return "I apologize, but I'm having trouble generating a response right now. Please try rephrasing your question."
|
172 |
+
|
173 |
+
def get_session_history(self, session_id: str) -> List[Dict[str, Any]]:
|
174 |
+
"""Get chat history for a session"""
|
175 |
+
try:
|
176 |
+
if session_id not in self.sessions:
|
177 |
+
return []
|
178 |
+
|
179 |
+
session = self.sessions[session_id]
|
180 |
+
return [
|
181 |
+
{
|
182 |
+
'role': msg.role,
|
183 |
+
'content': msg.content,
|
184 |
+
'timestamp': msg.timestamp.isoformat()
|
185 |
+
}
|
186 |
+
for msg in session.messages
|
187 |
+
]
|
188 |
+
|
189 |
+
except Exception as e:
|
190 |
+
logger.error(f"Failed to get session history {session_id}: {str(e)}")
|
191 |
+
return []
|
192 |
+
|
193 |
+
def clear_session(self, session_id: str) -> bool:
|
194 |
+
"""Clear a chat session and its data"""
|
195 |
+
try:
|
196 |
+
# Clear from RAG system
|
197 |
+
self.rag_system.clear_session_data(session_id)
|
198 |
+
|
199 |
+
# Remove from local sessions
|
200 |
+
if session_id in self.sessions:
|
201 |
+
del self.sessions[session_id]
|
202 |
+
|
203 |
+
logger.info(f"Cleared session {session_id}")
|
204 |
+
return True
|
205 |
+
|
206 |
+
except Exception as e:
|
207 |
+
logger.error(f"Failed to clear session {session_id}: {str(e)}")
|
208 |
+
return False
|
209 |
+
|
210 |
+
def get_session_stats(self, session_id: str) -> Dict[str, Any]:
|
211 |
+
"""Get statistics about a session"""
|
212 |
+
try:
|
213 |
+
if session_id not in self.sessions:
|
214 |
+
return {'exists': False}
|
215 |
+
|
216 |
+
session = self.sessions[session_id]
|
217 |
+
rag_stats = self.rag_system.get_session_stats(session_id)
|
218 |
+
|
219 |
+
return {
|
220 |
+
'exists': True,
|
221 |
+
'message_count': len(session.messages),
|
222 |
+
'created_at': session.created_at.isoformat(),
|
223 |
+
'has_pdf': session.pdf_content is not None,
|
224 |
+
'has_lecture': session.lecture_content is not None,
|
225 |
+
**rag_stats
|
226 |
+
}
|
227 |
+
|
228 |
+
except Exception as e:
|
229 |
+
logger.error(f"Failed to get session stats {session_id}: {str(e)}")
|
230 |
+
return {'exists': False, 'error': str(e)}
|
231 |
+
|
232 |
+
def update_session_content(self, session_id: str, pdf_content: str = None, lecture_content: str = None) -> bool:
|
233 |
+
"""Update session with new content"""
|
234 |
+
try:
|
235 |
+
if session_id not in self.sessions:
|
236 |
+
return False
|
237 |
+
|
238 |
+
session = self.sessions[session_id]
|
239 |
+
|
240 |
+
# Update PDF content
|
241 |
+
if pdf_content:
|
242 |
+
session.pdf_content = pdf_content
|
243 |
+
self.rag_system.add_pdf_content(session_id, pdf_content)
|
244 |
+
|
245 |
+
# Update lecture content
|
246 |
+
if lecture_content:
|
247 |
+
session.lecture_content = lecture_content
|
248 |
+
self.rag_system.add_lecture_content(session_id, lecture_content)
|
249 |
+
|
250 |
+
logger.info(f"Updated content for session {session_id}")
|
251 |
+
return True
|
252 |
+
|
253 |
+
except Exception as e:
|
254 |
+
logger.error(f"Failed to update session content {session_id}: {str(e)}")
|
255 |
+
return False
|
app/gradio_interface.py
ADDED
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import uuid
|
4 |
+
import tempfile
|
5 |
+
from typing import Dict, Any, Optional, Tuple
|
6 |
+
import logging
|
7 |
+
from datetime import datetime
|
8 |
+
|
9 |
+
from app.pdf_processor import PDFProcessor
|
10 |
+
from app.lecture_generator import LectureGenerator
|
11 |
+
from app.voice_synthesizer import VoiceSynthesizer
|
12 |
+
from app.chatbot import RAGChatbot
|
13 |
+
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
# Initialize components
|
17 |
+
pdf_processor = PDFProcessor()
|
18 |
+
lecture_generator = LectureGenerator()
|
19 |
+
voice_synthesizer = VoiceSynthesizer()
|
20 |
+
chatbot = RAGChatbot()
|
21 |
+
|
22 |
+
# Global state for sessions
|
23 |
+
current_session = None
|
24 |
+
session_data = {}
|
25 |
+
|
26 |
+
def create_gradio_interface():
|
27 |
+
"""Create and configure the Gradio interface"""
|
28 |
+
|
29 |
+
# Custom CSS for better styling
|
30 |
+
css = """
|
31 |
+
.container {
|
32 |
+
max-width: 1200px;
|
33 |
+
margin: 0 auto;
|
34 |
+
}
|
35 |
+
.status-box {
|
36 |
+
padding: 10px;
|
37 |
+
border-radius: 5px;
|
38 |
+
margin: 10px 0;
|
39 |
+
}
|
40 |
+
.success {
|
41 |
+
background-color: #d4edda;
|
42 |
+
border: 1px solid #c3e6cb;
|
43 |
+
color: #155724;
|
44 |
+
}
|
45 |
+
.error {
|
46 |
+
background-color: #f8d7da;
|
47 |
+
border: 1px solid #f5c6cb;
|
48 |
+
color: #721c24;
|
49 |
+
}
|
50 |
+
.processing {
|
51 |
+
background-color: #d1ecf1;
|
52 |
+
border: 1px solid #bee5eb;
|
53 |
+
color: #0c5460;
|
54 |
+
}
|
55 |
+
"""
|
56 |
+
|
57 |
+
with gr.Blocks(css=css, title="AI Tutor") as interface:
|
58 |
+
gr.Markdown("# 🎓 AI Tutor")
|
59 |
+
gr.Markdown("Convert PDFs into interactive lectures with voice narration and chat with your AI tutor about any topic!")
|
60 |
+
|
61 |
+
# Session state
|
62 |
+
session_id_state = gr.State(value=str(uuid.uuid4()))
|
63 |
+
|
64 |
+
with gr.Tab("📄 PDF Upload & Processing"):
|
65 |
+
with gr.Row():
|
66 |
+
with gr.Column(scale=1):
|
67 |
+
pdf_upload = gr.File(
|
68 |
+
label="Upload PDF Document (Optional)",
|
69 |
+
file_types=[".pdf"],
|
70 |
+
type="binary"
|
71 |
+
)
|
72 |
+
|
73 |
+
lecture_style = gr.Dropdown(
|
74 |
+
choices=["academic", "casual", "detailed"],
|
75 |
+
value="academic",
|
76 |
+
label="Lecture Style"
|
77 |
+
)
|
78 |
+
|
79 |
+
include_examples = gr.Checkbox(
|
80 |
+
value=True,
|
81 |
+
label="Include Examples"
|
82 |
+
)
|
83 |
+
|
84 |
+
learning_objectives = gr.Textbox(
|
85 |
+
label="Learning Objectives & Topic",
|
86 |
+
placeholder="What do you want to learn? e.g., 'Machine Learning basics', 'Python programming fundamentals', 'Explain quantum physics concepts'",
|
87 |
+
lines=3,
|
88 |
+
max_lines=5
|
89 |
+
)
|
90 |
+
|
91 |
+
gr.Markdown("**Note:** You can generate a lecture with just learning objectives, or upload a PDF for content-based lectures.")
|
92 |
+
|
93 |
+
process_btn = gr.Button("🚀 Generate Lecture", variant="primary")
|
94 |
+
|
95 |
+
with gr.Column(scale=2):
|
96 |
+
processing_status = gr.HTML()
|
97 |
+
pdf_info = gr.JSON(label="PDF Information")
|
98 |
+
|
99 |
+
with gr.Tab("📚 Generated Lecture"):
|
100 |
+
with gr.Row():
|
101 |
+
with gr.Column():
|
102 |
+
lecture_title = gr.Textbox(label="Lecture Title", interactive=False)
|
103 |
+
lecture_content = gr.Textbox(
|
104 |
+
label="Lecture Content",
|
105 |
+
lines=20,
|
106 |
+
max_lines=30,
|
107 |
+
interactive=False
|
108 |
+
)
|
109 |
+
|
110 |
+
with gr.Row():
|
111 |
+
download_pdf_btn = gr.Button("📄 Download PDF")
|
112 |
+
download_audio_btn = gr.Button("🎤 Generate & Download Audio")
|
113 |
+
|
114 |
+
pdf_download = gr.File(label="Download Lecture PDF")
|
115 |
+
audio_download = gr.File(label="Download Audio Lecture")
|
116 |
+
|
117 |
+
with gr.Tab("💬 Tutor Chat"):
|
118 |
+
with gr.Row():
|
119 |
+
with gr.Column(scale=3):
|
120 |
+
chatbot_interface = gr.Chatbot(
|
121 |
+
label="Chat with your AI Tutor about your content",
|
122 |
+
height=400,
|
123 |
+
type="messages"
|
124 |
+
)
|
125 |
+
|
126 |
+
with gr.Row():
|
127 |
+
msg_input = gr.Textbox(
|
128 |
+
label="Your Message",
|
129 |
+
placeholder="Ask your AI tutor about any topic, PDF content, or lecture...",
|
130 |
+
scale=4
|
131 |
+
)
|
132 |
+
send_btn = gr.Button("Send", scale=1)
|
133 |
+
|
134 |
+
clear_chat_btn = gr.Button("Clear Chat History")
|
135 |
+
|
136 |
+
with gr.Column(scale=1):
|
137 |
+
chat_stats = gr.JSON(label="Session Statistics")
|
138 |
+
refresh_stats_btn = gr.Button("Refresh Stats")
|
139 |
+
|
140 |
+
# Event handlers
|
141 |
+
def process_pdf_handler(pdf_file, style, examples, learning_objectives, session_id):
|
142 |
+
"""Handle PDF processing or topic-based lecture generation"""
|
143 |
+
global session_data
|
144 |
+
|
145 |
+
try:
|
146 |
+
# Check if we have either PDF or learning objectives
|
147 |
+
if pdf_file is None and not learning_objectives.strip():
|
148 |
+
return (
|
149 |
+
'<div class="status-box error">❌ Please either upload a PDF file or provide learning objectives</div>',
|
150 |
+
{},
|
151 |
+
session_id
|
152 |
+
)
|
153 |
+
|
154 |
+
# Update status based on input type
|
155 |
+
if pdf_file is not None:
|
156 |
+
status_html = '<div class="status-box processing">🔄 Processing PDF...</div>'
|
157 |
+
|
158 |
+
# Validate PDF
|
159 |
+
validation = pdf_processor.validate_pdf(pdf_file)
|
160 |
+
if not validation['valid']:
|
161 |
+
return (
|
162 |
+
f'<div class="status-box error">❌ {validation["error"]}</div>',
|
163 |
+
{},
|
164 |
+
session_id
|
165 |
+
)
|
166 |
+
|
167 |
+
# Extract text
|
168 |
+
extraction_result = pdf_processor.extract_text_from_pdf(pdf_file)
|
169 |
+
if not extraction_result['success']:
|
170 |
+
return (
|
171 |
+
f'<div class="status-box error">❌ {extraction_result["error"]}</div>',
|
172 |
+
{},
|
173 |
+
session_id
|
174 |
+
)
|
175 |
+
|
176 |
+
pdf_content = extraction_result['text']
|
177 |
+
pdf_data = extraction_result
|
178 |
+
else:
|
179 |
+
# Generate lecture from learning objectives only
|
180 |
+
status_html = '<div class="status-box processing">🔄 Generating lecture from learning objectives...</div>'
|
181 |
+
pdf_content = ""
|
182 |
+
pdf_data = {
|
183 |
+
'success': True,
|
184 |
+
'text': "",
|
185 |
+
'metadata': {'total_pages': 0, 'title': learning_objectives[:50], 'author': '', 'subject': ''},
|
186 |
+
'word_count': 0,
|
187 |
+
'character_count': 0
|
188 |
+
}
|
189 |
+
|
190 |
+
# Generate lecture
|
191 |
+
lecture_result = lecture_generator.generate_lecture(
|
192 |
+
pdf_content,
|
193 |
+
style=style,
|
194 |
+
include_examples=examples,
|
195 |
+
learning_objectives=learning_objectives
|
196 |
+
)
|
197 |
+
|
198 |
+
if not lecture_result['success']:
|
199 |
+
return (
|
200 |
+
f'<div class="status-box error">❌ Lecture generation failed: {lecture_result["error"]}</div>',
|
201 |
+
{},
|
202 |
+
session_id
|
203 |
+
)
|
204 |
+
|
205 |
+
# Store session data
|
206 |
+
session_data[session_id] = {
|
207 |
+
'pdf_data': pdf_data,
|
208 |
+
'lecture_data': lecture_result,
|
209 |
+
'processed_at': datetime.now().isoformat()
|
210 |
+
}
|
211 |
+
|
212 |
+
# Create chatbot session
|
213 |
+
chatbot.create_session(
|
214 |
+
session_id,
|
215 |
+
pdf_content=pdf_content,
|
216 |
+
lecture_content=lecture_result['content']
|
217 |
+
)
|
218 |
+
|
219 |
+
if pdf_file is not None:
|
220 |
+
success_html = '<div class="status-box success">✅ PDF processed successfully!</div>'
|
221 |
+
info = {
|
222 |
+
'filename': getattr(pdf_file, 'name', 'uploaded_file.pdf'),
|
223 |
+
'pages': pdf_data['metadata']['total_pages'],
|
224 |
+
'word_count': pdf_data['word_count'],
|
225 |
+
'lecture_title': lecture_result['title'],
|
226 |
+
'estimated_duration': f"{lecture_result['estimated_duration']} minutes"
|
227 |
+
}
|
228 |
+
else:
|
229 |
+
success_html = '<div class="status-box success">✅ Lecture generated from learning objectives!</div>'
|
230 |
+
info = {
|
231 |
+
'source': 'Learning Objectives',
|
232 |
+
'topic': learning_objectives[:100] + "..." if len(learning_objectives) > 100 else learning_objectives,
|
233 |
+
'lecture_title': lecture_result['title'],
|
234 |
+
'estimated_duration': f"{lecture_result['estimated_duration']} minutes"
|
235 |
+
}
|
236 |
+
|
237 |
+
return success_html, info, session_id
|
238 |
+
|
239 |
+
except Exception as e:
|
240 |
+
logger.error(f"PDF processing error: {str(e)}")
|
241 |
+
return (
|
242 |
+
f'<div class="status-box error">❌ Processing failed: {str(e)}</div>',
|
243 |
+
{},
|
244 |
+
session_id
|
245 |
+
)
|
246 |
+
|
247 |
+
def update_lecture_display(session_id):
|
248 |
+
"""Update lecture display with generated content"""
|
249 |
+
global session_data
|
250 |
+
|
251 |
+
if session_id not in session_data:
|
252 |
+
return "", ""
|
253 |
+
|
254 |
+
lecture_data = session_data[session_id]['lecture_data']
|
255 |
+
return lecture_data['title'], lecture_data['content']
|
256 |
+
|
257 |
+
def generate_pdf_download(session_id):
|
258 |
+
"""Generate PDF download"""
|
259 |
+
global session_data
|
260 |
+
|
261 |
+
try:
|
262 |
+
if session_id not in session_data:
|
263 |
+
return None
|
264 |
+
|
265 |
+
lecture_data = session_data[session_id]['lecture_data']
|
266 |
+
|
267 |
+
# Generate PDF
|
268 |
+
output_path = os.path.join("output", f"lecture_{session_id}.pdf")
|
269 |
+
success = lecture_generator.generate_pdf(lecture_data, output_path)
|
270 |
+
|
271 |
+
if success:
|
272 |
+
return output_path
|
273 |
+
else:
|
274 |
+
return None
|
275 |
+
|
276 |
+
except Exception as e:
|
277 |
+
logger.error(f"PDF generation error: {str(e)}")
|
278 |
+
return None
|
279 |
+
|
280 |
+
def generate_audio_download(session_id):
|
281 |
+
"""Generate audio download"""
|
282 |
+
global session_data
|
283 |
+
|
284 |
+
try:
|
285 |
+
if session_id not in session_data:
|
286 |
+
return None
|
287 |
+
|
288 |
+
lecture_data = session_data[session_id]['lecture_data']
|
289 |
+
|
290 |
+
# Generate audio
|
291 |
+
output_path = os.path.join("output", f"lecture_audio_{session_id}.mp3")
|
292 |
+
result = voice_synthesizer.synthesize_lecture(
|
293 |
+
lecture_data['content'],
|
294 |
+
voice="nova",
|
295 |
+
output_path=output_path
|
296 |
+
)
|
297 |
+
|
298 |
+
if result['success']:
|
299 |
+
return result['file_path']
|
300 |
+
else:
|
301 |
+
return None
|
302 |
+
|
303 |
+
except Exception as e:
|
304 |
+
logger.error(f"Audio generation error: {str(e)}")
|
305 |
+
return None
|
306 |
+
|
307 |
+
def chat_handler(message, history, session_id):
|
308 |
+
"""Handle chat messages"""
|
309 |
+
if not message.strip():
|
310 |
+
return history, ""
|
311 |
+
|
312 |
+
try:
|
313 |
+
response_result = chatbot.get_response(session_id, message)
|
314 |
+
|
315 |
+
if response_result['success']:
|
316 |
+
history.append({"role": "user", "content": message})
|
317 |
+
history.append({"role": "assistant", "content": response_result['response']})
|
318 |
+
else:
|
319 |
+
history.append({"role": "user", "content": message})
|
320 |
+
history.append({"role": "assistant", "content": f"Error: {response_result['error']}"})
|
321 |
+
|
322 |
+
return history, ""
|
323 |
+
|
324 |
+
except Exception as e:
|
325 |
+
logger.error(f"Chat error: {str(e)}")
|
326 |
+
history.append({"role": "user", "content": message})
|
327 |
+
history.append({"role": "assistant", "content": f"Sorry, I encountered an error: {str(e)}"})
|
328 |
+
return history, ""
|
329 |
+
|
330 |
+
def clear_chat_handler(session_id):
|
331 |
+
"""Clear chat history"""
|
332 |
+
chatbot.clear_session(session_id)
|
333 |
+
new_session_id = str(uuid.uuid4())
|
334 |
+
|
335 |
+
# Recreate session with existing content if available
|
336 |
+
if session_id in session_data:
|
337 |
+
pdf_content = session_data[session_id]['pdf_data']['text']
|
338 |
+
lecture_content = session_data[session_id]['lecture_data']['content']
|
339 |
+
chatbot.create_session(new_session_id, pdf_content, lecture_content)
|
340 |
+
session_data[new_session_id] = session_data[session_id]
|
341 |
+
del session_data[session_id]
|
342 |
+
|
343 |
+
return [], new_session_id
|
344 |
+
|
345 |
+
def get_chat_stats(session_id):
|
346 |
+
"""Get chat statistics"""
|
347 |
+
return chatbot.get_session_stats(session_id)
|
348 |
+
|
349 |
+
# Wire up event handlers
|
350 |
+
process_btn.click(
|
351 |
+
fn=process_pdf_handler,
|
352 |
+
inputs=[pdf_upload, lecture_style, include_examples, learning_objectives, session_id_state],
|
353 |
+
outputs=[processing_status, pdf_info, session_id_state]
|
354 |
+
).then(
|
355 |
+
fn=update_lecture_display,
|
356 |
+
inputs=[session_id_state],
|
357 |
+
outputs=[lecture_title, lecture_content]
|
358 |
+
)
|
359 |
+
|
360 |
+
download_pdf_btn.click(
|
361 |
+
fn=generate_pdf_download,
|
362 |
+
inputs=[session_id_state],
|
363 |
+
outputs=[pdf_download]
|
364 |
+
)
|
365 |
+
|
366 |
+
download_audio_btn.click(
|
367 |
+
fn=generate_audio_download,
|
368 |
+
inputs=[session_id_state],
|
369 |
+
outputs=[audio_download]
|
370 |
+
)
|
371 |
+
|
372 |
+
send_btn.click(
|
373 |
+
fn=chat_handler,
|
374 |
+
inputs=[msg_input, chatbot_interface, session_id_state],
|
375 |
+
outputs=[chatbot_interface, msg_input]
|
376 |
+
)
|
377 |
+
|
378 |
+
msg_input.submit(
|
379 |
+
fn=chat_handler,
|
380 |
+
inputs=[msg_input, chatbot_interface, session_id_state],
|
381 |
+
outputs=[chatbot_interface, msg_input]
|
382 |
+
)
|
383 |
+
|
384 |
+
clear_chat_btn.click(
|
385 |
+
fn=clear_chat_handler,
|
386 |
+
inputs=[session_id_state],
|
387 |
+
outputs=[chatbot_interface, session_id_state]
|
388 |
+
)
|
389 |
+
|
390 |
+
refresh_stats_btn.click(
|
391 |
+
fn=get_chat_stats,
|
392 |
+
inputs=[session_id_state],
|
393 |
+
outputs=[chat_stats]
|
394 |
+
)
|
395 |
+
|
396 |
+
return interface
|
app/lecture_generator.py
ADDED
@@ -0,0 +1,538 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langgraph.graph import Graph, START, END
|
2 |
+
from langgraph.graph.message import add_messages
|
3 |
+
from typing_extensions import TypedDict, Annotated
|
4 |
+
from typing import Dict, List, Any, Optional
|
5 |
+
import openai
|
6 |
+
import os
|
7 |
+
import logging
|
8 |
+
from datetime import datetime
|
9 |
+
from reportlab.lib.pagesizes import letter, A4
|
10 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
|
11 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
12 |
+
from reportlab.lib.units import inch
|
13 |
+
from reportlab.lib.colors import HexColor
|
14 |
+
from dotenv import load_dotenv
|
15 |
+
|
16 |
+
# Load environment variables
|
17 |
+
load_dotenv()
|
18 |
+
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
# Set up OpenAI client
|
22 |
+
openai.api_key = os.getenv("OPENAI_API_KEY", "")
|
23 |
+
|
24 |
+
class LectureState(TypedDict):
|
25 |
+
pdf_content: str
|
26 |
+
style: str
|
27 |
+
include_examples: bool
|
28 |
+
learning_objectives: str
|
29 |
+
analysis: Dict[str, Any]
|
30 |
+
outline: Dict[str, Any]
|
31 |
+
sections: List[Dict[str, Any]]
|
32 |
+
lecture_content: str
|
33 |
+
title: str
|
34 |
+
metadata: Dict[str, Any]
|
35 |
+
messages: Annotated[list, add_messages]
|
36 |
+
|
37 |
+
class LectureGenerator:
|
38 |
+
"""AI agent for converting PDF content into structured lectures using LangGraph"""
|
39 |
+
|
40 |
+
def __init__(self):
|
41 |
+
self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
|
42 |
+
self.graph = self._build_graph()
|
43 |
+
|
44 |
+
def _build_graph(self) -> Graph:
|
45 |
+
"""Build the LangGraph workflow for lecture generation"""
|
46 |
+
workflow = Graph()
|
47 |
+
|
48 |
+
# Add nodes
|
49 |
+
workflow.add_node("analyze_content", self._analyze_content)
|
50 |
+
workflow.add_node("create_outline", self._create_outline)
|
51 |
+
workflow.add_node("generate_sections", self._generate_sections)
|
52 |
+
workflow.add_node("compile_lecture", self._compile_lecture)
|
53 |
+
workflow.add_node("finalize_output", self._finalize_output)
|
54 |
+
|
55 |
+
# Add edges
|
56 |
+
workflow.add_edge(START, "analyze_content")
|
57 |
+
workflow.add_edge("analyze_content", "create_outline")
|
58 |
+
workflow.add_edge("create_outline", "generate_sections")
|
59 |
+
workflow.add_edge("generate_sections", "compile_lecture")
|
60 |
+
workflow.add_edge("compile_lecture", "finalize_output")
|
61 |
+
workflow.add_edge("finalize_output", END)
|
62 |
+
|
63 |
+
return workflow.compile()
|
64 |
+
|
65 |
+
def generate_lecture(self, pdf_content: str, style: str = "academic", include_examples: bool = True, learning_objectives: str = "") -> Dict[str, Any]:
|
66 |
+
"""Generate a structured lecture from PDF content"""
|
67 |
+
try:
|
68 |
+
initial_state = LectureState(
|
69 |
+
pdf_content=pdf_content,
|
70 |
+
style=style,
|
71 |
+
include_examples=include_examples,
|
72 |
+
learning_objectives=learning_objectives,
|
73 |
+
analysis={},
|
74 |
+
outline={},
|
75 |
+
sections=[],
|
76 |
+
lecture_content="",
|
77 |
+
title="",
|
78 |
+
metadata={},
|
79 |
+
messages=[]
|
80 |
+
)
|
81 |
+
|
82 |
+
# Run the graph
|
83 |
+
result = self.graph.invoke(initial_state)
|
84 |
+
|
85 |
+
return {
|
86 |
+
'success': True,
|
87 |
+
'title': result['title'],
|
88 |
+
'content': result['lecture_content'],
|
89 |
+
'sections': result['sections'],
|
90 |
+
'metadata': result['metadata'],
|
91 |
+
'word_count': len(result['lecture_content'].split()),
|
92 |
+
'estimated_duration': self._estimate_duration(result['lecture_content'])
|
93 |
+
}
|
94 |
+
|
95 |
+
except Exception as e:
|
96 |
+
logger.error(f"Lecture generation failed: {str(e)}")
|
97 |
+
return {
|
98 |
+
'success': False,
|
99 |
+
'error': str(e),
|
100 |
+
'title': '',
|
101 |
+
'content': '',
|
102 |
+
'sections': [],
|
103 |
+
'metadata': {},
|
104 |
+
'word_count': 0,
|
105 |
+
'estimated_duration': 0
|
106 |
+
}
|
107 |
+
|
108 |
+
def _analyze_content(self, state: LectureState) -> LectureState:
|
109 |
+
"""Analyze content or learning objectives to understand structure and main topics"""
|
110 |
+
try:
|
111 |
+
if state['pdf_content'].strip():
|
112 |
+
# PDF-based analysis
|
113 |
+
learning_context = f"\n\nUser Learning Objectives: {state['learning_objectives']}" if state['learning_objectives'].strip() else ""
|
114 |
+
|
115 |
+
prompt = f"""
|
116 |
+
Analyze the following document content and provide a structured analysis:
|
117 |
+
|
118 |
+
Content: {state['pdf_content'][:5000]}...{learning_context}
|
119 |
+
|
120 |
+
Please provide:
|
121 |
+
1. Main topic/subject
|
122 |
+
2. Key themes and concepts
|
123 |
+
3. Document type (research paper, textbook, article, etc.)
|
124 |
+
4. Complexity level (beginner, intermediate, advanced)
|
125 |
+
5. Target audience
|
126 |
+
6. Key learning objectives (consider user's stated objectives if provided)
|
127 |
+
|
128 |
+
Format your response as a JSON object.
|
129 |
+
"""
|
130 |
+
else:
|
131 |
+
# Learning objectives-based analysis
|
132 |
+
prompt = f"""
|
133 |
+
Create a structured analysis for a lecture based on these learning objectives:
|
134 |
+
|
135 |
+
Learning Objectives: {state['learning_objectives']}
|
136 |
+
|
137 |
+
Please provide:
|
138 |
+
1. Main topic/subject (extracted from learning objectives)
|
139 |
+
2. Key themes and concepts that should be covered
|
140 |
+
3. Document type: "educational lecture"
|
141 |
+
4. Complexity level (beginner, intermediate, advanced) based on objectives
|
142 |
+
5. Target audience (inferred from objectives)
|
143 |
+
6. Detailed learning objectives breakdown
|
144 |
+
|
145 |
+
Format your response as a JSON object.
|
146 |
+
"""
|
147 |
+
|
148 |
+
response = self.client.chat.completions.create(
|
149 |
+
model="gpt-4o-mini",
|
150 |
+
messages=[{"role": "user", "content": prompt}],
|
151 |
+
temperature=0.3
|
152 |
+
)
|
153 |
+
|
154 |
+
# Parse the analysis
|
155 |
+
import json
|
156 |
+
try:
|
157 |
+
analysis = json.loads(response.choices[0].message.content)
|
158 |
+
except:
|
159 |
+
# Fallback parsing if JSON parsing fails
|
160 |
+
if state['learning_objectives'].strip():
|
161 |
+
# Extract topic from learning objectives
|
162 |
+
topic = state['learning_objectives'].split('.')[0].split(',')[0][:50]
|
163 |
+
main_topic = topic if topic else "Educational Lecture"
|
164 |
+
else:
|
165 |
+
main_topic = "Document Analysis"
|
166 |
+
|
167 |
+
analysis = {
|
168 |
+
"main_topic": main_topic,
|
169 |
+
"key_themes": ["Content Summary"],
|
170 |
+
"document_type": "Document",
|
171 |
+
"complexity_level": "intermediate",
|
172 |
+
"target_audience": "General",
|
173 |
+
"learning_objectives": ["Understand main concepts"]
|
174 |
+
}
|
175 |
+
|
176 |
+
state['analysis'] = analysis
|
177 |
+
return state
|
178 |
+
|
179 |
+
except Exception as e:
|
180 |
+
logger.error(f"Content analysis failed: {str(e)}")
|
181 |
+
if state['learning_objectives'].strip():
|
182 |
+
# Extract topic from learning objectives for fallback
|
183 |
+
topic = state['learning_objectives'].split('.')[0].split(',')[0][:50]
|
184 |
+
main_topic = topic if topic else "Educational Lecture"
|
185 |
+
else:
|
186 |
+
main_topic = "Document Analysis"
|
187 |
+
|
188 |
+
state['analysis'] = {
|
189 |
+
"main_topic": main_topic,
|
190 |
+
"key_themes": ["Content Summary"],
|
191 |
+
"document_type": "Document",
|
192 |
+
"complexity_level": "intermediate",
|
193 |
+
"target_audience": "General",
|
194 |
+
"learning_objectives": ["Understand main concepts"]
|
195 |
+
}
|
196 |
+
return state
|
197 |
+
|
198 |
+
def _create_outline(self, state: LectureState) -> LectureState:
|
199 |
+
"""Create a detailed lecture outline based on analysis"""
|
200 |
+
try:
|
201 |
+
analysis = state['analysis']
|
202 |
+
style = state['style']
|
203 |
+
|
204 |
+
learning_context = f"\n\nUser Learning Objectives: {state['learning_objectives']}" if state['learning_objectives'].strip() else ""
|
205 |
+
|
206 |
+
prompt = f"""
|
207 |
+
Based on this analysis, create a detailed lecture outline:
|
208 |
+
|
209 |
+
Analysis: {analysis}
|
210 |
+
Style: {style}
|
211 |
+
Include Examples: {state['include_examples']}{learning_context}
|
212 |
+
|
213 |
+
Create an outline with:
|
214 |
+
1. Engaging title that reflects user's learning goals
|
215 |
+
2. Introduction section
|
216 |
+
3. 3-5 main sections with subsections (prioritize user's stated learning objectives)
|
217 |
+
4. Conclusion section
|
218 |
+
5. Estimated time for each section
|
219 |
+
|
220 |
+
Format as JSON with sections array containing title, subsections, and duration.
|
221 |
+
"""
|
222 |
+
|
223 |
+
response = self.client.chat.completions.create(
|
224 |
+
model="gpt-4o-mini",
|
225 |
+
messages=[{"role": "user", "content": prompt}],
|
226 |
+
temperature=0.4
|
227 |
+
)
|
228 |
+
|
229 |
+
import json
|
230 |
+
try:
|
231 |
+
outline = json.loads(response.choices[0].message.content)
|
232 |
+
except:
|
233 |
+
# Fallback outline with proper title
|
234 |
+
title = analysis.get("main_topic", "Educational Lecture")
|
235 |
+
if state['learning_objectives'].strip() and not state['pdf_content'].strip():
|
236 |
+
# For learning objectives only, create a more descriptive title
|
237 |
+
objectives_words = state['learning_objectives'].split()[:5] # First 5 words
|
238 |
+
title = " ".join(objectives_words).title()
|
239 |
+
|
240 |
+
outline = {
|
241 |
+
"title": title,
|
242 |
+
"sections": [
|
243 |
+
{"title": "Introduction", "subsections": ["Overview"], "duration": 5},
|
244 |
+
{"title": "Core Concepts", "subsections": ["Key Points"], "duration": 15},
|
245 |
+
{"title": "Conclusion", "subsections": ["Summary"], "duration": 5}
|
246 |
+
]
|
247 |
+
}
|
248 |
+
|
249 |
+
state['outline'] = outline
|
250 |
+
state['title'] = outline.get('title', 'Generated Lecture')
|
251 |
+
return state
|
252 |
+
|
253 |
+
except Exception as e:
|
254 |
+
logger.error(f"Outline creation failed: {str(e)}")
|
255 |
+
# Generate appropriate fallback title based on learning objectives
|
256 |
+
if state['learning_objectives'].strip():
|
257 |
+
objectives_words = state['learning_objectives'].split()[:5]
|
258 |
+
title = " ".join(objectives_words).title() if objectives_words else "Educational Lecture"
|
259 |
+
else:
|
260 |
+
title = "Generated Lecture"
|
261 |
+
|
262 |
+
state['outline'] = {
|
263 |
+
"title": title,
|
264 |
+
"sections": [
|
265 |
+
{"title": "Introduction", "subsections": ["Overview"], "duration": 5},
|
266 |
+
{"title": "Core Concepts", "subsections": ["Key Points"], "duration": 15},
|
267 |
+
{"title": "Conclusion", "subsections": ["Summary"], "duration": 5}
|
268 |
+
]
|
269 |
+
}
|
270 |
+
state['title'] = title
|
271 |
+
return state
|
272 |
+
|
273 |
+
def _generate_sections(self, state: LectureState) -> LectureState:
|
274 |
+
"""Generate detailed content for each section"""
|
275 |
+
try:
|
276 |
+
sections = []
|
277 |
+
outline = state['outline']
|
278 |
+
pdf_content = state['pdf_content']
|
279 |
+
style = state['style']
|
280 |
+
|
281 |
+
for section in outline.get('sections', []):
|
282 |
+
learning_context = f"\n\nUser Learning Objectives: {state['learning_objectives']}" if state['learning_objectives'].strip() else ""
|
283 |
+
|
284 |
+
if pdf_content.strip():
|
285 |
+
# PDF-based section generation
|
286 |
+
section_prompt = f"""
|
287 |
+
Generate detailed content for this lecture section:
|
288 |
+
|
289 |
+
Section Title: {section['title']}
|
290 |
+
Subsections: {section.get('subsections', [])}
|
291 |
+
Style: {style}
|
292 |
+
Include Examples: {state['include_examples']}{learning_context}
|
293 |
+
|
294 |
+
Source Material: {pdf_content[:3000]}...
|
295 |
+
|
296 |
+
Create engaging, educational content that:
|
297 |
+
1. Explains concepts clearly (focus on user's learning objectives if provided)
|
298 |
+
2. Includes relevant examples if requested
|
299 |
+
3. Uses appropriate tone for {style} style
|
300 |
+
4. Builds logically on previous sections
|
301 |
+
5. Addresses the user's specific learning goals
|
302 |
+
|
303 |
+
Format with clear headings and structured paragraphs.
|
304 |
+
"""
|
305 |
+
else:
|
306 |
+
# Learning objectives-based section generation
|
307 |
+
section_prompt = f"""
|
308 |
+
Generate comprehensive educational content for this lecture section:
|
309 |
+
|
310 |
+
Section Title: {section['title']}
|
311 |
+
Subsections: {section.get('subsections', [])}
|
312 |
+
Style: {style}
|
313 |
+
Include Examples: {state['include_examples']}
|
314 |
+
Learning Objectives: {state['learning_objectives']}
|
315 |
+
|
316 |
+
Create engaging, educational content that:
|
317 |
+
1. Thoroughly explains concepts related to the learning objectives
|
318 |
+
2. Includes practical examples and real-world applications if requested
|
319 |
+
3. Uses appropriate {style} tone and language
|
320 |
+
4. Builds logically on previous sections
|
321 |
+
5. Directly addresses the stated learning objectives
|
322 |
+
6. Provides comprehensive coverage of the topic
|
323 |
+
|
324 |
+
Format with clear headings and structured paragraphs.
|
325 |
+
"""
|
326 |
+
|
327 |
+
response = self.client.chat.completions.create(
|
328 |
+
model="gpt-4o-mini",
|
329 |
+
messages=[{"role": "user", "content": section_prompt}],
|
330 |
+
temperature=0.5
|
331 |
+
)
|
332 |
+
|
333 |
+
section_content = response.choices[0].message.content
|
334 |
+
|
335 |
+
sections.append({
|
336 |
+
'title': section['title'],
|
337 |
+
'content': section_content,
|
338 |
+
'duration': section.get('duration', 10),
|
339 |
+
'subsections': section.get('subsections', [])
|
340 |
+
})
|
341 |
+
|
342 |
+
state['sections'] = sections
|
343 |
+
return state
|
344 |
+
|
345 |
+
except Exception as e:
|
346 |
+
logger.error(f"Section generation failed: {str(e)}")
|
347 |
+
# Create basic fallback sections
|
348 |
+
state['sections'] = [
|
349 |
+
{
|
350 |
+
'title': 'Introduction',
|
351 |
+
'content': 'Welcome to this lecture based on the provided document.',
|
352 |
+
'duration': 5,
|
353 |
+
'subsections': ['Overview']
|
354 |
+
},
|
355 |
+
{
|
356 |
+
'title': 'Main Content',
|
357 |
+
'content': 'Here are the key points from the document.',
|
358 |
+
'duration': 15,
|
359 |
+
'subsections': ['Key Points']
|
360 |
+
}
|
361 |
+
]
|
362 |
+
return state
|
363 |
+
|
364 |
+
def _compile_lecture(self, state: LectureState) -> LectureState:
|
365 |
+
"""Compile all sections into a cohesive lecture"""
|
366 |
+
try:
|
367 |
+
title = state['title']
|
368 |
+
sections = state['sections']
|
369 |
+
|
370 |
+
# Create dynamic introduction based on content type
|
371 |
+
if state['pdf_content'].strip():
|
372 |
+
# PDF-based lecture introduction
|
373 |
+
intro = f"""# {title}
|
374 |
+
|
375 |
+
Welcome to this comprehensive lecture on {title}. This presentation has been crafted from your uploaded document to provide you with a thorough understanding of the key concepts and insights.
|
376 |
+
|
377 |
+
## Learning Objectives
|
378 |
+
By the end of this lecture, you will be able to:
|
379 |
+
- Understand the main concepts presented in the source material
|
380 |
+
- Apply the knowledge to practical situations
|
381 |
+
- Engage in meaningful discussions about the topic
|
382 |
+
|
383 |
+
---
|
384 |
+
"""
|
385 |
+
else:
|
386 |
+
# Topic-based lecture introduction
|
387 |
+
learning_goals = state['learning_objectives'][:200] + "..." if len(state['learning_objectives']) > 200 else state['learning_objectives']
|
388 |
+
intro = f"""# {title}
|
389 |
+
|
390 |
+
Welcome to this comprehensive lecture on {title}. This presentation has been crafted to address your specific learning objectives and provide you with a thorough understanding of the topic.
|
391 |
+
|
392 |
+
## Your Learning Goals
|
393 |
+
{learning_goals}
|
394 |
+
|
395 |
+
## What You'll Learn
|
396 |
+
By the end of this lecture, you will be able to:
|
397 |
+
- Master the concepts you've requested to learn
|
398 |
+
- Apply this knowledge to real-world scenarios
|
399 |
+
- Build upon these foundations for further learning
|
400 |
+
|
401 |
+
---
|
402 |
+
"""
|
403 |
+
|
404 |
+
# Compile all sections
|
405 |
+
compiled_content = intro
|
406 |
+
for i, section in enumerate(sections, 1):
|
407 |
+
compiled_content += f"\n## {i}. {section['title']}\n\n"
|
408 |
+
compiled_content += section['content']
|
409 |
+
compiled_content += "\n\n---\n"
|
410 |
+
|
411 |
+
# Add conclusion
|
412 |
+
compiled_content += """
|
413 |
+
## Conclusion
|
414 |
+
|
415 |
+
Thank you for joining this lecture. We've covered the essential concepts and insights from the source material. Remember to review the key points and consider how they apply to your understanding of the subject.
|
416 |
+
|
417 |
+
### Key Takeaways
|
418 |
+
- Review the main concepts discussed
|
419 |
+
- Consider practical applications
|
420 |
+
- Engage with additional resources for deeper learning
|
421 |
+
|
422 |
+
---
|
423 |
+
|
424 |
+
*This lecture was generated using AI to transform written content into an engaging educational experience.*
|
425 |
+
"""
|
426 |
+
|
427 |
+
state['lecture_content'] = compiled_content
|
428 |
+
return state
|
429 |
+
|
430 |
+
except Exception as e:
|
431 |
+
logger.error(f"Lecture compilation failed: {str(e)}")
|
432 |
+
state['lecture_content'] = f"# {state['title']}\n\nThis is a generated lecture based on the provided document."
|
433 |
+
return state
|
434 |
+
|
435 |
+
def _finalize_output(self, state: LectureState) -> LectureState:
|
436 |
+
"""Finalize the output with metadata"""
|
437 |
+
try:
|
438 |
+
word_count = len(state['lecture_content'].split())
|
439 |
+
|
440 |
+
state['metadata'] = {
|
441 |
+
'generated_at': datetime.now().isoformat(),
|
442 |
+
'style': state['style'],
|
443 |
+
'include_examples': state['include_examples'],
|
444 |
+
'word_count': word_count,
|
445 |
+
'estimated_duration': self._estimate_duration(state['lecture_content']),
|
446 |
+
'sections_count': len(state['sections']),
|
447 |
+
'analysis': state['analysis']
|
448 |
+
}
|
449 |
+
|
450 |
+
return state
|
451 |
+
|
452 |
+
except Exception as e:
|
453 |
+
logger.error(f"Output finalization failed: {str(e)}")
|
454 |
+
state['metadata'] = {
|
455 |
+
'generated_at': datetime.now().isoformat(),
|
456 |
+
'style': state['style'],
|
457 |
+
'include_examples': state['include_examples'],
|
458 |
+
'word_count': 0,
|
459 |
+
'estimated_duration': 0,
|
460 |
+
'sections_count': 0,
|
461 |
+
'analysis': {}
|
462 |
+
}
|
463 |
+
return state
|
464 |
+
|
465 |
+
def _estimate_duration(self, content: str) -> int:
|
466 |
+
"""Estimate lecture duration in minutes based on word count"""
|
467 |
+
word_count = len(content.split())
|
468 |
+
# Assume average speaking rate of 150 words per minute
|
469 |
+
return max(1, round(word_count / 150))
|
470 |
+
|
471 |
+
def generate_pdf(self, lecture_data: Dict[str, Any], output_path: str) -> bool:
|
472 |
+
"""Generate PDF version of the lecture"""
|
473 |
+
try:
|
474 |
+
doc = SimpleDocTemplate(output_path, pagesize=A4)
|
475 |
+
styles = getSampleStyleSheet()
|
476 |
+
|
477 |
+
# Custom styles
|
478 |
+
title_style = ParagraphStyle(
|
479 |
+
'CustomTitle',
|
480 |
+
parent=styles['Heading1'],
|
481 |
+
fontSize=24,
|
482 |
+
spaceAfter=30,
|
483 |
+
textColor=HexColor('#2C3E50'),
|
484 |
+
alignment=1 # Center alignment
|
485 |
+
)
|
486 |
+
|
487 |
+
heading_style = ParagraphStyle(
|
488 |
+
'CustomHeading',
|
489 |
+
parent=styles['Heading2'],
|
490 |
+
fontSize=18,
|
491 |
+
spaceAfter=20,
|
492 |
+
spaceBefore=20,
|
493 |
+
textColor=HexColor('#34495E')
|
494 |
+
)
|
495 |
+
|
496 |
+
content = []
|
497 |
+
|
498 |
+
# Title
|
499 |
+
content.append(Paragraph(lecture_data['title'], title_style))
|
500 |
+
content.append(Spacer(1, 20))
|
501 |
+
|
502 |
+
# Metadata
|
503 |
+
metadata_text = f"""
|
504 |
+
<b>Generated:</b> {lecture_data['metadata']['generated_at']}<br/>
|
505 |
+
<b>Style:</b> {lecture_data['metadata']['style'].title()}<br/>
|
506 |
+
<b>Duration:</b> ~{lecture_data['metadata']['estimated_duration']} minutes<br/>
|
507 |
+
<b>Word Count:</b> {lecture_data['metadata']['word_count']}
|
508 |
+
"""
|
509 |
+
content.append(Paragraph(metadata_text, styles['Normal']))
|
510 |
+
content.append(Spacer(1, 30))
|
511 |
+
|
512 |
+
# Process lecture content
|
513 |
+
lecture_lines = lecture_data['content'].split('\n')
|
514 |
+
|
515 |
+
for line in lecture_lines:
|
516 |
+
line = line.strip()
|
517 |
+
if not line:
|
518 |
+
content.append(Spacer(1, 12))
|
519 |
+
elif line.startswith('# '):
|
520 |
+
# Main title (already added)
|
521 |
+
continue
|
522 |
+
elif line.startswith('## '):
|
523 |
+
# Section heading
|
524 |
+
content.append(Paragraph(line[3:], heading_style))
|
525 |
+
elif line.startswith('---'):
|
526 |
+
# Section separator
|
527 |
+
content.append(Spacer(1, 20))
|
528 |
+
else:
|
529 |
+
# Regular content
|
530 |
+
content.append(Paragraph(line, styles['Normal']))
|
531 |
+
content.append(Spacer(1, 6))
|
532 |
+
|
533 |
+
doc.build(content)
|
534 |
+
return True
|
535 |
+
|
536 |
+
except Exception as e:
|
537 |
+
logger.error(f"PDF generation failed: {str(e)}")
|
538 |
+
return False
|
app/models.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List, Optional, Dict, Any
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
+
class ChatMessage(BaseModel):
|
6 |
+
role: str # "user" or "assistant"
|
7 |
+
content: str
|
8 |
+
timestamp: datetime = datetime.now()
|
9 |
+
|
10 |
+
class ChatSession(BaseModel):
|
11 |
+
session_id: str
|
12 |
+
messages: List[ChatMessage] = []
|
13 |
+
pdf_content: Optional[str] = None
|
14 |
+
lecture_content: Optional[str] = None
|
15 |
+
created_at: datetime = datetime.now()
|
16 |
+
|
17 |
+
class LectureRequest(BaseModel):
|
18 |
+
pdf_content: str
|
19 |
+
style: str = "academic" # academic, casual, detailed
|
20 |
+
include_examples: bool = True
|
21 |
+
|
22 |
+
class LectureResponse(BaseModel):
|
23 |
+
title: str
|
24 |
+
content: str
|
25 |
+
sections: List[Dict[str, Any]]
|
26 |
+
word_count: int
|
27 |
+
estimated_duration: int # in minutes
|
28 |
+
|
29 |
+
class ProcessingStatus(BaseModel):
|
30 |
+
status: str # "processing", "completed", "error"
|
31 |
+
progress: int # 0-100
|
32 |
+
message: str
|
33 |
+
result: Optional[Dict[str, Any]] = None
|
app/pdf_processor.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import PyPDF2
|
2 |
+
import pdfplumber
|
3 |
+
from typing import Dict, List, Optional, Union, Any
|
4 |
+
import re
|
5 |
+
import logging
|
6 |
+
import io
|
7 |
+
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
class PDFProcessor:
|
11 |
+
"""Handles PDF file processing and text extraction"""
|
12 |
+
|
13 |
+
def __init__(self):
|
14 |
+
self.supported_formats = ['.pdf']
|
15 |
+
|
16 |
+
def extract_text_from_pdf(self, pdf_file) -> Dict[str, Any]:
|
17 |
+
"""
|
18 |
+
Extract text content from PDF file
|
19 |
+
Returns structured data with text, metadata, and page information
|
20 |
+
"""
|
21 |
+
try:
|
22 |
+
# Handle bytes input from Gradio
|
23 |
+
if isinstance(pdf_file, bytes):
|
24 |
+
pdf_file = io.BytesIO(pdf_file)
|
25 |
+
|
26 |
+
# Try pdfplumber first (better for complex layouts)
|
27 |
+
with pdfplumber.open(pdf_file) as pdf:
|
28 |
+
text_content = []
|
29 |
+
metadata = {
|
30 |
+
'total_pages': len(pdf.pages),
|
31 |
+
'title': '',
|
32 |
+
'author': '',
|
33 |
+
'subject': ''
|
34 |
+
}
|
35 |
+
|
36 |
+
# Extract metadata if available
|
37 |
+
if pdf.metadata:
|
38 |
+
metadata.update({
|
39 |
+
'title': pdf.metadata.get('Title', ''),
|
40 |
+
'author': pdf.metadata.get('Author', ''),
|
41 |
+
'subject': pdf.metadata.get('Subject', '')
|
42 |
+
})
|
43 |
+
|
44 |
+
# Extract text from each page
|
45 |
+
for page_num, page in enumerate(pdf.pages, 1):
|
46 |
+
page_text = page.extract_text()
|
47 |
+
if page_text:
|
48 |
+
text_content.append({
|
49 |
+
'page_number': page_num,
|
50 |
+
'text': self._clean_text(page_text)
|
51 |
+
})
|
52 |
+
|
53 |
+
combined_text = '\n\n'.join([page['text'] for page in text_content])
|
54 |
+
|
55 |
+
return {
|
56 |
+
'success': True,
|
57 |
+
'text': combined_text,
|
58 |
+
'pages': text_content,
|
59 |
+
'metadata': metadata,
|
60 |
+
'word_count': len(combined_text.split()),
|
61 |
+
'character_count': len(combined_text)
|
62 |
+
}
|
63 |
+
|
64 |
+
except Exception as e:
|
65 |
+
logger.error(f"pdfplumber extraction failed: {str(e)}")
|
66 |
+
# Fallback to PyPDF2
|
67 |
+
return self._extract_with_pypdf2(pdf_file)
|
68 |
+
|
69 |
+
def _extract_with_pypdf2(self, pdf_file) -> Dict[str, Any]:
|
70 |
+
"""Fallback method using PyPDF2"""
|
71 |
+
try:
|
72 |
+
# Handle bytes input from Gradio
|
73 |
+
if isinstance(pdf_file, bytes):
|
74 |
+
pdf_file = io.BytesIO(pdf_file)
|
75 |
+
else:
|
76 |
+
pdf_file.seek(0) # Reset file pointer
|
77 |
+
reader = PyPDF2.PdfReader(pdf_file)
|
78 |
+
|
79 |
+
text_content = []
|
80 |
+
metadata = {
|
81 |
+
'total_pages': len(reader.pages),
|
82 |
+
'title': '',
|
83 |
+
'author': '',
|
84 |
+
'subject': ''
|
85 |
+
}
|
86 |
+
|
87 |
+
# Extract metadata
|
88 |
+
if reader.metadata:
|
89 |
+
metadata.update({
|
90 |
+
'title': reader.metadata.get('/Title', ''),
|
91 |
+
'author': reader.metadata.get('/Author', ''),
|
92 |
+
'subject': reader.metadata.get('/Subject', '')
|
93 |
+
})
|
94 |
+
|
95 |
+
# Extract text from each page
|
96 |
+
for page_num, page in enumerate(reader.pages, 1):
|
97 |
+
page_text = page.extract_text()
|
98 |
+
if page_text:
|
99 |
+
text_content.append({
|
100 |
+
'page_number': page_num,
|
101 |
+
'text': self._clean_text(page_text)
|
102 |
+
})
|
103 |
+
|
104 |
+
combined_text = '\n\n'.join([page['text'] for page in text_content])
|
105 |
+
|
106 |
+
return {
|
107 |
+
'success': True,
|
108 |
+
'text': combined_text,
|
109 |
+
'pages': text_content,
|
110 |
+
'metadata': metadata,
|
111 |
+
'word_count': len(combined_text.split()),
|
112 |
+
'character_count': len(combined_text)
|
113 |
+
}
|
114 |
+
|
115 |
+
except Exception as e:
|
116 |
+
logger.error(f"PyPDF2 extraction failed: {str(e)}")
|
117 |
+
return {
|
118 |
+
'success': False,
|
119 |
+
'error': f"Failed to extract text from PDF: {str(e)}",
|
120 |
+
'text': '',
|
121 |
+
'pages': [],
|
122 |
+
'metadata': {},
|
123 |
+
'word_count': 0,
|
124 |
+
'character_count': 0
|
125 |
+
}
|
126 |
+
|
127 |
+
def _clean_text(self, text: str) -> str:
|
128 |
+
"""Clean and normalize extracted text"""
|
129 |
+
# Remove excessive whitespace
|
130 |
+
text = re.sub(r'\s+', ' ', text)
|
131 |
+
|
132 |
+
# Remove page numbers and headers/footers (common patterns)
|
133 |
+
text = re.sub(r'\n\d+\n', '\n', text)
|
134 |
+
|
135 |
+
# Fix common PDF extraction issues
|
136 |
+
text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text) # Split concatenated words
|
137 |
+
text = re.sub(r'(\w)-\n(\w)', r'\1\2', text) # Fix hyphenated words across lines
|
138 |
+
|
139 |
+
# Remove excessive line breaks
|
140 |
+
text = re.sub(r'\n{3,}', '\n\n', text)
|
141 |
+
|
142 |
+
return text.strip()
|
143 |
+
|
144 |
+
def validate_pdf(self, pdf_file) -> Dict[str, Any]:
|
145 |
+
"""Validate PDF file before processing"""
|
146 |
+
try:
|
147 |
+
# Handle bytes input from Gradio
|
148 |
+
if isinstance(pdf_file, bytes):
|
149 |
+
file_size = len(pdf_file)
|
150 |
+
pdf_file = io.BytesIO(pdf_file)
|
151 |
+
else:
|
152 |
+
# Check file size (limit to 50MB)
|
153 |
+
pdf_file.seek(0, 2) # Seek to end
|
154 |
+
file_size = pdf_file.tell()
|
155 |
+
pdf_file.seek(0) # Reset to beginning
|
156 |
+
|
157 |
+
if file_size > 50 * 1024 * 1024: # 50MB limit
|
158 |
+
return {
|
159 |
+
'valid': False,
|
160 |
+
'error': 'File size exceeds 50MB limit'
|
161 |
+
}
|
162 |
+
|
163 |
+
# Try to open the PDF to validate format
|
164 |
+
try:
|
165 |
+
reader = PyPDF2.PdfReader(pdf_file)
|
166 |
+
if len(reader.pages) == 0:
|
167 |
+
return {
|
168 |
+
'valid': False,
|
169 |
+
'error': 'PDF contains no pages'
|
170 |
+
}
|
171 |
+
pdf_file.seek(0) # Reset file pointer
|
172 |
+
|
173 |
+
return {
|
174 |
+
'valid': True,
|
175 |
+
'pages': len(reader.pages),
|
176 |
+
'size_mb': round(file_size / (1024 * 1024), 2)
|
177 |
+
}
|
178 |
+
|
179 |
+
except Exception as e:
|
180 |
+
return {
|
181 |
+
'valid': False,
|
182 |
+
'error': f'Invalid PDF format: {str(e)}'
|
183 |
+
}
|
184 |
+
|
185 |
+
except Exception as e:
|
186 |
+
return {
|
187 |
+
'valid': False,
|
188 |
+
'error': f'Error validating PDF: {str(e)}'
|
189 |
+
}
|
app/rag_system.py
ADDED
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chromadb
|
2 |
+
from chromadb.utils import embedding_functions
|
3 |
+
import openai
|
4 |
+
import os
|
5 |
+
import logging
|
6 |
+
from typing import List, Dict, Any, Optional
|
7 |
+
import uuid
|
8 |
+
from datetime import datetime
|
9 |
+
import numpy as np
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
|
12 |
+
# Load environment variables
|
13 |
+
load_dotenv()
|
14 |
+
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
class RAGSystem:
|
18 |
+
"""Retrieval-Augmented Generation system for chatbot functionality"""
|
19 |
+
|
20 |
+
def __init__(self, persist_directory: str = "chroma_db"):
|
21 |
+
self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
|
22 |
+
|
23 |
+
# Initialize ChromaDB
|
24 |
+
self.chroma_client = chromadb.PersistentClient(path=persist_directory)
|
25 |
+
|
26 |
+
# Create embedding function
|
27 |
+
self.embedding_function = embedding_functions.OpenAIEmbeddingFunction(
|
28 |
+
api_key=os.getenv("OPENAI_API_KEY", ""),
|
29 |
+
model_name="text-embedding-ada-002"
|
30 |
+
)
|
31 |
+
|
32 |
+
# Collections for different document types
|
33 |
+
self.pdf_collection = self._get_or_create_collection("pdf_documents")
|
34 |
+
self.lecture_collection = self._get_or_create_collection("lecture_content")
|
35 |
+
|
36 |
+
def _get_or_create_collection(self, name: str):
|
37 |
+
"""Get existing collection or create new one"""
|
38 |
+
try:
|
39 |
+
return self.chroma_client.get_collection(
|
40 |
+
name=name,
|
41 |
+
embedding_function=self.embedding_function
|
42 |
+
)
|
43 |
+
except:
|
44 |
+
return self.chroma_client.create_collection(
|
45 |
+
name=name,
|
46 |
+
embedding_function=self.embedding_function,
|
47 |
+
metadata={"description": f"Collection for {name}"}
|
48 |
+
)
|
49 |
+
|
50 |
+
def add_pdf_content(self, session_id: str, pdf_content: str, metadata: Dict[str, Any] = None) -> bool:
|
51 |
+
"""Add PDF content to the vector database"""
|
52 |
+
try:
|
53 |
+
# Split content into chunks
|
54 |
+
chunks = self._split_text(pdf_content, chunk_size=1000, overlap=200)
|
55 |
+
|
56 |
+
# Prepare documents for insertion
|
57 |
+
documents = []
|
58 |
+
metadatas = []
|
59 |
+
ids = []
|
60 |
+
|
61 |
+
base_metadata = {
|
62 |
+
"session_id": session_id,
|
63 |
+
"document_type": "pdf",
|
64 |
+
"added_at": datetime.now().isoformat(),
|
65 |
+
**(metadata or {})
|
66 |
+
}
|
67 |
+
|
68 |
+
for i, chunk in enumerate(chunks):
|
69 |
+
doc_id = f"{session_id}_pdf_{i}_{uuid.uuid4().hex[:8]}"
|
70 |
+
|
71 |
+
documents.append(chunk)
|
72 |
+
metadatas.append({
|
73 |
+
**base_metadata,
|
74 |
+
"chunk_index": i,
|
75 |
+
"chunk_id": doc_id
|
76 |
+
})
|
77 |
+
ids.append(doc_id)
|
78 |
+
|
79 |
+
# Add to collection
|
80 |
+
self.pdf_collection.add(
|
81 |
+
documents=documents,
|
82 |
+
metadatas=metadatas,
|
83 |
+
ids=ids
|
84 |
+
)
|
85 |
+
|
86 |
+
logger.info(f"Added {len(chunks)} PDF chunks for session {session_id}")
|
87 |
+
return True
|
88 |
+
|
89 |
+
except Exception as e:
|
90 |
+
logger.error(f"Failed to add PDF content: {str(e)}")
|
91 |
+
return False
|
92 |
+
|
93 |
+
def add_lecture_content(self, session_id: str, lecture_content: str, metadata: Dict[str, Any] = None) -> bool:
|
94 |
+
"""Add lecture content to the vector database"""
|
95 |
+
try:
|
96 |
+
# Split content into chunks
|
97 |
+
chunks = self._split_text(lecture_content, chunk_size=1000, overlap=200)
|
98 |
+
|
99 |
+
documents = []
|
100 |
+
metadatas = []
|
101 |
+
ids = []
|
102 |
+
|
103 |
+
base_metadata = {
|
104 |
+
"session_id": session_id,
|
105 |
+
"document_type": "lecture",
|
106 |
+
"added_at": datetime.now().isoformat(),
|
107 |
+
**(metadata or {})
|
108 |
+
}
|
109 |
+
|
110 |
+
for i, chunk in enumerate(chunks):
|
111 |
+
doc_id = f"{session_id}_lecture_{i}_{uuid.uuid4().hex[:8]}"
|
112 |
+
|
113 |
+
documents.append(chunk)
|
114 |
+
metadatas.append({
|
115 |
+
**base_metadata,
|
116 |
+
"chunk_index": i,
|
117 |
+
"chunk_id": doc_id
|
118 |
+
})
|
119 |
+
ids.append(doc_id)
|
120 |
+
|
121 |
+
# Add to collection
|
122 |
+
self.lecture_collection.add(
|
123 |
+
documents=documents,
|
124 |
+
metadatas=metadatas,
|
125 |
+
ids=ids
|
126 |
+
)
|
127 |
+
|
128 |
+
logger.info(f"Added {len(chunks)} lecture chunks for session {session_id}")
|
129 |
+
return True
|
130 |
+
|
131 |
+
except Exception as e:
|
132 |
+
logger.error(f"Failed to add lecture content: {str(e)}")
|
133 |
+
return False
|
134 |
+
|
135 |
+
def retrieve_relevant_content(self, session_id: str, query: str, n_results: int = 5) -> Dict[str, Any]:
|
136 |
+
"""Retrieve relevant content for a query"""
|
137 |
+
try:
|
138 |
+
# Search in both collections
|
139 |
+
pdf_results = self.pdf_collection.query(
|
140 |
+
query_texts=[query],
|
141 |
+
n_results=n_results,
|
142 |
+
where={"session_id": session_id}
|
143 |
+
)
|
144 |
+
|
145 |
+
lecture_results = self.lecture_collection.query(
|
146 |
+
query_texts=[query],
|
147 |
+
n_results=n_results,
|
148 |
+
where={"session_id": session_id}
|
149 |
+
)
|
150 |
+
|
151 |
+
# Combine and rank results
|
152 |
+
all_results = []
|
153 |
+
|
154 |
+
# Process PDF results
|
155 |
+
if pdf_results['documents'] and pdf_results['documents'][0]:
|
156 |
+
for i, doc in enumerate(pdf_results['documents'][0]):
|
157 |
+
all_results.append({
|
158 |
+
'content': doc,
|
159 |
+
'metadata': pdf_results['metadatas'][0][i],
|
160 |
+
'distance': pdf_results['distances'][0][i],
|
161 |
+
'source': 'pdf'
|
162 |
+
})
|
163 |
+
|
164 |
+
# Process lecture results
|
165 |
+
if lecture_results['documents'] and lecture_results['documents'][0]:
|
166 |
+
for i, doc in enumerate(lecture_results['documents'][0]):
|
167 |
+
all_results.append({
|
168 |
+
'content': doc,
|
169 |
+
'metadata': lecture_results['metadatas'][0][i],
|
170 |
+
'distance': lecture_results['distances'][0][i],
|
171 |
+
'source': 'lecture'
|
172 |
+
})
|
173 |
+
|
174 |
+
# Sort by relevance (distance)
|
175 |
+
all_results.sort(key=lambda x: x['distance'])
|
176 |
+
|
177 |
+
return {
|
178 |
+
'success': True,
|
179 |
+
'results': all_results[:n_results],
|
180 |
+
'total_found': len(all_results)
|
181 |
+
}
|
182 |
+
|
183 |
+
except Exception as e:
|
184 |
+
logger.error(f"Content retrieval failed: {str(e)}")
|
185 |
+
return {
|
186 |
+
'success': False,
|
187 |
+
'results': [],
|
188 |
+
'total_found': 0,
|
189 |
+
'error': str(e)
|
190 |
+
}
|
191 |
+
|
192 |
+
def _split_text(self, text: str, chunk_size: int = 1000, overlap: int = 200) -> List[str]:
|
193 |
+
"""Split text into overlapping chunks"""
|
194 |
+
if len(text) <= chunk_size:
|
195 |
+
return [text]
|
196 |
+
|
197 |
+
chunks = []
|
198 |
+
start = 0
|
199 |
+
|
200 |
+
while start < len(text):
|
201 |
+
end = start + chunk_size
|
202 |
+
|
203 |
+
# Try to end at a sentence boundary
|
204 |
+
if end < len(text):
|
205 |
+
# Look for sentence endings within the last 100 characters
|
206 |
+
search_start = max(end - 100, start)
|
207 |
+
sentence_ends = []
|
208 |
+
|
209 |
+
for punct in ['. ', '! ', '? ', '\n\n']:
|
210 |
+
pos = text.rfind(punct, search_start, end)
|
211 |
+
if pos > start:
|
212 |
+
sentence_ends.append(pos + len(punct))
|
213 |
+
|
214 |
+
if sentence_ends:
|
215 |
+
end = max(sentence_ends)
|
216 |
+
|
217 |
+
chunk = text[start:end].strip()
|
218 |
+
if chunk:
|
219 |
+
chunks.append(chunk)
|
220 |
+
|
221 |
+
# Move start position with overlap
|
222 |
+
start = end - overlap
|
223 |
+
if start >= len(text):
|
224 |
+
break
|
225 |
+
|
226 |
+
return chunks
|
227 |
+
|
228 |
+
def get_session_stats(self, session_id: str) -> Dict[str, Any]:
|
229 |
+
"""Get statistics about stored content for a session"""
|
230 |
+
try:
|
231 |
+
# Count PDF chunks
|
232 |
+
pdf_count = len(self.pdf_collection.get(
|
233 |
+
where={"session_id": session_id}
|
234 |
+
)['ids'])
|
235 |
+
|
236 |
+
# Count lecture chunks
|
237 |
+
lecture_count = len(self.lecture_collection.get(
|
238 |
+
where={"session_id": session_id}
|
239 |
+
)['ids'])
|
240 |
+
|
241 |
+
return {
|
242 |
+
'pdf_chunks': pdf_count,
|
243 |
+
'lecture_chunks': lecture_count,
|
244 |
+
'total_chunks': pdf_count + lecture_count
|
245 |
+
}
|
246 |
+
|
247 |
+
except Exception as e:
|
248 |
+
logger.error(f"Failed to get session stats: {str(e)}")
|
249 |
+
return {
|
250 |
+
'pdf_chunks': 0,
|
251 |
+
'lecture_chunks': 0,
|
252 |
+
'total_chunks': 0
|
253 |
+
}
|
254 |
+
|
255 |
+
def clear_session_data(self, session_id: str) -> bool:
|
256 |
+
"""Clear all data for a specific session"""
|
257 |
+
try:
|
258 |
+
# Get all document IDs for this session
|
259 |
+
pdf_ids = self.pdf_collection.get(
|
260 |
+
where={"session_id": session_id}
|
261 |
+
)['ids']
|
262 |
+
|
263 |
+
lecture_ids = self.lecture_collection.get(
|
264 |
+
where={"session_id": session_id}
|
265 |
+
)['ids']
|
266 |
+
|
267 |
+
# Delete documents
|
268 |
+
if pdf_ids:
|
269 |
+
self.pdf_collection.delete(ids=pdf_ids)
|
270 |
+
|
271 |
+
if lecture_ids:
|
272 |
+
self.lecture_collection.delete(ids=lecture_ids)
|
273 |
+
|
274 |
+
logger.info(f"Cleared data for session {session_id}")
|
275 |
+
return True
|
276 |
+
|
277 |
+
except Exception as e:
|
278 |
+
logger.error(f"Failed to clear session data: {str(e)}")
|
279 |
+
return False
|
app/utils.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import hashlib
|
4 |
+
from typing import Dict, Any, Optional
|
5 |
+
from datetime import datetime
|
6 |
+
import json
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
|
9 |
+
def setup_logging():
|
10 |
+
"""Setup logging configuration"""
|
11 |
+
logging.basicConfig(
|
12 |
+
level=logging.INFO,
|
13 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
14 |
+
handlers=[
|
15 |
+
logging.StreamHandler(),
|
16 |
+
logging.FileHandler('app.log')
|
17 |
+
]
|
18 |
+
)
|
19 |
+
|
20 |
+
def ensure_directory(path: str) -> bool:
|
21 |
+
"""Ensure directory exists, create if it doesn't"""
|
22 |
+
try:
|
23 |
+
os.makedirs(path, exist_ok=True)
|
24 |
+
return True
|
25 |
+
except Exception as e:
|
26 |
+
logging.error(f"Failed to create directory {path}: {str(e)}")
|
27 |
+
return False
|
28 |
+
|
29 |
+
def generate_file_hash(content: str) -> str:
|
30 |
+
"""Generate SHA-256 hash for content"""
|
31 |
+
return hashlib.sha256(content.encode()).hexdigest()
|
32 |
+
|
33 |
+
def sanitize_filename(filename: str) -> str:
|
34 |
+
"""Sanitize filename for safe file system operations"""
|
35 |
+
import re
|
36 |
+
# Remove invalid characters
|
37 |
+
filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
|
38 |
+
# Limit length
|
39 |
+
if len(filename) > 255:
|
40 |
+
name, ext = os.path.splitext(filename)
|
41 |
+
filename = name[:255-len(ext)] + ext
|
42 |
+
return filename
|
43 |
+
|
44 |
+
def format_file_size(size_bytes: int) -> str:
|
45 |
+
"""Format file size in human readable format"""
|
46 |
+
if size_bytes == 0:
|
47 |
+
return "0B"
|
48 |
+
|
49 |
+
size_names = ["B", "KB", "MB", "GB", "TB"]
|
50 |
+
import math
|
51 |
+
i = int(math.floor(math.log(size_bytes, 1024)))
|
52 |
+
p = math.pow(1024, i)
|
53 |
+
s = round(size_bytes / p, 2)
|
54 |
+
return f"{s} {size_names[i]}"
|
55 |
+
|
56 |
+
def validate_environment():
|
57 |
+
"""Validate required environment variables"""
|
58 |
+
required_vars = ['OPENAI_API_KEY']
|
59 |
+
missing_vars = []
|
60 |
+
|
61 |
+
for var in required_vars:
|
62 |
+
if not os.getenv(var):
|
63 |
+
missing_vars.append(var)
|
64 |
+
|
65 |
+
if missing_vars:
|
66 |
+
logging.warning(f"Missing environment variables: {', '.join(missing_vars)}")
|
67 |
+
return False
|
68 |
+
|
69 |
+
return True
|
70 |
+
|
71 |
+
def save_json_file(data: Dict[Any, Any], filepath: str) -> bool:
|
72 |
+
"""Save data to JSON file"""
|
73 |
+
try:
|
74 |
+
ensure_directory(os.path.dirname(filepath))
|
75 |
+
with open(filepath, 'w', encoding='utf-8') as f:
|
76 |
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
77 |
+
return True
|
78 |
+
except Exception as e:
|
79 |
+
logging.error(f"Failed to save JSON file {filepath}: {str(e)}")
|
80 |
+
return False
|
81 |
+
|
82 |
+
def load_json_file(filepath: str) -> Optional[Dict[Any, Any]]:
|
83 |
+
"""Load data from JSON file"""
|
84 |
+
try:
|
85 |
+
if not os.path.exists(filepath):
|
86 |
+
return None
|
87 |
+
|
88 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
89 |
+
return json.load(f)
|
90 |
+
except Exception as e:
|
91 |
+
logging.error(f"Failed to load JSON file {filepath}: {str(e)}")
|
92 |
+
return None
|
93 |
+
|
94 |
+
def cleanup_old_files(directory: str, max_age_hours: int = 24) -> int:
|
95 |
+
"""Clean up old files in directory"""
|
96 |
+
try:
|
97 |
+
if not os.path.exists(directory):
|
98 |
+
return 0
|
99 |
+
|
100 |
+
now = datetime.now()
|
101 |
+
removed_count = 0
|
102 |
+
|
103 |
+
for filename in os.listdir(directory):
|
104 |
+
filepath = os.path.join(directory, filename)
|
105 |
+
|
106 |
+
if os.path.isfile(filepath):
|
107 |
+
file_age = now - datetime.fromtimestamp(os.path.getmtime(filepath))
|
108 |
+
if file_age.total_seconds() > max_age_hours * 3600:
|
109 |
+
try:
|
110 |
+
os.remove(filepath)
|
111 |
+
removed_count += 1
|
112 |
+
logging.info(f"Removed old file: {filepath}")
|
113 |
+
except Exception as e:
|
114 |
+
logging.error(f"Failed to remove file {filepath}: {str(e)}")
|
115 |
+
|
116 |
+
return removed_count
|
117 |
+
|
118 |
+
except Exception as e:
|
119 |
+
logging.error(f"Failed to cleanup directory {directory}: {str(e)}")
|
120 |
+
return 0
|
121 |
+
|
122 |
+
def get_system_info() -> Dict[str, Any]:
|
123 |
+
"""Get basic system information"""
|
124 |
+
import platform
|
125 |
+
import psutil
|
126 |
+
|
127 |
+
try:
|
128 |
+
return {
|
129 |
+
'platform': platform.system(),
|
130 |
+
'python_version': platform.python_version(),
|
131 |
+
'cpu_count': os.cpu_count(),
|
132 |
+
'memory_gb': round(psutil.virtual_memory().total / (1024**3), 2),
|
133 |
+
'disk_usage': {
|
134 |
+
'total_gb': round(psutil.disk_usage('/').total / (1024**3), 2),
|
135 |
+
'free_gb': round(psutil.disk_usage('/').free / (1024**3), 2)
|
136 |
+
}
|
137 |
+
}
|
138 |
+
except Exception as e:
|
139 |
+
logging.error(f"Failed to get system info: {str(e)}")
|
140 |
+
return {'error': str(e)}
|
141 |
+
|
142 |
+
def measure_execution_time(func):
|
143 |
+
"""Decorator to measure function execution time"""
|
144 |
+
import functools
|
145 |
+
import time
|
146 |
+
|
147 |
+
@functools.wraps(func)
|
148 |
+
def wrapper(*args, **kwargs):
|
149 |
+
start_time = time.time()
|
150 |
+
result = func(*args, **kwargs)
|
151 |
+
end_time = time.time()
|
152 |
+
execution_time = end_time - start_time
|
153 |
+
|
154 |
+
logging.info(f"{func.__name__} executed in {execution_time:.2f} seconds")
|
155 |
+
return result
|
156 |
+
|
157 |
+
return wrapper
|
158 |
+
|
159 |
+
# Load environment variables
|
160 |
+
load_dotenv()
|
161 |
+
|
162 |
+
# Initialize logging when module is imported
|
163 |
+
setup_logging()
|
164 |
+
|
165 |
+
# Validate environment on import
|
166 |
+
if not validate_environment():
|
167 |
+
logging.warning("Environment validation failed. Some features may not work properly.")
|
app/voice_synthesizer.py
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import os
|
3 |
+
import logging
|
4 |
+
from typing import Dict, Any, Optional
|
5 |
+
from pathlib import Path
|
6 |
+
import tempfile
|
7 |
+
import io
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
|
10 |
+
# Load environment variables
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
|
15 |
+
class VoiceSynthesizer:
|
16 |
+
"""Handles text-to-speech conversion for lecture content"""
|
17 |
+
|
18 |
+
def __init__(self):
|
19 |
+
self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
|
20 |
+
self.supported_voices = [
|
21 |
+
"alloy", "echo", "fable", "onyx", "nova", "shimmer"
|
22 |
+
]
|
23 |
+
self.default_voice = "nova"
|
24 |
+
|
25 |
+
def synthesize_lecture(self, lecture_content: str, voice: str = None, output_path: str = None) -> Dict[str, Any]:
|
26 |
+
"""
|
27 |
+
Convert lecture text to speech using OpenAI TTS
|
28 |
+
|
29 |
+
Args:
|
30 |
+
lecture_content: The lecture text to convert
|
31 |
+
voice: Voice to use (alloy, echo, fable, onyx, nova, shimmer)
|
32 |
+
output_path: Where to save the audio file
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
Dict with success status, file path, and metadata
|
36 |
+
"""
|
37 |
+
try:
|
38 |
+
if not lecture_content.strip():
|
39 |
+
return {
|
40 |
+
'success': False,
|
41 |
+
'error': 'No content provided for synthesis',
|
42 |
+
'file_path': None,
|
43 |
+
'duration': 0
|
44 |
+
}
|
45 |
+
|
46 |
+
# Validate and set voice
|
47 |
+
selected_voice = voice if voice in self.supported_voices else self.default_voice
|
48 |
+
|
49 |
+
# Prepare content for TTS (remove markdown formatting)
|
50 |
+
clean_content = self._clean_content_for_tts(lecture_content)
|
51 |
+
|
52 |
+
# Split content into chunks if too long (OpenAI TTS has limits)
|
53 |
+
chunks = self._split_content(clean_content, max_length=4000)
|
54 |
+
|
55 |
+
if not output_path:
|
56 |
+
output_path = os.path.join("output", f"lecture_audio_{hash(lecture_content)}.mp3")
|
57 |
+
|
58 |
+
# Ensure output directory exists
|
59 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
60 |
+
|
61 |
+
if len(chunks) == 1:
|
62 |
+
# Single chunk - direct synthesis
|
63 |
+
response = self.client.audio.speech.create(
|
64 |
+
model="tts-1",
|
65 |
+
voice=selected_voice,
|
66 |
+
input=chunks[0],
|
67 |
+
response_format="mp3"
|
68 |
+
)
|
69 |
+
|
70 |
+
# Save the audio file
|
71 |
+
with open(output_path, "wb") as f:
|
72 |
+
f.write(response.content)
|
73 |
+
|
74 |
+
else:
|
75 |
+
# Multiple chunks - synthesize and combine
|
76 |
+
self._synthesize_multiple_chunks(chunks, selected_voice, output_path)
|
77 |
+
|
78 |
+
# Get file size and estimate duration
|
79 |
+
file_size = os.path.getsize(output_path)
|
80 |
+
estimated_duration = self._estimate_audio_duration(clean_content)
|
81 |
+
|
82 |
+
return {
|
83 |
+
'success': True,
|
84 |
+
'file_path': output_path,
|
85 |
+
'voice': selected_voice,
|
86 |
+
'duration': estimated_duration,
|
87 |
+
'file_size': file_size,
|
88 |
+
'chunks_count': len(chunks)
|
89 |
+
}
|
90 |
+
|
91 |
+
except Exception as e:
|
92 |
+
logger.error(f"Voice synthesis failed: {str(e)}")
|
93 |
+
return {
|
94 |
+
'success': False,
|
95 |
+
'error': str(e),
|
96 |
+
'file_path': None,
|
97 |
+
'duration': 0
|
98 |
+
}
|
99 |
+
|
100 |
+
def _clean_content_for_tts(self, content: str) -> str:
|
101 |
+
"""Clean markdown and formatting for better TTS output"""
|
102 |
+
import re
|
103 |
+
|
104 |
+
# Remove markdown headers
|
105 |
+
content = re.sub(r'^#{1,6}\s+', '', content, flags=re.MULTILINE)
|
106 |
+
|
107 |
+
# Remove markdown emphasis
|
108 |
+
content = re.sub(r'\*\*(.*?)\*\*', r'\1', content) # Bold
|
109 |
+
content = re.sub(r'\*(.*?)\*', r'\1', content) # Italic
|
110 |
+
|
111 |
+
# Remove markdown links
|
112 |
+
content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content)
|
113 |
+
|
114 |
+
# Remove horizontal rules
|
115 |
+
content = re.sub(r'^---+$', '', content, flags=re.MULTILINE)
|
116 |
+
|
117 |
+
# Clean up extra whitespace
|
118 |
+
content = re.sub(r'\n{3,}', '\n\n', content)
|
119 |
+
content = re.sub(r' {2,}', ' ', content)
|
120 |
+
|
121 |
+
# Add pauses for better speech flow
|
122 |
+
content = re.sub(r'\n\n', '\n\n... \n\n', content) # Longer pause between sections
|
123 |
+
|
124 |
+
return content.strip()
|
125 |
+
|
126 |
+
def _split_content(self, content: str, max_length: int = 4000) -> list:
|
127 |
+
"""Split content into chunks suitable for TTS API"""
|
128 |
+
if len(content) <= max_length:
|
129 |
+
return [content]
|
130 |
+
|
131 |
+
chunks = []
|
132 |
+
sentences = content.split('. ')
|
133 |
+
current_chunk = ""
|
134 |
+
|
135 |
+
for sentence in sentences:
|
136 |
+
# Check if adding this sentence would exceed the limit
|
137 |
+
if len(current_chunk) + len(sentence) + 2 > max_length:
|
138 |
+
if current_chunk:
|
139 |
+
chunks.append(current_chunk.strip())
|
140 |
+
current_chunk = sentence + ". "
|
141 |
+
else:
|
142 |
+
# Single sentence is too long, split by words
|
143 |
+
words = sentence.split()
|
144 |
+
word_chunk = ""
|
145 |
+
for word in words:
|
146 |
+
if len(word_chunk) + len(word) + 1 > max_length:
|
147 |
+
if word_chunk:
|
148 |
+
chunks.append(word_chunk.strip())
|
149 |
+
word_chunk = word + " "
|
150 |
+
else:
|
151 |
+
# Single word is too long, truncate
|
152 |
+
chunks.append(word[:max_length])
|
153 |
+
else:
|
154 |
+
word_chunk += word + " "
|
155 |
+
if word_chunk:
|
156 |
+
current_chunk = word_chunk + ". "
|
157 |
+
else:
|
158 |
+
current_chunk += sentence + ". "
|
159 |
+
|
160 |
+
if current_chunk:
|
161 |
+
chunks.append(current_chunk.strip())
|
162 |
+
|
163 |
+
return [chunk for chunk in chunks if chunk.strip()]
|
164 |
+
|
165 |
+
def _synthesize_multiple_chunks(self, chunks: list, voice: str, output_path: str):
|
166 |
+
"""Synthesize multiple chunks and combine them"""
|
167 |
+
import tempfile
|
168 |
+
import shutil
|
169 |
+
|
170 |
+
temp_files = []
|
171 |
+
|
172 |
+
try:
|
173 |
+
# Synthesize each chunk
|
174 |
+
for i, chunk in enumerate(chunks):
|
175 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f"_chunk_{i}.mp3")
|
176 |
+
temp_files.append(temp_file.name)
|
177 |
+
temp_file.close()
|
178 |
+
|
179 |
+
response = self.client.audio.speech.create(
|
180 |
+
model="tts-1",
|
181 |
+
voice=voice,
|
182 |
+
input=chunk,
|
183 |
+
response_format="mp3"
|
184 |
+
)
|
185 |
+
|
186 |
+
with open(temp_file.name, "wb") as f:
|
187 |
+
f.write(response.content)
|
188 |
+
|
189 |
+
# Combine audio files (simple concatenation for MP3)
|
190 |
+
with open(output_path, "wb") as outfile:
|
191 |
+
for temp_file in temp_files:
|
192 |
+
with open(temp_file, "rb") as infile:
|
193 |
+
shutil.copyfileobj(infile, outfile)
|
194 |
+
|
195 |
+
finally:
|
196 |
+
# Clean up temporary files
|
197 |
+
for temp_file in temp_files:
|
198 |
+
try:
|
199 |
+
os.unlink(temp_file)
|
200 |
+
except:
|
201 |
+
pass
|
202 |
+
|
203 |
+
def _estimate_audio_duration(self, content: str) -> int:
|
204 |
+
"""Estimate audio duration in seconds based on content length"""
|
205 |
+
# Average speaking rate: ~150 words per minute
|
206 |
+
word_count = len(content.split())
|
207 |
+
duration_minutes = word_count / 150
|
208 |
+
return int(duration_minutes * 60)
|
209 |
+
|
210 |
+
def get_available_voices(self) -> Dict[str, str]:
|
211 |
+
"""Get list of available voices with descriptions"""
|
212 |
+
return {
|
213 |
+
"alloy": "Neutral, balanced voice",
|
214 |
+
"echo": "Crisp, clear voice",
|
215 |
+
"fable": "Warm, engaging voice",
|
216 |
+
"onyx": "Deep, authoritative voice",
|
217 |
+
"nova": "Pleasant, professional voice (default)",
|
218 |
+
"shimmer": "Bright, energetic voice"
|
219 |
+
}
|
220 |
+
|
221 |
+
def validate_voice(self, voice: str) -> bool:
|
222 |
+
"""Validate if the provided voice is supported"""
|
223 |
+
return voice in self.supported_voices
|
generated-icon.png
ADDED
![]() |
Git LFS Details
|
main.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uvicorn
|
2 |
+
import gradio as gr
|
3 |
+
from app.gradio_interface import create_gradio_interface
|
4 |
+
import os
|
5 |
+
|
6 |
+
# Create output directory if it doesn't exist
|
7 |
+
os.makedirs("output", exist_ok=True)
|
8 |
+
|
9 |
+
# Create Gradio interface
|
10 |
+
gradio_app = create_gradio_interface()
|
11 |
+
|
12 |
+
if __name__ == "__main__":
|
13 |
+
gradio_app.launch(
|
14 |
+
server_name="0.0.0.0",
|
15 |
+
server_port=5000,
|
16 |
+
share=False,
|
17 |
+
show_error=True
|
18 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
chromadb>=1.0.12
|
2 |
+
fastapi>=0.115.9
|
3 |
+
gradio>=5.33.0
|
4 |
+
langgraph>=0.4.8
|
5 |
+
numpy>=2.3.0
|
6 |
+
openai>=1.85.0
|
7 |
+
pdfplumber>=0.11.6
|
8 |
+
psutil>=7.0.0
|
9 |
+
pypdf2>=3.0.1
|
10 |
+
reportlab>=4.4.1
|
11 |
+
uvicorn>=0.34.3
|
12 |
+
pydantic
|
13 |
+
typing-extensions
|
14 |
+
python-dotenv
|
static/style.css
ADDED
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* Custom styles for AI Lecture Generator */
|
2 |
+
|
3 |
+
:root {
|
4 |
+
--primary-color: #2c3e50;
|
5 |
+
--secondary-color: #3498db;
|
6 |
+
--success-color: #27ae60;
|
7 |
+
--warning-color: #f39c12;
|
8 |
+
--error-color: #e74c3c;
|
9 |
+
--background-color: #f8f9fa;
|
10 |
+
--text-color: #2c3e50;
|
11 |
+
--border-color: #ddd;
|
12 |
+
}
|
13 |
+
|
14 |
+
/* Global styles */
|
15 |
+
body {
|
16 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
17 |
+
background-color: var(--background-color);
|
18 |
+
color: var(--text-color);
|
19 |
+
line-height: 1.6;
|
20 |
+
}
|
21 |
+
|
22 |
+
/* Header styles */
|
23 |
+
.gradio-container h1 {
|
24 |
+
color: var(--primary-color);
|
25 |
+
text-align: center;
|
26 |
+
margin-bottom: 2rem;
|
27 |
+
font-size: 2.5rem;
|
28 |
+
font-weight: bold;
|
29 |
+
}
|
30 |
+
|
31 |
+
/* Status boxes */
|
32 |
+
.status-box {
|
33 |
+
padding: 15px;
|
34 |
+
border-radius: 8px;
|
35 |
+
margin: 15px 0;
|
36 |
+
font-weight: 500;
|
37 |
+
border-left: 4px solid;
|
38 |
+
}
|
39 |
+
|
40 |
+
.status-box.success {
|
41 |
+
background-color: #d4edda;
|
42 |
+
border-left-color: var(--success-color);
|
43 |
+
color: #155724;
|
44 |
+
}
|
45 |
+
|
46 |
+
.status-box.error {
|
47 |
+
background-color: #f8d7da;
|
48 |
+
border-left-color: var(--error-color);
|
49 |
+
color: #721c24;
|
50 |
+
}
|
51 |
+
|
52 |
+
.status-box.processing {
|
53 |
+
background-color: #d1ecf1;
|
54 |
+
border-left-color: var(--secondary-color);
|
55 |
+
color: #0c5460;
|
56 |
+
}
|
57 |
+
|
58 |
+
.status-box.warning {
|
59 |
+
background-color: #fff3cd;
|
60 |
+
border-left-color: var(--warning-color);
|
61 |
+
color: #856404;
|
62 |
+
}
|
63 |
+
|
64 |
+
/* Button styles */
|
65 |
+
.gradio-button {
|
66 |
+
background: linear-gradient(135deg, var(--secondary-color), #2980b9);
|
67 |
+
color: white;
|
68 |
+
border: none;
|
69 |
+
padding: 12px 24px;
|
70 |
+
border-radius: 6px;
|
71 |
+
font-weight: 500;
|
72 |
+
transition: all 0.3s ease;
|
73 |
+
cursor: pointer;
|
74 |
+
}
|
75 |
+
|
76 |
+
.gradio-button:hover {
|
77 |
+
transform: translateY(-2px);
|
78 |
+
box-shadow: 0 4px 12px rgba(52, 152, 219, 0.3);
|
79 |
+
}
|
80 |
+
|
81 |
+
.gradio-button.primary {
|
82 |
+
background: linear-gradient(135deg, var(--success-color), #229954);
|
83 |
+
}
|
84 |
+
|
85 |
+
.gradio-button.secondary {
|
86 |
+
background: linear-gradient(135deg, #95a5a6, #7f8c8d);
|
87 |
+
}
|
88 |
+
|
89 |
+
/* File upload area */
|
90 |
+
.file-upload {
|
91 |
+
border: 2px dashed var(--border-color);
|
92 |
+
border-radius: 8px;
|
93 |
+
padding: 2rem;
|
94 |
+
text-align: center;
|
95 |
+
transition: all 0.3s ease;
|
96 |
+
}
|
97 |
+
|
98 |
+
.file-upload:hover {
|
99 |
+
border-color: var(--secondary-color);
|
100 |
+
background-color: #f0f8ff;
|
101 |
+
}
|
102 |
+
|
103 |
+
/* Input fields */
|
104 |
+
.gradio-textbox,
|
105 |
+
.gradio-dropdown {
|
106 |
+
border: 1px solid var(--border-color);
|
107 |
+
border-radius: 6px;
|
108 |
+
padding: 10px;
|
109 |
+
font-size: 14px;
|
110 |
+
transition: border-color 0.3s ease;
|
111 |
+
}
|
112 |
+
|
113 |
+
.gradio-textbox:focus,
|
114 |
+
.gradio-dropdown:focus {
|
115 |
+
border-color: var(--secondary-color);
|
116 |
+
outline: none;
|
117 |
+
box-shadow: 0 0 0 3px rgba(52, 152, 219, 0.1);
|
118 |
+
}
|
119 |
+
|
120 |
+
/* Chat interface */
|
121 |
+
.chatbot {
|
122 |
+
border: 1px solid var(--border-color);
|
123 |
+
border-radius: 8px;
|
124 |
+
background: white;
|
125 |
+
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
126 |
+
}
|
127 |
+
|
128 |
+
.chat-message {
|
129 |
+
padding: 12px;
|
130 |
+
margin: 8px;
|
131 |
+
border-radius: 8px;
|
132 |
+
max-width: 80%;
|
133 |
+
}
|
134 |
+
|
135 |
+
.chat-message.user {
|
136 |
+
background: var(--secondary-color);
|
137 |
+
color: white;
|
138 |
+
margin-left: auto;
|
139 |
+
}
|
140 |
+
|
141 |
+
.chat-message.assistant {
|
142 |
+
background: #f1f3f4;
|
143 |
+
color: var(--text-color);
|
144 |
+
margin-right: auto;
|
145 |
+
}
|
146 |
+
|
147 |
+
/* Tab styles */
|
148 |
+
.gradio-tab {
|
149 |
+
border-bottom: 3px solid transparent;
|
150 |
+
padding: 12px 24px;
|
151 |
+
font-weight: 500;
|
152 |
+
transition: all 0.3s ease;
|
153 |
+
}
|
154 |
+
|
155 |
+
.gradio-tab.selected {
|
156 |
+
border-bottom-color: var(--secondary-color);
|
157 |
+
color: var(--secondary-color);
|
158 |
+
}
|
159 |
+
|
160 |
+
/* Card-like containers */
|
161 |
+
.gradio-column {
|
162 |
+
background: white;
|
163 |
+
border-radius: 8px;
|
164 |
+
padding: 20px;
|
165 |
+
margin: 10px;
|
166 |
+
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
167 |
+
}
|
168 |
+
|
169 |
+
/* JSON display */
|
170 |
+
.gradio-json {
|
171 |
+
background: #f8f9fa;
|
172 |
+
border: 1px solid var(--border-color);
|
173 |
+
border-radius: 6px;
|
174 |
+
padding: 15px;
|
175 |
+
font-family: 'Courier New', monospace;
|
176 |
+
font-size: 13px;
|
177 |
+
}
|
178 |
+
|
179 |
+
/* Loading animation */
|
180 |
+
.loading {
|
181 |
+
display: inline-block;
|
182 |
+
width: 20px;
|
183 |
+
height: 20px;
|
184 |
+
border: 3px solid #f3f3f3;
|
185 |
+
border-top: 3px solid var(--secondary-color);
|
186 |
+
border-radius: 50%;
|
187 |
+
animation: spin 1s linear infinite;
|
188 |
+
}
|
189 |
+
|
190 |
+
@keyframes spin {
|
191 |
+
0% { transform: rotate(0deg); }
|
192 |
+
100% { transform: rotate(360deg); }
|
193 |
+
}
|
194 |
+
|
195 |
+
/* Progress bar */
|
196 |
+
.progress-bar {
|
197 |
+
width: 100%;
|
198 |
+
height: 8px;
|
199 |
+
background-color: #f0f0f0;
|
200 |
+
border-radius: 4px;
|
201 |
+
overflow: hidden;
|
202 |
+
}
|
203 |
+
|
204 |
+
.progress-bar-fill {
|
205 |
+
height: 100%;
|
206 |
+
background: linear-gradient(90deg, var(--secondary-color), var(--success-color));
|
207 |
+
transition: width 0.3s ease;
|
208 |
+
}
|
209 |
+
|
210 |
+
/* Responsive design */
|
211 |
+
@media (max-width: 768px) {
|
212 |
+
.gradio-container {
|
213 |
+
padding: 10px;
|
214 |
+
}
|
215 |
+
|
216 |
+
.gradio-container h1 {
|
217 |
+
font-size: 2rem;
|
218 |
+
}
|
219 |
+
|
220 |
+
.gradio-column {
|
221 |
+
margin: 5px;
|
222 |
+
padding: 15px;
|
223 |
+
}
|
224 |
+
|
225 |
+
.chat-message {
|
226 |
+
max-width: 95%;
|
227 |
+
}
|
228 |
+
}
|
229 |
+
|
230 |
+
/* Accessibility improvements */
|
231 |
+
.gradio-button:focus,
|
232 |
+
.gradio-textbox:focus,
|
233 |
+
.gradio-dropdown:focus {
|
234 |
+
outline: 2px solid var(--secondary-color);
|
235 |
+
outline-offset: 2px;
|
236 |
+
}
|
237 |
+
|
238 |
+
/* Dark mode support */
|
239 |
+
@media (prefers-color-scheme: dark) {
|
240 |
+
:root {
|
241 |
+
--background-color: #1a1a1a;
|
242 |
+
--text-color: #ffffff;
|
243 |
+
--border-color: #404040;
|
244 |
+
}
|
245 |
+
|
246 |
+
.gradio-column {
|
247 |
+
background: #2d2d2d;
|
248 |
+
color: var(--text-color);
|
249 |
+
}
|
250 |
+
|
251 |
+
.gradio-json {
|
252 |
+
background: #2d2d2d;
|
253 |
+
color: var(--text-color);
|
254 |
+
}
|
255 |
+
|
256 |
+
.chatbot {
|
257 |
+
background: #2d2d2d;
|
258 |
+
}
|
259 |
+
|
260 |
+
.chat-message.assistant {
|
261 |
+
background: #404040;
|
262 |
+
color: var(--text-color);
|
263 |
+
}
|
264 |
+
}
|