jzou19950715 commited on
Commit
16af053
·
verified ·
1 Parent(s): 793ffff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +611 -571
app.py CHANGED
@@ -1,602 +1,642 @@
1
- import json #1
2
- import logging #2
3
- from datetime import datetime #3
4
- from typing import Dict, List, Optional, Tuple, Any #4
5
- from dataclasses import dataclass #5
6
- from pathlib import Path #6
7
- #7
8
- # Third-party imports #8
9
- import gradio as gr #9
10
- from openai import OpenAI #10
11
- #11
12
- # Configure logging #12
13
- logging.basicConfig( #13
14
- level=logging.INFO, #14
15
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' #15
16
- ) #16
17
- logger = logging.getLogger(__name__) #17
18
- #18
19
- # System prompt for the AI #19
20
- SYSTEM_PROMPT = """ #20
21
- You are LOSS DOG (Life, Occupation & Student Story Digital Output Generator), an engaging and
22
- supportive information collector that focuses on understanding a person's complete educational
23
- and professional journey. Your approach is flexible, allowing both structured metrics and personal narratives.
24
-
25
- Core Traits:
26
- - Conversational and natural in gathering information
27
- - Equally values quantitative achievements and qualitative experiences
28
- - Adapts to each person's unique story
29
- - Encourages sharing of both metrics and personal growth
30
- - Maintains context throughout the conversation
31
- """
32
- #32
33
- @dataclass #33
34
- class ConversationState: #34
35
- """Track the state of the conversation and profile completion.""" #35
36
- sections_completed: List[str] = None #36
37
- sections_partial: List[str] = None #37
38
- current_section: Optional[str] = None #38
39
- completion_percentage: float = 0.0 #39
40
- last_error: Optional[str] = None #40
41
- #41
42
- def __post_init__(self): #42
43
- self.sections_completed = [] #43
44
- self.sections_partial = [] #44
45
- #45
46
- class ProfileAnalyzer: #46
47
- """Analyzes and structures conversation data flexibly.""" #47
48
- #48
49
- @staticmethod #49
50
- def analyze_content(text: str) -> Dict[str, Any]: #50
51
- """Extract key information from text.""" #51
52
- analysis = { #52
53
- "categories": [], #53
54
- "metrics": {}, #54
55
- "experiences": [], #55
56
- "achievements": [], #56
57
- "skills": [] #57
58
- } #58
59
- return analysis #59
60
- #60
61
- @staticmethod #61
62
- def clean_data(data: Dict[str, Any]) -> Dict[str, Any]: #62
63
- """Clean and validate extracted data.""" #63
64
- def clean_value(v): #64
65
- if isinstance(v, dict): #65
66
- return {k: clean_value(val) for k, val in v.items() if val not in (None, "", [], {})} #66
67
- if isinstance(v, list): #67
68
- return [clean_value(item) for item in v if item not in (None, "", [], {})] #68
69
- return v #69
70
- return clean_value(data) #70
71
- #71
72
- class EducationCareerCollector: #72
73
- """Main collector class for handling career and education information.""" #73
74
- #74
75
- def __init__(self): #75
76
- self.conversation_history = [] #76
77
- self.client = None #77
78
- self.state = ConversationState() #78
79
- self.analyzer = ProfileAnalyzer() #79
80
- #80
81
- def process_message(self, message: str, api_key: str) -> Dict[str, Any]: #81
82
- """Process a user message and return AI response with enhanced error handling.""" #82
83
- try: #83
84
- if not message.strip(): #84
85
- raise ValueError("Message cannot be empty") #85
86
- #86
87
- if not api_key.strip().startswith('sk-'): #87
88
- raise ValueError("Invalid API key format") #88
89
- #89
90
- if not self.client: #90
91
- self.client = OpenAI(api_key=api_key) #91
92
- #92
93
- # Add message to conversation history #93
94
- self.conversation_history.append({ #94
95
- "role": "user", #95
96
- "content": message, #96
97
- "timestamp": datetime.now().isoformat() #97
98
- }) #98
99
- #99
100
- # Get AI response with retry mechanism #100
101
- max_retries = 3 #101
102
- last_error = None #102
103
- #103
104
- for attempt in range(max_retries): #104
105
- try: #105
106
- response = self.client.chat.completions.create( #106
107
- model="gpt-4o-mini", #107
108
- messages=[ #108
109
- {"role": "system", "content": SYSTEM_PROMPT}, #109
110
- *[{ #110
111
- "role": msg["role"], #111
112
- "content": msg["content"] #112
113
- } for msg in self.conversation_history] #113
114
- ], #114
115
- temperature=0.7, #115
116
- max_tokens=1000 #116
117
- ) #117
118
- break #118
119
- except Exception as e: #119
120
- last_error = str(e) #120
121
- if attempt == max_retries - 1: #121
122
- raise Exception(f"Failed after {max_retries} attempts: {last_error}") #122
123
- logger.warning(f"Attempt {attempt + 1} failed: {last_error}") #123
124
- continue #124
125
- #125
126
- # Process response #126
127
- ai_message = response.choices[0].message.content #127
128
- self.conversation_history.append({ #128
129
- "role": "assistant", #129
130
- "content": ai_message, #130
131
- "timestamp": datetime.now().isoformat() #131
132
- }) #132
133
- #133
134
- # Analyze response and update state #134
135
- self._update_conversation_state(ai_message) #135
136
- #136
137
- return { #137
138
- "content": ai_message, #138
139
- "type": "success", #139
140
- "completion_status": self.get_completion_status(), #140
141
- "timestamp": datetime.now().isoformat() #141
142
- } #142
143
- #143
144
- except Exception as e: #144
145
- error_msg = f"Error processing message: {str(e)}" #145
146
- logger.error(error_msg) #146
147
- self.state.last_error = error_msg #147
148
- return { #148
149
- "content": error_msg, #149
150
- "type": "error", #150
151
- "completion_status": self.get_completion_status(), #151
152
- "timestamp": datetime.now().isoformat() #152
153
- } #153
154
-
155
- def _update_conversation_state(self, ai_message: str) -> None: #154
156
- """Update the conversation state based on AI response.""" #155
157
- try: #156
158
- # Create analysis prompt #157
159
- analysis_prompt = """ #158
160
- Review our conversation and identify: #159
161
- 1. What topics or aspects of their journey were discussed? #160
162
- 2. What areas need more exploration? #161
163
- 3. What's the current focus of discussion? #162
164
-
165
- Response format:
166
- {
167
- "topics_discussed": [],
168
- "areas_needing_exploration": [],
169
- "current_focus": "",
170
- "completion_estimate": 0.0
171
- }
172
- """ #163
173
-
174
- # Get analysis from AI #164
175
- response = self.client.chat.completions.create( #165
176
- model="gpt-4o-mini", #166
177
- messages=[ #167
178
- {"role": "system", "content": SYSTEM_PROMPT}, #168
179
- *self.conversation_history, #169
180
- {"role": "user", "content": analysis_prompt} #170
181
- ], #171
182
- temperature=0.3 #172
183
- ) #173
184
-
185
- # Process analysis #174
186
- try: #175
187
- analysis = json.loads(response.choices[0].message.content) #176
188
-
189
- # Update state based on analysis #177
190
- self.state.sections_completed = analysis.get("topics_discussed", []) #178
191
- self.state.sections_partial = analysis.get("areas_needing_exploration", []) #179
192
- self.state.current_section = analysis.get("current_focus") #180
193
- self.state.completion_percentage = analysis.get("completion_estimate", 0.0) #181
194
 
195
- except json.JSONDecodeError as e: #182
196
- logger.error(f"Error parsing analysis JSON: {str(e)}") #183
197
- # Set default values on error #184
198
- self.state.completion_percentage = max( #185
199
- self.state.completion_percentage, #186
200
- len(self.conversation_history) * 5.0 # Rough estimate based on message count #187
201
- ) #188
202
 
203
- except Exception as e: #189
204
- logger.error(f"Error updating conversation state: {str(e)}") #190
205
- # State remains unchanged on error #191
 
 
 
 
 
 
 
 
 
206
 
207
- def get_completion_status(self) -> Dict[str, Any]: #192
208
- """Get current completion status with rich context.""" #193
209
- status = { #194
210
- "completion_percentage": self.state.completion_percentage, #195
211
- "topics_covered": self.state.sections_completed, #196
212
- "topics_in_progress": self.state.sections_partial, #197
213
- "current_focus": self.state.current_section, #198
214
- "conversation_length": len(self.conversation_history), #199
215
- "last_update": datetime.now().isoformat(), #200
216
- "needs_attention": [ #201
217
- topic for topic in self.state.sections_partial #202
218
- if topic not in self.state.sections_completed #203
219
- ], #204
220
- "status_summary": self._generate_status_summary() #205
221
- } #206
222
-
223
- if self.state.last_error: #207
224
- status["last_error"] = self.state.last_error #208
225
 
226
- return status #209
227
-
228
- def _generate_status_summary(self) -> str: #210
229
- """Generate a human-readable summary of the conversation status.""" #211
230
- if not self.conversation_history: #212
231
- return "Ready to start the conversation." #213
 
 
 
 
 
232
 
233
- summary_parts = [] #214
234
-
235
- # Add completion status #215
236
- if self.state.completion_percentage > 0: #216
237
- summary_parts.append( #217
238
- f"Conversation is approximately {self.state.completion_percentage:.1f}% complete" #218
239
- ) #219
240
 
241
- # Add covered topics #220
242
- if self.state.sections_completed: #221
243
- topics = ", ".join(self.state.sections_completed) #222
244
- summary_parts.append(f"We've discussed: {topics}") #223
 
 
 
 
245
 
246
- # Add current focus #224
247
- if self.state.current_section: #225
248
- summary_parts.append( #226
249
- f"Currently focusing on: {self.state.current_section}" #227
250
- ) #228
251
 
252
- # Add next steps if any #229
253
- if self.state.sections_partial: #230
254
- topics = ", ".join(self.state.sections_partial) #231
255
- summary_parts.append(f"Topics to explore further: {topics}") #232
 
 
 
 
256
 
257
- return " | ".join(summary_parts) if summary_parts else "Conversation in progress." #233
258
-
259
- def generate_json(self, api_key: str) -> Tuple[Optional[str], str]: #234
260
- """Generate a JSON profile from the conversation history.""" #235
261
- try: #236
262
- if not self.client: #237
263
- self.client = OpenAI(api_key=api_key) #238
264
-
265
- # Analysis prompt focused on understanding the conversation #239
266
- analysis_prompt = """ #240
267
- Review our conversation and create a JSON structure that captures the person's journey. #241
268
- Focus on what was actually discussed, not fitting into predetermined categories. #242
269
- Include: #243
270
- 1. Any experiences or achievements shared #244
271
- 2. Skills or competencies demonstrated #245
272
- 3. Timeline or progression points mentioned #246
273
- 4. Notable metrics or outcomes #247
274
- 5. Personal growth or learning moments #248
275
 
276
- Structure the JSON naturally around the topics they shared. #249
277
- """ #250
278
- # Get initial analysis of conversation content #251
279
- analysis_response = self.client.chat.completions.create( #252
280
- model="gpt-4o-mini", #253
281
- messages=[ #254
282
- {"role": "system", "content": SYSTEM_PROMPT}, #255
283
- *self.conversation_history, #256
284
- {"role": "user", "content": analysis_prompt} #257
285
- ], #258
286
- temperature=0.7 #259
287
- ) #260
 
 
 
 
 
 
 
 
 
288
 
289
- # Parse the initial analysis #261
290
- analysis = json.loads(analysis_response.choices[0].message.content) #262
 
 
 
 
 
 
 
 
 
291
 
292
- # Generate structured profile based on analysis #263
293
- profile_prompt = f""" #264
294
- Based on our conversation, create a detailed profile JSON. #265
295
- Use this analysis as a guide: {json.dumps(analysis, indent=2)} #266
296
 
297
- Important guidelines: #267
298
- - Create sections based on what was actually discussed #268
299
- - Include both quantitative and qualitative information #269
300
- - Preserve the context and significance of experiences #270
301
- - Maintain natural flow and connections between topics #271
302
- - Use descriptive section names that reflect the conversation #272
303
- """ #273
304
 
305
- # Generate the profile JSON #274
306
- profile_response = self.client.chat.completions.create( #275
307
- model="gpt-4o-mini", #276
308
- messages=[ #277
309
- {"role": "system", "content": SYSTEM_PROMPT}, #278
310
- *self.conversation_history, #279
311
- {"role": "user", "content": profile_prompt} #280
312
- ], #281
313
- temperature=0.5 #282
314
- ) #283
315
 
316
- # Parse and clean the profile data #284
317
- profile_data = json.loads(profile_response.choices[0].message.content) #285
318
 
319
- # Clean the data #286
320
- def clean_dict(d): #287
321
- if isinstance(d, dict): #288
322
- return {k: clean_dict(v) for k, v in d.items() #289
323
- if v not in (None, "", [], {}, "N/A", "None")} #290
324
- if isinstance(d, list): #291
325
- return [clean_dict(item) for item in d #292
326
- if item not in (None, "", [], {}, "N/A", "None")] #293
327
- return d #294
 
 
 
 
 
 
328
 
329
- profile_data = clean_dict(profile_data) #295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
- # Add metadata #296
332
- profile_data["metadata"] = { #297
333
- "generated_at": datetime.now().isoformat(), #298
334
- "version": "2.0", #299
335
- "generation_metrics": { #300
336
- "conversation_length": len(self.conversation_history), #301
337
- "topics_covered": self.state.sections_completed, #302
338
- "completion_percentage": self.state.completion_percentage #303
339
- } #304
340
- } #305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
- # Save to file #306
343
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") #307
344
- filename = f"career_education_profile_{timestamp}.json" #308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
- try: #309
347
- with open(filename, 'w', encoding='utf-8') as f: #310
348
- json.dump(profile_data, f, indent=2, ensure_ascii=False) #311
349
- return (filename, json.dumps(profile_data, indent=2, ensure_ascii=False)) #312
350
- except Exception as e: #313
351
- logger.error(f"Error saving profile to file: {str(e)}") #314
352
- return (None, json.dumps(profile_data, indent=2, ensure_ascii=False)) #315
353
 
354
- except Exception as e: #316
355
- error_msg = f"Error generating profile: {str(e)}" #317
356
- logger.error(error_msg) #318
357
- error_json = { #319
358
- "error": error_msg, #320
359
- "metadata": { #321
360
- "generated_at": datetime.now().isoformat(), #322
361
- "error_occurred": True #323
362
- } #324
363
- } #325
364
- return (None, json.dumps(error_json, indent=2)) #326
365
-
366
- def create_education_career_interface(): #327
367
- """Create Gradio interface for the education and career collector.""" #328
368
- collector = EducationCareerCollector() #329
369
-
370
- css = """ #330
371
- .message { font-size: 16px; margin: 8px 0; } #331
372
- .system-message { color: #444; font-style: italic; } #332
373
- .user-message { color: #000; font-weight: 500; } #333
374
- .alert { #334
375
- padding: 12px; #335
376
- margin: 8px 0; #336
377
- border-radius: 4px; #337
378
- } #338
379
- .alert-info { #339
380
- background-color: #e8f4f8; #340
381
- border-left: 4px solid #4a90e2; #341
382
- } #342
383
- .alert-error { #343
384
- background-color: #fde8e8; #344
385
- border-left: 4px solid #f56565; #345
386
- } #346
387
- .alert-success { #347
388
- background-color: #e8f8e8; #348
389
- border-left: 4px solid #48bb78; #349
390
- } #350
391
- """ #351
392
-
393
- with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: #352
394
- gr.Markdown(""" #353
395
- # 🐕 LOSS DOG - Profile Builder #354
396
-
397
- Share your career and education journey naturally. #355
398
- Tell your story in your own way - we'll capture what matters to you. #356
399
- """) #357
400
-
401
- with gr.Row(): #358
402
- with gr.Column(scale=2): #359
403
- # API Key Input #360
404
- api_key = gr.Textbox( #361
405
- label="OpenAI API Key", #362
406
- type="password", #363
407
- placeholder="Enter your OpenAI API key (sk-...)", #364
408
- info="Your API key from platform.openai.com" #365
409
- ) #366
410
 
411
- # Status Messages #367
412
- status_msg = gr.Markdown( #368
413
- "Ready to start! Share your journey...", #369
414
- elem_classes=["alert", "alert-info"] #370
415
- ) #371
 
 
 
 
416
 
417
- # Chat Interface #372
418
- chatbot = gr.Chatbot( #373
419
- height=400, #374
420
- show_label=False, #375
421
- elem_classes=["message"] #376
422
- ) #377
423
 
424
- # Message Input #378
425
- with gr.Row(): #379
426
- msg = gr.Textbox( #380
427
- label="Your message", #381
428
- placeholder="Tell me about your journey...", #382
429
- show_label=False, #383
430
- scale=4 #384
431
- ) #385
432
- submit = gr.Button("Send", variant="primary", scale=1) #386
433
 
434
- # Action Buttons #387
435
- with gr.Row(): #388
436
- clear = gr.Button("🗑️ Clear Chat", scale=1) #389
437
- generate = gr.Button("📄 Generate Profile", scale=2) #390
438
- with gr.Column(scale=1): #391
439
- # Progress Information #392
440
- progress_info = gr.Markdown( #393
441
- "### Profile Progress\nStart sharing your story!", #394
442
- elem_classes=["alert", "alert-info"] #395
443
- ) #396
444
 
445
- # Profile Preview #397
446
- with gr.Tab("Preview"): #398
447
- json_preview = gr.JSON( #399
448
- label="Profile Preview", #400
449
- show_label=True #401
450
- ) #402
 
 
 
 
 
 
 
451
 
452
- # Download Section #403
453
- with gr.Tab("Download"): #404
454
- output_file = gr.File( #405
455
- label="Download Profile" #406
456
- ) #407
 
 
457
 
458
- # Tips and Guidelines #408
459
- with gr.Accordion("💡 Tips", open=False): #409
460
- gr.Markdown("""
461
- ### Share Your Story Naturally
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
- - Tell us about experiences that matter to you
464
- - Include both achievements and challenges
465
- - Share numbers when they're meaningful
466
- - Describe your growth and learning
467
- - Talk about what makes your journey unique
468
- """) #417
469
-
470
- def process_message(message: str, history: list, key: str) -> tuple: #418
471
- """Process user message and update interface.""" #419
472
- if not message.strip(): #420
473
- return history, "Please enter a message." #421
474
-
475
- try: #422
476
- # Process the message #423
477
- result = collector.process_message(message, key) #424
478
-
479
- # Update chat history #425
480
- history.append((message, result["content"])) #426
481
-
482
- # Generate status message #427
483
- status = f"""Progress: {result['completion_status']['completion_percentage']:.1f}% #428
484
- | Topics covered: {len(result['completion_status']['topics_covered'])}""" #429
485
-
486
- return history, status #430
487
-
488
- except Exception as e: #431
489
- error_msg = f"Error: {str(e)}" #432
490
- logger.error(error_msg) #433
491
- return history, error_msg #434
492
-
493
- def generate_profile(key: str) -> tuple: #435
494
- """Generate and return profile JSON.""" #436
495
- try: #437
496
- filename, json_content = collector.generate_json(key) #438
497
- if filename: #439
498
- return ( #440
499
- filename, #441
500
- json.loads(json_content), #442
501
- "Profile generated successfully! 🎉" #443
502
- ) #444
503
- return ( #445
504
- None, #446
505
- json.loads(json_content), #447
506
- "Profile generated but couldn't save file." #448
507
- ) #449
508
- except Exception as e: #450
509
- error_msg = f"Error generating profile: {str(e)}" #451
510
- logger.error(error_msg) #452
511
- return None, {"error": error_msg}, error_msg #453
512
-
513
- def clear_interface() -> tuple: #454
514
- """Reset the interface state.""" #455
515
- return ( #456
516
- [], # Clear chat history #457
517
- "Ready to start! Share your journey...", # Reset status #458
518
- "### Profile Progress\nStart sharing your story!", # Reset progress #459
519
- None, # Clear JSON preview #460
520
- None # Clear file output #461
521
- ) #462
522
-
523
- def update_progress(history: list) -> str: #463
524
- """Update progress information based on conversation.""" #464
525
- if not history: #465
526
- return "### Profile Progress\nStart sharing your story!" #466
527
-
528
- # Get completion status #467
529
- status = collector.get_completion_status() #468
530
-
531
- # Format progress message #469
532
- progress_md = f"""### Profile Progress: {status['completion_percentage']:.1f}%\n\n""" #470
533
-
534
- if status['topics_covered']: #471
535
- progress_md += "✅ **Discussed:**\n" #472
536
- for topic in status['topics_covered']: #473
537
- progress_md += f"- {topic}\n" #474
538
-
539
- if status['topics_in_progress']: #475
540
- progress_md += "\n📝 **Currently exploring:**\n" #476
541
- for topic in status['topics_in_progress']: #477
542
- progress_md += f"- {topic}\n" #478
543
-
544
- if status.get('needs_attention'): #479
545
- progress_md += "\n❗ **Consider discussing:**\n" #480
546
- for topic in status['needs_attention']: #481
547
- progress_md += f"- {topic}\n" #482
548
 
549
- return progress_md #483
550
-
551
- # Event Handlers #484
552
- msg.submit( #485
553
- process_message, #486
554
- [msg, chatbot, api_key], #487
555
- [chatbot, status_msg] #488
556
- ).then( #489
557
- update_progress, #490
558
- chatbot, #491
559
- progress_info #492
560
- ).then( #493
561
- lambda: "", #494
562
- None, #495
563
- msg #496
564
- ) #497
565
-
566
- submit.click( #498
567
- process_message, #499
568
- [msg, chatbot, api_key], #500
569
- [chatbot, status_msg] #501
570
- ).then( #502
571
- update_progress, #503
572
- chatbot, #504
573
- progress_info #505
574
- ).then( #506
575
- lambda: "", #507
576
- None, #508
577
- msg #509
578
- ) #510
579
-
580
- generate.click( #511
581
- generate_profile, #512
582
- [api_key], #513
583
- [output_file, json_preview, status_msg] #514
584
- ) #515
585
-
586
- clear.click( #516
587
- clear_interface, #517
588
- None, #518
589
- [chatbot, status_msg, progress_info, json_preview, output_file] #519
590
- ) #520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
- return demo #521
593
-
594
- if __name__ == "__main__": #522
595
- demo = create_education_career_interface() #523
596
- demo.launch( #524
597
- server_name="0.0.0.0", #525
598
- server_port=7860, #526
599
- share=True, #527
600
- enable_queue=True, #528
601
- show_error=True #529
602
- ) #530
 
1
+ import json #1
2
+ import logging #2
3
+ import os #3
4
+ from datetime import datetime #4
5
+ from typing import Dict, List, Optional, Any, Tuple #5
6
+ from dataclasses import dataclass, field #6
7
+ from pathlib import Path #7
8
+
9
+ # Third-party imports
10
+ import gradio as gr #8
11
+ from openai import OpenAI #9
12
+
13
+ # Configure logging
14
+ logging.basicConfig( #10
15
+ level=logging.INFO, #11
16
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', #12
17
+ handlers=[ #13
18
+ logging.StreamHandler(), #14
19
+ logging.FileHandler('app.log') #15
20
+ ] #16
21
+ ) #17
22
+ logger = logging.getLogger(__name__) #18
23
+
24
+ # System prompt for the AI assistant
25
+ SYSTEM_PROMPT = """ #19
26
+ You are an Information Extraction Assistant, designed to help extract and organize
27
+ important information from conversations in a natural and engaging way.
28
+
29
+ Core Capabilities:
30
+ - Natural conversation while gathering specific information
31
+ - Flexible information extraction based on context
32
+ - Progress tracking and completion estimation
33
+ - Structured data organization with context preservation
34
+
35
+ Please maintain a friendly and professional tone while ensuring accurate information extraction.
36
+ """ #20
37
+
38
+ @dataclass #21
39
+ class ExtractedInfo: #22
40
+ """Structure for storing extracted information.""" #23
41
+ text: str #24
42
+ category: str #25
43
+ confidence: float #26
44
+ timestamp: datetime = field(default_factory=datetime.now) #27
45
+ metadata: Dict[str, Any] = field(default_factory=dict) #28
46
+
47
+ @dataclass #29
48
+ class ConversationState: #30
49
+ """Tracks the state and progress of the conversation.""" #31
50
+ extracted_items: List[ExtractedInfo] = field(default_factory=list) #32
51
+ categories_covered: List[str] = field(default_factory=list) #33
52
+ current_focus: Optional[str] = None #34
53
+ completion_percentage: float = 0.0 #35
54
+ last_error: Optional[str] = None #36
55
+ last_update: datetime = field(default_factory=datetime.now) #37
56
+
57
+ def add_extracted_info(self, info: ExtractedInfo) -> None: #38
58
+ """Add new extracted information and update state.""" #39
59
+ self.extracted_items.append(info) #40
60
+ if info.category not in self.categories_covered: #41
61
+ self.categories_covered.append(info.category) #42
62
+ self.last_update = datetime.now() #43
63
+
64
+ class InformationExtractor: #44
65
+ """Core class for handling information extraction from conversations.""" #45
66
+
67
+ def __init__(self): #46
68
+ self.conversation_history: List[Dict[str, str]] = [] #47
69
+ self.state = ConversationState() #48
70
+ self.client: Optional[OpenAI] = None #49
71
+ self.extraction_categories = [ #50
72
+ "personal_info", #51
73
+ "education", #52
74
+ "work_experience", #53
75
+ "skills", #54
76
+ "achievements" #55
77
+ ] #56
78
+
79
+ def _validate_api_key(self, api_key: str) -> bool: #57
80
+ """Validate OpenAI API key format.""" #58
81
+ if not api_key.strip(): #59
82
+ raise ValueError("API key cannot be empty") #60
83
+ if not api_key.startswith('sk-'): #61
84
+ raise ValueError("Invalid API key format") #62
85
+ return True #63
86
+
87
+ def _initialize_client(self, api_key: str) -> None: #64
88
+ """Initialize OpenAI client with error handling.""" #65
89
+ try: #66
90
+ if self._validate_api_key(api_key): #67
91
+ self.client = OpenAI(api_key=api_key) #68
92
+ except Exception as e: #69
93
+ logger.error(f"Error initializing OpenAI client: {str(e)}") #70
94
+ raise #71
95
+
96
+ def _add_to_history(self, role: str, content: str) -> None: #72
97
+ """Add a message to conversation history with timestamp.""" #73
98
+ self.conversation_history.append({ #74
99
+ "role": role, #75
100
+ "content": content, #76
101
+ "timestamp": datetime.now().isoformat() #77
102
+ }) #78
103
+
104
+ def _get_ai_response(self, retries: int = 3) -> str: #79
105
+ """Get response from OpenAI with retry mechanism.""" #80
106
+ if not self.client: #81
107
+ raise ValueError("OpenAI client not initialized") #82
108
+ for attempt in range(retries): #83
109
+ try: #84
110
+ response = self.client.chat.completions.create( #85
111
+ model="gpt-4", #86
112
+ messages=[ #87
113
+ {"role": "system", "content": SYSTEM_PROMPT}, #88
114
+ *[{ #89
115
+ "role": msg["role"], #90
116
+ "content": msg["content"] #91
117
+ } for msg in self.conversation_history] #92
118
+ ], #93
119
+ temperature=0.7, #94
120
+ max_tokens=2000 #95
121
+ ) #96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ return response.choices[0].message.content #97
 
 
 
 
 
 
124
 
125
+ except Exception as e: #98
126
+ logger.warning(f"Attempt {attempt + 1} failed: {str(e)}") #99
127
+ if attempt == retries - 1: #100
128
+ raise Exception(f"Failed after {retries} attempts: {str(e)}") #101
129
+ continue #102
130
+
131
+ def _extract_information(self, text: str) -> List[ExtractedInfo]: #103
132
+ """Extract structured information from text.""" #104
133
+ try: #105
134
+ extraction_prompt = f""" #106
135
+ Analyze the following text and extract relevant information.
136
+ Categories to consider: {', '.join(self.extraction_categories)}
137
 
138
+ For each piece of information extracted, provide:
139
+ 1. The exact text
140
+ 2. The category it belongs to
141
+ 3. Confidence level (0.0 to 1.0)
142
+ 4. Any relevant context or metadata
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ Format as JSON:
145
+ {{
146
+ "extracted_items": [
147
+ {{
148
+ "text": "extracted text",
149
+ "category": "category name",
150
+ "confidence": 0.95,
151
+ "metadata": {{}}
152
+ }}
153
+ ]
154
+ }}
155
 
156
+ Text to analyze: {text}
157
+ """ #107
 
 
 
 
 
158
 
159
+ response = self.client.chat.completions.create( #108
160
+ model="gpt-4", #109
161
+ messages=[ #110
162
+ {"role": "system", "content": SYSTEM_PROMPT}, #111
163
+ {"role": "user", "content": extraction_prompt} #112
164
+ ], #113
165
+ temperature=0.3 #114
166
+ ) #115
167
 
168
+ # Parse response and create ExtractedInfo objects #116
169
+ analysis = json.loads(response.choices[0].message.content) #117
170
+ extracted_items = [] #118
 
 
171
 
172
+ for item in analysis.get("extracted_items", []): #119
173
+ extracted_info = ExtractedInfo( #120
174
+ text=item["text"], #121
175
+ category=item["category"], #122
176
+ confidence=item["confidence"], #123
177
+ metadata=item.get("metadata", {}) #124
178
+ ) #125
179
+ extracted_items.append(extracted_info) #126
180
 
181
+ return extracted_items #127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ except json.JSONDecodeError as e: #128
184
+ logger.error(f"Error parsing extraction response: {str(e)}") #129
185
+ return [] #130
186
+ except Exception as e: #131
187
+ logger.error(f"Error during information extraction: {str(e)}") #132
188
+ return [] #133
189
+
190
+ def _update_completion_status(self) -> None: #134
191
+ """Update completion status based on extracted information.""" #135
192
+ total_categories = len(self.extraction_categories) #136
193
+ covered_categories = len(self.state.categories_covered) #137
194
+
195
+ # Calculate base completion percentage #138
196
+ base_completion = (covered_categories / total_categories) * 100 #139
197
+
198
+ # Adjust based on confidence levels #140
199
+ if self.state.extracted_items: #141
200
+ avg_confidence = sum(item.confidence for item in self.state.extracted_items) / len(self.state.extracted_items) #142
201
+ adjusted_completion = base_completion * avg_confidence #143
202
+ else: #144
203
+ adjusted_completion = 0.0 #145
204
 
205
+ self.state.completion_percentage = min(adjusted_completion, 100.0) #146
206
+
207
+ def process_message(self, message: str, api_key: str) -> Dict[str, Any]: #147
208
+ """Process a user message and extract information.""" #148
209
+ try: #149
210
+ # Initialize client if needed #150
211
+ if not self.client: #151
212
+ self._initialize_client(api_key) #152
213
+
214
+ # Add user message to history #153
215
+ self._add_to_history("user", message) #154
216
 
217
+ # Get AI response #155
218
+ ai_response = self._get_ai_response() #156
219
+ self._add_to_history("assistant", ai_response) #157
 
220
 
221
+ # Extract information from the entire conversation #158
222
+ new_information = self._extract_information(message + "\n" + ai_response) #159
 
 
 
 
 
223
 
224
+ # Update state with new information #160
225
+ for info in new_information: #161
226
+ self.state.add_extracted_info(info) #162
 
 
 
 
 
 
 
227
 
228
+ # Update completion status #163
229
+ self._update_completion_status() #164
230
 
231
+ return { #165
232
+ "response": ai_response, #166
233
+ "extracted_info": [ #167
234
+ { #168
235
+ "text": info.text, #169
236
+ "category": info.category, #170
237
+ "confidence": info.confidence #171
238
+ } for info in new_information #172
239
+ ], #173
240
+ "completion_status": { #174
241
+ "percentage": self.state.completion_percentage, #175
242
+ "categories_covered": self.state.categories_covered, #176
243
+ "current_focus": self.state.current_focus #177
244
+ } #178
245
+ } #179
246
 
247
+ except Exception as e: #180
248
+ error_msg = f"Error processing message: {str(e)}" #181
249
+ logger.error(error_msg) #182
250
+ self.state.last_error = error_msg #183
251
+ return { #184
252
+ "error": error_msg, #185
253
+ "completion_status": { #186
254
+ "percentage": self.state.completion_percentage, #187
255
+ "categories_covered": self.state.categories_covered, #188
256
+ "current_focus": self.state.current_focus #189
257
+ } #190
258
+ } #191
259
+ def generate_output(self) -> Dict[str, Any]: #192
260
+ """Generate structured output from all extracted information.""" #193
261
+ try: #194
262
+ # Organize extracted information by category #195
263
+ categorized_info = {} #196
264
+ for category in self.extraction_categories: #197
265
+ category_items = [ #198
266
+ { #199
267
+ "text": item.text, #200
268
+ "confidence": item.confidence, #201
269
+ "timestamp": item.timestamp.isoformat(), #202
270
+ "metadata": item.metadata #203
271
+ } #204
272
+ for item in self.state.extracted_items #205
273
+ if item.category == category #206
274
+ ] #207
275
+ if category_items: #208
276
+ categorized_info[category] = category_items #209
277
+
278
+ # Create output structure #210
279
+ output = { #211
280
+ "extracted_information": categorized_info, #212
281
+ "analysis_summary": { #213
282
+ "total_items": len(self.state.extracted_items), #214
283
+ "categories_covered": self.state.categories_covered, #215
284
+ "completion_percentage": self.state.completion_percentage #216
285
+ }, #217
286
+ "metadata": { #218
287
+ "generated_at": datetime.now().isoformat(), #219
288
+ "conversation_length": len(self.conversation_history), #220
289
+ "version": "2.0" #221
290
+ } #222
291
+ } #223
292
+
293
+ # Save to file #224
294
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") #225
295
+ filename = f"extracted_info_{timestamp}.json" #226
296
 
297
+ with open(filename, 'w', encoding='utf-8') as f: #227
298
+ json.dump(output, f, indent=2, ensure_ascii=False) #228
299
+
300
+ return { #229
301
+ "filename": filename, #230
302
+ "content": output, #231
303
+ "status": "success" #232
304
+ } #233
305
+
306
+ except Exception as e: #234
307
+ error_msg = f"Error generating output: {str(e)}" #235
308
+ logger.error(error_msg) #236
309
+ return { #237
310
+ "error": error_msg, #238
311
+ "status": "error" #239
312
+ } #240
313
+
314
+ def create_gradio_interface(): #241
315
+ """Create the Gradio interface for information extraction.""" #242
316
+ extractor = InformationExtractor() #243
317
+
318
+ # Custom CSS for better styling #244
319
+ css = """ #245
320
+ .container { max-width: 900px; margin: auto; } #246
321
+ .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; } #247
322
+ .info-panel { background: #f5f5f5; padding: 1rem; border-radius: 0.5rem; } #248
323
+ .status-badge { #249
324
+ display: inline-block; #250
325
+ padding: 0.25rem 0.5rem; #251
326
+ border-radius: 0.25rem; #252
327
+ margin: 0.25rem; #253
328
+ background: #e0e0e0; #254
329
+ } #255
330
+ .extraction-highlight { #256
331
+ background: #e8f4f8; #257
332
+ border-left: 4px solid #4a90e2; #258
333
+ padding: 0.5rem; #259
334
+ margin: 0.5rem 0; #260
335
+ } #261
336
+ """ #262
337
+
338
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: #263
339
+ gr.Markdown(""" #264
340
+ # 🔍 Information Extraction Assistant
341
+
342
+ Have a natural conversation while we extract and organize important information.
343
+ The system will automatically identify and categorize relevant details.
344
+ """) #265
345
+
346
+ with gr.Row(): #266
347
+ with gr.Column(scale=2): #267
348
+ # API Key input #268
349
+ api_key = gr.Textbox( #269
350
+ label="OpenAI API Key", #270
351
+ type="password", #271
352
+ placeholder="Enter your OpenAI API key (sk-...)", #272
353
+ show_label=True #273
354
+ ) #274
355
+
356
+ # Chat interface #275
357
+ chatbot = gr.Chatbot( #276
358
+ value=[], #277
359
+ height=400, #278
360
+ type="messages", #279
361
+ show_label=False #280
362
+ ) #281
363
+
364
+ # Message input #282
365
+ with gr.Row(): #283
366
+ msg = gr.Textbox( #284
367
+ label="Message", #285
368
+ placeholder="Type your message here...", #286
369
+ scale=4 #287
370
+ ) #288
371
+ submit = gr.Button( #289
372
+ "Send", #290
373
+ variant="primary", #291
374
+ scale=1 #292
375
+ ) #293
376
+
377
+ # Action buttons #294
378
+ with gr.Row(): #295
379
+ clear = gr.Button("Clear Chat", scale=1) #296
380
+ generate = gr.Button( #297
381
+ "Generate Report", #298
382
+ variant="secondary", #299
383
+ scale=2 #300
384
+ ) #301
385
+ with gr.Column(scale=1): #302
386
+ # Extraction Status Panel #303
387
+ with gr.Group(visible=True) as status_panel: #304
388
+ gr.Markdown("### Extraction Progress") #305
389
+
390
+ # Progress indicator #306
391
+ progress = gr.Slider( #307
392
+ label="Completion", #308
393
+ minimum=0, #309
394
+ maximum=100, #310
395
+ value=0, #311
396
+ interactive=False #312
397
+ ) #313
398
+
399
+ # Categories covered #314
400
+ categories_covered = gr.JSON( #315
401
+ label="Categories Covered", #316
402
+ value={"categories": []} #317
403
+ ) #318
404
+
405
+ # Current focus #319
406
+ current_focus = gr.Textbox( #320
407
+ label="Current Focus", #321
408
+ value="Not started", #322
409
+ interactive=False #323
410
+ ) #324
411
+
412
+ # Extraction Results #325
413
+ with gr.Tabs() as result_tabs: #326
414
+ with gr.Tab("Extracted Information"): #327
415
+ extracted_info = gr.JSON( #328
416
+ label="Extracted Details", #329
417
+ value={} #330
418
+ ) #331
419
+
420
+ with gr.Tab("Download"): #332
421
+ file_output = gr.File( #333
422
+ label="Download Report" #334
423
+ ) #335
424
+
425
+ with gr.Tab("Analysis"): #336
426
+ analysis_text = gr.Markdown( #337
427
+ "Analysis will appear here after processing." #338
428
+ ) #339
429
+
430
+ # Helper Functions #340
431
+ def format_extraction_summary(extracted_items: List[Dict]) -> str: #341
432
+ """Format extracted information for display.""" #342
433
+ if not extracted_items: #343
434
+ return "No information extracted yet." #344
435
 
436
+ summary = ["### Recently Extracted Information"] #345
437
+ for item in extracted_items: #346
438
+ summary.append( #347
439
+ f"- **{item['category']}** ({item['confidence']*100:.1f}% confidence)\n" #348
440
+ f" {item['text']}" #349
441
+ ) #350
442
+ return "\n".join(summary) #351
443
+
444
+ def update_interface_state(state: Dict[str, Any]) -> tuple: #352
445
+ """Update all interface components based on current state.""" #353
446
+ return ( #354
447
+ state['completion_status']['percentage'], #355
448
+ {"categories": state['completion_status']['categories_covered']}, #356
449
+ state['completion_status']['current_focus'] #357
450
+ ) #358
451
+
452
+ # Event Handlers #359
453
+ def process_message(message: str, history: list, key: str) -> tuple: #360
454
+ """Handle message processing and update interface.""" #361
455
+ if not message.strip(): #362
456
+ return history, 0, {}, "Please enter a message" #363
457
 
458
+ try: #364
459
+ # Process message #365
460
+ result = extractor.process_message(message, key) #366
 
 
 
 
461
 
462
+ if "error" in result: #367
463
+ return ( #368
464
+ history, #369
465
+ 0, #370
466
+ {"categories": []}, #371
467
+ f"Error: {result['error']}" #372
468
+ ) #373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
 
470
+ # Update chat history #374
471
+ history.append({ #375
472
+ "role": "user", #376
473
+ "content": message #377
474
+ }) #378
475
+ history.append({ #379
476
+ "role": "assistant", #380
477
+ "content": result["response"] #381
478
+ }) #382
479
 
480
+ # Update status components #383
481
+ progress_value = result["completion_status"]["percentage"] #384
482
+ categories = { #385
483
+ "categories": result["completion_status"]["categories_covered"] #386
484
+ } #387
485
+ current_focus = result["completion_status"]["current_focus"] or "Processing..." #388
486
 
487
+ # Update extraction display #389
488
+ if result.get("extracted_info"): #390
489
+ analysis_text = format_extraction_summary(result["extracted_info"]) #391
490
+ else: #392
491
+ analysis_text = "No new information extracted." #393
 
 
 
 
492
 
493
+ return ( #394
494
+ history, #395
495
+ progress_value, #396
496
+ categories, #397
497
+ current_focus, #398
498
+ analysis_text #399
499
+ ) #400
 
 
 
500
 
501
+ except Exception as e: #401
502
+ logger.error(f"Error in process_message: {str(e)}") #402
503
+ return ( #403
504
+ history, #404
505
+ 0, #405
506
+ {"categories": []}, #406
507
+ f"Error: {str(e)}", #407
508
+ "An error occurred during processing." #408
509
+ ) #409
510
+ def generate_report() -> tuple: #410
511
+ """Generate and return report file.""" #411
512
+ try: #412
513
+ result = extractor.generate_output() #413
514
 
515
+ if result["status"] == "success": #414
516
+ # Update JSON preview #415
517
+ content_preview = { #416
518
+ "summary": result["content"]["analysis_summary"], #417
519
+ "categories": list(result["content"]["extracted_information"].keys()), #418
520
+ "total_items": len(result["content"]["extracted_information"]) #419
521
+ } #420
522
 
523
+ return ( #421
524
+ result["filename"], #422
525
+ content_preview, #423
526
+ "Report generated successfully! 🎉", #424
527
+ gr.update(value=format_extraction_summary( #425
528
+ [item for items in result["content"]["extracted_information"].values() #426
529
+ for item in items] #427
530
+ )) #428
531
+ ) #429
532
+ else: #430
533
+ return ( #431
534
+ None, #432
535
+ {"error": result["error"]}, #433
536
+ f"Error generating report: {result['error']}", #434
537
+ "Failed to generate analysis." #435
538
+ ) #436
539
 
540
+ except Exception as e: #437
541
+ logger.error(f"Error in generate_report: {str(e)}") #438
542
+ return ( #439
543
+ None, #440
544
+ {"error": str(e)}, #441
545
+ f"Error: {str(e)}", #442
546
+ "An error occurred during report generation." #443
547
+ ) #444
548
+
549
+ def clear_interface() -> tuple: #445
550
+ """Reset all interface components.""" #446
551
+ # Reset extractor state #447
552
+ global extractor #448
553
+ extractor = InformationExtractor() #449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
 
555
+ return ( #450
556
+ [], # Clear chat history #451
557
+ 0.0, # Reset progress #452
558
+ {"categories": []}, # Clear categories #453
559
+ "Not started", # Reset focus #454
560
+ {}, # Clear extracted info #455
561
+ None, # Clear file output #456
562
+ "Ready to start new extraction.", # Reset analysis #457
563
+ gr.update(value="") # Clear message input #458
564
+ ) #459
565
+
566
+ # Event Bindings #460
567
+ msg.submit( #461
568
+ process_message, #462
569
+ inputs=[msg, chatbot, api_key], #463
570
+ outputs=[ #464
571
+ chatbot, #465
572
+ progress, #466
573
+ categories_covered, #467
574
+ current_focus, #468
575
+ analysis_text #469
576
+ ] #470
577
+ ).then( #471
578
+ lambda: "", #472
579
+ None, #473
580
+ msg #474
581
+ ) #475
582
+
583
+ submit.click( #476
584
+ process_message, #477
585
+ inputs=[msg, chatbot, api_key], #478
586
+ outputs=[ #479
587
+ chatbot, #480
588
+ progress, #481
589
+ categories_covered, #482
590
+ current_focus, #483
591
+ analysis_text #484
592
+ ] #485
593
+ ).then( #486
594
+ lambda: "", #487
595
+ None, #488
596
+ msg #489
597
+ ) #490
598
+
599
+ generate.click( #491
600
+ generate_report, #492
601
+ outputs=[ #493
602
+ file_output, #494
603
+ extracted_info, #495
604
+ current_focus, #496
605
+ analysis_text #497
606
+ ] #498
607
+ ) #499
608
+
609
+ clear.click( #500
610
+ clear_interface, #501
611
+ outputs=[ #502
612
+ chatbot, #503
613
+ progress, #504
614
+ categories_covered, #505
615
+ current_focus, #506
616
+ extracted_info, #507
617
+ file_output, #508
618
+ analysis_text, #509
619
+ msg #510
620
+ ] #511
621
+ ) #512
622
+
623
+ return demo #513
624
+
625
+ if __name__ == "__main__": #514
626
+ # Set up logging for the main application #515
627
+ logging.basicConfig( #516
628
+ level=logging.INFO, #517
629
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' #518
630
+ ) #519
631
 
632
+ try: #520
633
+ demo = create_gradio_interface() #521
634
+ demo.launch( #522
635
+ server_name="0.0.0.0", #523
636
+ server_port=7860, #524
637
+ share=True, #525
638
+ show_api=False #526
639
+ ) #527
640
+ except Exception as e: #528
641
+ logger.error(f"Application failed to start: {str(e)}") #529
642
+ raise #530