jzou19950715 commited on
Commit
c734c14
·
verified ·
1 Parent(s): 16af053

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +563 -559
app.py CHANGED
@@ -1,28 +1,28 @@
1
- import json #1
2
- import logging #2
3
- import os #3
4
- from datetime import datetime #4
5
- from typing import Dict, List, Optional, Any, Tuple #5
6
- from dataclasses import dataclass, field #6
7
- from pathlib import Path #7
8
 
9
  # Third-party imports
10
- import gradio as gr #8
11
- from openai import OpenAI #9
12
 
13
  # Configure logging
14
- logging.basicConfig( #10
15
- level=logging.INFO, #11
16
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', #12
17
- handlers=[ #13
18
- logging.StreamHandler(), #14
19
- logging.FileHandler('app.log') #15
20
- ] #16
21
- ) #17
22
- logger = logging.getLogger(__name__) #18
23
 
24
  # System prompt for the AI assistant
25
- SYSTEM_PROMPT = """ #19
26
  You are an Information Extraction Assistant, designed to help extract and organize
27
  important information from conversations in a natural and engaging way.
28
 
@@ -33,105 +33,106 @@ Core Capabilities:
33
  - Structured data organization with context preservation
34
 
35
  Please maintain a friendly and professional tone while ensuring accurate information extraction.
36
- """ #20
37
-
38
- @dataclass #21
39
- class ExtractedInfo: #22
40
- """Structure for storing extracted information.""" #23
41
- text: str #24
42
- category: str #25
43
- confidence: float #26
44
- timestamp: datetime = field(default_factory=datetime.now) #27
45
- metadata: Dict[str, Any] = field(default_factory=dict) #28
46
-
47
- @dataclass #29
48
- class ConversationState: #30
49
- """Tracks the state and progress of the conversation.""" #31
50
- extracted_items: List[ExtractedInfo] = field(default_factory=list) #32
51
- categories_covered: List[str] = field(default_factory=list) #33
52
- current_focus: Optional[str] = None #34
53
- completion_percentage: float = 0.0 #35
54
- last_error: Optional[str] = None #36
55
- last_update: datetime = field(default_factory=datetime.now) #37
56
-
57
- def add_extracted_info(self, info: ExtractedInfo) -> None: #38
58
- """Add new extracted information and update state.""" #39
59
- self.extracted_items.append(info) #40
60
- if info.category not in self.categories_covered: #41
61
- self.categories_covered.append(info.category) #42
62
- self.last_update = datetime.now() #43
63
-
64
- class InformationExtractor: #44
65
- """Core class for handling information extraction from conversations.""" #45
66
 
67
- def __init__(self): #46
68
- self.conversation_history: List[Dict[str, str]] = [] #47
69
- self.state = ConversationState() #48
70
- self.client: Optional[OpenAI] = None #49
71
- self.extraction_categories = [ #50
72
- "personal_info", #51
73
- "education", #52
74
- "work_experience", #53
75
- "skills", #54
76
- "achievements" #55
77
- ] #56
78
 
79
- def _validate_api_key(self, api_key: str) -> bool: #57
80
- """Validate OpenAI API key format.""" #58
81
- if not api_key.strip(): #59
82
- raise ValueError("API key cannot be empty") #60
83
- if not api_key.startswith('sk-'): #61
84
- raise ValueError("Invalid API key format") #62
85
- return True #63
86
 
87
- def _initialize_client(self, api_key: str) -> None: #64
88
- """Initialize OpenAI client with error handling.""" #65
89
- try: #66
90
- if self._validate_api_key(api_key): #67
91
- self.client = OpenAI(api_key=api_key) #68
92
- except Exception as e: #69
93
- logger.error(f"Error initializing OpenAI client: {str(e)}") #70
94
- raise #71
95
-
96
- def _add_to_history(self, role: str, content: str) -> None: #72
97
- """Add a message to conversation history with timestamp.""" #73
98
- self.conversation_history.append({ #74
99
- "role": role, #75
100
- "content": content, #76
101
- "timestamp": datetime.now().isoformat() #77
102
- }) #78
103
-
104
- def _get_ai_response(self, retries: int = 3) -> str: #79
105
- """Get response from OpenAI with retry mechanism.""" #80
106
- if not self.client: #81
107
- raise ValueError("OpenAI client not initialized") #82
108
- for attempt in range(retries): #83
109
- try: #84
110
- response = self.client.chat.completions.create( #85
111
- model="gpt-4", #86
112
- messages=[ #87
113
- {"role": "system", "content": SYSTEM_PROMPT}, #88
114
- *[{ #89
115
- "role": msg["role"], #90
116
- "content": msg["content"] #91
117
- } for msg in self.conversation_history] #92
118
- ], #93
119
- temperature=0.7, #94
120
- max_tokens=2000 #95
121
- ) #96
 
122
 
123
- return response.choices[0].message.content #97
124
 
125
- except Exception as e: #98
126
- logger.warning(f"Attempt {attempt + 1} failed: {str(e)}") #99
127
- if attempt == retries - 1: #100
128
- raise Exception(f"Failed after {retries} attempts: {str(e)}") #101
129
- continue #102
130
-
131
- def _extract_information(self, text: str) -> List[ExtractedInfo]: #103
132
- """Extract structured information from text.""" #104
133
- try: #105
134
- extraction_prompt = f""" #106
135
  Analyze the following text and extract relevant information.
136
  Categories to consider: {', '.join(self.extraction_categories)}
137
 
@@ -154,489 +155,492 @@ for attempt in range(retries): #83
154
  }}
155
 
156
  Text to analyze: {text}
157
- """ #107
158
 
159
- response = self.client.chat.completions.create( #108
160
- model="gpt-4", #109
161
- messages=[ #110
162
- {"role": "system", "content": SYSTEM_PROMPT}, #111
163
- {"role": "user", "content": extraction_prompt} #112
164
- ], #113
165
- temperature=0.3 #114
166
- ) #115
167
 
168
- # Parse response and create ExtractedInfo objects #116
169
- analysis = json.loads(response.choices[0].message.content) #117
170
- extracted_items = [] #118
171
 
172
- for item in analysis.get("extracted_items", []): #119
173
- extracted_info = ExtractedInfo( #120
174
- text=item["text"], #121
175
- category=item["category"], #122
176
- confidence=item["confidence"], #123
177
- metadata=item.get("metadata", {}) #124
178
- ) #125
179
- extracted_items.append(extracted_info) #126
180
 
181
- return extracted_items #127
182
 
183
- except json.JSONDecodeError as e: #128
184
- logger.error(f"Error parsing extraction response: {str(e)}") #129
185
- return [] #130
186
- except Exception as e: #131
187
- logger.error(f"Error during information extraction: {str(e)}") #132
188
- return [] #133
189
-
190
- def _update_completion_status(self) -> None: #134
191
- """Update completion status based on extracted information.""" #135
192
- total_categories = len(self.extraction_categories) #136
193
- covered_categories = len(self.state.categories_covered) #137
194
 
195
- # Calculate base completion percentage #138
196
- base_completion = (covered_categories / total_categories) * 100 #139
197
 
198
- # Adjust based on confidence levels #140
199
- if self.state.extracted_items: #141
200
- avg_confidence = sum(item.confidence for item in self.state.extracted_items) / len(self.state.extracted_items) #142
201
- adjusted_completion = base_completion * avg_confidence #143
202
- else: #144
203
- adjusted_completion = 0.0 #145
204
 
205
- self.state.completion_percentage = min(adjusted_completion, 100.0) #146
206
 
207
- def process_message(self, message: str, api_key: str) -> Dict[str, Any]: #147
208
- """Process a user message and extract information.""" #148
209
- try: #149
210
- # Initialize client if needed #150
211
- if not self.client: #151
212
- self._initialize_client(api_key) #152
213
 
214
- # Add user message to history #153
215
- self._add_to_history("user", message) #154
216
 
217
- # Get AI response #155
218
- ai_response = self._get_ai_response() #156
219
- self._add_to_history("assistant", ai_response) #157
220
 
221
- # Extract information from the entire conversation #158
222
- new_information = self._extract_information(message + "\n" + ai_response) #159
223
 
224
- # Update state with new information #160
225
- for info in new_information: #161
226
- self.state.add_extracted_info(info) #162
227
 
228
- # Update completion status #163
229
- self._update_completion_status() #164
230
 
231
- return { #165
232
- "response": ai_response, #166
233
- "extracted_info": [ #167
234
- { #168
235
- "text": info.text, #169
236
- "category": info.category, #170
237
- "confidence": info.confidence #171
238
- } for info in new_information #172
239
- ], #173
240
- "completion_status": { #174
241
- "percentage": self.state.completion_percentage, #175
242
- "categories_covered": self.state.categories_covered, #176
243
- "current_focus": self.state.current_focus #177
244
- } #178
245
- } #179
246
 
247
- except Exception as e: #180
248
- error_msg = f"Error processing message: {str(e)}" #181
249
- logger.error(error_msg) #182
250
- self.state.last_error = error_msg #183
251
- return { #184
252
- "error": error_msg, #185
253
- "completion_status": { #186
254
- "percentage": self.state.completion_percentage, #187
255
- "categories_covered": self.state.categories_covered, #188
256
- "current_focus": self.state.current_focus #189
257
- } #190
258
- } #191
259
- def generate_output(self) -> Dict[str, Any]: #192
260
- """Generate structured output from all extracted information.""" #193
261
- try: #194
262
- # Organize extracted information by category #195
263
- categorized_info = {} #196
264
- for category in self.extraction_categories: #197
265
- category_items = [ #198
266
- { #199
267
- "text": item.text, #200
268
- "confidence": item.confidence, #201
269
- "timestamp": item.timestamp.isoformat(), #202
270
- "metadata": item.metadata #203
271
- } #204
272
- for item in self.state.extracted_items #205
273
- if item.category == category #206
274
- ] #207
275
- if category_items: #208
276
- categorized_info[category] = category_items #209
277
-
278
- # Create output structure #210
279
- output = { #211
280
- "extracted_information": categorized_info, #212
281
- "analysis_summary": { #213
282
- "total_items": len(self.state.extracted_items), #214
283
- "categories_covered": self.state.categories_covered, #215
284
- "completion_percentage": self.state.completion_percentage #216
285
- }, #217
286
- "metadata": { #218
287
- "generated_at": datetime.now().isoformat(), #219
288
- "conversation_length": len(self.conversation_history), #220
289
- "version": "2.0" #221
290
- } #222
291
- } #223
292
-
293
- # Save to file #224
294
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") #225
295
- filename = f"extracted_info_{timestamp}.json" #226
 
296
 
297
- with open(filename, 'w', encoding='utf-8') as f: #227
298
- json.dump(output, f, indent=2, ensure_ascii=False) #228
299
-
300
- return { #229
301
- "filename": filename, #230
302
- "content": output, #231
303
- "status": "success" #232
304
- } #233
305
-
306
- except Exception as e: #234
307
- error_msg = f"Error generating output: {str(e)}" #235
308
- logger.error(error_msg) #236
309
- return { #237
310
- "error": error_msg, #238
311
- "status": "error" #239
312
- } #240
313
-
314
- def create_gradio_interface(): #241
315
- """Create the Gradio interface for information extraction.""" #242
316
- extractor = InformationExtractor() #243
317
 
318
- # Custom CSS for better styling #244
319
- css = """ #245
320
- .container { max-width: 900px; margin: auto; } #246
321
- .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; } #247
322
- .info-panel { background: #f5f5f5; padding: 1rem; border-radius: 0.5rem; } #248
323
- .status-badge { #249
324
- display: inline-block; #250
325
- padding: 0.25rem 0.5rem; #251
326
- border-radius: 0.25rem; #252
327
- margin: 0.25rem; #253
328
- background: #e0e0e0; #254
329
- } #255
330
- .extraction-highlight { #256
331
- background: #e8f4f8; #257
332
- border-left: 4px solid #4a90e2; #258
333
- padding: 0.5rem; #259
334
- margin: 0.5rem 0; #260
335
- } #261
336
- """ #262
337
-
338
- with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: #263
339
- gr.Markdown(""" #264
340
  # 🔍 Information Extraction Assistant
341
 
342
  Have a natural conversation while we extract and organize important information.
343
  The system will automatically identify and categorize relevant details.
344
- """) #265
345
-
346
- with gr.Row(): #266
347
- with gr.Column(scale=2): #267
348
- # API Key input #268
349
- api_key = gr.Textbox( #269
350
- label="OpenAI API Key", #270
351
- type="password", #271
352
- placeholder="Enter your OpenAI API key (sk-...)", #272
353
- show_label=True #273
354
- ) #274
355
-
356
- # Chat interface #275
357
- chatbot = gr.Chatbot( #276
358
- value=[], #277
359
- height=400, #278
360
- type="messages", #279
361
- show_label=False #280
362
- ) #281
363
-
364
- # Message input #282
365
- with gr.Row(): #283
366
- msg = gr.Textbox( #284
367
- label="Message", #285
368
- placeholder="Type your message here...", #286
369
- scale=4 #287
370
- ) #288
371
- submit = gr.Button( #289
372
- "Send", #290
373
- variant="primary", #291
374
- scale=1 #292
375
- ) #293
376
-
377
- # Action buttons #294
378
- with gr.Row(): #295
379
- clear = gr.Button("Clear Chat", scale=1) #296
380
- generate = gr.Button( #297
381
- "Generate Report", #298
382
- variant="secondary", #299
383
- scale=2 #300
384
- ) #301
385
- with gr.Column(scale=1): #302
386
- # Extraction Status Panel #303
387
- with gr.Group(visible=True) as status_panel: #304
388
- gr.Markdown("### Extraction Progress") #305
 
389
 
390
- # Progress indicator #306
391
- progress = gr.Slider( #307
392
- label="Completion", #308
393
- minimum=0, #309
394
- maximum=100, #310
395
- value=0, #311
396
- interactive=False #312
397
- ) #313
398
 
399
- # Categories covered #314
400
- categories_covered = gr.JSON( #315
401
- label="Categories Covered", #316
402
- value={"categories": []} #317
403
- ) #318
404
 
405
- # Current focus #319
406
- current_focus = gr.Textbox( #320
407
- label="Current Focus", #321
408
- value="Not started", #322
409
- interactive=False #323
410
- ) #324
411
-
412
- # Extraction Results #325
413
- with gr.Tabs() as result_tabs: #326
414
- with gr.Tab("Extracted Information"): #327
415
- extracted_info = gr.JSON( #328
416
- label="Extracted Details", #329
417
- value={} #330
418
- ) #331
419
 
420
- with gr.Tab("Download"): #332
421
- file_output = gr.File( #333
422
- label="Download Report" #334
423
- ) #335
424
 
425
- with gr.Tab("Analysis"): #336
426
- analysis_text = gr.Markdown( #337
427
- "Analysis will appear here after processing." #338
428
- ) #339
429
-
430
- # Helper Functions #340
431
- def format_extraction_summary(extracted_items: List[Dict]) -> str: #341
432
- """Format extracted information for display.""" #342
433
- if not extracted_items: #343
434
- return "No information extracted yet." #344
435
 
436
- summary = ["### Recently Extracted Information"] #345
437
- for item in extracted_items: #346
438
- summary.append( #347
439
- f"- **{item['category']}** ({item['confidence']*100:.1f}% confidence)\n" #348
440
- f" {item['text']}" #349
441
- ) #350
442
- return "\n".join(summary) #351
443
-
444
- def update_interface_state(state: Dict[str, Any]) -> tuple: #352
445
- """Update all interface components based on current state.""" #353
446
- return ( #354
447
- state['completion_status']['percentage'], #355
448
- {"categories": state['completion_status']['categories_covered']}, #356
449
- state['completion_status']['current_focus'] #357
450
- ) #358
451
-
452
- # Event Handlers #359
453
- def process_message(message: str, history: list, key: str) -> tuple: #360
454
- """Handle message processing and update interface.""" #361
455
- if not message.strip(): #362
456
- return history, 0, {}, "Please enter a message" #363
457
 
458
- try: #364
459
- # Process message #365
460
- result = extractor.process_message(message, key) #366
461
 
462
- if "error" in result: #367
463
- return ( #368
464
- history, #369
465
- 0, #370
466
- {"categories": []}, #371
467
- f"Error: {result['error']}" #372
468
- ) #373
469
 
470
- # Update chat history #374
471
- history.append({ #375
472
- "role": "user", #376
473
- "content": message #377
474
- }) #378
475
- history.append({ #379
476
- "role": "assistant", #380
477
- "content": result["response"] #381
478
- }) #382
479
 
480
- # Update status components #383
481
- progress_value = result["completion_status"]["percentage"] #384
482
- categories = { #385
483
- "categories": result["completion_status"]["categories_covered"] #386
484
- } #387
485
- current_focus = result["completion_status"]["current_focus"] or "Processing..." #388
486
 
487
- # Update extraction display #389
488
- if result.get("extracted_info"): #390
489
- analysis_text = format_extraction_summary(result["extracted_info"]) #391
490
- else: #392
491
- analysis_text = "No new information extracted." #393
492
 
493
- return ( #394
494
- history, #395
495
- progress_value, #396
496
- categories, #397
497
- current_focus, #398
498
- analysis_text #399
499
- ) #400
500
 
501
- except Exception as e: #401
502
- logger.error(f"Error in process_message: {str(e)}") #402
503
- return ( #403
504
- history, #404
505
- 0, #405
506
- {"categories": []}, #406
507
- f"Error: {str(e)}", #407
508
- "An error occurred during processing." #408
509
- ) #409
510
- def generate_report() -> tuple: #410
511
- """Generate and return report file.""" #411
512
- try: #412
513
- result = extractor.generate_output() #413
 
514
 
515
- if result["status"] == "success": #414
516
- # Update JSON preview #415
517
- content_preview = { #416
518
- "summary": result["content"]["analysis_summary"], #417
519
- "categories": list(result["content"]["extracted_information"].keys()), #418
520
- "total_items": len(result["content"]["extracted_information"]) #419
521
- } #420
522
 
523
- return ( #421
524
- result["filename"], #422
525
- content_preview, #423
526
- "Report generated successfully! 🎉", #424
527
- gr.update(value=format_extraction_summary( #425
528
- [item for items in result["content"]["extracted_information"].values() #426
529
- for item in items] #427
530
- )) #428
531
- ) #429
532
- else: #430
533
- return ( #431
534
- None, #432
535
- {"error": result["error"]}, #433
536
- f"Error generating report: {result['error']}", #434
537
- "Failed to generate analysis." #435
538
- ) #436
539
 
540
- except Exception as e: #437
541
- logger.error(f"Error in generate_report: {str(e)}") #438
542
- return ( #439
543
- None, #440
544
- {"error": str(e)}, #441
545
- f"Error: {str(e)}", #442
546
- "An error occurred during report generation." #443
547
- ) #444
548
-
549
- def clear_interface() -> tuple: #445
550
- """Reset all interface components.""" #446
551
- # Reset extractor state #447
552
- global extractor #448
553
- extractor = InformationExtractor() #449
554
 
555
- return ( #450
556
- [], # Clear chat history #451
557
- 0.0, # Reset progress #452
558
- {"categories": []}, # Clear categories #453
559
- "Not started", # Reset focus #454
560
- {}, # Clear extracted info #455
561
- None, # Clear file output #456
562
- "Ready to start new extraction.", # Reset analysis #457
563
- gr.update(value="") # Clear message input #458
564
- ) #459
565
-
566
- # Event Bindings #460
567
- msg.submit( #461
568
- process_message, #462
569
- inputs=[msg, chatbot, api_key], #463
570
- outputs=[ #464
571
- chatbot, #465
572
- progress, #466
573
- categories_covered, #467
574
- current_focus, #468
575
- analysis_text #469
576
- ] #470
577
- ).then( #471
578
- lambda: "", #472
579
- None, #473
580
- msg #474
581
- ) #475
582
-
583
- submit.click( #476
584
- process_message, #477
585
- inputs=[msg, chatbot, api_key], #478
586
- outputs=[ #479
587
- chatbot, #480
588
- progress, #481
589
- categories_covered, #482
590
- current_focus, #483
591
- analysis_text #484
592
- ] #485
593
- ).then( #486
594
- lambda: "", #487
595
- None, #488
596
- msg #489
597
- ) #490
598
-
599
- generate.click( #491
600
- generate_report, #492
601
- outputs=[ #493
602
- file_output, #494
603
- extracted_info, #495
604
- current_focus, #496
605
- analysis_text #497
606
- ] #498
607
- ) #499
608
-
609
- clear.click( #500
610
- clear_interface, #501
611
- outputs=[ #502
612
- chatbot, #503
613
- progress, #504
614
- categories_covered, #505
615
- current_focus, #506
616
- extracted_info, #507
617
- file_output, #508
618
- analysis_text, #509
619
- msg #510
620
- ] #511
621
- ) #512
622
-
623
- return demo #513
624
-
625
- if __name__ == "__main__": #514
626
- # Set up logging for the main application #515
627
- logging.basicConfig( #516
628
- level=logging.INFO, #517
629
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' #518
630
- ) #519
631
 
632
- try: #520
633
- demo = create_gradio_interface() #521
634
- demo.launch( #522
635
- server_name="0.0.0.0", #523
636
- server_port=7860, #524
637
- share=True, #525
638
- show_api=False #526
639
- ) #527
640
- except Exception as e: #528
641
- logger.error(f"Application failed to start: {str(e)}") #529
642
- raise #530
 
1
+ import json
2
+ import logging
3
+ import os
4
+ from datetime import datetime
5
+ from typing import Dict, List, Optional, Any, Tuple
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
 
9
  # Third-party imports
10
+ import gradio as gr
11
+ from openai import OpenAI
12
 
13
  # Configure logging
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
17
+ handlers=[
18
+ logging.StreamHandler(),
19
+ logging.FileHandler('app.log')
20
+ ]
21
+ )
22
+ logger = logging.getLogger(__name__)
23
 
24
  # System prompt for the AI assistant
25
+ SYSTEM_PROMPT = """
26
  You are an Information Extraction Assistant, designed to help extract and organize
27
  important information from conversations in a natural and engaging way.
28
 
 
33
  - Structured data organization with context preservation
34
 
35
  Please maintain a friendly and professional tone while ensuring accurate information extraction.
36
+ """
37
+
38
+ @dataclass
39
+ class ExtractedInfo:
40
+ """Structure for storing extracted information."""
41
+ text: str
42
+ category: str
43
+ confidence: float
44
+ timestamp: datetime = field(default_factory=datetime.now)
45
+ metadata: Dict[str, Any] = field(default_factory=dict)
46
+
47
+ @dataclass
48
+ class ConversationState:
49
+ """Tracks the state and progress of the conversation."""
50
+ extracted_items: List[ExtractedInfo] = field(default_factory=list)
51
+ categories_covered: List[str] = field(default_factory=list)
52
+ current_focus: Optional[str] = None
53
+ completion_percentage: float = 0.0
54
+ last_error: Optional[str] = None
55
+ last_update: datetime = field(default_factory=datetime.now)
56
+
57
+ def add_extracted_info(self, info: ExtractedInfo) -> None:
58
+ """Add new extracted information and update state."""
59
+ self.extracted_items.append(info)
60
+ if info.category not in self.categories_covered:
61
+ self.categories_covered.append(info.category)
62
+ self.last_update = datetime.now()
63
+
64
+ class InformationExtractor:
65
+ """Core class for handling information extraction from conversations."""
66
 
67
+ def __init__(self):
68
+ self.conversation_history: List[Dict[str, str]] = []
69
+ self.state = ConversationState()
70
+ self.client: Optional[OpenAI] = None
71
+ self.extraction_categories = [
72
+ "personal_info",
73
+ "education",
74
+ "work_experience",
75
+ "skills",
76
+ "achievements"
77
+ ]
78
 
79
+ def _validate_api_key(self, api_key: str) -> bool:
80
+ """Validate OpenAI API key format."""
81
+ if not api_key.strip():
82
+ raise ValueError("API key cannot be empty")
83
+ if not api_key.startswith('sk-'):
84
+ raise ValueError("Invalid API key format")
85
+ return True
86
 
87
+ def _initialize_client(self, api_key: str) -> None:
88
+ """Initialize OpenAI client with error handling."""
89
+ try:
90
+ if self._validate_api_key(api_key):
91
+ self.client = OpenAI(api_key=api_key)
92
+ except Exception as e:
93
+ logger.error(f"Error initializing OpenAI client: {str(e)}")
94
+ raise
95
+
96
+ def _add_to_history(self, role: str, content: str) -> None:
97
+ """Add a message to conversation history with timestamp."""
98
+ self.conversation_history.append({
99
+ "role": role,
100
+ "content": content,
101
+ "timestamp": datetime.now().isoformat()
102
+ })
103
+
104
+ def _get_ai_response(self, retries: int = 3) -> str:
105
+ """Get response from OpenAI with retry mechanism."""
106
+ if not self.client:
107
+ raise ValueError("OpenAI client not initialized")
108
+
109
+ for attempt in range(retries):
110
+ try:
111
+ response = self.client.chat.completions.create(
112
+ model="gpt-4o-mini", # Changed from "gpt-4" to "gpt-4o-mini"
113
+ messages=[
114
+ {"role": "system", "content": SYSTEM_PROMPT},
115
+ *[{
116
+ "role": msg["role"],
117
+ "content": msg["content"]
118
+ } for msg in self.conversation_history]
119
+ ],
120
+ temperature=0.7,
121
+ max_tokens=2000
122
+ )
123
 
124
+ return response.choices[0].message.content
125
 
126
+ except Exception as e:
127
+ logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
128
+ if attempt == retries - 1:
129
+ raise Exception(f"Failed after {retries} attempts: {str(e)}")
130
+ continue
131
+
132
+ def _extract_information(self, text: str) -> List[ExtractedInfo]:
133
+ """Extract structured information from text."""
134
+ try:
135
+ extraction_prompt = f"""
136
  Analyze the following text and extract relevant information.
137
  Categories to consider: {', '.join(self.extraction_categories)}
138
 
 
155
  }}
156
 
157
  Text to analyze: {text}
158
+ """
159
 
160
+ response = self.client.chat.completions.create(
161
+ model="gpt-4o-mini", # Changed from "gpt-4" to "gpt-4o-mini"
162
+ messages=[
163
+ {"role": "system", "content": SYSTEM_PROMPT},
164
+ {"role": "user", "content": extraction_prompt}
165
+ ],
166
+ temperature=0.3
167
+ )
168
 
169
+ # Parse response and create ExtractedInfo objects
170
+ analysis = json.loads(response.choices[0].message.content)
171
+ extracted_items = []
172
 
173
+ for item in analysis.get("extracted_items", []):
174
+ extracted_info = ExtractedInfo(
175
+ text=item["text"],
176
+ category=item["category"],
177
+ confidence=item["confidence"],
178
+ metadata=item.get("metadata", {})
179
+ )
180
+ extracted_items.append(extracted_info)
181
 
182
+ return extracted_items
183
 
184
+ except json.JSONDecodeError as e:
185
+ logger.error(f"Error parsing extraction response: {str(e)}")
186
+ return []
187
+ except Exception as e:
188
+ logger.error(f"Error during information extraction: {str(e)}")
189
+ return []
190
+
191
+ def _update_completion_status(self) -> None:
192
+ """Update completion status based on extracted information."""
193
+ total_categories = len(self.extraction_categories)
194
+ covered_categories = len(self.state.categories_covered)
195
 
196
+ # Calculate base completion percentage
197
+ base_completion = (covered_categories / total_categories) * 100
198
 
199
+ # Adjust based on confidence levels
200
+ if self.state.extracted_items:
201
+ avg_confidence = sum(item.confidence for item in self.state.extracted_items) / len(self.state.extracted_items)
202
+ adjusted_completion = base_completion * avg_confidence
203
+ else:
204
+ adjusted_completion = 0.0
205
 
206
+ self.state.completion_percentage = min(adjusted_completion, 100.0)
207
 
208
+ def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
209
+ """Process a user message and extract information."""
210
+ try:
211
+ # Initialize client if needed
212
+ if not self.client:
213
+ self._initialize_client(api_key)
214
 
215
+ # Add user message to history
216
+ self._add_to_history("user", message)
217
 
218
+ # Get AI response
219
+ ai_response = self._get_ai_response()
220
+ self._add_to_history("assistant", ai_response)
221
 
222
+ # Extract information from the entire conversation
223
+ new_information = self._extract_information(message + "\n" + ai_response)
224
 
225
+ # Update state with new information
226
+ for info in new_information:
227
+ self.state.add_extracted_info(info)
228
 
229
+ # Update completion status
230
+ self._update_completion_status()
231
 
232
+ return {
233
+ "response": ai_response,
234
+ "extracted_info": [
235
+ {
236
+ "text": info.text,
237
+ "category": info.category,
238
+ "confidence": info.confidence
239
+ } for info in new_information
240
+ ],
241
+ "completion_status": {
242
+ "percentage": self.state.completion_percentage,
243
+ "categories_covered": self.state.categories_covered,
244
+ "current_focus": self.state.current_focus
245
+ }
246
+ }
247
 
248
+ except Exception as e:
249
+ error_msg = f"Error processing message: {str(e)}"
250
+ logger.error(error_msg)
251
+ self.state.last_error = error_msg
252
+ return {
253
+ "error": error_msg,
254
+ "completion_status": {
255
+ "percentage": self.state.completion_percentage,
256
+ "categories_covered": self.state.categories_covered,
257
+ "current_focus": self.state.current_focus
258
+ }
259
+ }
260
+
261
+ def generate_output(self) -> Dict[str, Any]:
262
+ """Generate structured output from all extracted information."""
263
+ try:
264
+ # Organize extracted information by category
265
+ categorized_info = {}
266
+ for category in self.extraction_categories:
267
+ category_items = [
268
+ {
269
+ "text": item.text,
270
+ "confidence": item.confidence,
271
+ "timestamp": item.timestamp.isoformat(),
272
+ "metadata": item.metadata
273
+ }
274
+ for item in self.state.extracted_items
275
+ if item.category == category
276
+ ]
277
+ if category_items:
278
+ categorized_info[category] = category_items
279
+
280
+ # Create output structure
281
+ output = {
282
+ "extracted_information": categorized_info,
283
+ "analysis_summary": {
284
+ "total_items": len(self.state.extracted_items),
285
+ "categories_covered": self.state.categories_covered,
286
+ "completion_percentage": self.state.completion_percentage
287
+ },
288
+ "metadata": {
289
+ "generated_at": datetime.now().isoformat(),
290
+ "conversation_length": len(self.conversation_history),
291
+ "version": "2.0"
292
+ }
293
+ }
294
+
295
+ # Save to file
296
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
297
+ filename = f"extracted_info_{timestamp}.json"
298
 
299
+ with open(filename, 'w', encoding='utf-8') as f:
300
+ json.dump(output, f, indent=2, ensure_ascii=False)
301
+
302
+ return {
303
+ "filename": filename,
304
+ "content": output,
305
+ "status": "success"
306
+ }
307
+
308
+ except Exception as e:
309
+ error_msg = f"Error generating output: {str(e)}"
310
+ logger.error(error_msg)
311
+ return {
312
+ "error": error_msg,
313
+ "status": "error"
314
+ }
315
+
316
+ def create_gradio_interface():
317
+ """Create the Gradio interface for information extraction."""
318
+ extractor = InformationExtractor()
319
 
320
+ # Custom CSS for better styling
321
+ css = """
322
+ .container { max-width: 900px; margin: auto; }
323
+ .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; }
324
+ .info-panel { background: #f5f5f5; padding: 1rem; border-radius: 0.5rem; }
325
+ .status-badge {
326
+ display: inline-block;
327
+ padding: 0.25rem 0.5rem;
328
+ border-radius: 0.25rem;
329
+ margin: 0.25rem;
330
+ background: #e0e0e0;
331
+ }
332
+ .extraction-highlight {
333
+ background: #e8f4f8;
334
+ border-left: 4px solid #4a90e2;
335
+ padding: 0.5rem;
336
+ margin: 0.5rem 0;
337
+ }
338
+ """
339
+
340
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
341
+ gr.Markdown("""
342
  # 🔍 Information Extraction Assistant
343
 
344
  Have a natural conversation while we extract and organize important information.
345
  The system will automatically identify and categorize relevant details.
346
+ """)
347
+
348
+ with gr.Row():
349
+ with gr.Column(scale=2):
350
+ # API Key input
351
+ api_key = gr.Textbox(
352
+ label="OpenAI API Key",
353
+ type="password",
354
+ placeholder="Enter your OpenAI API key (sk-...)",
355
+ show_label=True
356
+ )
357
+
358
+ # Chat interface
359
+ chatbot = gr.Chatbot(
360
+ value=[],
361
+ height=400,
362
+ type="messages",
363
+ show_label=False
364
+ )
365
+
366
+ # Message input
367
+ with gr.Row():
368
+ msg = gr.Textbox(
369
+ label="Message",
370
+ placeholder="Type your message here...",
371
+ scale=4
372
+ )
373
+ submit = gr.Button(
374
+ "Send",
375
+ variant="primary",
376
+ scale=1
377
+ )
378
+
379
+ # Action buttons
380
+ with gr.Row():
381
+ clear = gr.Button("Clear Chat", scale=1)
382
+ generate = gr.Button(
383
+ "Generate Report",
384
+ variant="secondary",
385
+ scale=2
386
+ )
387
+
388
+ with gr.Column(scale=1):
389
+ # Extraction Status Panel
390
+ with gr.Group(visible=True) as status_panel:
391
+ gr.Markdown("### Extraction Progress")
392
 
393
+ # Progress indicator
394
+ progress = gr.Slider(
395
+ label="Completion",
396
+ minimum=0,
397
+ maximum=100,
398
+ value=0,
399
+ interactive=False
400
+ )
401
 
402
+ # Categories covered
403
+ categories_covered = gr.JSON(
404
+ label="Categories Covered",
405
+ value={"categories": []}
406
+ )
407
 
408
+ # Current focus
409
+ current_focus = gr.Textbox(
410
+ label="Current Focus",
411
+ value="Not started",
412
+ interactive=False
413
+ )
414
+
415
+ # Extraction Results
416
+ with gr.Tabs() as result_tabs:
417
+ with gr.Tab("Extracted Information"):
418
+ extracted_info = gr.JSON(
419
+ label="Extracted Details",
420
+ value={}
421
+ )
422
 
423
+ with gr.Tab("Download"):
424
+ file_output = gr.File(
425
+ label="Download Report"
426
+ )
427
 
428
+ with gr.Tab("Analysis"):
429
+ analysis_text = gr.Markdown(
430
+ "Analysis will appear here after processing."
431
+ )
432
+
433
+ # Helper Functions
434
+ def format_extraction_summary(extracted_items: List[Dict]) -> str:
435
+ """Format extracted information for display."""
436
+ if not extracted_items:
437
+ return "No information extracted yet."
438
 
439
+ summary = ["### Recently Extracted Information"]
440
+ for item in extracted_items:
441
+ summary.append(
442
+ f"- **{item['category']}** ({item['confidence']*100:.1f}% confidence)\n"
443
+ f" {item['text']}"
444
+ )
445
+ return "\n".join(summary)
446
+
447
+ def update_interface_state(state: Dict[str, Any]) -> tuple:
448
+ """Update all interface components based on current state."""
449
+ return (
450
+ state['completion_status']['percentage'],
451
+ {"categories": state['completion_status']['categories_covered']},
452
+ state['completion_status']['current_focus']
453
+ )
454
+
455
+ # Event Handlers
456
+ def process_message(message: str, history: list, key: str) -> tuple:
457
+ """Handle message processing and update interface."""
458
+ if not message.strip():
459
+ return history, 0, {}, "Please enter a message"
460
 
461
+ try:
462
+ # Process message
463
+ result = extractor.process_message(message, key)
464
 
465
+ if "error" in result:
466
+ return (
467
+ history,
468
+ 0,
469
+ {"categories": []},
470
+ f"Error: {result['error']}"
471
+ )
472
 
473
+ # Update chat history
474
+ history.append({
475
+ "role": "user",
476
+ "content": message
477
+ })
478
+ history.append({
479
+ "role": "assistant",
480
+ "content": result["response"]
481
+ })
482
 
483
+ # Update status components
484
+ progress_value = result["completion_status"]["percentage"]
485
+ categories = {
486
+ "categories": result["completion_status"]["categories_covered"]
487
+ }
488
+ current_focus = result["completion_status"]["current_focus"] or "Processing..."
489
 
490
+ # Update extraction display
491
+ if result.get("extracted_info"):
492
+ analysis_text = format_extraction_summary(result["extracted_info"])
493
+ else:
494
+ analysis_text = "No new information extracted."
495
 
496
+ return (
497
+ history,
498
+ progress_value,
499
+ categories,
500
+ current_focus,
501
+ analysis_text
502
+ )
503
 
504
+ except Exception as e:
505
+ logger.error(f"Error in process_message: {str(e)}")
506
+ return (
507
+ history,
508
+ 0,
509
+ {"categories": []},
510
+ f"Error: {str(e)}",
511
+ "An error occurred during processing."
512
+ )
513
+
514
+ def generate_report() -> tuple:
515
+ """Generate and return report file."""
516
+ try:
517
+ result = extractor.generate_output()
518
 
519
+ if result["status"] == "success":
520
+ # Update JSON preview
521
+ content_preview = {
522
+ "summary": result["content"]["analysis_summary"],
523
+ "categories": list(result["content"]["extracted_information"].keys()),
524
+ "total_items": len(result["content"]["extracted_information"])
525
+ }
526
 
527
+ return (
528
+ result["filename"],
529
+ content_preview,
530
+ "Report generated successfully! 🎉",
531
+ gr.update(value=format_extraction_summary(
532
+ [item for items in result["content"]["extracted_information"].values()
533
+ for item in items]
534
+ ))
535
+ )
536
+ else:
537
+ return (
538
+ None,
539
+ {"error": result["error"]},
540
+ f"Error generating report: {result['error']}",
541
+ "Failed to generate analysis."
542
+ )
543
 
544
+ except Exception as e:
545
+ logger.error(f"Error in generate_report: {str(e)}")
546
+ return (
547
+ None,
548
+ {"error": str(e)},
549
+ f"Error: {str(e)}",
550
+ "An error occurred during report generation."
551
+ )
552
+
553
+ def clear_interface() -> tuple:
554
+ """Reset all interface components."""
555
+ # Reset extractor state
556
+ global extractor
557
+ extractor = InformationExtractor()
558
 
559
+ return (
560
+ [], # Clear chat history
561
+ 0.0, # Reset progress
562
+ {"categories": []}, # Clear categories
563
+ "Not started", # Reset focus
564
+ {}, # Clear extracted info
565
+ None, # Clear file output
566
+ "Ready to start new extraction.", # Reset analysis
567
+ gr.update(value="") # Clear message input
568
+ )
569
+
570
+ # Event Bindings
571
+ msg.submit(
572
+ process_message,
573
+ inputs=[msg, chatbot, api_key],
574
+ outputs=[
575
+ chatbot,
576
+ progress,
577
+ categories_covered,
578
+ current_focus,
579
+ analysis_text
580
+ ]
581
+ ).then(
582
+ lambda: "",
583
+ None,
584
+ msg
585
+ )
586
+
587
+ submit.click(
588
+ process_message,
589
+ inputs=[msg, chatbot, api_key],
590
+ outputs=[
591
+ chatbot,
592
+ progress,
593
+ categories_covered,
594
+ current_focus,
595
+ analysis_text
596
+ ]
597
+ ).then(
598
+ lambda: "",
599
+ None,
600
+ msg
601
+ )
602
+
603
+ generate.click(
604
+ generate_report,
605
+ outputs=[
606
+ file_output,
607
+ extracted_info,
608
+ current_focus,
609
+ analysis_text
610
+ ]
611
+ )
612
+
613
+ clear.click(
614
+ clear_interface,
615
+ outputs=[
616
+ chatbot,
617
+ progress,
618
+ categories_covered,
619
+ current_focus,
620
+ extracted_info,
621
+ file_output,
622
+ analysis_text,
623
+ msg
624
+ ]
625
+ )
626
+
627
+ return demo
628
+
629
+ if __name__ == "__main__":
630
+ # Set up logging for the main application
631
+ logging.basicConfig(
632
+ level=logging.INFO,
633
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
634
+ )
635
 
636
+ try:
637
+ demo = create_gradio_interface()
638
+ demo.launch(
639
+ server_name="0.0.0.0",
640
+ server_port=7860,
641
+ share=True,
642
+ show_api=False
643
+ )
644
+ except Exception as e:
645
+ logger.error(f"Application failed to start: {str(e)}")
646
+ raise