Spaces:

jzou19950715
/

Newco_information_extraction_agent

Sleeping

App Files Files Community

jzou19950715 commited on Feb 1

Commit

c734c14

verified ·

1 Parent(s): 16af053

Update app.py

Browse files

Files changed (1) hide show

app.py +563 -559

app.py CHANGED Viewed

@@ -1,28 +1,28 @@
-import json #1
-import logging #2
-import os #3
-from datetime import datetime #4
-from typing import Dict, List, Optional, Any, Tuple #5
-from dataclasses import dataclass, field #6
-from pathlib import Path #7
 # Third-party imports
-import gradio as gr #8
-from openai import OpenAI #9
 # Configure logging
-logging.basicConfig( #10
-    level=logging.INFO, #11
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', #12
-    handlers=[ #13
-        logging.StreamHandler(), #14
-        logging.FileHandler('app.log') #15
-    ] #16
-) #17
-logger = logging.getLogger(__name__) #18
 # System prompt for the AI assistant
-SYSTEM_PROMPT = """ #19
 You are an Information Extraction Assistant, designed to help extract and organize
 important information from conversations in a natural and engaging way.
@@ -33,105 +33,106 @@ Core Capabilities:
 - Structured data organization with context preservation
 Please maintain a friendly and professional tone while ensuring accurate information extraction.
-""" #20
-@dataclass #21
-class ExtractedInfo: #22
-    """Structure for storing extracted information.""" #23
-    text: str #24
-    category: str #25
-    confidence: float #26
-    timestamp: datetime = field(default_factory=datetime.now) #27
-    metadata: Dict[str, Any] = field(default_factory=dict) #28
-@dataclass #29
-class ConversationState: #30
-    """Tracks the state and progress of the conversation.""" #31
-    extracted_items: List[ExtractedInfo] = field(default_factory=list) #32
-    categories_covered: List[str] = field(default_factory=list) #33
-    current_focus: Optional[str] = None #34
-    completion_percentage: float = 0.0 #35
-    last_error: Optional[str] = None #36
-    last_update: datetime = field(default_factory=datetime.now) #37
-    def add_extracted_info(self, info: ExtractedInfo) -> None: #38
-        """Add new extracted information and update state.""" #39
-        self.extracted_items.append(info) #40
-        if info.category not in self.categories_covered: #41
-            self.categories_covered.append(info.category) #42
-        self.last_update = datetime.now() #43
-class InformationExtractor: #44
-    """Core class for handling information extraction from conversations.""" #45
-    def __init__(self): #46
-        self.conversation_history: List[Dict[str, str]] = [] #47
-        self.state = ConversationState() #48
-        self.client: Optional[OpenAI] = None #49
-        self.extraction_categories = [ #50
-            "personal_info", #51
-            "education", #52
-            "work_experience", #53
-            "skills", #54
-            "achievements" #55
-        ] #56
-    def _validate_api_key(self, api_key: str) -> bool: #57
-        """Validate OpenAI API key format.""" #58
-        if not api_key.strip(): #59
-            raise ValueError("API key cannot be empty") #60
-        if not api_key.startswith('sk-'): #61
-            raise ValueError("Invalid API key format") #62
-        return True #63
-    def _initialize_client(self, api_key: str) -> None: #64
-        """Initialize OpenAI client with error handling.""" #65
-        try: #66
-            if self._validate_api_key(api_key): #67
-                self.client = OpenAI(api_key=api_key) #68
-        except Exception as e: #69
-            logger.error(f"Error initializing OpenAI client: {str(e)}") #70
-            raise #71
-    def _add_to_history(self, role: str, content: str) -> None: #72
-        """Add a message to conversation history with timestamp.""" #73
-        self.conversation_history.append({ #74
-            "role": role, #75
-            "content": content, #76
-            "timestamp": datetime.now().isoformat() #77
-        }) #78
-    def _get_ai_response(self, retries: int = 3) -> str: #79
-        """Get response from OpenAI with retry mechanism.""" #80
-        if not self.client: #81
-            raise ValueError("OpenAI client not initialized") #82
-for attempt in range(retries): #83
-            try: #84
-                response = self.client.chat.completions.create( #85
-                    model="gpt-4", #86
-                    messages=[ #87
-                        {"role": "system", "content": SYSTEM_PROMPT}, #88
-                        *[{ #89
-                            "role": msg["role"], #90
-                            "content": msg["content"] #91
-                        } for msg in self.conversation_history] #92
-                    ], #93
-                    temperature=0.7, #94
-                    max_tokens=2000 #95
-                ) #96
-                return response.choices[0].message.content #97
-            except Exception as e: #98
-                logger.warning(f"Attempt {attempt + 1} failed: {str(e)}") #99
-                if attempt == retries - 1: #100
-                    raise Exception(f"Failed after {retries} attempts: {str(e)}") #101
-                continue #102
-    def _extract_information(self, text: str) -> List[ExtractedInfo]: #103
-        """Extract structured information from text.""" #104
-        try: #105
-            extraction_prompt = f""" #106
             Analyze the following text and extract relevant information.
             Categories to consider: {', '.join(self.extraction_categories)}
@@ -154,489 +155,492 @@ for attempt in range(retries): #83
             }}
             Text to analyze: {text}
-            """ #107
-            response = self.client.chat.completions.create( #108
-                model="gpt-4", #109
-                messages=[ #110
-                    {"role": "system", "content": SYSTEM_PROMPT}, #111
-                    {"role": "user", "content": extraction_prompt} #112
-                ], #113
-                temperature=0.3 #114
-            ) #115
-            # Parse response and create ExtractedInfo objects #116
-            analysis = json.loads(response.choices[0].message.content) #117
-            extracted_items = [] #118
-            for item in analysis.get("extracted_items", []): #119
-                extracted_info = ExtractedInfo( #120
-                    text=item["text"], #121
-                    category=item["category"], #122
-                    confidence=item["confidence"], #123
-                    metadata=item.get("metadata", {}) #124
-                ) #125
-                extracted_items.append(extracted_info) #126
-            return extracted_items #127
-        except json.JSONDecodeError as e: #128
-            logger.error(f"Error parsing extraction response: {str(e)}") #129
-            return [] #130
-        except Exception as e: #131
-            logger.error(f"Error during information extraction: {str(e)}") #132
-            return [] #133
-    def _update_completion_status(self) -> None: #134
-        """Update completion status based on extracted information.""" #135
-        total_categories = len(self.extraction_categories) #136
-        covered_categories = len(self.state.categories_covered) #137
-        # Calculate base completion percentage #138
-        base_completion = (covered_categories / total_categories) * 100 #139
-        # Adjust based on confidence levels #140
-        if self.state.extracted_items: #141
-            avg_confidence = sum(item.confidence for item in self.state.extracted_items) / len(self.state.extracted_items) #142
-            adjusted_completion = base_completion * avg_confidence #143
-        else: #144
-            adjusted_completion = 0.0 #145
-        self.state.completion_percentage = min(adjusted_completion, 100.0) #146
-    def process_message(self, message: str, api_key: str) -> Dict[str, Any]: #147
-        """Process a user message and extract information.""" #148
-        try: #149
-            # Initialize client if needed #150
-            if not self.client: #151
-                self._initialize_client(api_key) #152
-            # Add user message to history #153
-            self._add_to_history("user", message) #154
-            # Get AI response #155
-            ai_response = self._get_ai_response() #156
-            self._add_to_history("assistant", ai_response) #157
-            # Extract information from the entire conversation #158
-            new_information = self._extract_information(message + "\n" + ai_response) #159
-            # Update state with new information #160
-            for info in new_information: #161
-                self.state.add_extracted_info(info) #162
-            # Update completion status #163
-            self._update_completion_status() #164
-            return { #165
-                "response": ai_response, #166
-                "extracted_info": [ #167
-                    { #168
-                        "text": info.text, #169
-                        "category": info.category, #170
-                        "confidence": info.confidence #171
-                    } for info in new_information #172
-                ], #173
-                "completion_status": { #174
-                    "percentage": self.state.completion_percentage, #175
-                    "categories_covered": self.state.categories_covered, #176
-                    "current_focus": self.state.current_focus #177
-                } #178
-            } #179
-        except Exception as e: #180
-            error_msg = f"Error processing message: {str(e)}" #181
-            logger.error(error_msg) #182
-            self.state.last_error = error_msg #183
-            return { #184
-                "error": error_msg, #185
-                "completion_status": { #186
-                    "percentage": self.state.completion_percentage, #187
-                    "categories_covered": self.state.categories_covered, #188
-                    "current_focus": self.state.current_focus #189
-                } #190
-            } #191
-def generate_output(self) -> Dict[str, Any]: #192
-        """Generate structured output from all extracted information.""" #193
-        try: #194
-            # Organize extracted information by category #195
-            categorized_info = {} #196
-            for category in self.extraction_categories: #197
-                category_items = [ #198
-                    { #199
-                        "text": item.text, #200
-                        "confidence": item.confidence, #201
-                        "timestamp": item.timestamp.isoformat(), #202
-                        "metadata": item.metadata #203
-                    } #204
-                    for item in self.state.extracted_items #205
-                    if item.category == category #206
-                ] #207
-                if category_items: #208
-                    categorized_info[category] = category_items #209
-            # Create output structure #210
-            output = { #211
-                "extracted_information": categorized_info, #212
-                "analysis_summary": { #213
-                    "total_items": len(self.state.extracted_items), #214
-                    "categories_covered": self.state.categories_covered, #215
-                    "completion_percentage": self.state.completion_percentage #216
-                }, #217
-                "metadata": { #218
-                    "generated_at": datetime.now().isoformat(), #219
-                    "conversation_length": len(self.conversation_history), #220
-                    "version": "2.0" #221
-                } #222
-            } #223
-            # Save to file #224
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") #225
-            filename = f"extracted_info_{timestamp}.json" #226
-            with open(filename, 'w', encoding='utf-8') as f: #227
-                json.dump(output, f, indent=2, ensure_ascii=False) #228
-            return { #229
-                "filename": filename, #230
-                "content": output, #231
-                "status": "success" #232
-            } #233
-        except Exception as e: #234
-            error_msg = f"Error generating output: {str(e)}" #235
-            logger.error(error_msg) #236
-            return { #237
-                "error": error_msg, #238
-                "status": "error" #239
-            } #240
-def create_gradio_interface(): #241
-    """Create the Gradio interface for information extraction.""" #242
-    extractor = InformationExtractor() #243
-    # Custom CSS for better styling #244
-    css = """ #245
-    .container { max-width: 900px; margin: auto; } #246
-    .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; } #247
-    .info-panel { background: #f5f5f5; padding: 1rem; border-radius: 0.5rem; } #248
-    .status-badge { #249
-        display: inline-block; #250
-        padding: 0.25rem 0.5rem; #251
-        border-radius: 0.25rem; #252
-        margin: 0.25rem; #253
-        background: #e0e0e0; #254
-    } #255
-    .extraction-highlight { #256
-        background: #e8f4f8; #257
-        border-left: 4px solid #4a90e2; #258
-        padding: 0.5rem; #259
-        margin: 0.5rem 0; #260
-    } #261
-    """ #262
-    with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: #263
-        gr.Markdown(""" #264
         # 🔍 Information Extraction Assistant
         Have a natural conversation while we extract and organize important information.
         The system will automatically identify and categorize relevant details.
-        """) #265
-        with gr.Row(): #266
-            with gr.Column(scale=2): #267
-                # API Key input #268
-                api_key = gr.Textbox( #269
-                    label="OpenAI API Key", #270
-                    type="password", #271
-                    placeholder="Enter your OpenAI API key (sk-...)", #272
-                    show_label=True #273
-                ) #274
-                # Chat interface #275
-                chatbot = gr.Chatbot( #276
-                    value=[], #277
-                    height=400, #278
-                    type="messages", #279
-                    show_label=False #280
-                ) #281
-                # Message input #282
-                with gr.Row(): #283
-                    msg = gr.Textbox( #284
-                        label="Message", #285
-                        placeholder="Type your message here...", #286
-                        scale=4 #287
-                    ) #288
-                    submit = gr.Button( #289
-                        "Send", #290
-                        variant="primary", #291
-                        scale=1 #292
-                    ) #293
-                # Action buttons #294
-                with gr.Row(): #295
-                    clear = gr.Button("Clear Chat", scale=1) #296
-                    generate = gr.Button( #297
-                        "Generate Report", #298
-                        variant="secondary", #299
-                        scale=2 #300
-                    ) #301
-with gr.Column(scale=1): #302
-                # Extraction Status Panel #303
-                with gr.Group(visible=True) as status_panel: #304
-                    gr.Markdown("### Extraction Progress") #305
-                    # Progress indicator #306
-                    progress = gr.Slider( #307
-                        label="Completion", #308
-                        minimum=0, #309
-                        maximum=100, #310
-                        value=0, #311
-                        interactive=False #312
-                    ) #313
-                    # Categories covered #314
-                    categories_covered = gr.JSON( #315
-                        label="Categories Covered", #316
-                        value={"categories": []} #317
-                    ) #318
-                    # Current focus #319
-                    current_focus = gr.Textbox( #320
-                        label="Current Focus", #321
-                        value="Not started", #322
-                        interactive=False #323
-                    ) #324
-                # Extraction Results #325
-                with gr.Tabs() as result_tabs: #326
-                    with gr.Tab("Extracted Information"): #327
-                        extracted_info = gr.JSON( #328
-                            label="Extracted Details", #329
-                            value={} #330
-                        ) #331
-                    with gr.Tab("Download"): #332
-                        file_output = gr.File( #333
-                            label="Download Report" #334
-                        ) #335
-                    with gr.Tab("Analysis"): #336
-                        analysis_text = gr.Markdown( #337
-                            "Analysis will appear here after processing." #338
-                        ) #339
-        # Helper Functions #340
-        def format_extraction_summary(extracted_items: List[Dict]) -> str: #341
-            """Format extracted information for display.""" #342
-            if not extracted_items: #343
-                return "No information extracted yet." #344
-            summary = ["### Recently Extracted Information"] #345
-            for item in extracted_items: #346
-                summary.append( #347
-                    f"- **{item['category']}** ({item['confidence']*100:.1f}% confidence)\n" #348
-                    f"  {item['text']}" #349
-                ) #350
-            return "\n".join(summary) #351
-        def update_interface_state(state: Dict[str, Any]) -> tuple: #352
-            """Update all interface components based on current state.""" #353
-            return ( #354
-                state['completion_status']['percentage'], #355
-                {"categories": state['completion_status']['categories_covered']}, #356
-                state['completion_status']['current_focus'] #357
-            ) #358
-        # Event Handlers #359
-        def process_message(message: str, history: list, key: str) -> tuple: #360
-            """Handle message processing and update interface.""" #361
-            if not message.strip(): #362
-                return history, 0, {}, "Please enter a message" #363
-            try: #364
-                # Process message #365
-                result = extractor.process_message(message, key) #366
-                if "error" in result: #367
-                    return ( #368
-                        history, #369
-                        0, #370
-                        {"categories": []}, #371
-                        f"Error: {result['error']}" #372
-                    ) #373
-                # Update chat history #374
-                history.append({ #375
-                    "role": "user", #376
-                    "content": message #377
-                }) #378
-                history.append({ #379
-                    "role": "assistant", #380
-                    "content": result["response"] #381
-                }) #382
-                # Update status components #383
-                progress_value = result["completion_status"]["percentage"] #384
-                categories = { #385
-                    "categories": result["completion_status"]["categories_covered"] #386
-                } #387
-                current_focus = result["completion_status"]["current_focus"] or "Processing..." #388
-                # Update extraction display #389
-                if result.get("extracted_info"): #390
-                    analysis_text = format_extraction_summary(result["extracted_info"]) #391
-                else: #392
-                    analysis_text = "No new information extracted." #393
-                return ( #394
-                    history, #395
-                    progress_value, #396
-                    categories, #397
-                    current_focus, #398
-                    analysis_text #399
-                ) #400
-            except Exception as e: #401
-                logger.error(f"Error in process_message: {str(e)}") #402
-                return ( #403
-                    history, #404
-                    0, #405
-                    {"categories": []}, #406
-                    f"Error: {str(e)}", #407
-                    "An error occurred during processing." #408
-                ) #409
-def generate_report() -> tuple: #410
-            """Generate and return report file.""" #411
-            try: #412
-                result = extractor.generate_output() #413
-                if result["status"] == "success": #414
-                    # Update JSON preview #415
-                    content_preview = { #416
-                        "summary": result["content"]["analysis_summary"], #417
-                        "categories": list(result["content"]["extracted_information"].keys()), #418
-                        "total_items": len(result["content"]["extracted_information"]) #419
-                    } #420
-                    return ( #421
-                        result["filename"], #422
-                        content_preview, #423
-                        "Report generated successfully! 🎉", #424
-                        gr.update(value=format_extraction_summary( #425
-                            [item for items in result["content"]["extracted_information"].values() #426
-                             for item in items] #427
-                        )) #428
-                    ) #429
-                else: #430
-                    return ( #431
-                        None, #432
-                        {"error": result["error"]}, #433
-                        f"Error generating report: {result['error']}", #434
-                        "Failed to generate analysis." #435
-                    ) #436
-            except Exception as e: #437
-                logger.error(f"Error in generate_report: {str(e)}") #438
-                return ( #439
-                    None, #440
-                    {"error": str(e)}, #441
-                    f"Error: {str(e)}", #442
-                    "An error occurred during report generation." #443
-                ) #444
-        def clear_interface() -> tuple: #445
-            """Reset all interface components.""" #446
-            # Reset extractor state #447
-            global extractor #448
-            extractor = InformationExtractor() #449
-            return ( #450
-                [], # Clear chat history #451
-                0.0, # Reset progress #452
-                {"categories": []}, # Clear categories #453
-                "Not started", # Reset focus #454
-                {}, # Clear extracted info #455
-                None, # Clear file output #456
-                "Ready to start new extraction.", # Reset analysis #457
-                gr.update(value="") # Clear message input #458
-            ) #459
-        # Event Bindings #460
-        msg.submit( #461
-            process_message, #462
-            inputs=[msg, chatbot, api_key], #463
-            outputs=[ #464
-                chatbot, #465
-                progress, #466
-                categories_covered, #467
-                current_focus, #468
-                analysis_text #469
-            ] #470
-        ).then( #471
-            lambda: "", #472
-            None, #473
-            msg #474
-        ) #475
-        submit.click( #476
-            process_message, #477
-            inputs=[msg, chatbot, api_key], #478
-            outputs=[ #479
-                chatbot, #480
-                progress, #481
-                categories_covered, #482
-                current_focus, #483
-                analysis_text #484
-            ] #485
-        ).then( #486
-            lambda: "", #487
-            None, #488
-            msg #489
-        ) #490
-        generate.click( #491
-            generate_report, #492
-            outputs=[ #493
-                file_output, #494
-                extracted_info, #495
-                current_focus, #496
-                analysis_text #497
-            ] #498
-        ) #499
-        clear.click( #500
-            clear_interface, #501
-            outputs=[ #502
-                chatbot, #503
-                progress, #504
-                categories_covered, #505
-                current_focus, #506
-                extracted_info, #507
-                file_output, #508
-                analysis_text, #509
-                msg #510
-            ] #511
-        ) #512
-    return demo #513
-if __name__ == "__main__": #514
-    # Set up logging for the main application #515
-    logging.basicConfig( #516
-        level=logging.INFO, #517
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' #518
-    ) #519
-    try: #520
-        demo = create_gradio_interface() #521
-        demo.launch( #522
-            server_name="0.0.0.0", #523
-            server_port=7860, #524
-            share=True, #525
-            show_api=False #526
-        ) #527
-    except Exception as e: #528
-        logger.error(f"Application failed to start: {str(e)}") #529
-        raise #530

+import json
+import logging
+import os
+from datetime import datetime
+from typing import Dict, List, Optional, Any, Tuple
+from dataclasses import dataclass, field
+from pathlib import Path
 # Third-party imports
+import gradio as gr
+from openai import OpenAI
 # Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        logging.FileHandler('app.log')
+    ]
+)
+logger = logging.getLogger(__name__)
 # System prompt for the AI assistant
+SYSTEM_PROMPT = """
 You are an Information Extraction Assistant, designed to help extract and organize
 important information from conversations in a natural and engaging way.
 - Structured data organization with context preservation
 Please maintain a friendly and professional tone while ensuring accurate information extraction.
+"""
+@dataclass
+class ExtractedInfo:
+    """Structure for storing extracted information."""
+    text: str
+    category: str
+    confidence: float
+    timestamp: datetime = field(default_factory=datetime.now)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+@dataclass
+class ConversationState:
+    """Tracks the state and progress of the conversation."""
+    extracted_items: List[ExtractedInfo] = field(default_factory=list)
+    categories_covered: List[str] = field(default_factory=list)
+    current_focus: Optional[str] = None
+    completion_percentage: float = 0.0
+    last_error: Optional[str] = None
+    last_update: datetime = field(default_factory=datetime.now)
+    def add_extracted_info(self, info: ExtractedInfo) -> None:
+        """Add new extracted information and update state."""
+        self.extracted_items.append(info)
+        if info.category not in self.categories_covered:
+            self.categories_covered.append(info.category)
+        self.last_update = datetime.now()
+class InformationExtractor:
+    """Core class for handling information extraction from conversations."""
+    def __init__(self):
+        self.conversation_history: List[Dict[str, str]] = []
+        self.state = ConversationState()
+        self.client: Optional[OpenAI] = None
+        self.extraction_categories = [
+            "personal_info",
+            "education",
+            "work_experience",
+            "skills",
+            "achievements"
+        ]
+    def _validate_api_key(self, api_key: str) -> bool:
+        """Validate OpenAI API key format."""
+        if not api_key.strip():
+            raise ValueError("API key cannot be empty")
+        if not api_key.startswith('sk-'):
+            raise ValueError("Invalid API key format")
+        return True
+    def _initialize_client(self, api_key: str) -> None:
+        """Initialize OpenAI client with error handling."""
+        try:
+            if self._validate_api_key(api_key):
+                self.client = OpenAI(api_key=api_key)
+        except Exception as e:
+            logger.error(f"Error initializing OpenAI client: {str(e)}")
+            raise
+    def _add_to_history(self, role: str, content: str) -> None:
+        """Add a message to conversation history with timestamp."""
+        self.conversation_history.append({
+            "role": role,
+            "content": content,
+            "timestamp": datetime.now().isoformat()
+        })
+    def _get_ai_response(self, retries: int = 3) -> str:
+        """Get response from OpenAI with retry mechanism."""
+        if not self.client:
+            raise ValueError("OpenAI client not initialized")
+        for attempt in range(retries):
+            try:
+                response = self.client.chat.completions.create(
+                    model="gpt-4o-mini",  # Changed from "gpt-4" to "gpt-4o-mini"
+                    messages=[
+                        {"role": "system", "content": SYSTEM_PROMPT},
+                        *[{
+                            "role": msg["role"],
+                            "content": msg["content"]
+                        } for msg in self.conversation_history]
+                    ],
+                    temperature=0.7,
+                    max_tokens=2000
+                )
+                return response.choices[0].message.content
+            except Exception as e:
+                logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
+                if attempt == retries - 1:
+                    raise Exception(f"Failed after {retries} attempts: {str(e)}")
+                continue
+    def _extract_information(self, text: str) -> List[ExtractedInfo]:
+        """Extract structured information from text."""
+        try:
+            extraction_prompt = f"""
             Analyze the following text and extract relevant information.
             Categories to consider: {', '.join(self.extraction_categories)}
             }}
             Text to analyze: {text}
+            """
+            response = self.client.chat.completions.create(
+                model="gpt-4o-mini",  # Changed from "gpt-4" to "gpt-4o-mini"
+                messages=[
+                    {"role": "system", "content": SYSTEM_PROMPT},
+                    {"role": "user", "content": extraction_prompt}
+                ],
+                temperature=0.3
+            )
+            # Parse response and create ExtractedInfo objects
+            analysis = json.loads(response.choices[0].message.content)
+            extracted_items = []
+            for item in analysis.get("extracted_items", []):
+                extracted_info = ExtractedInfo(
+                    text=item["text"],
+                    category=item["category"],
+                    confidence=item["confidence"],
+                    metadata=item.get("metadata", {})
+                )
+                extracted_items.append(extracted_info)
+            return extracted_items
+        except json.JSONDecodeError as e:
+            logger.error(f"Error parsing extraction response: {str(e)}")
+            return []
+        except Exception as e:
+            logger.error(f"Error during information extraction: {str(e)}")
+            return []
+    def _update_completion_status(self) -> None:
+        """Update completion status based on extracted information."""
+        total_categories = len(self.extraction_categories)
+        covered_categories = len(self.state.categories_covered)
+        # Calculate base completion percentage
+        base_completion = (covered_categories / total_categories) * 100
+        # Adjust based on confidence levels
+        if self.state.extracted_items:
+            avg_confidence = sum(item.confidence for item in self.state.extracted_items) / len(self.state.extracted_items)
+            adjusted_completion = base_completion * avg_confidence
+        else:
+            adjusted_completion = 0.0
+        self.state.completion_percentage = min(adjusted_completion, 100.0)
+    def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
+        """Process a user message and extract information."""
+        try:
+            # Initialize client if needed
+            if not self.client:
+                self._initialize_client(api_key)
+            # Add user message to history
+            self._add_to_history("user", message)
+            # Get AI response
+            ai_response = self._get_ai_response()
+            self._add_to_history("assistant", ai_response)
+            # Extract information from the entire conversation
+            new_information = self._extract_information(message + "\n" + ai_response)
+            # Update state with new information
+            for info in new_information:
+                self.state.add_extracted_info(info)
+            # Update completion status
+            self._update_completion_status()
+            return {
+                "response": ai_response,
+                "extracted_info": [
+                    {
+                        "text": info.text,
+                        "category": info.category,
+                        "confidence": info.confidence
+                    } for info in new_information
+                ],
+                "completion_status": {
+                    "percentage": self.state.completion_percentage,
+                    "categories_covered": self.state.categories_covered,
+                    "current_focus": self.state.current_focus
+                }
+            }
+        except Exception as e:
+            error_msg = f"Error processing message: {str(e)}"
+            logger.error(error_msg)
+            self.state.last_error = error_msg
+            return {
+                "error": error_msg,
+                "completion_status": {
+                    "percentage": self.state.completion_percentage,
+                    "categories_covered": self.state.categories_covered,
+                    "current_focus": self.state.current_focus
+                }
+            }
+    def generate_output(self) -> Dict[str, Any]:
+        """Generate structured output from all extracted information."""
+        try:
+            # Organize extracted information by category
+            categorized_info = {}
+            for category in self.extraction_categories:
+                category_items = [
+                    {
+                        "text": item.text,
+                        "confidence": item.confidence,
+                        "timestamp": item.timestamp.isoformat(),
+                        "metadata": item.metadata
+                    }
+                    for item in self.state.extracted_items
+                    if item.category == category
+                ]
+                if category_items:
+                    categorized_info[category] = category_items
+            # Create output structure
+            output = {
+                "extracted_information": categorized_info,
+                "analysis_summary": {
+                    "total_items": len(self.state.extracted_items),
+                    "categories_covered": self.state.categories_covered,
+                    "completion_percentage": self.state.completion_percentage
+                },
+                "metadata": {
+                    "generated_at": datetime.now().isoformat(),
+                    "conversation_length": len(self.conversation_history),
+                    "version": "2.0"
+                }
+            }
+            # Save to file
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"extracted_info_{timestamp}.json"
+            with open(filename, 'w', encoding='utf-8') as f:
+                json.dump(output, f, indent=2, ensure_ascii=False)
+            return {
+                "filename": filename,
+                "content": output,
+                "status": "success"
+            }
+        except Exception as e:
+            error_msg = f"Error generating output: {str(e)}"
+            logger.error(error_msg)
+            return {
+                "error": error_msg,
+                "status": "error"
+            }
+def create_gradio_interface():
+    """Create the Gradio interface for information extraction."""
+    extractor = InformationExtractor()
+    # Custom CSS for better styling
+    css = """
+    .container { max-width: 900px; margin: auto; }
+    .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; }
+    .info-panel { background: #f5f5f5; padding: 1rem; border-radius: 0.5rem; }
+    .status-badge {
+        display: inline-block;
+        padding: 0.25rem 0.5rem;
+        border-radius: 0.25rem;
+        margin: 0.25rem;
+        background: #e0e0e0;
+    }
+    .extraction-highlight {
+        background: #e8f4f8;
+        border-left: 4px solid #4a90e2;
+        padding: 0.5rem;
+        margin: 0.5rem 0;
+    }
+    """
+    with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
         # 🔍 Information Extraction Assistant
         Have a natural conversation while we extract and organize important information.
         The system will automatically identify and categorize relevant details.
+        """)
+        with gr.Row():
+            with gr.Column(scale=2):
+                # API Key input
+                api_key = gr.Textbox(
+                    label="OpenAI API Key",
+                    type="password",
+                    placeholder="Enter your OpenAI API key (sk-...)",
+                    show_label=True
+                )
+                # Chat interface
+                chatbot = gr.Chatbot(
+                    value=[],
+                    height=400,
+                    type="messages",
+                    show_label=False
+                )
+                # Message input
+                with gr.Row():
+                    msg = gr.Textbox(
+                        label="Message",
+                        placeholder="Type your message here...",
+                        scale=4
+                    )
+                    submit = gr.Button(
+                        "Send",
+                        variant="primary",
+                        scale=1
+                    )
+                # Action buttons
+                with gr.Row():
+                    clear = gr.Button("Clear Chat", scale=1)
+                    generate = gr.Button(
+                        "Generate Report",
+                        variant="secondary",
+                        scale=2
+                    )
+            with gr.Column(scale=1):
+                # Extraction Status Panel
+                with gr.Group(visible=True) as status_panel:
+                    gr.Markdown("### Extraction Progress")
+                    # Progress indicator
+                    progress = gr.Slider(
+                        label="Completion",
+                        minimum=0,
+                        maximum=100,
+                        value=0,
+                        interactive=False
+                    )
+                    # Categories covered
+                    categories_covered = gr.JSON(
+                        label="Categories Covered",
+                        value={"categories": []}
+                    )
+                    # Current focus
+                    current_focus = gr.Textbox(
+                        label="Current Focus",
+                        value="Not started",
+                        interactive=False
+                    )
+                # Extraction Results
+                with gr.Tabs() as result_tabs:
+                    with gr.Tab("Extracted Information"):
+                        extracted_info = gr.JSON(
+                            label="Extracted Details",
+                            value={}
+                        )
+                    with gr.Tab("Download"):
+                        file_output = gr.File(
+                            label="Download Report"
+                        )
+                    with gr.Tab("Analysis"):
+                        analysis_text = gr.Markdown(
+                            "Analysis will appear here after processing."
+                        )
+        # Helper Functions
+        def format_extraction_summary(extracted_items: List[Dict]) -> str:
+            """Format extracted information for display."""
+            if not extracted_items:
+                return "No information extracted yet."
+            summary = ["### Recently Extracted Information"]
+            for item in extracted_items:
+                summary.append(
+                    f"- **{item['category']}** ({item['confidence']*100:.1f}% confidence)\n"
+                    f"  {item['text']}"
+                )
+            return "\n".join(summary)
+        def update_interface_state(state: Dict[str, Any]) -> tuple:
+            """Update all interface components based on current state."""
+            return (
+                state['completion_status']['percentage'],
+                {"categories": state['completion_status']['categories_covered']},
+                state['completion_status']['current_focus']
+            )
+        # Event Handlers
+        def process_message(message: str, history: list, key: str) -> tuple:
+            """Handle message processing and update interface."""
+            if not message.strip():
+                return history, 0, {}, "Please enter a message"
+            try:
+                # Process message
+                result = extractor.process_message(message, key)
+                if "error" in result:
+                    return (
+                        history,
+                        0,
+                        {"categories": []},
+                        f"Error: {result['error']}"
+                    )
+                # Update chat history
+                history.append({
+                    "role": "user",
+                    "content": message
+                })
+                history.append({
+                    "role": "assistant",
+                    "content": result["response"]
+                })
+                # Update status components
+                progress_value = result["completion_status"]["percentage"]
+                categories = {
+                    "categories": result["completion_status"]["categories_covered"]
+                }
+                current_focus = result["completion_status"]["current_focus"] or "Processing..."
+                # Update extraction display
+                if result.get("extracted_info"):
+                    analysis_text = format_extraction_summary(result["extracted_info"])
+                else:
+                    analysis_text = "No new information extracted."
+                return (
+                    history,
+                    progress_value,
+                    categories,
+                    current_focus,
+                    analysis_text
+                )
+            except Exception as e:
+                logger.error(f"Error in process_message: {str(e)}")
+                return (
+                    history,
+                    0,
+                    {"categories": []},
+                    f"Error: {str(e)}",
+                    "An error occurred during processing."
+                )
+        def generate_report() -> tuple:
+            """Generate and return report file."""
+            try:
+                result = extractor.generate_output()
+                if result["status"] == "success":
+                    # Update JSON preview
+                    content_preview = {
+                        "summary": result["content"]["analysis_summary"],
+                        "categories": list(result["content"]["extracted_information"].keys()),
+                        "total_items": len(result["content"]["extracted_information"])
+                    }
+                    return (
+                        result["filename"],
+                        content_preview,
+                        "Report generated successfully! 🎉",
+                        gr.update(value=format_extraction_summary(
+                            [item for items in result["content"]["extracted_information"].values()
+                             for item in items]
+                        ))
+                    )
+                else:
+                    return (
+                        None,
+                        {"error": result["error"]},
+                        f"Error generating report: {result['error']}",
+                        "Failed to generate analysis."
+                    )
+            except Exception as e:
+                logger.error(f"Error in generate_report: {str(e)}")
+                return (
+                    None,
+                    {"error": str(e)},
+                    f"Error: {str(e)}",
+                    "An error occurred during report generation."
+                )
+        def clear_interface() -> tuple:
+            """Reset all interface components."""
+            # Reset extractor state
+            global extractor
+            extractor = InformationExtractor()
+            return (
+                [],  # Clear chat history
+                0.0,  # Reset progress
+                {"categories": []},  # Clear categories
+                "Not started",  # Reset focus
+                {},  # Clear extracted info
+                None,  # Clear file output
+                "Ready to start new extraction.",  # Reset analysis
+                gr.update(value="")  # Clear message input
+            )
+        # Event Bindings
+        msg.submit(
+            process_message,
+            inputs=[msg, chatbot, api_key],
+            outputs=[
+                chatbot,
+                progress,
+                categories_covered,
+                current_focus,
+                analysis_text
+            ]
+        ).then(
+            lambda: "",
+            None,
+            msg
+        )
+        submit.click(
+            process_message,
+            inputs=[msg, chatbot, api_key],
+            outputs=[
+                chatbot,
+                progress,
+                categories_covered,
+                current_focus,
+                analysis_text
+            ]
+        ).then(
+            lambda: "",
+            None,
+            msg
+        )
+        generate.click(
+            generate_report,
+            outputs=[
+                file_output,
+                extracted_info,
+                current_focus,
+                analysis_text
+            ]
+        )
+        clear.click(
+            clear_interface,
+            outputs=[
+                chatbot,
+                progress,
+                categories_covered,
+                current_focus,
+                extracted_info,
+                file_output,
+                analysis_text,
+                msg
+            ]
+        )
+    return demo
+if __name__ == "__main__":
+    # Set up logging for the main application
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    try:
+        demo = create_gradio_interface()
+        demo.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=True,
+            show_api=False
+        )
+    except Exception as e:
+        logger.error(f"Application failed to start: {str(e)}")
+        raise