Spaces:

jzou19950715
/

Newco_information_extraction_agent

Sleeping

App Files Files Community

jzou19950715 commited on Feb 1

Commit

16af053

verified ·

1 Parent(s): 793ffff

Update app.py

Browse files

Files changed (1) hide show

app.py +611 -571

app.py CHANGED Viewed

@@ -1,602 +1,642 @@
-import json  #1
-import logging  #2
-from datetime import datetime  #3
-from typing import Dict, List, Optional, Tuple, Any  #4
-from dataclasses import dataclass  #5
-from pathlib import Path  #6
-  #7
-# Third-party imports  #8
-import gradio as gr  #9
-from openai import OpenAI  #10
-  #11
-# Configure logging  #12
-logging.basicConfig(  #13
-    level=logging.INFO,  #14
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'  #15
-)  #16
-logger = logging.getLogger(__name__)  #17
-  #18
-# System prompt for the AI  #19
-SYSTEM_PROMPT = """  #20
-You are LOSS DOG (Life, Occupation & Student Story Digital Output Generator), an engaging and
-supportive information collector that focuses on understanding a person's complete educational
-and professional journey. Your approach is flexible, allowing both structured metrics and personal narratives.
-Core Traits:
-- Conversational and natural in gathering information
-- Equally values quantitative achievements and qualitative experiences
-- Adapts to each person's unique story
-- Encourages sharing of both metrics and personal growth
-- Maintains context throughout the conversation
-"""
-  #32
-@dataclass  #33
-class ConversationState:  #34
-    """Track the state of the conversation and profile completion."""  #35
-    sections_completed: List[str] = None  #36
-    sections_partial: List[str] = None  #37
-    current_section: Optional[str] = None  #38
-    completion_percentage: float = 0.0  #39
-    last_error: Optional[str] = None  #40
-  #41
-    def __post_init__(self):  #42
-        self.sections_completed = []  #43
-        self.sections_partial = []  #44
-  #45
-class ProfileAnalyzer:  #46
-    """Analyzes and structures conversation data flexibly."""  #47
-  #48
-    @staticmethod  #49
-    def analyze_content(text: str) -> Dict[str, Any]:  #50
-        """Extract key information from text."""  #51
-        analysis = {  #52
-            "categories": [],  #53
-            "metrics": {},  #54
-            "experiences": [],  #55
-            "achievements": [],  #56
-            "skills": []  #57
-        }  #58
-        return analysis  #59
-  #60
-    @staticmethod  #61
-    def clean_data(data: Dict[str, Any]) -> Dict[str, Any]:  #62
-        """Clean and validate extracted data."""  #63
-        def clean_value(v):  #64
-            if isinstance(v, dict):  #65
-                return {k: clean_value(val) for k, val in v.items() if val not in (None, "", [], {})}  #66
-            if isinstance(v, list):  #67
-                return [clean_value(item) for item in v if item not in (None, "", [], {})]  #68
-            return v  #69
-        return clean_value(data)  #70
-  #71
-class EducationCareerCollector:  #72
-    """Main collector class for handling career and education information."""  #73
-  #74
-    def __init__(self):  #75
-        self.conversation_history = []  #76
-        self.client = None  #77
-        self.state = ConversationState()  #78
-        self.analyzer = ProfileAnalyzer()  #79
-  #80
-    def process_message(self, message: str, api_key: str) -> Dict[str, Any]:  #81
-        """Process a user message and return AI response with enhanced error handling."""  #82
-        try:  #83
-            if not message.strip():  #84
-                raise ValueError("Message cannot be empty")  #85
-  #86
-            if not api_key.strip().startswith('sk-'):  #87
-                raise ValueError("Invalid API key format")  #88
-  #89
-            if not self.client:  #90
-                self.client = OpenAI(api_key=api_key)  #91
-  #92
-            # Add message to conversation history  #93
-            self.conversation_history.append({  #94
-                "role": "user",   #95
-                "content": message,  #96
-                "timestamp": datetime.now().isoformat()  #97
-            })  #98
-  #99
-            # Get AI response with retry mechanism  #100
-            max_retries = 3  #101
-            last_error = None  #102
-  #103
-            for attempt in range(max_retries):  #104
-                try:  #105
-                    response = self.client.chat.completions.create(  #106
-                        model="gpt-4o-mini",  #107
-                        messages=[  #108
-                            {"role": "system", "content": SYSTEM_PROMPT},  #109
-                            *[{  #110
-                                "role": msg["role"],   #111
-                                "content": msg["content"]  #112
-                            } for msg in self.conversation_history]  #113
-                        ],  #114
-                        temperature=0.7,  #115
-                        max_tokens=1000  #116
-                    )  #117
-                    break  #118
-                except Exception as e:  #119
-                    last_error = str(e)  #120
-                    if attempt == max_retries - 1:  #121
-                        raise Exception(f"Failed after {max_retries} attempts: {last_error}")  #122
-                    logger.warning(f"Attempt {attempt + 1} failed: {last_error}")  #123
-                    continue  #124
-  #125
-            # Process response  #126
-            ai_message = response.choices[0].message.content  #127
-            self.conversation_history.append({  #128
-                "role": "assistant",   #129
-                "content": ai_message,  #130
-                "timestamp": datetime.now().isoformat()  #131
-            })  #132
-  #133
-            # Analyze response and update state  #134
-            self._update_conversation_state(ai_message)  #135
-  #136
-            return {  #137
-                "content": ai_message,  #138
-                "type": "success",  #139
-                "completion_status": self.get_completion_status(),  #140
-                "timestamp": datetime.now().isoformat()  #141
-            }  #142
-  #143
-        except Exception as e:  #144
-            error_msg = f"Error processing message: {str(e)}"  #145
-            logger.error(error_msg)  #146
-            self.state.last_error = error_msg  #147
-            return {  #148
-                "content": error_msg,  #149
-                "type": "error",  #150
-                "completion_status": self.get_completion_status(),  #151
-                "timestamp": datetime.now().isoformat()  #152
-            }  #153
-    def _update_conversation_state(self, ai_message: str) -> None:  #154
-        """Update the conversation state based on AI response."""  #155
-        try:  #156
-            # Create analysis prompt  #157
-            analysis_prompt = """  #158
-            Review our conversation and identify:  #159
-            1. What topics or aspects of their journey were discussed?  #160
-            2. What areas need more exploration?  #161
-            3. What's the current focus of discussion?  #162
-            Response format:
-            {
-                "topics_discussed": [],
-                "areas_needing_exploration": [],
-                "current_focus": "",
-                "completion_estimate": 0.0
-            }
-            """  #163
-            # Get analysis from AI  #164
-            response = self.client.chat.completions.create(  #165
-                model="gpt-4o-mini",  #166
-                messages=[  #167
-                    {"role": "system", "content": SYSTEM_PROMPT},  #168
-                    *self.conversation_history,  #169
-                    {"role": "user", "content": analysis_prompt}  #170
-                ],  #171
-                temperature=0.3  #172
-            )  #173
-            # Process analysis  #174
-            try:  #175
-                analysis = json.loads(response.choices[0].message.content)  #176
-                # Update state based on analysis  #177
-                self.state.sections_completed = analysis.get("topics_discussed", [])  #178
-                self.state.sections_partial = analysis.get("areas_needing_exploration", [])  #179
-                self.state.current_section = analysis.get("current_focus")  #180
-                self.state.completion_percentage = analysis.get("completion_estimate", 0.0)  #181
-            except json.JSONDecodeError as e:  #182
-                logger.error(f"Error parsing analysis JSON: {str(e)}")  #183
-                # Set default values on error  #184
-                self.state.completion_percentage = max(  #185
-                    self.state.completion_percentage,   #186
-                    len(self.conversation_history) * 5.0  # Rough estimate based on message count  #187
-                )  #188
-        except Exception as e:  #189
-            logger.error(f"Error updating conversation state: {str(e)}")  #190
-            # State remains unchanged on error  #191
-    def get_completion_status(self) -> Dict[str, Any]:  #192
-        """Get current completion status with rich context."""  #193
-        status = {  #194
-            "completion_percentage": self.state.completion_percentage,  #195
-            "topics_covered": self.state.sections_completed,  #196
-            "topics_in_progress": self.state.sections_partial,  #197
-            "current_focus": self.state.current_section,  #198
-            "conversation_length": len(self.conversation_history),  #199
-            "last_update": datetime.now().isoformat(),  #200
-            "needs_attention": [  #201
-                topic for topic in self.state.sections_partial   #202
-                if topic not in self.state.sections_completed  #203
-            ],  #204
-            "status_summary": self._generate_status_summary()  #205
-        }  #206
-        if self.state.last_error:  #207
-            status["last_error"] = self.state.last_error  #208
-        return status  #209
-    def _generate_status_summary(self) -> str:  #210
-        """Generate a human-readable summary of the conversation status."""  #211
-        if not self.conversation_history:  #212
-            return "Ready to start the conversation."  #213
-        summary_parts = []  #214
-        # Add completion status  #215
-        if self.state.completion_percentage > 0:  #216
-            summary_parts.append(  #217
-                f"Conversation is approximately {self.state.completion_percentage:.1f}% complete"  #218
-            )  #219
-        # Add covered topics  #220
-        if self.state.sections_completed:  #221
-            topics = ", ".join(self.state.sections_completed)  #222
-            summary_parts.append(f"We've discussed: {topics}")  #223
-        # Add current focus  #224
-        if self.state.current_section:  #225
-            summary_parts.append(  #226
-                f"Currently focusing on: {self.state.current_section}"  #227
-            )  #228
-        # Add next steps if any  #229
-        if self.state.sections_partial:  #230
-            topics = ", ".join(self.state.sections_partial)  #231
-            summary_parts.append(f"Topics to explore further: {topics}")  #232
-        return " | ".join(summary_parts) if summary_parts else "Conversation in progress."  #233
-    def generate_json(self, api_key: str) -> Tuple[Optional[str], str]:  #234
-        """Generate a JSON profile from the conversation history."""  #235
-        try:  #236
-            if not self.client:  #237
-                self.client = OpenAI(api_key=api_key)  #238
-            # Analysis prompt focused on understanding the conversation  #239
-            analysis_prompt = """  #240
-            Review our conversation and create a JSON structure that captures the person's journey.  #241
-            Focus on what was actually discussed, not fitting into predetermined categories.  #242
-            Include:  #243
-            1. Any experiences or achievements shared  #244
-            2. Skills or competencies demonstrated  #245
-            3. Timeline or progression points mentioned  #246
-            4. Notable metrics or outcomes  #247
-            5. Personal growth or learning moments  #248
-            Structure the JSON naturally around the topics they shared.  #249
-            """  #250
-            # Get initial analysis of conversation content  #251
-            analysis_response = self.client.chat.completions.create(  #252
-                model="gpt-4o-mini",  #253
-                messages=[  #254
-                    {"role": "system", "content": SYSTEM_PROMPT},  #255
-                    *self.conversation_history,  #256
-                    {"role": "user", "content": analysis_prompt}  #257
-                ],  #258
-                temperature=0.7  #259
-            )  #260
-            # Parse the initial analysis  #261
-            analysis = json.loads(analysis_response.choices[0].message.content)  #262
-            # Generate structured profile based on analysis  #263
-            profile_prompt = f"""  #264
-            Based on our conversation, create a detailed profile JSON.  #265
-            Use this analysis as a guide: {json.dumps(analysis, indent=2)}  #266
-            Important guidelines:  #267
-            - Create sections based on what was actually discussed  #268
-            - Include both quantitative and qualitative information  #269
-            - Preserve the context and significance of experiences  #270
-            - Maintain natural flow and connections between topics  #271
-            - Use descriptive section names that reflect the conversation  #272
-            """  #273
-            # Generate the profile JSON  #274
-            profile_response = self.client.chat.completions.create(  #275
-                model="gpt-4o-mini",  #276
-                messages=[  #277
-                    {"role": "system", "content": SYSTEM_PROMPT},  #278
-                    *self.conversation_history,  #279
-                    {"role": "user", "content": profile_prompt}  #280
-                ],  #281
-                temperature=0.5  #282
-            )  #283
-            # Parse and clean the profile data  #284
-            profile_data = json.loads(profile_response.choices[0].message.content)  #285
-            # Clean the data  #286
-            def clean_dict(d):  #287
-                if isinstance(d, dict):  #288
-                    return {k: clean_dict(v) for k, v in d.items()   #289
-                            if v not in (None, "", [], {}, "N/A", "None")}  #290
-                if isinstance(d, list):  #291
-                    return [clean_dict(item) for item in d   #292
-                            if item not in (None, "", [], {}, "N/A", "None")]  #293
-                return d  #294
-            profile_data = clean_dict(profile_data)  #295
-            # Add metadata  #296
-            profile_data["metadata"] = {  #297
-                "generated_at": datetime.now().isoformat(),  #298
-                "version": "2.0",  #299
-                "generation_metrics": {  #300
-                    "conversation_length": len(self.conversation_history),  #301
-                    "topics_covered": self.state.sections_completed,  #302
-                    "completion_percentage": self.state.completion_percentage  #303
-                }  #304
-            }  #305
-            # Save to file  #306
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")  #307
-            filename = f"career_education_profile_{timestamp}.json"  #308
-            try:  #309
-                with open(filename, 'w', encoding='utf-8') as f:  #310
-                    json.dump(profile_data, f, indent=2, ensure_ascii=False)  #311
-                return (filename, json.dumps(profile_data, indent=2, ensure_ascii=False))  #312
-            except Exception as e:  #313
-                logger.error(f"Error saving profile to file: {str(e)}")  #314
-                return (None, json.dumps(profile_data, indent=2, ensure_ascii=False))  #315
-        except Exception as e:  #316
-            error_msg = f"Error generating profile: {str(e)}"  #317
-            logger.error(error_msg)  #318
-            error_json = {  #319
-                "error": error_msg,  #320
-                "metadata": {  #321
-                    "generated_at": datetime.now().isoformat(),  #322
-                    "error_occurred": True  #323
-                }  #324
-            }  #325
-            return (None, json.dumps(error_json, indent=2))  #326
-def create_education_career_interface():  #327
-    """Create Gradio interface for the education and career collector."""  #328
-    collector = EducationCareerCollector()  #329
-    css = """  #330
-    .message { font-size: 16px; margin: 8px 0; }  #331
-    .system-message { color: #444; font-style: italic; }  #332
-    .user-message { color: #000; font-weight: 500; }  #333
-    .alert {  #334
-        padding: 12px;  #335
-        margin: 8px 0;  #336
-        border-radius: 4px;  #337
-    }  #338
-    .alert-info {  #339
-        background-color: #e8f4f8;  #340
-        border-left: 4px solid #4a90e2;  #341
-    }  #342
-    .alert-error {  #343
-        background-color: #fde8e8;  #344
-        border-left: 4px solid #f56565;  #345
-    }  #346
-    .alert-success {  #347
-        background-color: #e8f8e8;  #348
-        border-left: 4px solid #48bb78;  #349
-    }  #350
-    """  #351
-    with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:  #352
-        gr.Markdown("""  #353
-        # 🐕 LOSS DOG - Profile Builder  #354
-        Share your career and education journey naturally.  #355
-        Tell your story in your own way - we'll capture what matters to you.  #356
-        """)  #357
-        with gr.Row():  #358
-            with gr.Column(scale=2):  #359
-                # API Key Input  #360
-                api_key = gr.Textbox(  #361
-                    label="OpenAI API Key",  #362
-                    type="password",  #363
-                    placeholder="Enter your OpenAI API key (sk-...)",  #364
-                    info="Your API key from platform.openai.com"  #365
-                )  #366
-                # Status Messages  #367
-                status_msg = gr.Markdown(  #368
-                    "Ready to start! Share your journey...",  #369
-                    elem_classes=["alert", "alert-info"]  #370
-                )  #371
-                # Chat Interface  #372
-                chatbot = gr.Chatbot(  #373
-                    height=400,  #374
-                    show_label=False,  #375
-                    elem_classes=["message"]  #376
-                )  #377
-                # Message Input  #378
-                with gr.Row():  #379
-                    msg = gr.Textbox(  #380
-                        label="Your message",  #381
-                        placeholder="Tell me about your journey...",  #382
-                        show_label=False,  #383
-                        scale=4  #384
-                    )  #385
-                    submit = gr.Button("Send", variant="primary", scale=1)  #386
-                # Action Buttons  #387
-                with gr.Row():  #388
-                    clear = gr.Button("🗑️ Clear Chat", scale=1)  #389
-                    generate = gr.Button("📄 Generate Profile", scale=2)  #390
-            with gr.Column(scale=1):  #391
-                # Progress Information  #392
-                progress_info = gr.Markdown(  #393
-                    "### Profile Progress\nStart sharing your story!",  #394
-                    elem_classes=["alert", "alert-info"]  #395
-                )  #396
-                # Profile Preview  #397
-                with gr.Tab("Preview"):  #398
-                    json_preview = gr.JSON(  #399
-                        label="Profile Preview",  #400
-                        show_label=True  #401
-                    )  #402
-                # Download Section  #403
-                with gr.Tab("Download"):  #404
-                    output_file = gr.File(  #405
-                        label="Download Profile"  #406
-                    )  #407
-                # Tips and Guidelines  #408
-                with gr.Accordion("💡 Tips", open=False):  #409
-                    gr.Markdown("""
-                    ### Share Your Story Naturally
-                    - Tell us about experiences that matter to you
-                    - Include both achievements and challenges
-                    - Share numbers when they're meaningful
-                    - Describe your growth and learning
-                    - Talk about what makes your journey unique
-                    """)  #417
-        def process_message(message: str, history: list, key: str) -> tuple:  #418
-            """Process user message and update interface."""  #419
-            if not message.strip():  #420
-                return history, "Please enter a message."  #421
-            try:  #422
-                # Process the message  #423
-                result = collector.process_message(message, key)  #424
-                # Update chat history  #425
-                history.append((message, result["content"]))  #426
-                # Generate status message  #427
-                status = f"""Progress: {result['completion_status']['completion_percentage']:.1f}%  #428
-                        | Topics covered: {len(result['completion_status']['topics_covered'])}"""  #429
-                return history, status  #430
-            except Exception as e:  #431
-                error_msg = f"Error: {str(e)}"  #432
-                logger.error(error_msg)  #433
-                return history, error_msg  #434
-        def generate_profile(key: str) -> tuple:  #435
-            """Generate and return profile JSON."""  #436
-            try:  #437
-                filename, json_content = collector.generate_json(key)  #438
-                if filename:  #439
-                    return (  #440
-                        filename,   #441
-                        json.loads(json_content),   #442
-                        "Profile generated successfully! 🎉"  #443
-                    )  #444
-                return (  #445
-                    None,   #446
-                    json.loads(json_content),   #447
-                    "Profile generated but couldn't save file."  #448
-                )  #449
-            except Exception as e:  #450
-                error_msg = f"Error generating profile: {str(e)}"  #451
-                logger.error(error_msg)  #452
-                return None, {"error": error_msg}, error_msg  #453
-        def clear_interface() -> tuple:  #454
-            """Reset the interface state."""  #455
-            return (  #456
-                [],  # Clear chat history  #457
-                "Ready to start! Share your journey...",  # Reset status  #458
-                "### Profile Progress\nStart sharing your story!",  # Reset progress  #459
-                None,  # Clear JSON preview  #460
-                None  # Clear file output  #461
-            )  #462
-        def update_progress(history: list) -> str:  #463
-            """Update progress information based on conversation."""  #464
-            if not history:  #465
-                return "### Profile Progress\nStart sharing your story!"  #466
-            # Get completion status  #467
-            status = collector.get_completion_status()  #468
-            # Format progress message  #469
-            progress_md = f"""### Profile Progress: {status['completion_percentage']:.1f}%\n\n"""  #470
-            if status['topics_covered']:  #471
-                progress_md += "✅ **Discussed:**\n"  #472
-                for topic in status['topics_covered']:  #473
-                    progress_md += f"- {topic}\n"  #474
-            if status['topics_in_progress']:  #475
-                progress_md += "\n📝 **Currently exploring:**\n"  #476
-                for topic in status['topics_in_progress']:  #477
-                    progress_md += f"- {topic}\n"  #478
-            if status.get('needs_attention'):  #479
-                progress_md += "\n❗ **Consider discussing:**\n"  #480
-                for topic in status['needs_attention']:  #481
-                    progress_md += f"- {topic}\n"  #482
-            return progress_md  #483
-        # Event Handlers  #484
-        msg.submit(  #485
-            process_message,   #486
-            [msg, chatbot, api_key],   #487
-            [chatbot, status_msg]  #488
-        ).then(  #489
-            update_progress,   #490
-            chatbot,   #491
-            progress_info  #492
-        ).then(  #493
-            lambda: "",   #494
-            None,   #495
-            msg  #496
-        )  #497
-        submit.click(  #498
-            process_message,   #499
-            [msg, chatbot, api_key],   #500
-            [chatbot, status_msg]  #501
-        ).then(  #502
-            update_progress,  #503
-            chatbot,  #504
-            progress_info  #505
-        ).then(  #506
-            lambda: "",  #507
-            None,  #508
-            msg  #509
-        )  #510
-        generate.click(  #511
-            generate_profile,  #512
-            [api_key],  #513
-            [output_file, json_preview, status_msg]  #514
-        )  #515
-        clear.click(  #516
-            clear_interface,  #517
-            None,  #518
-            [chatbot, status_msg, progress_info, json_preview, output_file]  #519
-        )  #520
-    return demo  #521
-if __name__ == "__main__":  #522
-    demo = create_education_career_interface()  #523
-    demo.launch(  #524
-        server_name="0.0.0.0",  #525
-        server_port=7860,  #526
-        share=True,  #527
-        enable_queue=True,  #528
-        show_error=True  #529
-    )  #530

+import json #1
+import logging #2
+import os #3
+from datetime import datetime #4
+from typing import Dict, List, Optional, Any, Tuple #5
+from dataclasses import dataclass, field #6
+from pathlib import Path #7
+# Third-party imports
+import gradio as gr #8
+from openai import OpenAI #9
+# Configure logging
+logging.basicConfig( #10
+    level=logging.INFO, #11
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', #12
+    handlers=[ #13
+        logging.StreamHandler(), #14
+        logging.FileHandler('app.log') #15
+    ] #16
+) #17
+logger = logging.getLogger(__name__) #18
+# System prompt for the AI assistant
+SYSTEM_PROMPT = """ #19
+You are an Information Extraction Assistant, designed to help extract and organize
+important information from conversations in a natural and engaging way.
+Core Capabilities:
+- Natural conversation while gathering specific information
+- Flexible information extraction based on context
+- Progress tracking and completion estimation
+- Structured data organization with context preservation
+Please maintain a friendly and professional tone while ensuring accurate information extraction.
+""" #20
+@dataclass #21
+class ExtractedInfo: #22
+    """Structure for storing extracted information.""" #23
+    text: str #24
+    category: str #25
+    confidence: float #26
+    timestamp: datetime = field(default_factory=datetime.now) #27
+    metadata: Dict[str, Any] = field(default_factory=dict) #28
+@dataclass #29
+class ConversationState: #30
+    """Tracks the state and progress of the conversation.""" #31
+    extracted_items: List[ExtractedInfo] = field(default_factory=list) #32
+    categories_covered: List[str] = field(default_factory=list) #33
+    current_focus: Optional[str] = None #34
+    completion_percentage: float = 0.0 #35
+    last_error: Optional[str] = None #36
+    last_update: datetime = field(default_factory=datetime.now) #37
+    def add_extracted_info(self, info: ExtractedInfo) -> None: #38
+        """Add new extracted information and update state.""" #39
+        self.extracted_items.append(info) #40
+        if info.category not in self.categories_covered: #41
+            self.categories_covered.append(info.category) #42
+        self.last_update = datetime.now() #43
+class InformationExtractor: #44
+    """Core class for handling information extraction from conversations.""" #45
+    def __init__(self): #46
+        self.conversation_history: List[Dict[str, str]] = [] #47
+        self.state = ConversationState() #48
+        self.client: Optional[OpenAI] = None #49
+        self.extraction_categories = [ #50
+            "personal_info", #51
+            "education", #52
+            "work_experience", #53
+            "skills", #54
+            "achievements" #55
+        ] #56
+    def _validate_api_key(self, api_key: str) -> bool: #57
+        """Validate OpenAI API key format.""" #58
+        if not api_key.strip(): #59
+            raise ValueError("API key cannot be empty") #60
+        if not api_key.startswith('sk-'): #61
+            raise ValueError("Invalid API key format") #62
+        return True #63
+    def _initialize_client(self, api_key: str) -> None: #64
+        """Initialize OpenAI client with error handling.""" #65
+        try: #66
+            if self._validate_api_key(api_key): #67
+                self.client = OpenAI(api_key=api_key) #68
+        except Exception as e: #69
+            logger.error(f"Error initializing OpenAI client: {str(e)}") #70
+            raise #71
+    def _add_to_history(self, role: str, content: str) -> None: #72
+        """Add a message to conversation history with timestamp.""" #73
+        self.conversation_history.append({ #74
+            "role": role, #75
+            "content": content, #76
+            "timestamp": datetime.now().isoformat() #77
+        }) #78
+    def _get_ai_response(self, retries: int = 3) -> str: #79
+        """Get response from OpenAI with retry mechanism.""" #80
+        if not self.client: #81
+            raise ValueError("OpenAI client not initialized") #82
+for attempt in range(retries): #83
+            try: #84
+                response = self.client.chat.completions.create( #85
+                    model="gpt-4", #86
+                    messages=[ #87
+                        {"role": "system", "content": SYSTEM_PROMPT}, #88
+                        *[{ #89
+                            "role": msg["role"], #90
+                            "content": msg["content"] #91
+                        } for msg in self.conversation_history] #92
+                    ], #93
+                    temperature=0.7, #94
+                    max_tokens=2000 #95
+                ) #96
+                return response.choices[0].message.content #97
+            except Exception as e: #98
+                logger.warning(f"Attempt {attempt + 1} failed: {str(e)}") #99
+                if attempt == retries - 1: #100
+                    raise Exception(f"Failed after {retries} attempts: {str(e)}") #101
+                continue #102
+    def _extract_information(self, text: str) -> List[ExtractedInfo]: #103
+        """Extract structured information from text.""" #104
+        try: #105
+            extraction_prompt = f""" #106
+            Analyze the following text and extract relevant information.
+            Categories to consider: {', '.join(self.extraction_categories)}
+            For each piece of information extracted, provide:
+            1. The exact text
+            2. The category it belongs to
+            3. Confidence level (0.0 to 1.0)
+            4. Any relevant context or metadata
+            Format as JSON:
+            {{
+                "extracted_items": [
+                    {{
+                        "text": "extracted text",
+                        "category": "category name",
+                        "confidence": 0.95,
+                        "metadata": {{}}
+                    }}
+                ]
+            }}
+            Text to analyze: {text}
+            """ #107
+            response = self.client.chat.completions.create( #108
+                model="gpt-4", #109
+                messages=[ #110
+                    {"role": "system", "content": SYSTEM_PROMPT}, #111
+                    {"role": "user", "content": extraction_prompt} #112
+                ], #113
+                temperature=0.3 #114
+            ) #115
+            # Parse response and create ExtractedInfo objects #116
+            analysis = json.loads(response.choices[0].message.content) #117
+            extracted_items = [] #118
+            for item in analysis.get("extracted_items", []): #119
+                extracted_info = ExtractedInfo( #120
+                    text=item["text"], #121
+                    category=item["category"], #122
+                    confidence=item["confidence"], #123
+                    metadata=item.get("metadata", {}) #124
+                ) #125
+                extracted_items.append(extracted_info) #126
+            return extracted_items #127
+        except json.JSONDecodeError as e: #128
+            logger.error(f"Error parsing extraction response: {str(e)}") #129
+            return [] #130
+        except Exception as e: #131
+            logger.error(f"Error during information extraction: {str(e)}") #132
+            return [] #133
+    def _update_completion_status(self) -> None: #134
+        """Update completion status based on extracted information.""" #135
+        total_categories = len(self.extraction_categories) #136
+        covered_categories = len(self.state.categories_covered) #137
+        # Calculate base completion percentage #138
+        base_completion = (covered_categories / total_categories) * 100 #139
+        # Adjust based on confidence levels #140
+        if self.state.extracted_items: #141
+            avg_confidence = sum(item.confidence for item in self.state.extracted_items) / len(self.state.extracted_items) #142
+            adjusted_completion = base_completion * avg_confidence #143
+        else: #144
+            adjusted_completion = 0.0 #145
+        self.state.completion_percentage = min(adjusted_completion, 100.0) #146
+    def process_message(self, message: str, api_key: str) -> Dict[str, Any]: #147
+        """Process a user message and extract information.""" #148
+        try: #149
+            # Initialize client if needed #150
+            if not self.client: #151
+                self._initialize_client(api_key) #152
+            # Add user message to history #153
+            self._add_to_history("user", message) #154
+            # Get AI response #155
+            ai_response = self._get_ai_response() #156
+            self._add_to_history("assistant", ai_response) #157
+            # Extract information from the entire conversation #158
+            new_information = self._extract_information(message + "\n" + ai_response) #159
+            # Update state with new information #160
+            for info in new_information: #161
+                self.state.add_extracted_info(info) #162
+            # Update completion status #163
+            self._update_completion_status() #164
+            return { #165
+                "response": ai_response, #166
+                "extracted_info": [ #167
+                    { #168
+                        "text": info.text, #169
+                        "category": info.category, #170
+                        "confidence": info.confidence #171
+                    } for info in new_information #172
+                ], #173
+                "completion_status": { #174
+                    "percentage": self.state.completion_percentage, #175
+                    "categories_covered": self.state.categories_covered, #176
+                    "current_focus": self.state.current_focus #177
+                } #178
+            } #179
+        except Exception as e: #180
+            error_msg = f"Error processing message: {str(e)}" #181
+            logger.error(error_msg) #182
+            self.state.last_error = error_msg #183
+            return { #184
+                "error": error_msg, #185
+                "completion_status": { #186
+                    "percentage": self.state.completion_percentage, #187
+                    "categories_covered": self.state.categories_covered, #188
+                    "current_focus": self.state.current_focus #189
+                } #190
+            } #191
+def generate_output(self) -> Dict[str, Any]: #192
+        """Generate structured output from all extracted information.""" #193
+        try: #194
+            # Organize extracted information by category #195
+            categorized_info = {} #196
+            for category in self.extraction_categories: #197
+                category_items = [ #198
+                    { #199
+                        "text": item.text, #200
+                        "confidence": item.confidence, #201
+                        "timestamp": item.timestamp.isoformat(), #202
+                        "metadata": item.metadata #203
+                    } #204
+                    for item in self.state.extracted_items #205
+                    if item.category == category #206
+                ] #207
+                if category_items: #208
+                    categorized_info[category] = category_items #209
+            # Create output structure #210
+            output = { #211
+                "extracted_information": categorized_info, #212
+                "analysis_summary": { #213
+                    "total_items": len(self.state.extracted_items), #214
+                    "categories_covered": self.state.categories_covered, #215
+                    "completion_percentage": self.state.completion_percentage #216
+                }, #217
+                "metadata": { #218
+                    "generated_at": datetime.now().isoformat(), #219
+                    "conversation_length": len(self.conversation_history), #220
+                    "version": "2.0" #221
+                } #222
+            } #223
+            # Save to file #224
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") #225
+            filename = f"extracted_info_{timestamp}.json" #226
+            with open(filename, 'w', encoding='utf-8') as f: #227
+                json.dump(output, f, indent=2, ensure_ascii=False) #228
+            return { #229
+                "filename": filename, #230
+                "content": output, #231
+                "status": "success" #232
+            } #233
+        except Exception as e: #234
+            error_msg = f"Error generating output: {str(e)}" #235
+            logger.error(error_msg) #236
+            return { #237
+                "error": error_msg, #238
+                "status": "error" #239
+            } #240
+def create_gradio_interface(): #241
+    """Create the Gradio interface for information extraction.""" #242
+    extractor = InformationExtractor() #243
+    # Custom CSS for better styling #244
+    css = """ #245
+    .container { max-width: 900px; margin: auto; } #246
+    .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; } #247
+    .info-panel { background: #f5f5f5; padding: 1rem; border-radius: 0.5rem; } #248
+    .status-badge { #249
+        display: inline-block; #250
+        padding: 0.25rem 0.5rem; #251
+        border-radius: 0.25rem; #252
+        margin: 0.25rem; #253
+        background: #e0e0e0; #254
+    } #255
+    .extraction-highlight { #256
+        background: #e8f4f8; #257
+        border-left: 4px solid #4a90e2; #258
+        padding: 0.5rem; #259
+        margin: 0.5rem 0; #260
+    } #261
+    """ #262
+    with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: #263
+        gr.Markdown(""" #264
+        # 🔍 Information Extraction Assistant
+        Have a natural conversation while we extract and organize important information.
+        The system will automatically identify and categorize relevant details.
+        """) #265
+        with gr.Row(): #266
+            with gr.Column(scale=2): #267
+                # API Key input #268
+                api_key = gr.Textbox( #269
+                    label="OpenAI API Key", #270
+                    type="password", #271
+                    placeholder="Enter your OpenAI API key (sk-...)", #272
+                    show_label=True #273
+                ) #274
+                # Chat interface #275
+                chatbot = gr.Chatbot( #276
+                    value=[], #277
+                    height=400, #278
+                    type="messages", #279
+                    show_label=False #280
+                ) #281
+                # Message input #282
+                with gr.Row(): #283
+                    msg = gr.Textbox( #284
+                        label="Message", #285
+                        placeholder="Type your message here...", #286
+                        scale=4 #287
+                    ) #288
+                    submit = gr.Button( #289
+                        "Send", #290
+                        variant="primary", #291
+                        scale=1 #292
+                    ) #293
+                # Action buttons #294
+                with gr.Row(): #295
+                    clear = gr.Button("Clear Chat", scale=1) #296
+                    generate = gr.Button( #297
+                        "Generate Report", #298
+                        variant="secondary", #299
+                        scale=2 #300
+                    ) #301
+with gr.Column(scale=1): #302
+                # Extraction Status Panel #303
+                with gr.Group(visible=True) as status_panel: #304
+                    gr.Markdown("### Extraction Progress") #305
+                    # Progress indicator #306
+                    progress = gr.Slider( #307
+                        label="Completion", #308
+                        minimum=0, #309
+                        maximum=100, #310
+                        value=0, #311
+                        interactive=False #312
+                    ) #313
+                    # Categories covered #314
+                    categories_covered = gr.JSON( #315
+                        label="Categories Covered", #316
+                        value={"categories": []} #317
+                    ) #318
+                    # Current focus #319
+                    current_focus = gr.Textbox( #320
+                        label="Current Focus", #321
+                        value="Not started", #322
+                        interactive=False #323
+                    ) #324
+                # Extraction Results #325
+                with gr.Tabs() as result_tabs: #326
+                    with gr.Tab("Extracted Information"): #327
+                        extracted_info = gr.JSON( #328
+                            label="Extracted Details", #329
+                            value={} #330
+                        ) #331
+                    with gr.Tab("Download"): #332
+                        file_output = gr.File( #333
+                            label="Download Report" #334
+                        ) #335
+                    with gr.Tab("Analysis"): #336
+                        analysis_text = gr.Markdown( #337
+                            "Analysis will appear here after processing." #338
+                        ) #339
+        # Helper Functions #340
+        def format_extraction_summary(extracted_items: List[Dict]) -> str: #341
+            """Format extracted information for display.""" #342
+            if not extracted_items: #343
+                return "No information extracted yet." #344
+            summary = ["### Recently Extracted Information"] #345
+            for item in extracted_items: #346
+                summary.append( #347
+                    f"- **{item['category']}** ({item['confidence']*100:.1f}% confidence)\n" #348
+                    f"  {item['text']}" #349
+                ) #350
+            return "\n".join(summary) #351
+        def update_interface_state(state: Dict[str, Any]) -> tuple: #352
+            """Update all interface components based on current state.""" #353
+            return ( #354
+                state['completion_status']['percentage'], #355
+                {"categories": state['completion_status']['categories_covered']}, #356
+                state['completion_status']['current_focus'] #357
+            ) #358
+        # Event Handlers #359
+        def process_message(message: str, history: list, key: str) -> tuple: #360
+            """Handle message processing and update interface.""" #361
+            if not message.strip(): #362
+                return history, 0, {}, "Please enter a message" #363
+            try: #364
+                # Process message #365
+                result = extractor.process_message(message, key) #366
+                if "error" in result: #367
+                    return ( #368
+                        history, #369
+                        0, #370
+                        {"categories": []}, #371
+                        f"Error: {result['error']}" #372
+                    ) #373
+                # Update chat history #374
+                history.append({ #375
+                    "role": "user", #376
+                    "content": message #377
+                }) #378
+                history.append({ #379
+                    "role": "assistant", #380
+                    "content": result["response"] #381
+                }) #382
+                # Update status components #383
+                progress_value = result["completion_status"]["percentage"] #384
+                categories = { #385
+                    "categories": result["completion_status"]["categories_covered"] #386
+                } #387
+                current_focus = result["completion_status"]["current_focus"] or "Processing..." #388
+                # Update extraction display #389
+                if result.get("extracted_info"): #390
+                    analysis_text = format_extraction_summary(result["extracted_info"]) #391
+                else: #392
+                    analysis_text = "No new information extracted." #393
+                return ( #394
+                    history, #395
+                    progress_value, #396
+                    categories, #397
+                    current_focus, #398
+                    analysis_text #399
+                ) #400
+            except Exception as e: #401
+                logger.error(f"Error in process_message: {str(e)}") #402
+                return ( #403
+                    history, #404
+                    0, #405
+                    {"categories": []}, #406
+                    f"Error: {str(e)}", #407
+                    "An error occurred during processing." #408
+                ) #409
+def generate_report() -> tuple: #410
+            """Generate and return report file.""" #411
+            try: #412
+                result = extractor.generate_output() #413
+                if result["status"] == "success": #414
+                    # Update JSON preview #415
+                    content_preview = { #416
+                        "summary": result["content"]["analysis_summary"], #417
+                        "categories": list(result["content"]["extracted_information"].keys()), #418
+                        "total_items": len(result["content"]["extracted_information"]) #419
+                    } #420
+                    return ( #421
+                        result["filename"], #422
+                        content_preview, #423
+                        "Report generated successfully! 🎉", #424
+                        gr.update(value=format_extraction_summary( #425
+                            [item for items in result["content"]["extracted_information"].values() #426
+                             for item in items] #427
+                        )) #428
+                    ) #429
+                else: #430
+                    return ( #431
+                        None, #432
+                        {"error": result["error"]}, #433
+                        f"Error generating report: {result['error']}", #434
+                        "Failed to generate analysis." #435
+                    ) #436
+            except Exception as e: #437
+                logger.error(f"Error in generate_report: {str(e)}") #438
+                return ( #439
+                    None, #440
+                    {"error": str(e)}, #441
+                    f"Error: {str(e)}", #442
+                    "An error occurred during report generation." #443
+                ) #444
+        def clear_interface() -> tuple: #445
+            """Reset all interface components.""" #446
+            # Reset extractor state #447
+            global extractor #448
+            extractor = InformationExtractor() #449
+            return ( #450
+                [], # Clear chat history #451
+                0.0, # Reset progress #452
+                {"categories": []}, # Clear categories #453
+                "Not started", # Reset focus #454
+                {}, # Clear extracted info #455
+                None, # Clear file output #456
+                "Ready to start new extraction.", # Reset analysis #457
+                gr.update(value="") # Clear message input #458
+            ) #459
+        # Event Bindings #460
+        msg.submit( #461
+            process_message, #462
+            inputs=[msg, chatbot, api_key], #463
+            outputs=[ #464
+                chatbot, #465
+                progress, #466
+                categories_covered, #467
+                current_focus, #468
+                analysis_text #469
+            ] #470
+        ).then( #471
+            lambda: "", #472
+            None, #473
+            msg #474
+        ) #475
+        submit.click( #476
+            process_message, #477
+            inputs=[msg, chatbot, api_key], #478
+            outputs=[ #479
+                chatbot, #480
+                progress, #481
+                categories_covered, #482
+                current_focus, #483
+                analysis_text #484
+            ] #485
+        ).then( #486
+            lambda: "", #487
+            None, #488
+            msg #489
+        ) #490
+        generate.click( #491
+            generate_report, #492
+            outputs=[ #493
+                file_output, #494
+                extracted_info, #495
+                current_focus, #496
+                analysis_text #497
+            ] #498
+        ) #499
+        clear.click( #500
+            clear_interface, #501
+            outputs=[ #502
+                chatbot, #503
+                progress, #504
+                categories_covered, #505
+                current_focus, #506
+                extracted_info, #507
+                file_output, #508
+                analysis_text, #509
+                msg #510
+            ] #511
+        ) #512
+    return demo #513
+if __name__ == "__main__": #514
+    # Set up logging for the main application #515
+    logging.basicConfig( #516
+        level=logging.INFO, #517
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' #518
+    ) #519
+    try: #520
+        demo = create_gradio_interface() #521
+        demo.launch( #522
+            server_name="0.0.0.0", #523
+            server_port=7860, #524
+            share=True, #525
+            show_api=False #526
+        ) #527
+    except Exception as e: #528
+        logger.error(f"Application failed to start: {str(e)}") #529
+        raise #530