Spaces:

jzou19950715
/

Newco_information_extraction_agent

Running

App Files Files Community

jzou19950715 commited on Feb 1

Commit

50e3198

verified ·

1 Parent(s): d965c95

Update app.py

Browse files

Files changed (1) hide show

app.py +333 -379

app.py CHANGED Viewed

@@ -4,13 +4,11 @@ import os
 from datetime import datetime
 from typing import Dict, List, Optional, Any, Tuple
 from dataclasses import dataclass, field
-from pathlib import Path
-# Third-party imports
 import gradio as gr
-from openai import OpenAI
-# Configure logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
@@ -21,9 +19,9 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# System prompt for the AI assistant
 SYSTEM_PROMPT = """
-You are LOSS DOG, a Career and Education Information Extraction Assistant, designed to help users craft a compelling and well-structured resume by extracting and organizing key details from conversations.
 Core Capabilities:
 - Proactively ask users about their career history, education, skills, certifications, projects, and achievements.
@@ -32,21 +30,24 @@ Core Capabilities:
 - Maintain a friendly, engaging, and professional tone to encourage users to share relevant information.
 - Structure extracted data into well-organized resume sections.
-Your goal is to make resume-building effortless by asking the right questions, extracting key information, and presenting it in a clear, professional format.
 """
 @dataclass
 class ExtractedInfo:
-    """Structure for storing extracted information."""
     text: str
     category: str
     confidence: float
     timestamp: datetime = field(default_factory=datetime.now)
     metadata: Dict[str, Any] = field(default_factory=dict)
 @dataclass
 class ConversationState:
-    """Tracks the state and progress of the conversation."""
     extracted_items: List[ExtractedInfo] = field(default_factory=list)
     categories_covered: List[str] = field(default_factory=list)
     current_focus: Optional[str] = None
@@ -55,19 +56,25 @@ class ConversationState:
     last_update: datetime = field(default_factory=datetime.now)
     def add_extracted_info(self, info: ExtractedInfo) -> None:
-        """Add new extracted information and update state."""
         self.extracted_items.append(info)
         if info.category not in self.categories_covered:
             self.categories_covered.append(info.category)
         self.last_update = datetime.now()
 class InformationExtractor:
-    """Core class for handling information extraction from conversations."""
-    def __init__(self):
         self.conversation_history: List[Dict[str, str]] = []
         self.state = ConversationState()
-        self.client: Optional[OpenAI] = None
         self.extraction_categories = [
             "personal_info",
             "education",
@@ -75,26 +82,48 @@ class InformationExtractor:
             "skills",
             "achievements"
         ]
     def _validate_api_key(self, api_key: str) -> bool:
-        """Validate OpenAI API key format."""
         if not api_key.strip():
-            raise ValueError("API key cannot be empty")
-        if not api_key.startswith('sk-'):
-            raise ValueError("Invalid API key format")
         return True
     def _initialize_client(self, api_key: str) -> None:
-        """Initialize OpenAI client with error handling."""
         try:
             if self._validate_api_key(api_key):
-                self.client = OpenAI(api_key=api_key)
         except Exception as e:
             logger.error(f"Error initializing OpenAI client: {str(e)}")
             raise
     def _add_to_history(self, role: str, content: str) -> None:
-        """Add a message to conversation history with timestamp."""
         self.conversation_history.append({
             "role": role,
             "content": content,
@@ -102,140 +131,138 @@ class InformationExtractor:
         })
     def _get_ai_response(self, retries: int = 3) -> str:
-        """Get response from OpenAI with retry mechanism."""
-        if not self.client:
-            raise ValueError("OpenAI client not initialized")
         for attempt in range(retries):
             try:
-                response = self.client.chat.completions.create(
-                    model="gpt-4o-mini",  # Changed from "gpt-4" to "gpt-4o-mini"
                     messages=[
                         {"role": "system", "content": SYSTEM_PROMPT},
-                        *[{
-                            "role": msg["role"],
-                            "content": msg["content"]
-                        } for msg in self.conversation_history]
                     ],
                     temperature=0.7,
                     max_tokens=2000
-                )
-                return response.choices[0].message.content
             except Exception as e:
                 logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
                 if attempt == retries - 1:
                     raise Exception(f"Failed after {retries} attempts: {str(e)}")
-                continue
-def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
-    """Extract structured career and education-related information from text for resume building."""
-    try:
         extraction_prompt = f"""
         Analyze the following text and extract relevant information for resume building.
-        Focus on key resume elements based on the specified extraction categories:
-        {', '.join(self.extraction_categories)}
-        For each extracted item, determine its category dynamically and format it accordingly.
-        The JSON output structure should align with the category of extracted information.
-        Example format:
         {{
-            "extracted_items": {{
-                "category_name_1": [
-                    {{
-                        "text": "extracted detail",
-                        "confidence": 0.95,
-                        "metadata": {{}}
-                    }}
-                ],
-                "category_name_2": [
-                    {{
-                        "text": "extracted detail",
-                        "confidence": 0.92,
-                        "metadata": {{}}
-                    }}
-                ]
-            }}
         }}
-        Ensure extracted details are relevant for resume-building purposes.
         Text to analyze: {text}
         """
-        response = self.client.chat.completions.create(
-            model="gpt-4o-mini",  # Changed from "gpt-4" to "gpt-4o-mini"
-            messages=[
-                {"role": "system", "content": SYSTEM_PROMPT},
-                {"role": "user", "content": extraction_prompt}
-            ],
-            temperature=0.3
-        )
-        # Parse response and create ExtractedInfo objects
-        analysis = json.loads(response.choices[0].message.content)
-        extracted_items = []
-        for item in analysis.get("extracted_items", []):
-            extracted_info = ExtractedInfo(
-                text=item["text"],
-                category=item["category"],
-                confidence=item["confidence"],
-                metadata=item.get("metadata", {})
-            )
-            extracted_items.append(extracted_info)
-        return extracted_items
-    except json.JSONDecodeError as e:
-        logger.error(f"Error parsing extraction response: {str(e)}")
-        return []
-    except Exception as e:
-        logger.error(f"Error during information extraction: {str(e)}")
-        return []
-    def _update_completion_status(self) -> None:
-        """Update completion status based on extracted information."""
-        total_categories = len(self.extraction_categories)
-        covered_categories = len(self.state.categories_covered)
-        # Calculate base completion percentage
-        base_completion = (covered_categories / total_categories) * 100
-        # Adjust based on confidence levels
-        if self.state.extracted_items:
-            avg_confidence = sum(item.confidence for item in self.state.extracted_items) / len(self.state.extracted_items)
-            adjusted_completion = base_completion * avg_confidence
-        else:
-            adjusted_completion = 0.0
-        self.state.completion_percentage = min(adjusted_completion, 100.0)
     def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
-        """Process a user message and extract information."""
         try:
-            # Initialize client if needed
-            if not self.client:
                 self._initialize_client(api_key)
-            # Add user message to history
             self._add_to_history("user", message)
-            # Get AI response
             ai_response = self._get_ai_response()
             self._add_to_history("assistant", ai_response)
-            # Extract information from the entire conversation
-            new_information = self._extract_information(message + "\n" + ai_response)
-            # Update state with new information
-            for info in new_information:
-                self.state.add_extracted_info(info)
-            # Update completion status
             self._update_completion_status()
             return {
                 "response": ai_response,
                 "extracted_info": [
@@ -243,35 +270,46 @@ def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
                         "text": info.text,
                         "category": info.category,
                         "confidence": info.confidence
-                    } for info in new_information
                 ],
                 "completion_status": {
                     "percentage": self.state.completion_percentage,
                     "categories_covered": self.state.categories_covered,
                     "current_focus": self.state.current_focus
-                }
             }
         except Exception as e:
             error_msg = f"Error processing message: {str(e)}"
             logger.error(error_msg)
             self.state.last_error = error_msg
             return {
-                "error": error_msg,
                 "completion_status": {
                     "percentage": self.state.completion_percentage,
                     "categories_covered": self.state.categories_covered,
                     "current_focus": self.state.current_focus
-                }
             }
     def generate_output(self) -> Dict[str, Any]:
-        """Generate structured output from all extracted information."""
         try:
-            # Organize extracted information by category
             categorized_info = {}
             for category in self.extraction_categories:
-                category_items = [
                     {
                         "text": item.text,
                         "confidence": item.confidence,
@@ -281,10 +319,9 @@ def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
                     for item in self.state.extracted_items
                     if item.category == category
                 ]
-                if category_items:
-                    categorized_info[category] = category_items
-            # Create output structure
             output = {
                 "extracted_information": categorized_info,
                 "analysis_summary": {
@@ -299,10 +336,10 @@ def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
                 }
             }
-            # Save to file
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             filename = f"extracted_info_{timestamp}.json"
             with open(filename, 'w', encoding='utf-8') as f:
                 json.dump(output, f, indent=2, ensure_ascii=False)
@@ -320,11 +357,16 @@ def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
                 "status": "error"
             }
-def create_gradio_interface():
-    """Create the Gradio interface for information extraction."""
     extractor = InformationExtractor()
-    # Custom CSS for better styling
     css = """
     .container { max-width: 900px; margin: auto; }
     .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; }
@@ -345,59 +387,36 @@ def create_gradio_interface():
     """
     with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
-        gr.Markdown("""
-        # 🔍 Information Extraction Assistant
-        Have a natural conversation while we extract and organize important information.
-        The system will automatically identify and categorize relevant details.
-        """)
         with gr.Row():
             with gr.Column(scale=2):
-                # API Key input
                 api_key = gr.Textbox(
                     label="OpenAI API Key",
                     type="password",
-                    placeholder="Enter your OpenAI API key (sk-...)",
-                    show_label=True
                 )
-                # Chat interface
                 chatbot = gr.Chatbot(
                     value=[],
-                    height=400,
-                    type="messages",
-                    show_label=False
                 )
-                # Message input
                 with gr.Row():
                     msg = gr.Textbox(
                         label="Message",
-                        placeholder="Type your message here...",
-                        scale=4
-                    )
-                    submit = gr.Button(
-                        "Send",
-                        variant="primary",
-                        scale=1
                     )
-                # Action buttons
                 with gr.Row():
-                    clear = gr.Button("Clear Chat", scale=1)
-                    generate = gr.Button(
-                        "Generate Report",
-                        variant="secondary",
-                        scale=2
-                    )
             with gr.Column(scale=1):
-                # Extraction Status Panel
-                with gr.Group(visible=True) as status_panel:
                     gr.Markdown("### Extraction Progress")
-                    # Progress indicator
                     progress = gr.Slider(
                         label="Completion",
                         minimum=0,
@@ -405,244 +424,176 @@ def create_gradio_interface():
                         value=0,
                         interactive=False
                     )
-                    # Categories covered
-                    categories_covered = gr.JSON(
-                        label="Categories Covered",
-                        value={"categories": []}
-                    )
-                    # Current focus
                     current_focus = gr.Textbox(
                         label="Current Focus",
                         value="Not started",
                         interactive=False
                     )
-                # Extraction Results
                 with gr.Tabs() as result_tabs:
                     with gr.Tab("Extracted Information"):
-                        extracted_info = gr.JSON(
-                            label="Extracted Details",
-                            value={}
-                        )
                     with gr.Tab("Download"):
-                        file_output = gr.File(
-                            label="Download Report"
-                        )
                     with gr.Tab("Analysis"):
-                        analysis_text = gr.Markdown(
-                            "Analysis will appear here after processing."
-                        )
-        # Helper Functions
-        def format_extraction_summary(extracted_items: List[Dict]) -> str:
-            """Format extracted information for display."""
-            if not extracted_items:
                 return "No information extracted yet."
-            summary = ["### Recently Extracted Information"]
-            for item in extracted_items:
-                summary.append(
-                    f"- **{item['category']}** ({item['confidence']*100:.1f}% confidence)\n"
-                    f"  {item['text']}"
                 )
-            return "\n".join(summary)
-        def update_interface_state(state: Dict[str, Any]) -> tuple:
-            """Update all interface components based on current state."""
-            return (
-                state['completion_status']['percentage'],
-                {"categories": state['completion_status']['categories_covered']},
-                state['completion_status']['current_focus']
-            )
-        # Event Handlers
-        def process_message(message: str, history: list, key: str) -> tuple:
-            """Handle message processing and update interface."""
-            if not message.strip():
-                return history, 0, {}, "Please enter a message"
-            try:
-                # Process message
-                result = extractor.process_message(message, key)
-                if "error" in result:
-                    return (
-                        history,
-                        0,
-                        {"categories": []},
-                        f"Error: {result['error']}"
-                    )
-                # Update chat history
-                history.append({
-                    "role": "user",
-                    "content": message
-                })
-                history.append({
-                    "role": "assistant",
-                    "content": result["response"]
-                })
-                # Update status components
-                progress_value = result["completion_status"]["percentage"]
-                categories = {
-                    "categories": result["completion_status"]["categories_covered"]
                 }
-                current_focus = result["completion_status"]["current_focus"] or "Processing..."
-                # Update extraction display
-                if result.get("extracted_info"):
-                    analysis_text = format_extraction_summary(result["extracted_info"])
-                else:
-                    analysis_text = "No new information extracted."
-                return (
-                    history,
-                    progress_value,
-                    categories,
-                    current_focus,
-                    analysis_text
-                )
-            except Exception as e:
-                logger.error(f"Error in process_message: {str(e)}")
-                return (
-                    history,
-                    0,
-                    {"categories": []},
-                    f"Error: {str(e)}",
-                    "An error occurred during processing."
-                )
-        def generate_report() -> tuple:
-            """Generate and return report file."""
-            try:
-                result = extractor.generate_output()
-                if result["status"] == "success":
-                    # Update JSON preview
-                    content_preview = {
-                        "summary": result["content"]["analysis_summary"],
-                        "categories": list(result["content"]["extracted_information"].keys()),
-                        "total_items": len(result["content"]["extracted_information"])
-                    }
-                    return (
-                        result["filename"],
-                        content_preview,
-                        "Report generated successfully! 🎉",
-                        gr.update(value=format_extraction_summary(
-                            [item for items in result["content"]["extracted_information"].values()
-                             for item in items]
-                        ))
-                    )
-                else:
-                    return (
-                        None,
-                        {"error": result["error"]},
-                        f"Error generating report: {result['error']}",
-                        "Failed to generate analysis."
-                    )
-            except Exception as e:
-                logger.error(f"Error in generate_report: {str(e)}")
-                return (
-                    None,
-                    {"error": str(e)},
-                    f"Error: {str(e)}",
-                    "An error occurred during report generation."
-                )
-        def clear_interface() -> tuple:
-            """Reset all interface components."""
-            # Reset extractor state
             global extractor
             extractor = InformationExtractor()
-            return (
-                [],  # Clear chat history
-                0.0,  # Reset progress
-                {"categories": []},  # Clear categories
-                "Not started",  # Reset focus
-                {},  # Clear extracted info
-                None,  # Clear file output
-                "Ready to start new extraction.",  # Reset analysis
-                gr.update(value="")  # Clear message input
-            )
-        # Event Bindings
         msg.submit(
-            process_message,
             inputs=[msg, chatbot, api_key],
-            outputs=[
-                chatbot,
-                progress,
-                categories_covered,
-                current_focus,
-                analysis_text
-            ]
-        ).then(
-            lambda: "",
-            None,
-            msg
-        )
         submit.click(
-            process_message,
             inputs=[msg, chatbot, api_key],
-            outputs=[
-                chatbot,
-                progress,
-                categories_covered,
-                current_focus,
-                analysis_text
-            ]
-        ).then(
-            lambda: "",
-            None,
-            msg
-        )
         generate.click(
-            generate_report,
-            outputs=[
-                file_output,
-                extracted_info,
-                current_focus,
-                analysis_text
-            ]
         )
         clear.click(
-            clear_interface,
             outputs=[
-                chatbot,
-                progress,
-                categories_covered,
-                current_focus,
-                extracted_info,
-                file_output,
-                analysis_text,
-                msg
             ]
         )
     return demo
-if __name__ == "__main__":
-    # Set up logging for the main application
     logging.basicConfig(
         level=logging.INFO,
         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
     )
     try:
-        demo = create_gradio_interface()
-        demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=True,
@@ -650,4 +601,7 @@ if __name__ == "__main__":
         )
     except Exception as e:
         logger.error(f"Application failed to start: {str(e)}")
-        raise

 from datetime import datetime
 from typing import Dict, List, Optional, Any, Tuple
 from dataclasses import dataclass, field
+import openai  # We'll use the official openai package
 import gradio as gr
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 )
 logger = logging.getLogger(__name__)
 SYSTEM_PROMPT = """
+You are LOSS DOG, a Career and Education Information Extraction Assistant, designed to help users craft a compelling
+and well-structured resume by extracting and organizing key details from conversations.
 Core Capabilities:
 - Proactively ask users about their career history, education, skills, certifications, projects, and achievements.
 - Maintain a friendly, engaging, and professional tone to encourage users to share relevant information.
 - Structure extracted data into well-organized resume sections.
+Your goal is to make resume-building effortless by asking the right questions, extracting key information,
+and presenting it in a clear, professional format.
 """
 @dataclass
 class ExtractedInfo:
+    """Structure for storing extracted information relevant to building a resume."""
     text: str
     category: str
     confidence: float
     timestamp: datetime = field(default_factory=datetime.now)
     metadata: Dict[str, Any] = field(default_factory=dict)
 @dataclass
 class ConversationState:
+    """Tracks the conversation state and progress regarding extracted resume info."""
     extracted_items: List[ExtractedInfo] = field(default_factory=list)
     categories_covered: List[str] = field(default_factory=list)
     current_focus: Optional[str] = None
     last_update: datetime = field(default_factory=datetime.now)
     def add_extracted_info(self, info: ExtractedInfo) -> None:
+        """Add new extracted information and update state accordingly."""
         self.extracted_items.append(info)
         if info.category not in self.categories_covered:
             self.categories_covered.append(info.category)
         self.last_update = datetime.now()
 class InformationExtractor:
+    """
+    Core class for handling information extraction from user messages to build a structured resume.
+    Attributes:
+        conversation_history: A list of dictionaries storing each message and its role (user/assistant).
+        state: An instance of ConversationState, which tracks the extraction progress and items.
+        extraction_categories: A list of main categories we want to extract for building the resume.
+    """
+    def __init__(self) -> None:
+        """Initialize the InformationExtractor with default settings."""
         self.conversation_history: List[Dict[str, str]] = []
         self.state = ConversationState()
         self.extraction_categories = [
             "personal_info",
             "education",
             "skills",
             "achievements"
         ]
+        # We'll store the API key in a protected variable to re-use as needed
+        self._api_key: Optional[str] = None
     def _validate_api_key(self, api_key: str) -> bool:
+        """
+        Validate the OpenAI API key format.
+        Args:
+            api_key: The user's OpenAI API key.
+        Returns:
+            True if the API key is valid, raises ValueError otherwise.
+        """
         if not api_key.strip():
+            raise ValueError("API key cannot be empty.")
+        if not api_key.startswith("sk-"):
+            raise ValueError("Invalid API key format. It must start with 'sk-'.")
         return True
     def _initialize_client(self, api_key: str) -> None:
+        """
+        Initialize openai with the given API key. Uses error handling to catch any issue.
+        Args:
+            api_key: The user's OpenAI API key.
+        """
         try:
             if self._validate_api_key(api_key):
+                openai.api_key = api_key
+                self._api_key = api_key
         except Exception as e:
             logger.error(f"Error initializing OpenAI client: {str(e)}")
             raise
     def _add_to_history(self, role: str, content: str) -> None:
+        """
+        Add a message to the conversation history with a timestamp.
+        Args:
+            role: Either 'user' or 'assistant' to denote who sent the message.
+            content: The message content.
+        """
         self.conversation_history.append({
             "role": role,
             "content": content,
         })
     def _get_ai_response(self, retries: int = 3) -> str:
+        """
+        Get an AI response from OpenAI's ChatCompletion endpoint.
+        Args:
+            retries: Number of times to retry upon failure.
+        Returns:
+            The text content of the AI's reply.
+        """
+        if not self._api_key:
+            raise ValueError("OpenAI client not initialized (API key missing).")
         for attempt in range(retries):
             try:
+                with openai.ChatCompletion.create(
+                    model="gpt-4o-mini",  # or "gpt-4" or any other available model
                     messages=[
                         {"role": "system", "content": SYSTEM_PROMPT},
+                        *[{"role": msg["role"], "content": msg["content"]} for msg in self.conversation_history]
                     ],
                     temperature=0.7,
                     max_tokens=2000
+                ) as response:
+                    return response["choices"][0]["message"]["content"]
             except Exception as e:
                 logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
                 if attempt == retries - 1:
                     raise Exception(f"Failed after {retries} attempts: {str(e)}")
+        return ""
+    def _extract_resume_information(self, text: str) -> List[ExtractedInfo]:
+        """
+        Extract structured career and education-related information from the given text.
+        Args:
+            text: The combined user and AI text from which to extract relevant info.
+        Returns:
+            A list of ExtractedInfo objects with the extracted details.
+        """
+        if not self._api_key:
+            raise ValueError("OpenAI client not initialized (API key missing).")
+        # We'll ask GPT to produce JSON with extracted items
         extraction_prompt = f"""
         Analyze the following text and extract relevant information for resume building.
+        Focus on these key categories: {', '.join(self.extraction_categories)}.
+        For each piece of extracted data, output a JSON structure with:
         {{
+            "extracted_items": [
+                {{
+                    "text": "...",
+                    "category": "...",
+                    "confidence": 0.0,
+                    "metadata": {{ ... }}
+                }},
+                ...
+            ]
         }}
         Text to analyze: {text}
         """
+        try:
+            with openai.ChatCompletion.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": SYSTEM_PROMPT},
+                    {"role": "user", "content": extraction_prompt}
+                ],
+                temperature=0.3,
+                max_tokens=1000
+            ) as response:
+                raw_content = response["choices"][0]["message"]["content"]
+            # Now parse the content
+            analysis = json.loads(raw_content)
+            extracted_items = []
+            for item in analysis.get("extracted_
     def process_message(self, message: str, api_key: str) -> Dict[str, Any]:
+        """
+        Process a user message:
+            1. Initialize OpenAI if needed,
+            2. Add user message to history,
+            3. Get AI response,
+            4. Extract resume information,
+            5. Update the conversation state,
+            6. Return structured data.
+        Args:
+            message: The user's chat input.
+            api_key: The user's OpenAI API key.
+        Returns:
+            A dictionary with AI response, extracted info, and updated completion status.
+        """
+        # Always return a dictionary so that the UI can parse it
         try:
+            if not message.strip():
+                # Return a 5-element tuple anyway (the UI needs 5 outputs)
+                return {
+                    "response": "Please enter a message.",
+                    "extracted_info": [],
+                    "completion_status": {
+                        "percentage": self.state.completion_percentage,
+                        "categories_covered": self.state.categories_covered,
+                        "current_focus": self.state.current_focus
+                    },
+                    "analysis_text": "No new information extracted.",
+                    "history_message": "(No change in history)"
+                }
+            # Initialize the client if not done yet
+            if not self._api_key:
                 self._initialize_client(api_key)
+            # Add user message to conversation history
             self._add_to_history("user", message)
             ai_response = self._get_ai_response()
             self._add_to_history("assistant", ai_response)
+            # Extract new info from the full conversation
+            new_info = self._extract_resume_information(text=message + "\n" + ai_response)
+            # Update the conversation state
+            for info_item in new_info:
+                self.state.add_extracted_info(info_item)
             self._update_completion_status()
             return {
                 "response": ai_response,
                 "extracted_info": [
                         "text": info.text,
                         "category": info.category,
                         "confidence": info.confidence
+                    }
+                    for info in new_info
                 ],
                 "completion_status": {
                     "percentage": self.state.completion_percentage,
                     "categories_covered": self.state.categories_covered,
                     "current_focus": self.state.current_focus
+                },
+                "analysis_text": "Successfully extracted new information." if new_info else "No new information extracted.",
+                "history_message": f"Added user message '{message}' and assistant response to history."
             }
         except Exception as e:
             error_msg = f"Error processing message: {str(e)}"
             logger.error(error_msg)
             self.state.last_error = error_msg
             return {
+                "response": "",
+                "extracted_info": [],
                 "completion_status": {
                     "percentage": self.state.completion_percentage,
                     "categories_covered": self.state.categories_covered,
                     "current_focus": self.state.current_focus
+                },
+                "analysis_text": error_msg,
+                "history_message": "(Processing failed)"
             }
     def generate_output(self) -> Dict[str, Any]:
+        """
+        Generate structured JSON output containing all extracted information,
+        store it in a file, and return the file name and content.
+        Returns:
+            A dict with fields: filename, content, and status.
+        """
         try:
             categorized_info = {}
             for category in self.extraction_categories:
+                items_in_cat = [
                     {
                         "text": item.text,
                         "confidence": item.confidence,
                     for item in self.state.extracted_items
                     if item.category == category
                 ]
+                if items_in_cat:
+                    categorized_info[category] = items_in_cat
             output = {
                 "extracted_information": categorized_info,
                 "analysis_summary": {
                 }
             }
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             filename = f"extracted_info_{timestamp}.json"
+            # Use a context manager for safe file operations
             with open(filename, 'w', encoding='utf-8') as f:
                 json.dump(output, f, indent=2, ensure_ascii=False)
                 "status": "error"
             }
+def create_gradio_interface() -> gr.Blocks:
+    """
+    Create the Gradio interface for the InformationExtractor.
+    Returns:
+        The gradio Blocks application interface object.
+    """
     extractor = InformationExtractor()
     css = """
     .container { max-width: 900px; margin: auto; }
     .message { padding: 1rem; margin: 0.5rem 0; border-radius: 0.5rem; }
     """
     with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🔍 Information Extraction Assistant\n")
         with gr.Row():
             with gr.Column(scale=2):
                 api_key = gr.Textbox(
                     label="OpenAI API Key",
                     type="password",
+                    placeholder="Enter your OpenAI API key (sk-...)"
                 )
                 chatbot = gr.Chatbot(
+                    label="Conversation",
                     value=[],
+                    height=400
                 )
                 with gr.Row():
                     msg = gr.Textbox(
                         label="Message",
+                        placeholder="Type your message here..."
                     )
+                    submit = gr.Button("Send", variant="primary")
                 with gr.Row():
+                    clear = gr.Button("Clear Chat")
+                    generate = gr.Button("Generate Report", variant="secondary")
             with gr.Column(scale=1):
+                with gr.Group():
                     gr.Markdown("### Extraction Progress")
                     progress = gr.Slider(
                         label="Completion",
                         minimum=0,
                         value=0,
                         interactive=False
                     )
+                    categories_covered = gr.JSON(label="Categories Covered", value={})
                     current_focus = gr.Textbox(
                         label="Current Focus",
                         value="Not started",
                         interactive=False
                     )
                 with gr.Tabs() as result_tabs:
                     with gr.Tab("Extracted Information"):
+                        extracted_info = gr.JSON(label="Extracted Details", value={})
                     with gr.Tab("Download"):
+                        file_output = gr.File(label="Download Report")
                     with gr.Tab("Analysis"):
+                        analysis_text = gr.Markdown("Analysis will appear here after processing.")
+        def format_extraction_summary(extracted_items_list: List[Dict[str, Any]]) -> str:
+            """
+            Utility function to format extracted data for user-friendly display.
+            Args:
+                extracted_items_list: List of dictionaries with 'category', 'confidence', and 'text'.
+            Returns:
+                A string summary of the extracted items.
+            """
+            if not extracted_items_list:
                 return "No information extracted yet."
+            lines = ["### Recently Extracted Information"]
+            for itm in extracted_items_list:
+                lines.append(
+                    f"- **{itm['category']}** ({itm['confidence']*100:.1f}% confidence)\n"
+                    f"  {itm['text']}"
                 )
+            return "\n".join(lines)
+        def process_message(user_input: str, history: List[Dict[str, str]], key: str) -> Tuple[Any, float, Dict[str, Any], str, str]:
+            """
+            Event handler to process a user message. Returns a 5-element tuple matching the
+            outputs: (new_chat_history, progress_value, categories_json, focus_text, analysis_message).
+            Args:
+                user_input: The current user message.
+                history: The existing chat history.
+                key: The user's OpenAI API key.
+            Returns:
+                A tuple with updated chatbot messages, progress, categories_covered, current_focus, and analysis text.
+            """
+            result = extractor.process_message(user_input, key)
+            # Update chat history
+            # We will append the user message + assistant response
+            history.append({"role": "user", "content": user_input})
+            history.append({"role": "assistant", "content": result["response"]})
+            # Update progress
+            prog_val = result["completion_status"]["percentage"]
+            cat_cov = {"categories": result["completion_status"]["categories_covered"]}
+            focus_val = result["completion_status"]["current_focus"] or "Not specified"
+            # If we have newly extracted info, let's show it
+            extract_list = result.get("extracted_info", [])
+            if extract_list:
+                analysis = format_extraction_summary(extract_list)
+            else:
+                analysis = result["analysis_text"]
+            return history, prog_val, cat_cov, focus_val, analysis
+        def generate_report() -> Tuple[Optional[str], Dict[str, Any], str, str]:
+            """
+            Generate a JSON report of extracted resume info.
+            Returns:
+                A tuple of: (filename, extracted_json, focus_message, analysis_text).
+            """
+            gen_result = extractor.generate_output()
+            if gen_result["status"] == "success":
+                filename = gen_result["filename"]
+                content = gen_result["content"]
+                # Summarize categories, etc. for user
+                content_preview = {
+                    "summary": content["analysis_summary"],
+                    "categories": list(content["extracted_information"].keys()),
+                    "total_items": len(content["extracted_information"])
                 }
+                # Flatten everything for a final analysis string
+                flat_items = []
+                for cat_items in content["extracted_information"].values():
+                    flat_items.extend(cat_items)
+                final_analysis = format_extraction_summary([
+                    {
+                        "text": i["text"],
+                        "confidence": i["confidence"],
+                        "category": cat
+                    }
+                    for cat in content["extracted_information"].keys()
+                    for i in content["extracted_information"][cat]
+                ])
+                return filename, content_preview, "Report generated successfully!", final_analysis
+            else:
+                return None, {"error": gen_result["error"]}, "Error generating report.", "No analysis."
+        def clear_interface() -> Tuple[List[Dict[str, str]], float, Dict[str, Any], str, Dict[str, Any], None, str, str]:
+            """
+            Reset all UI components to their initial state.
+            Returns:
+                A tuple specifying the reset states of:
+                - Chatbot
+                - Progress
+                - Categories
+                - Current Focus
+                - Extracted Info
+                - File Output
+                - Analysis
+                - Message Box
+            """
+            # Re-instantiate the extractor to clear its internal state
             global extractor
             extractor = InformationExtractor()
+            return [], 0.0, {"categories": []}, "Not started", {}, None, "Ready to start new extraction.", ""
+        # Bind events
         msg.submit(
+            fn=process_message,
             inputs=[msg, chatbot, api_key],
+            outputs=[chatbot, progress, categories_covered, current_focus, analysis_text]
+        ).then(lambda: "", None, msg)
         submit.click(
+            fn=process_message,
             inputs=[msg, chatbot, api_key],
+            outputs=[chatbot, progress, categories_covered, current_focus, analysis_text]
+        ).then(lambda: "", None, msg)
         generate.click(
+            fn=generate_report,
+            outputs=[file_output, extracted_info, current_focus, analysis_text]
         )
         clear.click(
+            fn=clear_interface,
             outputs=[
+                chatbot, progress, categories_covered,
+                current_focus, extracted_info, file_output,
+                analysis_text, msg
             ]
         )
     return demo
+def main() -> None:
+    """
+    Main function to launch the Gradio application on port 7860, with share=True.
+    """
     logging.basicConfig(
         level=logging.INFO,
         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
     )
+    demo_app = create_gradio_interface()
     try:
+        demo_app.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=True,
         )
     except Exception as e:
         logger.error(f"Application failed to start: {str(e)}")
+if __name__ == "__main__":
+    main()