Spaces:

jzou19950715
/

Newco_information_extraction_agent

Sleeping

App Files Files Community

jzou19950715 commited on Feb 6

Commit

f6b0589

verified ·

1 Parent(s): e576ca7

Update app.py

Browse files

Files changed (1) hide show

app.py +167 -114

app.py CHANGED Viewed

@@ -11,150 +11,226 @@ import io
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Prompts remain the same
-CONVERSATION_PROMPT = """You are LOSS DOG, a professional profile builder. Your goal is to have natural conversations
-with users to gather information about their professional background across 9 categories:
 1. Work History & Experience
 2. Salary & Compensation
 3. Skills & Certifications
 4. Education & Learning
-5. Personal Branding & Online Presence
-6. Achievements & Awards
-7. Social Proof & Networking
-8. Project Contributions & Leadership
-9. Work Performance & Impact Metrics
-Be friendly and conversational. Ask follow-up questions naturally. When appropriate, guide users to share more details
-but respect their boundaries. Once you believe you have gathered sufficient information (or if the user indicates they
-have nothing more to share), let them know they can click 'Generate Profile' to proceed.
 """
-EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to extract information from the potentially unstructure conversation and return ONLY a valid JSON object.
-Proactively determine how to fill the json schema using limited information provided.
 Do not include any explanatory text before or after the JSON.
 Return the data in this exact structure:
 {
     "work_history_experience": {
         "positions": [
             {
                 "title": string,
                 "company": string,
-                "industry": string,
-                "location": string,
-                "employment_type": string,
-                "adaptability": {
-                    "career_shifts": [],
-                    "upskilling": []
-                },
-                "promotions": [],
-                "confidence": number
             }
         ]
     },
     "salary_compensation": {
         "history": [
             {
                 "base_salary": number,
                 "bonus_structure": string,
-                "stock_options": {
                     "type": string,
                     "details": string
                 },
-                "commission": null,
                 "benefits": {
                     "health": string,
                     "pto": string,
-                    "retirement": string,
                     "other": []
                 },
-                "confidence": number
             }
         ]
     },
     "skills_certifications": {
-        "hard_skills": [],
-        "soft_skills": [],
-        "certifications": [],
-        "licenses": []
     },
     "education_learning": {
-        "formal_education": [],
-        "online_courses": [],
-        "executive_education": []
-    },
-    "personal_branding": {
-        "portfolio": {
-            "github": null,
-            "behance": null,
-            "other": []
-        },
-        "blog_posts": [],
-        "blockchain_projects": {
-            "nfts": [],
-            "defi": [],
-            "dapps": []
         },
-        "social_media": {
-            "platforms": [],
-            "influence_metrics": {}
-        }
     },
     "achievements_awards": {
-        "industry_awards": [],
-        "hackathons": [],
-        "peer_endorsements": [],
-        "creative_projects": {
-            "ai_art": [],
-            "other": []
-        }
     },
     "social_proof_networking": {
-        "mentors": [],
-        "references": [],
-        "memberships": [],
-        "conference_engagement": []
-    },
-    "project_contributions": {
-        "major_projects": [],
-        "open_source": [],
-        "team_leadership": [],
-        "patents": [],
-        "impact": {
-            "description": string,
-            "metrics": [],
-            "confidence": number
         }
-    },
-    "work_performance_metrics": {
-        "kpis": [],
-        "revenue_impact": [],
-        "efficiency_gains": [],
-        "career_growth": [],
-        "leadership_influence": []
     }
 }
 IMPORTANT: Return ONLY the JSON. Do not add any explanation text."""
 class ProfileBuilder:
     def __init__(self):
         self.client = None
         self.pdf_text = None
     def _initialize_client(self, api_key: str) -> None:
-        """Initialize OpenAI client if not already initialized"""
         if not api_key.startswith("sk-"):
             raise ValueError("Invalid API key format")
         self.client = AsyncOpenAI(api_key=api_key)
     async def process_message(self, message: str, history: List[List[str]], api_key: str) -> Dict[str, Any]:
-        """Process a chat message using conversation history from Gradio's state"""
         try:
-            # Initialize client if needed
             self._initialize_client(api_key)
-            # Convert Gradio history format to OpenAI message format
             conversation_history = []
             for human, assistant in history:
                 conversation_history.extend([
@@ -162,10 +238,8 @@ class ProfileBuilder:
                     {"role": "assistant", "content": assistant}
                 ])
-            # Add current message
             conversation_history.append({"role": "user", "content": message})
-            # Get AI response
             completion = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
@@ -175,7 +249,6 @@ class ProfileBuilder:
                 temperature=0.7
             )
-            # Extract response
             ai_message = completion.choices[0].message.content
             return {"response": ai_message}
@@ -184,7 +257,6 @@ class ProfileBuilder:
             return {"error": str(e)}
     async def extract_from_pdf(self, pdf_content: bytes) -> str:
-        """Extract text from PDF file"""
         try:
             pdf_file = io.BytesIO(pdf_content)
             pdf_reader = PyPDF2.PdfReader(pdf_file)
@@ -198,15 +270,12 @@ class ProfileBuilder:
             raise
     async def process_pdf(self, pdf_path: str, api_key: str) -> Dict[str, Any]:
-        """Process PDF resume"""
         try:
             self._initialize_client(api_key)
-            # Read and extract PDF content
             with open(pdf_path, 'rb') as file:
                 resume_text = await self.extract_from_pdf(file.read())
-            # Process with AI
             completion = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
@@ -216,11 +285,9 @@ class ProfileBuilder:
                 temperature=0.3
             )
-            # Parse response
             response_text = completion.choices[0].message.content.strip()
             profile_data = json.loads(response_text)
-            # Create profile object
             profile = {
                 "profile_data": profile_data,
                 "metadata": {
@@ -236,11 +303,9 @@ class ProfileBuilder:
             return {"error": str(e)}
     async def generate_profile(self, history: List[List[str]], api_key: str) -> tuple[Dict[str, Any], Optional[str]]:
-        """Generate profile from conversation or PDF"""
         try:
             self._initialize_client(api_key)
-            # Determine source and prepare content
             if history:
                 content = "\n".join(f"User: {msg[0]}\nAssistant: {msg[1]}" for msg in history)
                 source = "conversation"
@@ -250,7 +315,6 @@ class ProfileBuilder:
             else:
                 raise ValueError("No content available for profile generation")
-            # Get AI extraction
             completion = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
@@ -260,11 +324,9 @@ class ProfileBuilder:
                 temperature=0.3
             )
-            # Parse response
             response_text = completion.choices[0].message.content.strip()
             profile_data = json.loads(response_text)
-            # Create profile
             profile = {
                 "profile_data": profile_data,
                 "metadata": {
@@ -273,7 +335,6 @@ class ProfileBuilder:
                 }
             }
-            # Save to file
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             filename = f"profile_{timestamp}.json"
             with open(filename, 'w', encoding='utf-8') as f:
@@ -291,20 +352,17 @@ def create_gradio_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder")
-        # Common API key input
         api_key = gr.Textbox(
             label="OpenAI API Key",
             type="password",
             placeholder="Enter your OpenAI API key"
         )
-        # Tab interface
         with gr.Tabs() as tabs:
-            # Resume Upload Tab
             with gr.Tab("Upload Resume"):
                 gr.Markdown("""
                 # Upload Your Resume
-                Upload your existing resume in PDF format and let LOSS DOG extract your professional profile.
                 """)
                 pdf_file = gr.File(
                     label="Upload PDF Resume",
@@ -312,11 +370,10 @@ def create_gradio_interface():
                 )
                 process_pdf_btn = gr.Button("Process Resume")
-            # Chat Tab
-            with gr.Tab("Chat with AI"):
                 gr.Markdown("""
-                # Chat with LOSS DOG
-                Start a conversation with LOSS DOG to build your professional profile from scratch.
                 """)
                 chatbot = gr.Chatbot(
                     label="Conversation",
@@ -325,21 +382,19 @@ def create_gradio_interface():
                 with gr.Row():
                     msg = gr.Textbox(
                         label="Message",
-                        placeholder="Chat with LOSS DOG...",
                         show_label=False
                     )
                     send = gr.Button("Send")
-        # Common output section
         with gr.Column():
             generate_btn = gr.Button("Generate Profile", variant="primary")
             profile_output = gr.JSON(label="Generated Profile")
             download_btn = gr.File(label="Download Profile")
-        # Event handlers
         async def on_message(message: str, history: List[List[str]], key: str):
             if not message.strip():
-                return history, None, None, ""  # Added empty string to clear input
             result = await builder.process_message(message, history, key)
@@ -347,7 +402,7 @@ def create_gradio_interface():
                 return history, {"error": result["error"]}, None, message
             new_history = history + [[message, result["response"]]]
-            return new_history, None, None, ""  # Empty string to clear input
         async def on_pdf_upload(pdf, key):
             if not pdf:
@@ -358,7 +413,6 @@ def create_gradio_interface():
                 if "error" in result:
                     return {"error": result["error"]}, None
-                # Save profile
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"profile_{timestamp}.json"
                 with open(filename, 'w', encoding='utf-8') as f:
@@ -374,17 +428,16 @@ def create_gradio_interface():
                 return {"error": profile["error"]}, None
             return profile["profile_data"], filename
-        # Bind events
         msg.submit(
             on_message,
             inputs=[msg, chatbot, api_key],
-            outputs=[chatbot, profile_output, download_btn, msg]  # Added msg to outputs
         )
         send.click(
             on_message,
             inputs=[msg, chatbot, api_key],
-            outputs=[chatbot, profile_output, download_btn, msg]  # Added msg to outputs
         )
         process_pdf_btn.click(

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Updated FINN Conversation Prompt
+CONVERSATION_PROMPT = """
+You are FINN (Facts, Insights, Numbers, and Narrative), Lossdog's dedicated AI assistant for portfolio and resume building.
+Your Core Purpose:
+Guide users in discovering and showcasing their professional worth through natural, engaging conversations.
+Conversation Principles:
+- Start with open-ended questions about their professional journey
+- Listen actively and ask relevant follow-up questions
+- Show genuine interest in their achievements and experiences
+- Use casual, friendly language while maintaining professionalism
+- Acknowledge and validate their experiences
+You must gather detailed information across these 6 core categories:
 1. Work History & Experience
+   - Job titles, companies, tenure, responsibilities
+   - Team size, budget management, project scope
+   - Revenue impact, growth numbers, KPIs
 2. Salary & Compensation
+   - Current and past compensation packages
+   - Bonus structures and equity
+   - Benefits and additional perks
+   - Market rate comparisons
+   - Salary growth trajectory
 3. Skills & Certifications
+   - Technical and soft skills with proficiency levels
+   - Professional certifications with dates
+   - Tools and technologies mastered
+   - Languages and frameworks
+   - Industry-specific expertise
 4. Education & Learning
+   - Formal degrees and institutions
+   - Continuing education programs
+   - Professional development courses
+   - Self-taught skills and projects
+   - Learning goals and progress
+5. Achievements & Awards
+   - Professional recognition
+   - Project successes with metrics
+   - Patents and publications
+   - Innovation contributions
+   - Performance awards
+6. Social Proof & Networking
+   - Professional network size and quality
+   - Speaking engagements
+   - Published content and thought leadership
+   - Community involvement
+   - Industry influence metrics
+Conversation Techniques:
+DO use prompts like:
+- "Could you tell me more about your role at [Company]?"
+- "What specific metrics showcase your impact in that position?"
+- "How would you quantify the results of that project?"
+- "Can you share an example of how you applied [Skill]?"
+- "What kind of recognition did you receive for that achievement?"
+DON'T:
+- Ask multiple questions at once
+- Skip categories without proper exploration
+- Accept vague answers without gentle follow-up
+- Rush through topics
+- Ignore potential achievements
+Information Gathering Strategy:
+1. Start broad: "Tell me about your professional journey."
+2. Listen for category mentions
+3. Dive deeper with specific follow-ups
+4. Encourage quantitative metrics where possible
+5. Circle back to missing information naturally
+6. Confirm and validate gathered data
+Make this feel like a natural conversation with a knowledgeable friend who's genuinely interested in their professional story, while systematically gathering comprehensive information across all six categories.
 """
+# Updated Extraction Prompt to match FINN's categories
+EXTRACTION_PROMPT = """You are a professional information extraction system. Extract information from the conversation and return ONLY a valid JSON object that matches FINN's 6 core categories.
+Proactively determine how to fill the json schema using provided information.
 Do not include any explanatory text before or after the JSON.
 Return the data in this exact structure:
 {
     "work_history_experience": {
         "positions": [
             {
                 "title": string,
                 "company": string,
+                "tenure": string,
+                "responsibilities": [],
+                "team_size": number,
+                "budget_managed": string,
+                "project_scope": string,
+                "metrics": {
+                    "revenue_impact": string,
+                    "growth_numbers": string,
+                    "kpis": []
+                }
             }
         ]
     },
     "salary_compensation": {
         "history": [
             {
+                "period": string,
                 "base_salary": number,
                 "bonus_structure": string,
+                "equity": {
                     "type": string,
                     "details": string
                 },
                 "benefits": {
                     "health": string,
                     "pto": string,
                     "other": []
                 },
+                "market_comparison": string,
+                "growth_trajectory": string
             }
         ]
     },
     "skills_certifications": {
+        "technical_skills": [
+            {
+                "name": string,
+                "proficiency": string
+            }
+        ],
+        "soft_skills": [
+            {
+                "name": string,
+                "proficiency": string
+            }
+        ],
+        "certifications": [
+            {
+                "name": string,
+                "date": string,
+                "issuer": string
+            }
+        ],
+        "tools_technologies": [],
+        "industry_expertise": []
     },
     "education_learning": {
+        "formal_education": [
+            {
+                "degree": string,
+                "institution": string,
+                "year": string,
+                "field": string
+            }
+        ],
+        "continuing_education": [],
+        "professional_development": [],
+        "self_learning": {
+            "skills": [],
+            "projects": []
         },
+        "learning_goals": []
     },
     "achievements_awards": {
+        "recognition": [
+            {
+                "title": string,
+                "issuer": string,
+                "date": string,
+                "description": string
+            }
+        ],
+        "project_successes": [
+            {
+                "name": string,
+                "metrics": [],
+                "impact": string
+            }
+        ],
+        "patents_publications": [],
+        "innovations": [],
+        "performance_awards": []
     },
     "social_proof_networking": {
+        "professional_network": {
+            "size": number,
+            "quality_metrics": string
+        },
+        "speaking_engagements": [],
+        "published_content": [],
+        "community_involvement": [],
+        "influence_metrics": {
+            "followers": number,
+            "engagement_rate": string,
+            "platform_presence": []
         }
     }
 }
 IMPORTANT: Return ONLY the JSON. Do not add any explanation text."""
+# Rest of the code remains identical to the original implementation
 class ProfileBuilder:
     def __init__(self):
         self.client = None
         self.pdf_text = None
     def _initialize_client(self, api_key: str) -> None:
         if not api_key.startswith("sk-"):
             raise ValueError("Invalid API key format")
         self.client = AsyncOpenAI(api_key=api_key)
     async def process_message(self, message: str, history: List[List[str]], api_key: str) -> Dict[str, Any]:
         try:
             self._initialize_client(api_key)
             conversation_history = []
             for human, assistant in history:
                 conversation_history.extend([
                     {"role": "assistant", "content": assistant}
                 ])
             conversation_history.append({"role": "user", "content": message})
             completion = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
                 temperature=0.7
             )
             ai_message = completion.choices[0].message.content
             return {"response": ai_message}
             return {"error": str(e)}
     async def extract_from_pdf(self, pdf_content: bytes) -> str:
         try:
             pdf_file = io.BytesIO(pdf_content)
             pdf_reader = PyPDF2.PdfReader(pdf_file)
             raise
     async def process_pdf(self, pdf_path: str, api_key: str) -> Dict[str, Any]:
         try:
             self._initialize_client(api_key)
             with open(pdf_path, 'rb') as file:
                 resume_text = await self.extract_from_pdf(file.read())
             completion = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
                 temperature=0.3
             )
             response_text = completion.choices[0].message.content.strip()
             profile_data = json.loads(response_text)
             profile = {
                 "profile_data": profile_data,
                 "metadata": {
             return {"error": str(e)}
     async def generate_profile(self, history: List[List[str]], api_key: str) -> tuple[Dict[str, Any], Optional[str]]:
         try:
             self._initialize_client(api_key)
             if history:
                 content = "\n".join(f"User: {msg[0]}\nAssistant: {msg[1]}" for msg in history)
                 source = "conversation"
             else:
                 raise ValueError("No content available for profile generation")
             completion = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
                 temperature=0.3
             )
             response_text = completion.choices[0].message.content.strip()
             profile_data = json.loads(response_text)
             profile = {
                 "profile_data": profile_data,
                 "metadata": {
                 }
             }
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             filename = f"profile_{timestamp}.json"
             with open(filename, 'w', encoding='utf-8') as f:
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder")
         api_key = gr.Textbox(
             label="OpenAI API Key",
             type="password",
             placeholder="Enter your OpenAI API key"
         )
         with gr.Tabs() as tabs:
             with gr.Tab("Upload Resume"):
                 gr.Markdown("""
                 # Upload Your Resume
+                Upload your existing resume in PDF format and let FINN extract your professional profile.
                 """)
                 pdf_file = gr.File(
                     label="Upload PDF Resume",
                 )
                 process_pdf_btn = gr.Button("Process Resume")
+            with gr.Tab("Chat with FINN"):
                 gr.Markdown("""
+                # Chat with FINN
+                Start a conversation with FINN to build your professional profile from scratch.
                 """)
                 chatbot = gr.Chatbot(
                     label="Conversation",
                 with gr.Row():
                     msg = gr.Textbox(
                         label="Message",
+                        placeholder="Chat with FINN...",
                         show_label=False
                     )
                     send = gr.Button("Send")
         with gr.Column():
             generate_btn = gr.Button("Generate Profile", variant="primary")
             profile_output = gr.JSON(label="Generated Profile")
             download_btn = gr.File(label="Download Profile")
         async def on_message(message: str, history: List[List[str]], key: str):
             if not message.strip():
+                return history, None, None, ""
             result = await builder.process_message(message, history, key)
                 return history, {"error": result["error"]}, None, message
             new_history = history + [[message, result["response"]]]
+            return new_history, None, None, ""
         async def on_pdf_upload(pdf, key):
             if not pdf:
                 if "error" in result:
                     return {"error": result["error"]}, None
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"profile_{timestamp}.json"
                 with open(filename, 'w', encoding='utf-8') as f:
                 return {"error": profile["error"]}, None
             return profile["profile_data"], filename
         msg.submit(
             on_message,
             inputs=[msg, chatbot, api_key],
+            outputs=[chatbot, profile_output, download_btn, msg]
         )
         send.click(
             on_message,
             inputs=[msg, chatbot, api_key],
+            outputs=[chatbot, profile_output, download_btn, msg]
         )
         process_pdf_btn.click(