|
|
|
import json |
|
import logging |
|
from datetime import datetime |
|
from typing import Dict, List, Optional, Any |
|
import gradio as gr |
|
from openai import AsyncOpenAI |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
CONVERSATION_PROMPT = """You are LOSS DOG, a professional profile builder. Your goal is to have natural conversations |
|
with users to gather information about their professional background across 9 categories: |
|
|
|
1. Work History & Experience |
|
2. Salary & Compensation |
|
3. Skills & Certifications |
|
4. Education & Learning |
|
5. Personal Branding & Online Presence |
|
6. Achievements & Awards |
|
7. Social Proof & Networking |
|
8. Project Contributions & Leadership |
|
9. Work Performance & Impact Metrics |
|
|
|
Be friendly and conversational. Ask follow-up questions naturally. When appropriate, guide users to share more details |
|
but respect their boundaries. Once you believe you have gathered sufficient information (or if the user indicates they |
|
have nothing more to share), let them know they can click 'Generate Profile' to proceed. |
|
""" |
|
EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to methodically analyze conversations and organize information into 9 specific categories. Process each category thoroughly and output in structured JSON format. |
|
|
|
ANALYTICAL PROCESS: |
|
1. Read entire conversation history |
|
2. Extract explicit and implicit information |
|
3. Make reasonable inferences when appropriate |
|
4. Structure data according to defined schema |
|
5. Include confidence scores for all extracted information |
|
|
|
OUTPUT SCHEMA: |
|
{ |
|
"work_history_experience": { |
|
"positions": [ |
|
{ |
|
"title": string, |
|
"company": string, |
|
"industry": string, |
|
"location": string, |
|
"employment_type": string, |
|
"adaptability": { |
|
"career_shifts": string[], |
|
"upskilling": string[] |
|
}, |
|
"promotions": string[], |
|
"confidence": float |
|
} |
|
] |
|
}, |
|
"salary_compensation": { |
|
"history": [ |
|
{ |
|
"base_salary": number | null, |
|
"bonus_structure": string | null, |
|
"stock_options": { |
|
"type": string, |
|
"details": string |
|
}, |
|
"commission": string | null, |
|
"benefits": { |
|
"health": string, |
|
"pto": string, |
|
"retirement": string, |
|
"other": string[] |
|
}, |
|
"confidence": float |
|
} |
|
] |
|
}, |
|
"skills_certifications": { |
|
"hard_skills": string[], |
|
"soft_skills": string[], |
|
"programming_languages": string[], |
|
"spoken_languages": string[], |
|
"certifications": [ |
|
{ |
|
"name": string, |
|
"issuer": string, |
|
"date": string, |
|
"confidence": float |
|
} |
|
], |
|
"licenses": [ |
|
{ |
|
"type": string, |
|
"issuer": string, |
|
"valid_until": string, |
|
"confidence": float |
|
} |
|
] |
|
}, |
|
"education_learning": { |
|
"formal_education": [ |
|
{ |
|
"degree": string, |
|
"institution": string, |
|
"gpa": number | null, |
|
"research": string[], |
|
"period": { |
|
"start": string, |
|
"end": string | null |
|
}, |
|
"confidence": float |
|
} |
|
], |
|
"online_courses": [], |
|
"executive_education": [] |
|
}, |
|
"personal_branding": { |
|
"portfolio": { |
|
"github": string | null, |
|
"behance": string | null, |
|
"other": string[] |
|
}, |
|
"blog_posts": [], |
|
"blockchain_projects": { |
|
"nfts": [], |
|
"defi": [], |
|
"dapps": [] |
|
}, |
|
"public_speaking": [], |
|
"social_media": { |
|
"platforms": [], |
|
"influence_metrics": {} |
|
} |
|
}, |
|
"achievements_awards": { |
|
"industry_awards": [], |
|
"hackathons": [], |
|
"peer_endorsements": [], |
|
"creative_projects": { |
|
"ai_art": [], |
|
"other": [] |
|
} |
|
}, |
|
"social_proof_networking": { |
|
"mentors": [], |
|
"references": [], |
|
"memberships": [ |
|
{ |
|
"organization": string, |
|
"type": string, |
|
"period": string, |
|
"confidence": float |
|
} |
|
], |
|
"conference_engagement": [] |
|
}, |
|
"project_contributions": { |
|
"major_projects": [], |
|
"open_source": [], |
|
"team_leadership": [], |
|
"patents": [], |
|
"impact": { |
|
"description": string, |
|
"metrics": string[], |
|
"confidence": float |
|
} |
|
}, |
|
"work_performance_metrics": { |
|
"kpis": [], |
|
"revenue_impact": [], |
|
"efficiency_gains": [], |
|
"career_growth": [], |
|
"leadership_influence": [] |
|
} |
|
} |
|
|
|
EXTRACTION GUIDELINES: |
|
|
|
1. Process systematically: |
|
- Analyze conversation thoroughly |
|
- Look for both direct statements and implied information |
|
- Cross-reference information across different parts of conversation |
|
- Make reasonable inferences when appropriate |
|
|
|
2. For each piece of information: |
|
- Clean and standardize the data |
|
- Assign confidence scores (0.0-1.0) |
|
- Mark inferred information |
|
- Include source context where relevant |
|
|
|
3. Quality requirements: |
|
- Use consistent date formats (YYYY-MM-DD) |
|
- Standardize company names and titles |
|
- Use empty arrays [] for missing information |
|
- Never use null for array fields |
|
- Include confidence scores for all extracted data |
|
|
|
4. Handle missing information: |
|
- Use empty arrays [] rather than null |
|
- Mark inferred information clearly |
|
- Include partial information when complete data isn't available |
|
- Note uncertainty in confidence scores |
|
|
|
Remember to: |
|
- Process each category thoroughly |
|
- Cross-reference information for consistency |
|
- Make reasonable inferences when appropriate |
|
- Maintain consistent formatting |
|
- Include all required fields even if empty""" |
|
class ProfileBuilder: |
|
def __init__(self): |
|
self.conversation_history = [] |
|
self.client = None |
|
|
|
def _initialize_client(self, api_key: str) -> None: |
|
"""Initialize AsyncOpenAI client with API key.""" |
|
if not api_key.startswith("sk-"): |
|
raise ValueError("Invalid API key format") |
|
self.client = AsyncOpenAI(api_key=api_key) |
|
|
|
async def process_message(self, message: str, api_key: str) -> Dict[str, Any]: |
|
"""Process a user message through conversation phase.""" |
|
try: |
|
if not self.client: |
|
self._initialize_client(api_key) |
|
|
|
|
|
self.conversation_history.append({"role": "user", "content": message}) |
|
|
|
|
|
completion = await self.client.chat.completions.create( |
|
model="gpt-4o-mini", |
|
messages=[ |
|
{"role": "system", "content": CONVERSATION_PROMPT}, |
|
*self.conversation_history |
|
], |
|
temperature=0.7 |
|
) |
|
|
|
ai_message = completion.choices[0].message.content |
|
self.conversation_history.append({"role": "assistant", "content": ai_message}) |
|
|
|
return {"response": ai_message} |
|
|
|
except Exception as e: |
|
logger.error(f"Error processing message: {str(e)}") |
|
return {"error": str(e)} |
|
|
|
async def generate_profile(self) -> Dict[str, Any]: |
|
"""Process conversation history into structured profile.""" |
|
try: |
|
if not self.client: |
|
raise ValueError("OpenAI client not initialized") |
|
|
|
|
|
conversation_text = "\n".join( |
|
f"{msg['role']}: {msg['content']}" |
|
for msg in self.conversation_history |
|
) |
|
|
|
|
|
completion = await self.client.chat.completions.create( |
|
model="gpt-4o-mini", |
|
messages=[ |
|
{"role": "system", "content": EXTRACTION_PROMPT}, |
|
{"role": "user", "content": f"Extract profile information from this conversation:\n\n{conversation_text}"} |
|
], |
|
temperature=0.3 |
|
) |
|
|
|
|
|
profile_data = json.loads(completion.choices[0].message.content) |
|
|
|
|
|
profile = { |
|
"profile_data": profile_data, |
|
"metadata": { |
|
"generated_at": datetime.now().isoformat(), |
|
"conversation_length": len(self.conversation_history) |
|
} |
|
} |
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
filename = f"profile_{timestamp}.json" |
|
with open(filename, 'w', encoding='utf-8') as f: |
|
json.dump(profile, f, indent=2) |
|
|
|
return { |
|
"profile": profile, |
|
"filename": filename |
|
} |
|
|
|
except Exception as e: |
|
logger.error(f"Error generating profile: {str(e)}") |
|
return {"error": str(e)} |
|
|
|
def create_gradio_interface(): |
|
"""Create the Gradio interface.""" |
|
builder = ProfileBuilder() |
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
api_key = gr.Textbox( |
|
label="OpenAI API Key", |
|
type="password", |
|
placeholder="Enter your OpenAI API key" |
|
) |
|
|
|
chatbot = gr.Chatbot(label="Conversation") |
|
|
|
with gr.Row(): |
|
msg = gr.Textbox( |
|
label="Message", |
|
placeholder="Chat with LOSS DOG..." |
|
) |
|
send = gr.Button("Send") |
|
|
|
with gr.Column(scale=1): |
|
generate_btn = gr.Button("Generate Profile") |
|
profile_output = gr.JSON(label="Generated Profile") |
|
download_btn = gr.File(label="Download Profile") |
|
|
|
|
|
async def on_message(message: str, history: List[List[str]], key: str): |
|
if not message.strip(): |
|
return history, None |
|
|
|
result = await builder.process_message(message, key) |
|
|
|
if "error" in result: |
|
return history, {"error": result["error"]} |
|
|
|
history = history + [[message, result["response"]]] |
|
return history, None |
|
|
|
async def on_generate(): |
|
result = await builder.generate_profile() |
|
if "error" in result: |
|
return {"error": result["error"]}, None |
|
return result["profile"], result["filename"] |
|
|
|
|
|
msg.submit( |
|
on_message, |
|
inputs=[msg, chatbot, api_key], |
|
outputs=[chatbot, profile_output] |
|
).then(lambda: "", None, msg) |
|
|
|
send.click( |
|
on_message, |
|
inputs=[msg, chatbot, api_key], |
|
outputs=[chatbot, profile_output] |
|
).then(lambda: "", None, msg) |
|
|
|
generate_btn.click( |
|
on_generate, |
|
outputs=[profile_output, download_btn] |
|
) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = create_gradio_interface() |
|
demo.queue() |
|
demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860 |
|
) |