jzou19950715's picture
Update app.py
ed879c6 verified
raw
history blame
19.8 kB
# ==============================================
# Monte Carlo Salary Prediction Application
# ==============================================
# Required imports
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import base64
import io
import json
import requests
from typing import Dict, List, Tuple, Any
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ==============================================
# System Prompts (Unchanged)
# ==============================================
CONVERSATION_PROMPT = """...""" # (Keep your existing prompt)
EXTRACTION_PROMPT = """...""" # (Keep your existing prompt)
# ==============================================
# Monte Carlo Simulation Class (Unchanged)
# ==============================================
class SalarySimulator:
def __init__(self):
"""Initialize growth and premium calculators."""
# Growth factors
self.growth_factors = {
"base_growth": lambda score: (0.02 + (score * 0.03), 0.04 + (score * 0.04)),
"skill_premium": lambda score: (0.01 + (score * 0.02), 0.02 + (score * 0.03)),
"experience_premium": lambda score: (0.01 + (score * 0.02), 0.02 + (score * 0.03)),
"education_premium": lambda score: (0.005 + (score * 0.015), 0.01 + (score * 0.02)),
"location_premium": lambda score: (0.0 + (score * 0.02), 0.01 + (score * 0.03))
}
# Risk factors
self.risk_factors = {
"volatility": lambda score: (0.02 + (score * 0.02), 0.03 + (score * 0.03)),
"disruption": lambda score: (0.05 + (score * 0.15), 0.1 + (score * 0.2))
}
def validate_scores(self, scores: Dict[str, float]) -> None:
"""Validate all required scores are present and valid."""
required = [
"industry_score", "experience_score", "education_score",
"skills_score", "location_score", "current_salary"
]
for key in required:
if key not in scores:
raise ValueError(f"Missing required score: {key}")
if key == "current_salary":
if not isinstance(scores[key], (int, float)) or scores[key] <= 0:
raise ValueError("Invalid salary value")
else:
if not 0 <= scores[key] <= 1:
raise ValueError(f"Invalid {key}: must be between 0 and 1")
def calculate_factor(self, name: str, score: float, factor_type: str) -> float:
"""Calculate growth or risk factor."""
factors = self.growth_factors if factor_type == "growth" else self.risk_factors
min_val, max_val = factors[name](score)
return np.random.uniform(min_val, max_val)
def run_simulation(self, scores: Dict[str, float]) -> Tuple[np.ndarray, Dict[str, float]]:
"""Run Monte Carlo simulation."""
self.validate_scores(scores)
# Calculate factors
factors = {}
score_mapping = {
"base_growth": "industry_score",
"skill_premium": "skills_score",
"experience_premium": "experience_score",
"education_premium": "education_score",
"location_premium": "location_score"
}
# Calculate growth factors
for factor_name, score_key in score_mapping.items():
factors[factor_name] = self.calculate_factor(factor_name, scores[score_key], "growth")
# Calculate risk factors using industry score
for factor_name in ["volatility", "disruption"]:
factors[factor_name] = self.calculate_factor(
factor_name, scores["industry_score"], "risk"
)
# Run simulation
years = 5
num_paths = 10000
paths = np.zeros((num_paths, years + 1))
initial_salary = float(scores["current_salary"])
paths[:, 0] = initial_salary
for path in range(num_paths):
salary = initial_salary
for year in range(1, years + 1):
# Calculate base growth
growth = sum(factors[f] for f in score_mapping.keys())
# Add market volatility
growth += np.random.normal(0, factors["volatility"])
# Add potential disruption
if np.random.random() < 0.1: # 10% chance each year
disruption = factors["disruption"] * np.random.random()
if np.random.random() < 0.7: # 70% positive disruption
growth += disruption
else:
growth -= disruption
# Apply growth bounds
growth = min(max(growth, -0.1), 0.25) # -10% to +25%
# Update salary
salary *= (1 + growth)
paths[path, year] = salary
return paths, factors
def create_plots(self, paths: np.ndarray) -> str:
"""Create visualization using matplotlib and return as base64 string."""
plt.style.use('dark_background')
# Create figure
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12), height_ratios=[2, 1])
fig.tight_layout(pad=4)
# Plot 1: Salary Projection
years = list(range(paths.shape[1]))
# Add confidence intervals
percentiles = [(5, 95), (10, 90), (25, 75)]
alphas = [0.1, 0.2, 0.3]
for (lower, upper), alpha in zip(percentiles, alphas):
lower_bound = np.percentile(paths, lower, axis=0)
upper_bound = np.percentile(paths, upper, axis=0)
ax1.fill_between(years, lower_bound, upper_bound, alpha=alpha, color='blue')
# Add median line
median = np.percentile(paths, 50, axis=0)
ax1.plot(years, median, color='white', linewidth=2, label='Expected Path')
# Customize first plot
ax1.set_title('Salary Projection', pad=20)
ax1.set_xlabel('Years')
ax1.set_ylabel('Salary ($)')
ax1.grid(True, alpha=0.2)
ax1.legend()
# Format y-axis as currency
ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))
# Customize x-axis
ax1.set_xticks(years)
ax1.set_xticklabels(['Current'] + [f'Year {i+1}' for i in range(len(years)-1)])
# Plot 2: Distribution
ax2.hist(paths[:, -1], bins=50, color='blue', alpha=0.7)
ax2.set_title('Final Salary Distribution', pad=20)
ax2.set_xlabel('Salary ($)')
ax2.set_ylabel('Count')
ax2.grid(True, alpha=0.2)
# Format x-axis as currency
ax2.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))
# Convert to base64
buf = io.BytesIO()
plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
buf.seek(0)
img_str = base64.b64encode(buf.read()).decode()
plt.close()
return img_str # Return raw base64 string
def generate_report(
self,
scores: Dict[str, float],
paths: np.ndarray,
factors: Dict[str, float]
) -> str:
"""Generate analysis report."""
final_salaries = paths[:, -1]
initial_salary = paths[0, 0]
metrics = {
"p25": np.percentile(final_salaries, 25),
"p50": np.percentile(final_salaries, 50),
"p75": np.percentile(final_salaries, 75),
"cagr": (np.median(final_salaries) / initial_salary) ** (1/5) - 1
}
report = f"""
Monte Carlo Salary Projection Analysis
====================================
Profile Scores (0-1 scale):
--------------------------
• Industry Score: {scores['industry_score']:.2f}
• Experience Score: {scores['experience_score']:.2f}
• Education Score: {scores['education_score']:.2f}
• Skills Score: {scores['skills_score']:.2f}
• Location Score: {scores['location_score']:.2f}
• Current Salary: ${scores['current_salary']:,.2f}
Growth Factors (Annual):
-----------------------
• Base Growth: {factors['base_growth']*100:.1f}%
• Skill Premium: {factors['skill_premium']*100:.1f}%
• Experience Premium: {factors['experience_premium']*100:.1f}%
• Education Premium: {factors['education_premium']*100:.1f}%
• Location Premium: {factors['location_premium']*100:.1f}%
• Market Volatility: {factors['volatility']*100:.1f}%
• Potential Disruption: {factors['disruption']*100:.1f}%
5-Year Projection Results:
-------------------------
• Conservative Estimate (25th percentile): ${metrics['p25']:,.2f}
• Most Likely Outcome (Median): ${metrics['p50']:,.2f}
• Optimistic Estimate (75th percentile): ${metrics['p75']:,.2f}
• Expected Annual Growth Rate: {metrics['cagr']*100:.1f}%
Analysis Insights:
-----------------
• Career profile suggests {metrics['cagr']*100:.1f}% annual growth potential
• Market volatility could lead to {factors['volatility']*100:.1f}% annual variation
• Industry position provides {factors['base_growth']*100:.1f}% base growth
• Personal factors add {(factors['skill_premium'] + factors['experience_premium'] + factors['education_premium'])*100:.1f}% potential premium
• Location impact contributes {factors['location_premium']*100:.1f}% to growth
Key Considerations:
------------------
• Projections based on {paths.shape[0]:,} simulated career paths
• Accounts for both regular growth and market disruptions
• Considers personal development and market factors
• Results show range of potential outcomes
• Actual results may vary based on economic conditions
"""
return report
# ==============================================
# Career Advisor Bot (Unchanged)
# ==============================================
class CareerAdvisor:
def __init__(self):
"""Initialize career advisor."""
self.chat_history = [] # List of dicts with 'role' and 'content'
self.simulator = SalarySimulator()
def process_message(self, message: str, api_key: str) -> Dict[str, str]:
"""Process user message and generate response."""
try:
if not api_key.strip().startswith("sk-"):
return {"error": "Invalid API key format"}
# Prepare conversation history
messages = [
{"role": "system", "content": CONVERSATION_PROMPT}
]
# Add chat history in correct format
messages.extend(self.chat_history)
# Add current message
messages.append({"role": "user", "content": message})
# Call API
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4",
"messages": messages,
"temperature": 0.7
}
)
if response.status_code == 200:
assistant_message = response.json()["choices"][0]["message"]["content"].strip()
# Store messages in correct format
self.chat_history.append({"role": "user", "content": message})
self.chat_history.append({"role": "assistant", "content": assistant_message})
return {"response": assistant_message}
else:
return {"error": f"API error: {response.status_code}"}
except Exception as e:
logger.error(f"Message processing error: {str(e)}")
return {"error": str(e)}
def extract_profile(self, api_key: str) -> Dict[str, float]:
"""Extract numerical profile from conversation."""
try:
# Prepare conversation for extraction
conversation = "\n".join([
f"{msg['role'].title()}: {msg['content']}"
for msg in self.chat_history
])
# Call API for extraction
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4",
"messages": [
{
"role": "system",
"content": EXTRACTION_PROMPT
},
{
"role": "user",
"content": f"Extract profile from:\n\n{conversation}"
}
],
"temperature": 0.3
}
)
if response.status_code == 200:
profile_data = json.loads(
response.json()["choices"][0]["message"]["content"].strip()
)
return profile_data
else:
raise Exception(f"API error: {response.status_code}")
except Exception as e:
logger.error(f"Profile extraction error: {str(e)}")
return {
"industry_score": 0.6,
"experience_score": 0.6,
"education_score": 0.6,
"skills_score": 0.6,
"location_score": 0.6,
"current_salary": 85000
}
def generate_analysis(self, api_key: str) -> Dict[str, Any]:
"""Generate complete salary analysis."""
try:
# Extract profile
profile_data = self.extract_profile(api_key)
# Run simulation
paths, factors = self.simulator.run_simulation(profile_data)
# Generate plots
plots_image = self.simulator.create_plots(paths)
# Generate report
report = self.simulator.generate_report(
profile_data,
paths,
factors
)
return {
"status": "success",
"report": report,
"plots": plots_image # Raw base64 string
}
except Exception as e:
logger.error(f"Analysis generation error: {str(e)}")
return {"error": str(e)}
# ==============================================
# Gradio Interface (Updated)
# ==============================================
def create_interface():
"""Create the Gradio interface."""
advisor = CareerAdvisor()
# Create Gradio blocks
with gr.Blocks(title="Monte Carlo Simulation of Salary Prediction") as demo:
# Title and description
gr.Markdown("""
# 💰 Monte Carlo Simulation of Salary Prediction
Chat with me about your career, and I'll generate detailed salary projections
using Monte Carlo simulation with machine learning.
""")
# API Key input
with gr.Row():
api_key = gr.Textbox(
label="OpenAI API Key",
placeholder="Enter your API key",
type="password"
)
# Main content area
with gr.Row():
# Left column: Chat interface
with gr.Column(scale=2):
chatbot = gr.Chatbot(
label="Career Conversation",
height=400,
show_copy_button=True,
type="messages" # Using OpenAI message format
)
# Message input and send button
with gr.Row():
message = gr.Textbox(
label="Your message",
placeholder="Tell me about your career...",
lines=2,
scale=4
)
send_btn = gr.Button(
"Send Message",
scale=1
)
# Right column: Analysis output
with gr.Column(scale=3):
status = gr.Textbox(label="Status")
report = gr.TextArea(
label="Analysis Report",
lines=20,
max_lines=30
)
plots = gr.Image(
label="Salary Projections",
show_download_button=True
)
# Analysis button
analyze_btn = gr.Button(
"Generate Analysis",
variant="primary",
size="lg"
)
# Message handling function
def handle_message(
message: str,
history: List[Dict[str, str]],
key: str
) -> Tuple[str, List[Dict[str, str]], str]:
"""Process chat messages."""
try:
result = advisor.process_message(message, key)
if "error" in result:
return "", history, f"Error: {result['error']}"
# Format messages in OpenAI style
new_history = history + [
{"role": "user", "content": message},
{"role": "assistant", "content": result["response"]}
]
return "", new_history, ""
except Exception as e:
return "", history, f"Error: {str(e)}"
# Analysis generation function
def generate_analysis(key: str) -> Tuple[str, str, str]:
"""Generate salary analysis."""
try:
result = advisor.generate_analysis(key)
if "error" in result:
return f"Error: {result['error']}", "", None
# Decode base64 image for Gradio
plots_image = f"data:image/png;base64,{result['plots']}"
return (
"Analysis completed successfully!",
result["report"],
plots_image
)
except Exception as e:
return f"Error: {str(e)}", "", None
# Wire up the interface
message.submit(
handle_message,
inputs=[message, chatbot, api_key],
outputs=[message, chatbot, status],
queue=False # Immediate response for better UX
)
send_btn.click(
handle_message,
inputs=[message, chatbot, api_key],
outputs=[message, chatbot, status],
queue=False # Immediate response for better UX
)
analyze_btn.click(
generate_analysis,
inputs=[api_key],
outputs=[status, report, plots]
)
return demo
# ==============================================
# Main Entry Point
# ==============================================
def main():
"""Launch the application."""
# Create interface
demo = create_interface()
# Enable queue for concurrent processing
demo.queue()
# Launch the application
demo.launch(
server_name="0.0.0.0", # Required for HuggingFace Spaces
server_port=7860, # Standard port for HuggingFace Spaces
share=True # Enable sharing
)
if __name__ == "__main__":
main()