Spaces:

jzou19950715
/

Monte_Carlo_Simulation_of_Salary_Prediction

No application file

App Files Files Community

Monte_Carlo_Simulation_of_Salary_Prediction / app.py

jzou19950715

Update app.py

84add52 verified about 2 months ago

raw

history blame

15.6 kB

	import base64
	import io
	import os
	import gradio as gr
	import numpy as np
	import matplotlib.pyplot as plt
	from typing import Dict, List, Tuple, Any
	import json
	from litellm import completion
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# System prompts for different AI roles
	CONVERSATION_PROMPT = """
	You are an expert career advisor collecting information through natural conversation.
	Focus on gathering these key points naturally without explicitly asking:
	1. Current Role & Salary:
	- Current salary (must get specific number)
	- Job title/role
	- Company size/type
	- Industry/sector

	2. Experience & Skills:
	- Years in current role
	- Total years working
	- Key skills and proficiencies
	- Management/leadership experience
	- Notable achievements

	3. Education & Training:
	- Highest degree
	- Field of study
	- Certifications
	- Recent training/upskilling

	4. Location & Work Setup:
	- Current location/market
	- Remote work status
	- Willingness to relocate
	- Preferred work arrangement

	Conversation Guidelines:
	- Be natural and conversational
	- Show interest in their career journey
	- Don't force structured responses
	- Make salary discussions comfortable
	- Build rapport through discussion
	- Once you have sufficient information say:
	"I have collected enough information for analysis. Please click 'Generate Analysis' to see your career projections."
	"""

	EXTRACTION_PROMPT = """
	Analyze the conversation and extract numerical scores based on salary growth potential.
	Convert qualitative information into scores from 0 to 1.

	Scoring Guidelines:

	1. Industry Score (0-1):
	- 1.0: Cutting-edge tech (AI, quantum)
	- 0.9: High-growth tech (cloud, cyber)
	- 0.8: Established tech/finance
	- 0.7: Healthcare/biotech
	- 0.6: Traditional sectors
	- 0.5: Declining industries
	+0.1: Market leader company
	+0.1: High growth market

	2. Experience Score (0-1):
	- 1.0: 15+ years with leadership
	- 0.9: 10-15 years senior
	- 0.8: 7-10 years mid-senior
	- 0.7: 4-6 years mid-level
	- 0.6: 2-3 years junior
	- 0.5: Entry level
	+0.1: Fast career progression
	+0.1: Significant achievements

	3. Education Score (0-1):
	- 1.0: PhD from top school
	- 0.9: Masters from top school
	- 0.8: Bachelors from top school
	- 0.7: Advanced degree
	- 0.6: Bachelors degree
	- 0.5: Other education
	+0.1: Relevant certifications
	+0.1: Ongoing education

	4. Skills Score (0-1):
	- 1.0: Rare, high-demand skills
	- 0.9: Advanced technical
	- 0.8: Strong tech + soft skills
	- 0.7: Solid technical skills
	- 0.6: Standard skills
	- 0.5: Basic skills
	+0.1: Multiple in-demand skills
	+0.1: Proven implementations

	5. Location Score (0-1):
	- 1.0: Major tech hubs
	- 0.9: Secondary tech hubs
	- 0.8: Major cities
	- 0.7: Growing cities
	- 0.6: Regional cities
	- 0.5: Small markets
	+0.1: Remote flexibility
	+0.1: High growth market

	Return only a JSON object with these exact fields:
	{
	"industry_score": float,
	"experience_score": float,
	"education_score": float,
	"skills_score": float,
	"location_score": float,
	"current_salary": float
	}

	Make reasonable assumptions for any missing information based on context clues in the conversation.
	"""

	class CodeEnvironment:
	"""Safe environment for executing visualization code"""

	def __init__(self):
	self.globals = {
	'np': np,
	'plt': plt
	}
	self.locals = {}

	def execute(self, code: str) -> Dict[str, Any]:
	"""Execute code and capture outputs"""
	result = {'figures': [], 'error': None}

	try:
	# Execute code in safe environment
	exec(code, self.globals, self.locals)

	# Capture generated plots
	for i in plt.get_fignums():
	fig = plt.figure(i)
	buf = io.BytesIO()
	fig.savefig(buf, format='png', dpi=300)
	buf.seek(0)
	img_str = base64.b64encode(buf.read()).decode()
	result['figures'].append(f"data:image/png;base64,{img_str}")
	plt.close(fig)

	except Exception as e:
	result['error'] = str(e)

	return result

	class MonteCarloSimulator:
	"""Handles Monte Carlo simulation for salary projections"""

	def __init__(self):
	self.years = 5
	self.num_paths = 1000

	def run_simulation(self, profile: Dict[str, float]) -> np.ndarray:
	"""Run Monte Carlo simulation for salary projections"""
	# Initialize paths array
	paths = np.zeros((self.num_paths, self.years + 1))
	paths[:, 0] = float(profile['current_salary'])

	# Calculate growth parameters from profile
	params = self._calculate_parameters(profile)

	# Generate paths
	for path in range(self.num_paths):
	salary = paths[path, 0]
	for year in range(1, self.years + 1):
	# Calculate growth with randomness
	growth = self._calculate_growth(params)
	# Update salary
	salary *= (1 + growth)
	paths[path, year] = salary

	return paths

	def _calculate_parameters(self, profile: Dict[str, float]) -> Dict[str, float]:
	"""Calculate simulation parameters from profile"""
	return {
	'base_growth': 0.02 + (profile['industry_score'] * 0.04),
	'skill_premium': 0.01 + (profile['skills_score'] * 0.02),
	'experience_premium': 0.01 + (profile['experience_score'] * 0.02),
	'education_premium': 0.005 + (profile['education_score'] * 0.015),
	'location_premium': 0.01 + (profile['location_score'] * 0.02),
	'volatility': 0.05 + (profile['industry_score'] * 0.05),
	'disruption_chance': 0.1,
	'disruption_impact': 0.2
	}

	def _calculate_growth(self, params: Dict[str, float]) -> float:
	"""Calculate annual growth rate with randomness"""
	# Base growth plus premiums
	growth = (params['base_growth'] +
	params['skill_premium'] +
	params['experience_premium'] +
	params['education_premium'] +
	params['location_premium'])

	# Add random volatility
	growth += np.random.normal(0, params['volatility'])

	# Possible disruption event
	if np.random.random() < params['disruption_chance']:
	disruption = params['disruption_impact'] * np.random.random()
	if np.random.random() < 0.7: # 70% positive disruption
	growth += disruption
	else:
	growth -= disruption

	# Apply reasonable bounds
	return max(min(growth, 0.25), -0.1) # -10% to +25%

	class CareerAdvisor:
	"""Main career advisor system"""

	def __init__(self, api_key: str):
	self.api_key = api_key
	self.chat_history = []
	self.code_env = CodeEnvironment()
	self.simulator = MonteCarloSimulator()

	def chat(self, message: str) -> str:
	"""Handle conversation with user"""
	messages = [
	{"role": "system", "content": CONVERSATION_PROMPT},
	*self.chat_history,
	{"role": "user", "content": message}
	]

	response = completion(
	model="gpt-4o-mini",
	messages=messages,
	api_key=self.api_key
	)

	self.chat_history.extend([
	{"role": "user", "content": message},
	{"role": "assistant", "content": response.choices[0].message.content}
	])

	return response.choices[0].message.content

	def extract_profile(self) -> Dict[str, float]:
	"""Extract numerical profile from conversation"""
	conversation = "\n".join([
	f"{msg['role']}: {msg['content']}"
	for msg in self.chat_history
	])

	messages = [
	{"role": "system", "content": EXTRACTION_PROMPT},
	{"role": "user", "content": f"Extract profile from:\n{conversation}"}
	]

	response = completion(
	model="gpt-4o-mini",
	messages=messages,
	api_key=self.api_key,
	response_format={"type": "json_object"}
	)

	return json.loads(response.choices[0].message.content)

	def generate_visualization(self, paths: np.ndarray) -> str:
	"""Generate visualization of simulation results"""
	viz_code = """
	plt.style.use('dark_background')
	fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), height_ratios=[2, 1])
	fig.tight_layout(pad=4)

	# Plot 1: Salary Projections
	years = list(range(paths.shape[1]))

	# Plot confidence intervals
	percentiles = [(5, 95), (10, 90), (25, 75)]
	alphas = [0.1, 0.2, 0.3]
	for (lower, upper), alpha in zip(percentiles, alphas):
	lower_bound = np.percentile(paths, lower, axis=0)
	upper_bound = np.percentile(paths, upper, axis=0)
	ax1.fill_between(years, lower_bound, upper_bound, alpha=alpha, color='blue')

	# Plot median line
	median = np.percentile(paths, 50, axis=0)
	ax1.plot(years, median, color='white', linewidth=2, label='Expected Path')

	# Customize plot
	ax1.set_title('Salary Growth Projections', pad=20)
	ax1.set_xlabel('Years')
	ax1.set_ylabel('Salary ($)')
	ax1.grid(True, alpha=0.2)
	ax1.legend()

	# Format axes
	ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))
	ax1.set_xticks(years)
	ax1.set_xticklabels(['Current'] + [f'Year {i+1}' for i in range(len(years)-1)])

	# Plot 2: Final Distribution
	ax2.hist(paths[:, -1], bins=50, color='blue', alpha=0.7)
	ax2.set_title('Final Salary Distribution', pad=20)
	ax2.set_xlabel('Salary ($)')
	ax2.set_ylabel('Frequency')
	ax2.grid(True, alpha=0.2)
	ax2.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))
	"""

	result = self.code_env.execute(viz_code)
	return result['figures'][0] if result['figures'] else None

	def generate_summary(self, profile: Dict[str, float], paths: np.ndarray) -> str:
	"""Generate analysis summary"""
	final_salaries = paths[:, -1]
	initial_salary = paths[0, 0]
	cagr = (np.median(final_salaries) / initial_salary) ** (1/5) - 1

	return f"""
	Career Profile Analysis
	======================

	Current Situation:
	• Salary: ${profile['current_salary']:,.2f}
	• Industry Position: {profile['industry_score']:.2f}/1.0
	• Experience Level: {profile['experience_score']:.2f}/1.0
	• Education Rating: {profile['education_score']:.2f}/1.0
	• Skills Assessment: {profile['skills_score']:.2f}/1.0
	• Location Impact: {profile['location_score']:.2f}/1.0

	5-Year Projection:
	• Conservative (25th percentile): ${np.percentile(final_salaries, 25):,.2f}
	• Most Likely (Median): ${np.percentile(final_salaries, 50):,.2f}
	• Optimistic (75th percentile): ${np.percentile(final_salaries, 75):,.2f}
	• Expected Annual Growth: {cagr*100:.1f}%

	Key Insights:
	• Your profile suggests {cagr*100:.1f}% annual growth potential
	• {profile['industry_score']:.2f} industry score indicates {'strong' if profile['industry_score'] > 0.7 else 'moderate' if profile['industry_score'] > 0.5 else 'challenging'} growth environment
	• Skills rating of {profile['skills_score']:.2f} suggests {'excellent' if profile['skills_score'] > 0.7 else 'good' if profile['skills_score'] > 0.5 else 'potential for'} career advancement
	• Location score {profile['location_score']:.2f} {'enhances' if profile['location_score'] > 0.7 else 'supports' if profile['location_score'] > 0.5 else 'may limit'} opportunities

	Based on {paths.shape[0]:,} simulated career paths
	"""

	def generate_analysis(self) -> Tuple[str, str]:
	"""Generate complete analysis with visualization"""
	try:
	# Extract profile from conversation
	profile = self.extract_profile()

	# Run Monte Carlo simulation
	paths = self.simulator.run_simulation(profile)

	# Generate visualization
	viz = self.generate_visualization(paths)

	# Generate summary
	summary = self.generate_summary(profile, paths)

	return summary, viz

	except Exception as e:
	return f"Error generating analysis: {str(e)}", None

	def create_interface():
	"""Create the Gradio interface"""
	advisor = None

	def init_advisor(api_key: str):
	nonlocal advisor
	if not api_key.strip().startswith("sk-"):
	return "Invalid API key format. Please check your key."
	advisor = CareerAdvisor(api_key)
	return "Advisor initialized! Let's discuss your career."

	def chat(message: str, history: List):
	if not advisor:
	return "Please enter your API key first.", history

	response = advisor.chat(message)
	history.append((message, response))
	return "", history

	def analyze():
	if not advisor:
	return "Please enter your API key first.", None

	summary, viz = advisor.generate_analysis()
	return summary, viz

	# Create interface
	with gr.Blocks() as demo:
	gr.Markdown("# AI Career Advisor with Monte Carlo Salary Projections")

	with gr.Row():
	api_key = gr.Textbox(
	label="OpenAI API Key",
	type="password",
	placeholder="Enter your API key"
	)
	init_btn = gr.Button("Initialize Advisor")

	status = gr.Textbox(label="Status")

	with gr.Row():
	with gr.Column(scale=1):
	chatbot = gr.Chatbot(height=400)
	msg = gr.Textbox(
	label="Your message",
	placeholder="Tell me about your career...",
	lines=2
	)
	analyze_btn = gr.Button("Generate Analysis", variant="primary")

	with gr.Column(scale=1):
	analysis = gr.Textbox(
	label="Analysis Report",
	lines=20,
	show_copy_button=True
	)
	plot = gr.Image(
	label="Salary Projections",
	show_download_button=True
	)

	# Wire up the interface
	init_btn.click(
	init_advisor,
	inputs=[api_key],
	outputs=[status]
	)

	msg.submit(
	chat,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot]
	)

	analyze_btn.click(
	analyze,
	outputs=[analysis, plot]
	)

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()