Spaces:

jzou19950715
/

Newco_information_extraction_agent

Sleeping

App Files Files Community

Newco_information_extraction_agent / app.py

jzou19950715

Update app.py

2091cb6 verified 2 months ago

raw

history blame

10.4 kB

	import json
	import logging
	from datetime import datetime
	from typing import Dict, List, Optional, Any
	import gradio as gr
	from openai import AsyncOpenAI
	import PyPDF2
	import io

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	CONVERSATION_PROMPT = """You are LOSS DOG, a professional profile builder. Your goal is to have natural conversations
	with users to gather information about their professional background across 9 categories:

	1. Work History & Experience
	2. Salary & Compensation
	3. Skills & Certifications
	4. Education & Learning
	5. Personal Branding & Online Presence
	6. Achievements & Awards
	7. Social Proof & Networking
	8. Project Contributions & Leadership
	9. Work Performance & Impact Metrics

	Be friendly and conversational. Ask follow-up questions naturally. When appropriate, guide users to share more details
	but respect their boundaries. Once you believe you have gathered sufficient information (or if the user indicates they
	have nothing more to share), let them know they can click 'Generate Profile' to proceed.
	"""

	EXTRACTION_PROMPT = """You are a professional information extraction system. Your task is to extract information from the potentially unstructure conversation and return ONLY a valid JSON object.
	Proactively determine how to fill the json schema using limited information provided.
	Do not include any explanatory text before or after the JSON.
	Return the data in this exact structure:
	{
	"work_history_experience": {
	"positions": [
	{
	"title": string,
	"company": string,
	"industry": string,
	"location": string,
	"employment_type": string,
	"adaptability": {
	"career_shifts": [],
	"upskilling": []
	},
	"promotions": [],
	"confidence": number
	}
	]
	},
	"salary_compensation": {
	"history": [
	{
	"base_salary": number,
	"bonus_structure": string,
	"stock_options": {
	"type": string,
	"details": string
	},
	"commission": null,
	"benefits": {
	"health": string,
	"pto": string,
	"retirement": string,
	"other": []
	},
	"confidence": number
	}
	]
	},
	"skills_certifications": {
	"hard_skills": [],
	"soft_skills": [],
	"certifications": [],
	"licenses": []
	},
	"education_learning": {
	"formal_education": [],
	"online_courses": [],
	"executive_education": []
	},
	"personal_branding": {
	"portfolio": {
	"github": null,
	"behance": null,
	"other": []
	},
	"blog_posts": [],
	"blockchain_projects": {
	"nfts": [],
	"defi": [],
	"dapps": []
	},
	"social_media": {
	"platforms": [],
	"influence_metrics": {}
	}
	},
	"achievements_awards": {
	"industry_awards": [],
	"hackathons": [],
	"peer_endorsements": [],
	"creative_projects": {
	"ai_art": [],
	"other": []
	}
	},
	"social_proof_networking": {
	"mentors": [],
	"references": [],
	"memberships": [],
	"conference_engagement": []
	},
	"project_contributions": {
	"major_projects": [],
	"open_source": [],
	"team_leadership": [],
	"patents": [],
	"impact": {
	"description": string,
	"metrics": [],
	"confidence": number
	}
	},
	"work_performance_metrics": {
	"kpis": [],
	"revenue_impact": [],
	"efficiency_gains": [],
	"career_growth": [],
	"leadership_influence": []
	}
	}

	IMPORTANT: Return ONLY the JSON. Do not add any explanation text."""
	import json
	import logging
	from datetime import datetime
	from typing import Dict, List, Optional, Any
	import gradio as gr
	from openai import AsyncOpenAI
	import PyPDF2
	import io

	# ... (previous imports and prompts remain the same)

	class ProfileBuilder:
	def __init__(self):
	self.conversation_history = []
	self.client = None

	def _initialize_client(self, api_key: str) -> None:
	if not api_key.startswith("sk-"):
	raise ValueError("Invalid API key format")
	self.client = AsyncOpenAI(api_key=api_key)

	async def extract_from_pdf(self, pdf_content: bytes) -> str:
	"""Extract text from PDF file"""
	try:
	pdf_file = io.BytesIO(pdf_content)
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text
	except Exception as e:
	logger.error(f"Error extracting PDF: {str(e)}")
	raise

	async def process_pdf(self, pdf_path: str, api_key: str) -> Dict[str, Any]:
	"""Process PDF resume and extract information"""
	try:
	if not self.client:
	self._initialize_client(api_key)

	with open(pdf_path, 'rb') as file:
	pdf_content = file.read()
	resume_text = await self.extract_from_pdf(pdf_content)

	# Use the extraction prompt directly on PDF content
	completion = await self.client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": EXTRACTION_PROMPT},
	{"role": "user", "content": f"Extract profile information from this resume:\n\n{resume_text}"}
	],
	temperature=0.3
	)

	response_text = completion.choices[0].message.content.strip()
	profile_data = json.loads(response_text)

	return {
	"profile_data": profile_data,
	"metadata": {
	"generated_at": datetime.now().isoformat(),
	"source": "pdf_resume"
	}
	}

	except Exception as e:
	logger.error(f"Error processing PDF: {str(e)}")
	return {"error": str(e)}

	# ... (rest of the ProfileBuilder class remains the same)

	def create_gradio_interface():
	builder = ProfileBuilder()

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder")

	api_key = gr.Textbox(
	label="OpenAI API Key",
	type="password",
	placeholder="Enter your OpenAI API key"
	)

	with gr.Tabs() as tabs:
	with gr.Tab("Upload Resume"):
	upload_text = gr.Markdown("""
	# Upload Your Resume
	Upload your existing resume in PDF format and let LOSS DOG extract your professional profile.
	""")
	pdf_file = gr.File(
	label="Upload PDF Resume",
	file_types=[".pdf"]
	)
	process_pdf_btn = gr.Button("Process Resume")

	with gr.Tab("Chat with AI"):
	chat_text = gr.Markdown("""
	# Chat with LOSS DOG
	Start a conversation with LOSS DOG to build your professional profile from scratch.
	""")
	chatbot = gr.Chatbot(label="Conversation")
	with gr.Row():
	msg = gr.Textbox(
	label="Message",
	placeholder="Chat with LOSS DOG..."
	)
	send = gr.Button("Send")

	with gr.Column():
	generate_btn = gr.Button("Generate Profile")
	profile_output = gr.JSON(label="Generated Profile")
	download_btn = gr.File(label="Download Profile")

	async def on_pdf_upload(pdf, key):
	if not pdf:
	return {"error": "No PDF file uploaded"}

	try:
	result = await builder.process_pdf(pdf.name, key)
	if "error" in result:
	return {"error": result["error"]}, None

	# Save profile
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"profile_{timestamp}.json"
	with open(filename, 'w', encoding='utf-8') as f:
	json.dump(result, f, indent=2)

	return result["profile_data"], filename
	except Exception as e:
	return {"error": str(e)}, None

	# Event handlers remain the same for chat functionality
	async def on_message(message: str, history: List[List[str]], key: str):
	if not message.strip():
	return history, None

	result = await builder.process_message(message, key)

	if "error" in result:
	return history, {"error": result["error"]}

	history = history + [[message, result["response"]]]
	return history, None

	async def on_generate():
	profile, filename = await builder.generate_profile()
	if "error" in profile:
	return profile, None
	return profile["profile_data"], filename

	# Bind events
	msg.submit(
	on_message,
	inputs=[msg, chatbot, api_key],
	outputs=[chatbot, profile_output]
	).then(lambda: "", None, msg)

	send.click(
	on_message,
	inputs=[msg, chatbot, api_key],
	outputs=[chatbot, profile_output]
	).then(lambda: "", None, msg)

	process_pdf_btn.click(
	on_pdf_upload,
	inputs=[pdf_file, api_key],
	outputs=[profile_output, download_btn]
	)

	generate_btn.click(
	on_generate,
	outputs=[profile_output, download_btn]
	)

	return demo

	if __name__ == "__main__":
	demo = create_gradio_interface()
	demo.queue()
	demo.launch(server_name="0.0.0.0", server_port=7860)