|
import json |
|
import logging |
|
from datetime import datetime |
|
from typing import Dict, List, Optional, Any |
|
import gradio as gr |
|
from openai import AsyncOpenAI |
|
import PyPDF2 |
|
import io |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
CONVERSATION_PROMPT = """ |
|
<SYSTEM_PROMPT> |
|
<NAME>FINN's Conversation Guide for Information Extraction</NAME> |
|
|
|
<ROLE> |
|
<DESCRIPTION>You are FINN, Lossdog's dedicated AI assistant specializing in information extraction.</DESCRIPTION> |
|
<OBJECTIVE>Help users build compelling resumes through natural conversation, drawing out both qualitative and quantitative achievements.</OBJECTIVE> |
|
<MEANING>FINN stands for Facts, Insights, Numbers, and Narrative.</MEANING> |
|
</ROLE> |
|
|
|
<COMMUNICATION_STYLE> |
|
<ADAPTABILITY>Adapt to each user's unique background and experience level.</ADAPTABILITY> |
|
<TONE>Professional yet approachable, encouraging detailed responses.</TONE> |
|
</COMMUNICATION_STYLE> |
|
|
|
<DATA_COLLECTION_CATEGORIES> |
|
<CATEGORY name="Work Experience"> |
|
<PROMPT>Tell me about your professional experience.</PROMPT> |
|
<APPROACH> |
|
- Gather core details (title, company, dates, compensation) |
|
- Encourage storytelling about responsibilities and achievements |
|
- Naturally probe for quantitative metrics when relevant |
|
- Help discover and highlight impactful contributions |
|
</APPROACH> |
|
</CATEGORY> |
|
|
|
<CATEGORY name="Volunteer & Community Impact"> |
|
<PROMPT>Tell me about any volunteer work or community involvement you'd like to highlight.</PROMPT> |
|
<APPROACH> |
|
- Focus on impact and contributions |
|
- Draw out measurable outcomes where possible |
|
- Connect activities to professional skills |
|
</APPROACH> |
|
</CATEGORY> |
|
|
|
<CATEGORY name="Education"> |
|
<PROMPT>Let's talk about your educational background and ongoing learning.</PROMPT> |
|
<APPROACH> |
|
- Gather formal education details |
|
- Explore continuing education and self-development |
|
- Identify relevant projects and achievements |
|
</APPROACH> |
|
</CATEGORY> |
|
|
|
<CATEGORY name="Skills"> |
|
<PROMPT>What key skills would you like to highlight?</PROMPT> |
|
<APPROACH> |
|
- Help identify and articulate both technical and soft skills |
|
- Draw out expertise from experience |
|
- Connect skills to practical applications |
|
</APPROACH> |
|
</CATEGORY> |
|
|
|
<CATEGORY name="Notable Achievements & Certifications"> |
|
<PROMPT>Tell me about your professional achievements and certifications.</PROMPT> |
|
<APPROACH> |
|
- Encourage sharing of recognition and accomplishments |
|
- Help quantify impact where possible |
|
- Draw out specific examples |
|
</APPROACH> |
|
</CATEGORY> |
|
|
|
<CATEGORY name="Professional Bio"> |
|
<PROMPT>Based on our conversation, let's create your professional bio. What would you like to highlight about yourself and your aspirations?</PROMPT> |
|
<APPROACH> |
|
- Synthesize key themes from previous categories |
|
- Include future goals and aspirations |
|
- Create a cohesive narrative |
|
</APPROACH> |
|
</CATEGORY> |
|
</DATA_COLLECTION_CATEGORIES> |
|
|
|
<CONVERSATION_PRINCIPLES> |
|
- Maintain a natural, friendly, and non-judgmental conversation flow with proactive use of emojis and emotional intelligence for effective wording. |
|
- Stay focused on the core tasks. |
|
- Adapt questions based on the user's background while staying task-driven. |
|
- Consistently seek quantifiable impacts in addition to qualitative information. |
|
- Help users discover and articulate their value through creative nudging and exploration. |
|
- Guide without being prescriptive, and accommodate users with diverse work experience. |
|
- Encourage specific examples and metrics needed for an effective resume. |
|
- Once all information has been collected, or the user indicates that they have finished providing details, naturally congratulate them for successfully creating their resume and invite them to click "Generate Profile" to see the result. |
|
</CONVERSATION_PRINCIPLES> |
|
</SYSTEM_PROMPT> |
|
""" |
|
|
|
EXTRACTION_PROMPT = """ |
|
You are a professional information extraction system. Extract information from the conversation and return ONLY a valid JSON object that matches FINN's six core categories. Proactively determine how to fill the JSON schema using the provided information. Do not include any explanatory text before or after the JSON. Return the data in this exact structure: Analyze the conversation and intelligently categorize all information into appropriate sections. Use your judgment to place information under the most relevant category, even if it wasn’t explicitly labeled as such in the conversation. Always creatively generate and synthesize a professional bio based on the provided from the entire conversation in the professional_bio section. |
|
Key Guidelines: |
|
- Flexibly categorize information while maintaining core structure |
|
- Place unique or unexpected information under the most relevant category |
|
- Capture both qualitative and quantitative elements |
|
- Ensure no valuable information is lost |
|
- Create a compelling professional bio that synthesizes the entire profile |
|
|
|
Return the data in this format, adding relevant fields as needed: |
|
|
|
{ |
|
"work_experience": [ |
|
{ |
|
"title": string, |
|
"company": string, |
|
"period": string, |
|
"compensation": { |
|
"salary": string, |
|
"benefits": string |
|
}, |
|
"details": string, // Combined responsibilities, achievements, and metrics |
|
"quantitative_highlights": [], // Array of measurable impacts |
|
"additional_information": {} // Flexible object for unique elements |
|
} |
|
], |
|
"volunteer_community": [ |
|
{ |
|
"organization": string, |
|
"role": string, |
|
"period": string, |
|
"impact": string, |
|
"metrics": [] |
|
} |
|
], |
|
"education": [ |
|
{ |
|
"degree": string, |
|
"institution": string, |
|
"period": string, |
|
"highlights": string, |
|
"honors": [] |
|
} |
|
], |
|
"skills": { |
|
"technical": [], |
|
"soft": [], |
|
"tools": [], |
|
"languages": [], |
|
"industry_expertise": [] |
|
}, |
|
"achievements": { |
|
"certifications": [ |
|
{ |
|
"name": string, |
|
"issuer": string, |
|
"date": string |
|
} |
|
], |
|
"recognition": [ |
|
{ |
|
"title": string, |
|
"details": string, |
|
"impact": string |
|
} |
|
] |
|
}, |
|
"professional_bio": { |
|
"summary": string, // Synthesized narrative of professional journey |
|
"aspirations": string, // Future goals and direction |
|
"key_strengths": [], // Core competencies and unique value |
|
"style": "narrative" // Ensures bio is written in engaging, story-telling format |
|
} |
|
} |
|
|
|
Notes on Bio Generation: |
|
- Always generate a professional bio section regardless of explicit bio information |
|
- Bio should synthesize key achievements, experience, and aspirations |
|
- Include relevant metrics and impactful contributions |
|
- Capture professional journey and future direction |
|
- Style should be engaging and narrative while maintaining professionalism |
|
|
|
IMPORTANT: Return ONLY the JSON. Do not include any explanatory text.""" |
|
|
|
class ProfileBuilder: |
|
def __init__(self): |
|
self.client = None |
|
self.pdf_text = None |
|
|
|
def _initialize_client(self, api_key: str) -> None: |
|
if not api_key.startswith("sk-"): |
|
raise ValueError("Invalid API key format") |
|
self.client = AsyncOpenAI(api_key=api_key) |
|
|
|
async def process_message(self, message: str, history: List[List[str]], api_key: str) -> Dict[str, Any]: |
|
try: |
|
self._initialize_client(api_key) |
|
|
|
conversation_history = [] |
|
for human, assistant in history: |
|
conversation_history.extend([ |
|
{"role": "user", "content": human}, |
|
{"role": "assistant", "content": assistant} |
|
]) |
|
|
|
conversation_history.append({"role": "user", "content": message}) |
|
|
|
completion = await self.client.chat.completions.create( |
|
model="gpt-4o-mini", |
|
messages=[ |
|
{"role": "system", "content": CONVERSATION_PROMPT}, |
|
*conversation_history |
|
], |
|
temperature=0.7 |
|
) |
|
|
|
ai_message = completion.choices[0].message.content |
|
return {"response": ai_message} |
|
|
|
except Exception as e: |
|
logger.error(f"Error in message processing: {str(e)}") |
|
return {"error": str(e)} |
|
|
|
async def extract_from_pdf(self, pdf_content: bytes) -> str: |
|
try: |
|
pdf_file = io.BytesIO(pdf_content) |
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() |
|
self.pdf_text = text |
|
return text |
|
except Exception as e: |
|
logger.error(f"PDF extraction error: {str(e)}") |
|
raise |
|
|
|
async def process_pdf(self, pdf_path: str, api_key: str) -> Dict[str, Any]: |
|
try: |
|
self._initialize_client(api_key) |
|
|
|
with open(pdf_path, 'rb') as file: |
|
resume_text = await self.extract_from_pdf(file.read()) |
|
|
|
completion = await self.client.chat.completions.create( |
|
model="gpt-4o-mini", |
|
messages=[ |
|
{"role": "system", "content": EXTRACTION_PROMPT}, |
|
{"role": "user", "content": f"Extract profile information from this resume:\n\n{resume_text}"} |
|
], |
|
temperature=0.3 |
|
) |
|
|
|
response_text = completion.choices[0].message.content.strip() |
|
profile_data = json.loads(response_text) |
|
|
|
profile = { |
|
"profile_data": profile_data, |
|
"metadata": { |
|
"generated_at": datetime.now().isoformat(), |
|
"source": "pdf_resume" |
|
} |
|
} |
|
|
|
return profile |
|
|
|
except Exception as e: |
|
logger.error(f"PDF processing error: {str(e)}") |
|
return {"error": str(e)} |
|
|
|
async def generate_profile(self, history: List[List[str]], api_key: str) -> tuple[Dict[str, Any], Optional[str]]: |
|
try: |
|
self._initialize_client(api_key) |
|
|
|
if history: |
|
content = "\n".join(f"User: {msg[0]}\nAssistant: {msg[1]}" for msg in history) |
|
source = "conversation" |
|
elif self.pdf_text: |
|
content = self.pdf_text |
|
source = "pdf" |
|
else: |
|
raise ValueError("No content available for profile generation") |
|
|
|
completion = await self.client.chat.completions.create( |
|
model="gpt-4o-mini", |
|
messages=[ |
|
{"role": "system", "content": EXTRACTION_PROMPT}, |
|
{"role": "user", "content": f"Extract profile information from this {source}:\n\n{content}"} |
|
], |
|
temperature=0.3 |
|
) |
|
|
|
response_text = completion.choices[0].message.content.strip() |
|
profile_data = json.loads(response_text) |
|
|
|
profile = { |
|
"profile_data": profile_data, |
|
"metadata": { |
|
"generated_at": datetime.now().isoformat(), |
|
"source": source |
|
} |
|
} |
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
filename = f"profile_{timestamp}.json" |
|
with open(filename, 'w', encoding='utf-8') as f: |
|
json.dump(profile, f, indent=2) |
|
|
|
return profile, filename |
|
|
|
except Exception as e: |
|
logger.error(f"Profile generation error: {str(e)}") |
|
return {"error": str(e)}, None |
|
|
|
def create_gradio_interface(): |
|
builder = ProfileBuilder() |
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# 🐕 LOSS DOG - Professional Profile Builder") |
|
|
|
api_key = gr.Textbox( |
|
label="OpenAI API Key", |
|
type="password", |
|
placeholder="Enter your OpenAI API key" |
|
) |
|
|
|
with gr.Tabs() as tabs: |
|
with gr.Tab("Upload Resume"): |
|
gr.Markdown(""" |
|
# Upload Your Resume |
|
Upload your existing resume in PDF format and let FINN extract your professional profile. |
|
""") |
|
pdf_file = gr.File( |
|
label="Upload PDF Resume", |
|
file_types=[".pdf"] |
|
) |
|
process_pdf_btn = gr.Button("Process Resume") |
|
|
|
with gr.Tab("Chat with FINN"): |
|
gr.Markdown(""" |
|
# Chat with FINN |
|
Start a conversation with FINN to build your professional profile from scratch. |
|
""") |
|
chatbot = gr.Chatbot( |
|
label="Conversation", |
|
height=400 |
|
) |
|
with gr.Row(): |
|
msg = gr.Textbox( |
|
label="Message", |
|
placeholder="Chat with FINN...", |
|
show_label=False |
|
) |
|
send = gr.Button("Send") |
|
|
|
with gr.Column(): |
|
generate_btn = gr.Button("Generate Profile", variant="primary") |
|
profile_output = gr.JSON(label="Generated Profile") |
|
download_btn = gr.File(label="Download Profile") |
|
|
|
async def on_message(message: str, history: List[List[str]], key: str): |
|
if not message.strip(): |
|
return history, None, None, "" |
|
|
|
result = await builder.process_message(message, history, key) |
|
|
|
if "error" in result: |
|
return history, {"error": result["error"]}, None, message |
|
|
|
new_history = history + [[message, result["response"]]] |
|
return new_history, None, None, "" |
|
|
|
async def on_pdf_upload(pdf, key): |
|
if not pdf: |
|
return {"error": "No PDF file uploaded"}, None |
|
|
|
try: |
|
result = await builder.process_pdf(pdf.name, key) |
|
if "error" in result: |
|
return {"error": result["error"]}, None |
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
filename = f"profile_{timestamp}.json" |
|
with open(filename, 'w', encoding='utf-8') as f: |
|
json.dump(result, f, indent=2) |
|
|
|
return result["profile_data"], filename |
|
except Exception as e: |
|
return {"error": str(e)}, None |
|
|
|
async def on_generate(history: List[List[str]], key: str): |
|
profile, filename = await builder.generate_profile(history, key) |
|
if "error" in profile: |
|
return {"error": profile["error"]}, None |
|
return profile["profile_data"], filename |
|
|
|
msg.submit( |
|
on_message, |
|
inputs=[msg, chatbot, api_key], |
|
outputs=[chatbot, profile_output, download_btn, msg] |
|
) |
|
|
|
send.click( |
|
on_message, |
|
inputs=[msg, chatbot, api_key], |
|
outputs=[chatbot, profile_output, download_btn, msg] |
|
) |
|
|
|
process_pdf_btn.click( |
|
on_pdf_upload, |
|
inputs=[pdf_file, api_key], |
|
outputs=[profile_output, download_btn] |
|
) |
|
|
|
generate_btn.click( |
|
on_generate, |
|
inputs=[chatbot, api_key], |
|
outputs=[profile_output, download_btn] |
|
) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = create_gradio_interface() |
|
demo.queue() |
|
demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=True |
|
) |