Spaces:
Runtime error
Runtime error
Johnny
commited on
Commit
·
79b5c9c
1
Parent(s):
102e49d
feat: Update resume builder with LFS-tracked assets
Browse files- Add header and footer images using Git LFS
- Update configuration and dependencies
- Improve resume builder and OpenAI extractor
- Update app components and utility functions
- Remove unused blank resume template
- .gitattributes +2 -0
- .gitignore +7 -0
- .streamlit/config.toml +0 -1
- app.py +57 -4
- config.py +105 -27
- footer.png +3 -0
- header.png +3 -0
- pages/Template.py +64 -13
- requirements.txt +2 -1
- templates/blank_resume.docx +0 -0
- utils/builder.py +192 -280
- utils/openai_extractor.py +142 -219
- utils/parser.py +1 -1
- utils/reporting.py +2 -2
- utils/screening.py +1 -1
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.docx filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -37,3 +37,10 @@ debug_*.docx
|
|
| 37 |
.sfdx/
|
| 38 |
*.cls
|
| 39 |
apex.db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
.sfdx/
|
| 38 |
*.cls
|
| 39 |
apex.db
|
| 40 |
+
|
| 41 |
+
.DS_Store
|
| 42 |
+
utils/.DS_Store
|
| 43 |
+
utils/cursor-updates
|
| 44 |
+
utils/prompt-updates
|
| 45 |
+
Youlin Joseph Li qvell.docx
|
| 46 |
+
Template.py
|
.streamlit/config.toml
CHANGED
|
@@ -7,7 +7,6 @@ font="sans serif"
|
|
| 7 |
|
| 8 |
[ui]
|
| 9 |
hideTopBar = false
|
| 10 |
-
hideSidebarNav = true
|
| 11 |
|
| 12 |
[server]
|
| 13 |
headless = true
|
|
|
|
| 7 |
|
| 8 |
[ui]
|
| 9 |
hideTopBar = false
|
|
|
|
| 10 |
|
| 11 |
[server]
|
| 12 |
headless = true
|
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# TalentLens
|
| 2 |
|
| 3 |
import os
|
|
|
|
| 4 |
from io import BytesIO
|
| 5 |
|
| 6 |
import streamlit as st
|
|
@@ -8,13 +9,40 @@ import fitz # PyMuPDF
|
|
| 8 |
import requests
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
|
| 11 |
-
from config import supabase, HF_API_TOKEN, HF_HEADERS,
|
| 12 |
from utils.parser import parse_resume, extract_email, summarize_resume
|
| 13 |
from utils.hybrid_extractor import extract_resume_sections
|
| 14 |
from utils.builder import build_resume_from_data
|
| 15 |
from utils.screening import evaluate_resumes
|
| 16 |
from utils.reporting import generate_pdf_report, generate_interview_questions_from_summaries
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# ------------------------- Main App Function -------------------------
|
| 20 |
def main():
|
|
@@ -61,11 +89,11 @@ def main():
|
|
| 61 |
|
| 62 |
with col1:
|
| 63 |
# Evaluation trigger
|
| 64 |
-
evaluate_clicked = st.button("
|
| 65 |
|
| 66 |
with col2:
|
| 67 |
# Format Resume redirect button
|
| 68 |
-
format_clicked = st.button("
|
| 69 |
|
| 70 |
# Handle Format Resume redirect
|
| 71 |
if format_clicked:
|
|
@@ -81,7 +109,7 @@ def main():
|
|
| 81 |
st.error("⚠️ Please upload at least one resume.")
|
| 82 |
return
|
| 83 |
|
| 84 |
-
st.write("###
|
| 85 |
|
| 86 |
# Resume Evaluation
|
| 87 |
shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
|
|
@@ -109,6 +137,31 @@ def main():
|
|
| 109 |
for removed in removed_candidates:
|
| 110 |
st.write(f"**{removed['name']}** - {removed['reason']}")
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# ------------------------- Run the App -------------------------
|
| 113 |
if __name__ == "__main__":
|
| 114 |
main()
|
|
|
|
| 1 |
# TalentLens
|
| 2 |
|
| 3 |
import os
|
| 4 |
+
import time # Add time module import
|
| 5 |
from io import BytesIO
|
| 6 |
|
| 7 |
import streamlit as st
|
|
|
|
| 9 |
import requests
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
|
| 12 |
+
from config import supabase, HF_API_TOKEN, HF_HEADERS, HF_ENDPOINTS
|
| 13 |
from utils.parser import parse_resume, extract_email, summarize_resume
|
| 14 |
from utils.hybrid_extractor import extract_resume_sections
|
| 15 |
from utils.builder import build_resume_from_data
|
| 16 |
from utils.screening import evaluate_resumes
|
| 17 |
from utils.reporting import generate_pdf_report, generate_interview_questions_from_summaries
|
| 18 |
|
| 19 |
+
def toggle_endpoint(endpoint_name, action):
|
| 20 |
+
"""Start or stop an endpoint"""
|
| 21 |
+
try:
|
| 22 |
+
from config import HF_HEADERS, HF_ENDPOINTS
|
| 23 |
+
# Use the health endpoint
|
| 24 |
+
endpoint_info = HF_ENDPOINTS[endpoint_name]
|
| 25 |
+
url = f"{endpoint_info['url']}/health"
|
| 26 |
+
|
| 27 |
+
# Use HEAD request to start the endpoint
|
| 28 |
+
response = requests.head(url, headers=HF_HEADERS)
|
| 29 |
+
|
| 30 |
+
if response.status_code == 503:
|
| 31 |
+
st.info("🚀 Starting endpoint... This may take 5-6 minutes. Click on 'Start' again to refresh status.")
|
| 32 |
+
time.sleep(2) # Wait briefly before refreshing status
|
| 33 |
+
from config import check_endpoint_status
|
| 34 |
+
new_status = check_endpoint_status(endpoint_name)
|
| 35 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
| 36 |
+
elif response.status_code == 200:
|
| 37 |
+
st.success("✅ Endpoint is running")
|
| 38 |
+
time.sleep(2) # Wait briefly before refreshing status
|
| 39 |
+
from config import check_endpoint_status
|
| 40 |
+
new_status = check_endpoint_status(endpoint_name)
|
| 41 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
| 42 |
+
else:
|
| 43 |
+
st.error(f"❌ Failed to {action} endpoint: {response.text}")
|
| 44 |
+
except Exception as e:
|
| 45 |
+
st.error(f"❌ Failed to {action} endpoint: {str(e)}")
|
| 46 |
|
| 47 |
# ------------------------- Main App Function -------------------------
|
| 48 |
def main():
|
|
|
|
| 89 |
|
| 90 |
with col1:
|
| 91 |
# Evaluation trigger
|
| 92 |
+
evaluate_clicked = st.button("\U0001F4CA Evaluate Resumes", type="primary", use_container_width=True)
|
| 93 |
|
| 94 |
with col2:
|
| 95 |
# Format Resume redirect button
|
| 96 |
+
format_clicked = st.button("\U0001F4C4 Format Resume", use_container_width=True)
|
| 97 |
|
| 98 |
# Handle Format Resume redirect
|
| 99 |
if format_clicked:
|
|
|
|
| 109 |
st.error("⚠️ Please upload at least one resume.")
|
| 110 |
return
|
| 111 |
|
| 112 |
+
st.write("### �� Evaluating Resumes...")
|
| 113 |
|
| 114 |
# Resume Evaluation
|
| 115 |
shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
|
|
|
|
| 137 |
for removed in removed_candidates:
|
| 138 |
st.write(f"**{removed['name']}** - {removed['reason']}")
|
| 139 |
|
| 140 |
+
|
| 141 |
+
# Get current status using DNS resolution
|
| 142 |
+
from config import check_endpoint_status
|
| 143 |
+
endpoint_name = "vzwjawyxvu030jsw" # Updated to match endpoint ID
|
| 144 |
+
current_status = check_endpoint_status(endpoint_name)
|
| 145 |
+
state = current_status.get('status', 'unknown')
|
| 146 |
+
|
| 147 |
+
# Update session state with current status
|
| 148 |
+
st.session_state['endpoint_status'] = {endpoint_name: current_status}
|
| 149 |
+
|
| 150 |
+
# Show Start button and status
|
| 151 |
+
start_button = st.empty() # Placeholder for Start button
|
| 152 |
+
if state in ['stopped', 'error']:
|
| 153 |
+
if start_button.button("▶️ Start", key=f"start_{endpoint_name}", use_container_width=True):
|
| 154 |
+
toggle_endpoint(endpoint_name, "start")
|
| 155 |
+
# Refresh status after starting
|
| 156 |
+
new_status = check_endpoint_status(endpoint_name)
|
| 157 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
| 158 |
+
if new_status.get('status') == 'running':
|
| 159 |
+
st.success("✅ Endpoint is running")
|
| 160 |
+
elif new_status.get('status') == 'starting':
|
| 161 |
+
st.info("🚀 Starting endpoint... This may take 5-6 minutes. Click on 'Start' again to refresh status.")
|
| 162 |
+
elif new_status.get('status') == 'error':
|
| 163 |
+
st.error(f"❌ Error: {new_status.get('error', 'Unknown error')}")
|
| 164 |
+
|
| 165 |
# ------------------------- Run the App -------------------------
|
| 166 |
if __name__ == "__main__":
|
| 167 |
main()
|
config.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
import os
|
| 3 |
import time
|
| 4 |
import requests
|
|
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from supabase import create_client
|
| 7 |
from sentence_transformers import SentenceTransformer
|
|
@@ -20,44 +21,121 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
| 20 |
# === Embedding Model for Scoring ===
|
| 21 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 22 |
|
| 23 |
-
# === Hugging Face API Configuration
|
| 24 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
| 25 |
if not HF_API_TOKEN:
|
| 26 |
raise ValueError("Missing Hugging Face API key. Check your .env file.")
|
|
|
|
|
|
|
| 27 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
| 28 |
|
| 29 |
# === Hugging Face Model Endpoints ===
|
| 30 |
-
|
| 31 |
-
"
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
}
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
# ===
|
| 42 |
-
def query(payload
|
| 43 |
"""
|
| 44 |
-
|
| 45 |
"""
|
| 46 |
-
if
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
| 58 |
return response.json()
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
import time
|
| 4 |
import requests
|
| 5 |
+
import socket
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
from supabase import create_client
|
| 8 |
from sentence_transformers import SentenceTransformer
|
|
|
|
| 21 |
# === Embedding Model for Scoring ===
|
| 22 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 23 |
|
| 24 |
+
# === Hugging Face API Configuration ===
|
| 25 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
| 26 |
if not HF_API_TOKEN:
|
| 27 |
raise ValueError("Missing Hugging Face API key. Check your .env file.")
|
| 28 |
+
|
| 29 |
+
# Headers for API requests
|
| 30 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
| 31 |
|
| 32 |
# === Hugging Face Model Endpoints ===
|
| 33 |
+
HF_ENDPOINTS = {
|
| 34 |
+
"bart-large-cnn-ovt": {
|
| 35 |
+
"url": "https://hedemwou4oqkk65c.us-east-1.aws.endpoints.huggingface.cloud",
|
| 36 |
+
"task": "summarization",
|
| 37 |
+
"model_id": "facebook/bart-large-cnn"
|
| 38 |
+
},
|
| 39 |
+
"vzwjawyxvu030jsw": { # Updated endpoint name to match URL
|
| 40 |
+
"url": "https://vzwjawyxvu030jsw.us-east-1.aws.endpoints.huggingface.cloud",
|
| 41 |
+
"task": "text-generation",
|
| 42 |
+
"model_id": "google/gemma-7b"
|
| 43 |
+
}
|
| 44 |
}
|
| 45 |
|
| 46 |
+
def check_endpoint_status(endpoint_name: str) -> dict:
|
| 47 |
+
"""
|
| 48 |
+
Check the status of a private Hugging Face endpoint using DNS resolution
|
| 49 |
+
"""
|
| 50 |
+
if endpoint_name not in HF_ENDPOINTS:
|
| 51 |
+
return {
|
| 52 |
+
"status": "error",
|
| 53 |
+
"error": f"Unknown endpoint: {endpoint_name}"
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
endpoint_info = HF_ENDPOINTS[endpoint_name]
|
| 58 |
+
hostname = endpoint_info['url'].replace('https://', '').split('/')[0]
|
| 59 |
+
|
| 60 |
+
# Try DNS resolution
|
| 61 |
+
try:
|
| 62 |
+
socket.gethostbyname(hostname)
|
| 63 |
+
# If DNS resolves, endpoint exists but may be stopped
|
| 64 |
+
return {
|
| 65 |
+
"status": "stopped",
|
| 66 |
+
"scaled": True,
|
| 67 |
+
"pending": 0,
|
| 68 |
+
"error": None
|
| 69 |
+
}
|
| 70 |
+
except socket.gaierror:
|
| 71 |
+
# If DNS fails, endpoint doesn't exist
|
| 72 |
+
return {
|
| 73 |
+
"status": "error",
|
| 74 |
+
"error": "Endpoint not found"
|
| 75 |
+
}
|
| 76 |
+
except Exception as e:
|
| 77 |
+
return {
|
| 78 |
+
"status": "error",
|
| 79 |
+
"error": str(e)
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
def toggle_endpoint(endpoint_name: str, action: str) -> dict:
|
| 83 |
+
"""
|
| 84 |
+
Start or stop a private Hugging Face endpoint
|
| 85 |
+
"""
|
| 86 |
+
try:
|
| 87 |
+
# For private endpoints, use the Endpoints API
|
| 88 |
+
api_base = "https://api.endpoints.huggingface.cloud"
|
| 89 |
+
action_url = f"{api_base}/v2/endpoint/{endpoint_name}/{action}"
|
| 90 |
+
|
| 91 |
+
response = requests.post(
|
| 92 |
+
action_url,
|
| 93 |
+
headers=HF_HEADERS,
|
| 94 |
+
timeout=10
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
if response.status_code in [200, 202]:
|
| 98 |
+
return {
|
| 99 |
+
"success": True,
|
| 100 |
+
"message": f"Successfully {action}ed endpoint"
|
| 101 |
+
}
|
| 102 |
+
else:
|
| 103 |
+
return {
|
| 104 |
+
"error": f"Failed to {action} endpoint: {response.text}"
|
| 105 |
+
}
|
| 106 |
+
except Exception as e:
|
| 107 |
+
return {
|
| 108 |
+
"error": f"Failed to {action} endpoint: {str(e)}"
|
| 109 |
+
}
|
| 110 |
|
| 111 |
+
# === Query Helper ===
|
| 112 |
+
def query(payload: dict, endpoint_name: str) -> dict:
|
| 113 |
"""
|
| 114 |
+
Send a query to a Hugging Face endpoint
|
| 115 |
"""
|
| 116 |
+
if endpoint_name not in HF_ENDPOINTS:
|
| 117 |
+
return {
|
| 118 |
+
"error": f"Unknown endpoint: {endpoint_name}"
|
| 119 |
+
}
|
| 120 |
|
| 121 |
+
endpoint_info = HF_ENDPOINTS[endpoint_name]
|
| 122 |
+
url = endpoint_info['url']
|
| 123 |
|
| 124 |
+
try:
|
| 125 |
+
response = requests.post(
|
| 126 |
+
url,
|
| 127 |
+
headers=HF_HEADERS,
|
| 128 |
+
json=payload,
|
| 129 |
+
timeout=30
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
if response.status_code == 200:
|
| 133 |
return response.json()
|
| 134 |
+
else:
|
| 135 |
+
return {
|
| 136 |
+
"error": f"Query failed with status {response.status_code}: {response.text}"
|
| 137 |
+
}
|
| 138 |
+
except Exception as e:
|
| 139 |
+
return {
|
| 140 |
+
"error": str(e)
|
| 141 |
+
}
|
footer.png
ADDED
|
Git LFS Details
|
header.png
ADDED
|
Git LFS Details
|
pages/Template.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
-
# pages/
|
| 2 |
|
| 3 |
import os, sys, streamlit as st
|
| 4 |
import json
|
| 5 |
from io import BytesIO
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Add parent directory to path so we can import utils
|
| 8 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
@@ -11,17 +13,13 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
load_dotenv(override=True)
|
| 13 |
|
|
|
|
| 14 |
from utils.hybrid_extractor import extract_resume_sections
|
| 15 |
from utils.builder import build_resume_from_data
|
| 16 |
-
from utils.parser import parse_resume
|
| 17 |
-
|
| 18 |
-
# Path to your blank template (header/footer only)
|
| 19 |
-
template_path = os.path.join(
|
| 20 |
-
os.path.dirname(__file__), '..', 'templates', 'blank_resume.docx'
|
| 21 |
-
)
|
| 22 |
|
| 23 |
st.set_page_config(
|
| 24 |
-
page_title='Resume
|
| 25 |
layout='centered',
|
| 26 |
initial_sidebar_state="collapsed"
|
| 27 |
)
|
|
@@ -40,17 +38,70 @@ st.markdown("""
|
|
| 40 |
</style>
|
| 41 |
""", unsafe_allow_html=True)
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
# Home button at the top
|
| 44 |
-
if st.button("
|
| 45 |
st.switch_page("app.py")
|
| 46 |
|
| 47 |
-
st.title('📄 Resume
|
| 48 |
st.markdown("---")
|
| 49 |
|
| 50 |
uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
|
| 51 |
if not uploaded:
|
| 52 |
st.info("Please upload a resume to get started.")
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
st.success(f'Uploaded: {uploaded.name}')
|
| 56 |
|
|
@@ -239,7 +290,7 @@ if st.button('📄 Generate Formatted Resume', type='primary'):
|
|
| 239 |
try:
|
| 240 |
with st.spinner('Building formatted resume...'):
|
| 241 |
# Build the resume document
|
| 242 |
-
doc = build_resume_from_data(
|
| 243 |
|
| 244 |
# Save to buffer
|
| 245 |
buf = BytesIO()
|
|
@@ -329,4 +380,4 @@ st.markdown(
|
|
| 329 |
"🚀 <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
|
| 330 |
"</div>",
|
| 331 |
unsafe_allow_html=True
|
| 332 |
-
)
|
|
|
|
| 1 |
+
# pages/Format_Resume.py
|
| 2 |
|
| 3 |
import os, sys, streamlit as st
|
| 4 |
import json
|
| 5 |
from io import BytesIO
|
| 6 |
+
import time # Added for API status check
|
| 7 |
+
import requests # Added for endpoint control
|
| 8 |
|
| 9 |
# Add parent directory to path so we can import utils
|
| 10 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
load_dotenv(override=True)
|
| 15 |
|
| 16 |
+
from config import HF_ENDPOINTS # Update import
|
| 17 |
from utils.hybrid_extractor import extract_resume_sections
|
| 18 |
from utils.builder import build_resume_from_data
|
| 19 |
+
from utils.parser import parse_resume
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
st.set_page_config(
|
| 22 |
+
page_title='Resume Formatter',
|
| 23 |
layout='centered',
|
| 24 |
initial_sidebar_state="collapsed"
|
| 25 |
)
|
|
|
|
| 38 |
</style>
|
| 39 |
""", unsafe_allow_html=True)
|
| 40 |
|
| 41 |
+
def toggle_endpoint(endpoint_name, action):
|
| 42 |
+
"""Start or stop an endpoint"""
|
| 43 |
+
try:
|
| 44 |
+
from config import HF_HEADERS, HF_ENDPOINTS
|
| 45 |
+
# Use the health endpoint
|
| 46 |
+
endpoint_info = HF_ENDPOINTS[endpoint_name]
|
| 47 |
+
url = f"{endpoint_info['url']}/health"
|
| 48 |
+
|
| 49 |
+
# Use HEAD request to start the endpoint
|
| 50 |
+
response = requests.head(url, headers=HF_HEADERS)
|
| 51 |
+
|
| 52 |
+
if response.status_code == 503:
|
| 53 |
+
st.info("🚀 Starting endpoint... This may take 3-4 minutes. Click on 'Start' again to refresh status.")
|
| 54 |
+
time.sleep(2) # Wait briefly before refreshing status
|
| 55 |
+
from config import check_endpoint_status
|
| 56 |
+
new_status = check_endpoint_status(endpoint_name)
|
| 57 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
| 58 |
+
elif response.status_code == 200:
|
| 59 |
+
st.success("✅ Endpoint is running")
|
| 60 |
+
time.sleep(2) # Wait briefly before refreshing status
|
| 61 |
+
from config import check_endpoint_status
|
| 62 |
+
new_status = check_endpoint_status(endpoint_name)
|
| 63 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
| 64 |
+
else:
|
| 65 |
+
st.error(f"❌ Failed to {action} endpoint: {response.text}")
|
| 66 |
+
except Exception as e:
|
| 67 |
+
st.error(f"❌ Failed to {action} endpoint: {str(e)}")
|
| 68 |
+
|
| 69 |
# Home button at the top
|
| 70 |
+
if st.button("\U0001F3E0 Home", help="Return to main TalentLens.AI page"):
|
| 71 |
st.switch_page("app.py")
|
| 72 |
|
| 73 |
+
st.title('📄 Resume Formatter')
|
| 74 |
st.markdown("---")
|
| 75 |
|
| 76 |
uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
|
| 77 |
if not uploaded:
|
| 78 |
st.info("Please upload a resume to get started.")
|
| 79 |
+
|
| 80 |
+
# Get current status using DNS resolution
|
| 81 |
+
from config import check_endpoint_status
|
| 82 |
+
endpoint_name = "bart-large-cnn-ovt"
|
| 83 |
+
current_status = check_endpoint_status(endpoint_name)
|
| 84 |
+
state = current_status.get('status', 'unknown')
|
| 85 |
+
|
| 86 |
+
# Update session state with current status
|
| 87 |
+
st.session_state['endpoint_status'] = {endpoint_name: current_status}
|
| 88 |
+
|
| 89 |
+
# Show Start button and status
|
| 90 |
+
start_button = st.empty() # Placeholder for Start button
|
| 91 |
+
if state in ['stopped', 'error']:
|
| 92 |
+
if start_button.button("▶️ Start", key=f"start_{endpoint_name}", use_container_width=True):
|
| 93 |
+
toggle_endpoint(endpoint_name, "start")
|
| 94 |
+
# Refresh status after starting
|
| 95 |
+
new_status = check_endpoint_status(endpoint_name)
|
| 96 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
| 97 |
+
if new_status.get('status') == 'running':
|
| 98 |
+
st.success("✅ Endpoint is running")
|
| 99 |
+
elif new_status.get('status') == 'starting':
|
| 100 |
+
st.info("🚀 Starting endpoint... This may take 3-4 minutes. Click on 'Start' again to refresh status.")
|
| 101 |
+
elif new_status.get('status') == 'error':
|
| 102 |
+
st.error(f"❌ Error: {new_status.get('error', 'Unknown error')}")
|
| 103 |
+
|
| 104 |
+
st.stop() # Stop here if no file is uploaded
|
| 105 |
|
| 106 |
st.success(f'Uploaded: {uploaded.name}')
|
| 107 |
|
|
|
|
| 290 |
try:
|
| 291 |
with st.spinner('Building formatted resume...'):
|
| 292 |
# Build the resume document
|
| 293 |
+
doc = build_resume_from_data(tmpl="", sections=data)
|
| 294 |
|
| 295 |
# Save to buffer
|
| 296 |
buf = BytesIO()
|
|
|
|
| 380 |
"🚀 <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
|
| 381 |
"</div>",
|
| 382 |
unsafe_allow_html=True
|
| 383 |
+
)
|
requirements.txt
CHANGED
|
@@ -10,4 +10,5 @@ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1
|
|
| 10 |
openai
|
| 11 |
fuzzywuzzy
|
| 12 |
python-docx
|
| 13 |
-
numpy<2.0
|
|
|
|
|
|
| 10 |
openai
|
| 11 |
fuzzywuzzy
|
| 12 |
python-docx
|
| 13 |
+
numpy<2.0
|
| 14 |
+
from torch._C import * # noqa: F403
|
templates/blank_resume.docx
DELETED
|
Binary file (48.2 kB)
|
|
|
utils/builder.py
CHANGED
|
@@ -1,20 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from datetime import datetime
|
| 2 |
from dateutil.parser import parse as date_parse
|
| 3 |
-
import re, math
|
| 4 |
from docx import Document
|
| 5 |
-
from docx.
|
| 6 |
-
from docx.
|
| 7 |
-
import logging
|
| 8 |
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
-
# ---------- helpers ---------------------------------------------------
|
| 12 |
-
def _date(dt_str:str)->datetime:
|
| 13 |
-
try: return date_parse(dt_str, default=datetime(1900,1,1))
|
| 14 |
-
except: return datetime(1900,1,1)
|
| 15 |
|
| 16 |
-
def fmt_range(raw:str)->str:
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
parts = [p.strip() for p in re.split(r"\s*[–-]\s*", raw)]
|
| 19 |
|
| 20 |
formatted_parts = []
|
|
@@ -23,284 +22,197 @@ def fmt_range(raw:str)->str:
|
|
| 23 |
formatted_parts.append("Present")
|
| 24 |
else:
|
| 25 |
try:
|
| 26 |
-
date_obj =
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
return " – ".join(formatted_parts)
|
| 32 |
|
| 33 |
-
# ---------- main ------------------------------------------------------
|
| 34 |
-
def build_resume_from_data(tmpl:str, sections:dict)->Document:
|
| 35 |
-
logger.info(f"BUILDER: Attempting to load document template from: {tmpl}")
|
| 36 |
-
doc = Document(tmpl)
|
| 37 |
-
logger.info(f"BUILDER: Template {tmpl} loaded successfully.")
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
tables_to_remove = list(doc.tables) # Create a copy of the list
|
| 61 |
-
for table in tables_to_remove:
|
| 62 |
-
tbl = table._element
|
| 63 |
-
tbl.getparent().remove(tbl)
|
| 64 |
-
|
| 65 |
-
logger.info(f"BUILDER: After clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables")
|
| 66 |
-
|
| 67 |
-
# Verify headers/footers are still intact
|
| 68 |
-
logger.info(f"BUILDER: After clearing - Document still has {len(doc.sections)} sections")
|
| 69 |
-
for i, section_obj in enumerate(doc.sections):
|
| 70 |
-
if section_obj.header:
|
| 71 |
-
logger.info(f"BUILDER: Section {i} header still has {len(section_obj.header.paragraphs)} paragraphs")
|
| 72 |
-
if section_obj.footer:
|
| 73 |
-
logger.info(f"BUILDER: Section {i} footer still has {len(section_obj.footer.paragraphs)} paragraphs")
|
| 74 |
-
|
| 75 |
-
logger.info(f"BUILDER: Template preserved with original headers and footers")
|
| 76 |
-
|
| 77 |
-
# --- easy builders ---
|
| 78 |
-
def heading(txt): pg=doc.add_paragraph(); r=pg.add_run(txt); r.bold=True; r.font.size=Pt(12)
|
| 79 |
-
def bullet(txt,lvl=0): p=doc.add_paragraph(); p.paragraph_format.left_indent=Pt(lvl*12); p.add_run(f"• {txt}").font.size=Pt(11)
|
| 80 |
-
def two_col(l,r):
|
| 81 |
-
tbl=doc.add_table(rows=1,cols=2); tbl.autofit=True
|
| 82 |
-
tbl.cell(0,0).paragraphs[0].add_run(l).bold=True
|
| 83 |
-
rp = tbl.cell(0,1).paragraphs[0]; rp.alignment=WD_ALIGN_PARAGRAPH.RIGHT
|
| 84 |
-
rr = rp.add_run(r); rr.italic=True
|
| 85 |
-
|
| 86 |
-
# --- header (name + current role) ---
|
| 87 |
-
exps = sections.get("StructuredExperiences",[])
|
| 88 |
-
if exps:
|
| 89 |
-
try:
|
| 90 |
-
# Filter to only dictionary experiences
|
| 91 |
-
dict_exps = [e for e in exps if isinstance(e, dict)]
|
| 92 |
-
if dict_exps:
|
| 93 |
-
newest = max(dict_exps, key=lambda e: _date(e.get("date_range","").split("–")[0] if "–" in e.get("date_range","") else e.get("date_range","").split("-")[0] if "-" in e.get("date_range","") else e.get("date_range","")))
|
| 94 |
-
cur_title = newest.get("title","")
|
| 95 |
-
else:
|
| 96 |
-
cur_title = ""
|
| 97 |
-
except:
|
| 98 |
-
# Fallback: try to get title from first dictionary experience
|
| 99 |
-
for exp in exps:
|
| 100 |
-
if isinstance(exp, dict) and exp.get("title"):
|
| 101 |
-
cur_title = exp.get("title","")
|
| 102 |
-
break
|
| 103 |
-
else:
|
| 104 |
-
cur_title = ""
|
| 105 |
-
else:
|
| 106 |
-
# Try to extract job title from summary if no structured experiences
|
| 107 |
-
cur_title = ""
|
| 108 |
-
summary = sections.get("Summary", "")
|
| 109 |
-
if summary:
|
| 110 |
-
# Look for job titles in the summary
|
| 111 |
-
title_patterns = [
|
| 112 |
-
r'(?i)(.*?engineer)',
|
| 113 |
-
r'(?i)(.*?developer)',
|
| 114 |
-
r'(?i)(.*?analyst)',
|
| 115 |
-
r'(?i)(.*?manager)',
|
| 116 |
-
r'(?i)(.*?specialist)',
|
| 117 |
-
r'(?i)(.*?consultant)',
|
| 118 |
-
r'(?i)(.*?architect)',
|
| 119 |
-
r'(?i)(.*?lead)',
|
| 120 |
-
r'(?i)(.*?director)',
|
| 121 |
-
r'(?i)(.*?coordinator)'
|
| 122 |
-
]
|
| 123 |
-
|
| 124 |
-
for pattern in title_patterns:
|
| 125 |
-
match = re.search(pattern, summary)
|
| 126 |
-
if match:
|
| 127 |
-
potential_title = match.group(1).strip()
|
| 128 |
-
# Clean up the title
|
| 129 |
-
potential_title = re.sub(r'^(results-driven|experienced|senior|junior|lead)\s+', '', potential_title, flags=re.I)
|
| 130 |
-
if len(potential_title) > 3 and len(potential_title) < 50:
|
| 131 |
-
cur_title = potential_title.title()
|
| 132 |
-
break
|
| 133 |
-
|
| 134 |
-
if sections.get("Name"):
|
| 135 |
-
p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER
|
| 136 |
-
run=p.add_run(sections["Name"]); run.bold=True; run.font.size=Pt(16)
|
| 137 |
-
if cur_title:
|
| 138 |
-
p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER
|
| 139 |
-
p.add_run(cur_title).font.size=Pt(12)
|
| 140 |
-
|
| 141 |
-
# --- summary ---
|
| 142 |
-
if sections.get("Summary"):
|
| 143 |
-
heading("Professional Summary:")
|
| 144 |
-
pg=doc.add_paragraph(); pg.paragraph_format.first_line_indent=Pt(12)
|
| 145 |
-
pg.add_run(sections["Summary"]).font.size=Pt(11)
|
| 146 |
-
|
| 147 |
-
# --- skills ---
|
| 148 |
-
if sections.get("Skills"):
|
| 149 |
-
heading("Skills:")
|
| 150 |
-
skills = sorted(set(sections["Skills"]))
|
| 151 |
-
cols = 3
|
| 152 |
-
rows = math.ceil(len(skills)/cols)
|
| 153 |
-
tbl = doc.add_table(rows=rows, cols=cols); tbl.autofit=True
|
| 154 |
-
k=0
|
| 155 |
-
for r in range(rows):
|
| 156 |
-
for c in range(cols):
|
| 157 |
-
if k < len(skills):
|
| 158 |
-
tbl.cell(r,c).paragraphs[0].add_run(f"• {skills[k]}").font.size=Pt(11)
|
| 159 |
-
k+=1
|
| 160 |
-
|
| 161 |
-
# --- experience ---
|
| 162 |
-
if exps:
|
| 163 |
-
heading("Professional Experience:")
|
| 164 |
-
for e in exps:
|
| 165 |
-
# Ensure e is a dictionary, not a string
|
| 166 |
-
if isinstance(e, str):
|
| 167 |
-
# If it's a string, create a basic experience entry
|
| 168 |
-
bullet(e, 0)
|
| 169 |
-
continue
|
| 170 |
-
elif not isinstance(e, dict):
|
| 171 |
-
# Skip if it's neither string nor dict
|
| 172 |
-
continue
|
| 173 |
-
|
| 174 |
-
# Process dictionary experience entry
|
| 175 |
-
title = e.get("title", "")
|
| 176 |
-
company = e.get("company", "")
|
| 177 |
-
date_range = e.get("date_range", "")
|
| 178 |
-
responsibilities = e.get("responsibilities", [])
|
| 179 |
-
|
| 180 |
-
# Create the job header
|
| 181 |
-
two_col(" | ".join(filter(None, [title, company])),
|
| 182 |
-
fmt_range(date_range))
|
| 183 |
-
|
| 184 |
-
# Add responsibilities
|
| 185 |
-
if isinstance(responsibilities, list):
|
| 186 |
-
for resp in responsibilities:
|
| 187 |
-
if isinstance(resp, str) and resp.strip():
|
| 188 |
-
bullet(resp, 1)
|
| 189 |
-
elif isinstance(responsibilities, str) and responsibilities.strip():
|
| 190 |
-
bullet(responsibilities, 1)
|
| 191 |
-
else:
|
| 192 |
-
# If no structured experiences found, try to extract from summary
|
| 193 |
-
heading("Professional Experience:")
|
| 194 |
-
summary = sections.get("Summary", "")
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
# Filter to only dictionary experiences and sort by date (most recent first)
|
| 226 |
-
dict_exps = [e for e in exps if isinstance(e, dict) and e.get("title") and e.get("date_range")]
|
| 227 |
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
| 245 |
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
# Clean up the education entry (remove bullets)
|
| 272 |
-
clean_ed = ed.replace('•', '').strip()
|
| 273 |
-
if re.match(r'^\d+\s+years?$', clean_ed, re.I):
|
| 274 |
-
# This is experience duration, not education
|
| 275 |
-
experience_years = clean_ed
|
| 276 |
-
else:
|
| 277 |
-
processed_education.append(clean_ed)
|
| 278 |
-
has_real_education = True
|
| 279 |
-
|
| 280 |
-
# Show education section
|
| 281 |
-
if has_real_education:
|
| 282 |
-
heading("Education:")
|
| 283 |
-
for ed in processed_education:
|
| 284 |
-
bullet(ed)
|
| 285 |
-
elif experience_years:
|
| 286 |
-
# If only experience years found, show it as a note
|
| 287 |
-
heading("Education:")
|
| 288 |
-
pg = doc.add_paragraph()
|
| 289 |
-
pg.add_run(f"Professional experience: {experience_years}").font.size = Pt(11)
|
| 290 |
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
# Ensure tr is a string
|
| 295 |
-
if isinstance(tr, str) and tr.strip():
|
| 296 |
-
bullet(tr)
|
| 297 |
-
|
| 298 |
-
# Final diagnostic before returning
|
| 299 |
-
logger.info(f"BUILDER: FINAL STATE - Document has {len(doc.sections)} sections")
|
| 300 |
-
for i, section_obj in enumerate(doc.sections):
|
| 301 |
-
if section_obj.header:
|
| 302 |
-
logger.info(f"BUILDER: FINAL - Section {i} header has {len(section_obj.header.paragraphs)} paragraphs")
|
| 303 |
-
if section_obj.footer:
|
| 304 |
-
logger.info(f"BUILDER: FINAL - Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs")
|
| 305 |
-
|
| 306 |
-
return doc
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
from datetime import datetime
|
| 5 |
from dateutil.parser import parse as date_parse
|
|
|
|
| 6 |
from docx import Document
|
| 7 |
+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
|
| 8 |
+
from docx.shared import Inches, Pt
|
|
|
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
def fmt_range(raw: str) -> str:
|
| 14 |
+
"""Formats a date range string nicely."""
|
| 15 |
+
if not raw:
|
| 16 |
+
return ""
|
| 17 |
parts = [p.strip() for p in re.split(r"\s*[–-]\s*", raw)]
|
| 18 |
|
| 19 |
formatted_parts = []
|
|
|
|
| 22 |
formatted_parts.append("Present")
|
| 23 |
else:
|
| 24 |
try:
|
| 25 |
+
date_obj = date_parse(part, fuzzy=True, default=datetime(1900, 1, 1))
|
| 26 |
+
if date_obj.year == 1900:
|
| 27 |
+
formatted_parts.append(part)
|
| 28 |
+
else:
|
| 29 |
+
formatted_parts.append(date_obj.strftime("%B %Y"))
|
| 30 |
+
except (ValueError, TypeError):
|
| 31 |
+
formatted_parts.append(part)
|
| 32 |
|
| 33 |
return " – ".join(formatted_parts)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
def add_section_heading(doc, text):
|
| 37 |
+
"""Adds a centered section heading."""
|
| 38 |
+
p = doc.add_paragraph()
|
| 39 |
+
run = p.add_run(text.upper())
|
| 40 |
+
run.bold = True
|
| 41 |
+
font = run.font
|
| 42 |
+
font.size = Pt(12)
|
| 43 |
+
font.name = 'Arial'
|
| 44 |
+
p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
| 45 |
+
p.paragraph_format.space_after = Pt(6)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def build_resume_from_data(tmpl: str, sections: dict, remove_blank_pages_enabled: bool = True) -> Document:
|
| 49 |
+
"""
|
| 50 |
+
Builds a formatted resume from structured data, inserting header/footer images and logging the process.
|
| 51 |
+
"""
|
| 52 |
+
logger.info("BUILDER: Starting image-based resume build process.")
|
| 53 |
+
try:
|
| 54 |
+
# 1. Create a new blank document, ignoring the template file
|
| 55 |
+
doc = Document()
|
| 56 |
+
logger.info("BUILDER: Successfully created a new blank document.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
# Get section and enable different first page header/footer
|
| 59 |
+
section = doc.sections[0]
|
| 60 |
+
section.different_first_page = True
|
| 61 |
+
|
| 62 |
+
# Move header and footer to the very edge of the page
|
| 63 |
+
section.header_distance = Pt(0)
|
| 64 |
+
section.footer_distance = Pt(0)
|
| 65 |
+
logger.info("BUILDER: Set header/footer distance to 0 to remove whitespace.")
|
| 66 |
+
|
| 67 |
+
# 2. Define image paths relative to the project root
|
| 68 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 69 |
+
project_root = os.path.dirname(script_dir)
|
| 70 |
+
header_path = os.path.join(project_root, 'header.png')
|
| 71 |
+
footer_path = os.path.join(project_root, 'footer.png')
|
| 72 |
+
|
| 73 |
+
logger.info(f"BUILDER: Attempting to use header image from: {header_path}")
|
| 74 |
+
logger.info(f"BUILDER: Attempting to use footer image from: {footer_path}")
|
| 75 |
+
|
| 76 |
+
if not os.path.exists(header_path):
|
| 77 |
+
logger.error(f"BUILDER FATAL: Header image not found at '{header_path}'. Cannot proceed.")
|
| 78 |
+
return doc # Return empty doc
|
| 79 |
+
if not os.path.exists(footer_path):
|
| 80 |
+
logger.error(f"BUILDER FATAL: Footer image not found at '{footer_path}'. Cannot proceed.")
|
| 81 |
+
return doc # Return empty doc
|
| 82 |
+
|
| 83 |
+
# 3. Setup Headers
|
| 84 |
+
candidate_name = sections.get("Name", "Candidate Name Not Found")
|
| 85 |
+
experiences = sections.get("StructuredExperiences", [])
|
| 86 |
+
job_title = experiences[0].get("title", "") if experiences else ""
|
| 87 |
+
|
| 88 |
+
# -- First Page Header (Image + Name + Title) --
|
| 89 |
+
first_page_header = section.first_page_header
|
| 90 |
+
first_page_header.is_linked_to_previous = False
|
| 91 |
+
|
| 92 |
+
# Safely get or create a paragraph for the image
|
| 93 |
+
p_header_img_first = first_page_header.paragraphs[0] if first_page_header.paragraphs else first_page_header.add_paragraph()
|
| 94 |
+
p_header_img_first.clear()
|
| 95 |
+
|
| 96 |
+
p_header_img_first.paragraph_format.space_before = Pt(0)
|
| 97 |
+
p_header_img_first.paragraph_format.space_after = Pt(0)
|
| 98 |
+
p_header_img_first.paragraph_format.left_indent = -section.left_margin
|
| 99 |
+
p_header_img_first.add_run().add_picture(header_path, width=section.page_width)
|
| 100 |
+
logger.info("BUILDER: Inserted header.png into FIRST PAGE header.")
|
| 101 |
+
|
| 102 |
+
# Add Name
|
| 103 |
+
p_name = first_page_header.add_paragraph()
|
| 104 |
+
run_name = p_name.add_run(candidate_name.upper())
|
| 105 |
+
run_name.font.name = 'Arial'
|
| 106 |
+
run_name.font.size = Pt(14)
|
| 107 |
+
run_name.bold = True
|
| 108 |
+
p_name.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
| 109 |
+
p_name.paragraph_format.space_before = Pt(6)
|
| 110 |
+
p_name.paragraph_format.space_after = Pt(0)
|
| 111 |
+
logger.info(f"BUILDER: Added candidate name '{candidate_name}' to FIRST PAGE header.")
|
| 112 |
+
|
| 113 |
+
# Add Job Title
|
| 114 |
+
if job_title:
|
| 115 |
+
p_title = first_page_header.add_paragraph()
|
| 116 |
+
run_title = p_title.add_run(job_title)
|
| 117 |
+
run_title.font.name = 'Arial'
|
| 118 |
+
run_title.font.size = Pt(11)
|
| 119 |
+
p_title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
| 120 |
+
p_title.paragraph_format.space_before = Pt(0)
|
| 121 |
+
logger.info(f"BUILDER: Added job title '{job_title}' to FIRST PAGE header.")
|
| 122 |
+
|
| 123 |
+
# -- Primary Header for subsequent pages (Image Only) --
|
| 124 |
+
primary_header = section.header
|
| 125 |
+
primary_header.is_linked_to_previous = False
|
| 126 |
+
|
| 127 |
+
# Safely get or create a paragraph for the image
|
| 128 |
+
p_header_img_primary = primary_header.paragraphs[0] if primary_header.paragraphs else primary_header.add_paragraph()
|
| 129 |
+
p_header_img_primary.clear()
|
| 130 |
+
|
| 131 |
+
p_header_img_primary.paragraph_format.space_before = Pt(0)
|
| 132 |
+
p_header_img_primary.paragraph_format.space_after = Pt(0)
|
| 133 |
+
p_header_img_primary.paragraph_format.left_indent = -section.left_margin
|
| 134 |
+
p_header_img_primary.add_run().add_picture(header_path, width=section.page_width)
|
| 135 |
+
logger.info("BUILDER: Inserted header.png into PRIMARY header for subsequent pages.")
|
| 136 |
+
|
| 137 |
+
# 4. Insert Footer Image (same for all pages)
|
| 138 |
+
footer = section.footer
|
| 139 |
+
footer.is_linked_to_previous = False
|
| 140 |
+
|
| 141 |
+
# Safely get or create a paragraph for the image
|
| 142 |
+
p_footer_img = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
|
| 143 |
+
p_footer_img.clear()
|
| 144 |
+
|
| 145 |
+
p_footer_img.paragraph_format.space_before = Pt(0)
|
| 146 |
+
p_footer_img.paragraph_format.space_after = Pt(0)
|
| 147 |
+
p_footer_img.paragraph_format.left_indent = -section.left_margin
|
| 148 |
+
p_footer_img.add_run().add_picture(footer_path, width=section.page_width)
|
| 149 |
+
|
| 150 |
+
# Link the first page footer to the primary footer so we only define it once.
|
| 151 |
+
section.first_page_footer.is_linked_to_previous = True
|
| 152 |
+
logger.info("BUILDER: Inserted footer.png and configured for all pages.")
|
| 153 |
|
| 154 |
+
# 5. Build Resume Body
|
| 155 |
+
logger.info("BUILDER: Proceeding to add structured resume content to document body.")
|
|
|
|
|
|
|
| 156 |
|
| 157 |
+
# --- Professional Summary ---
|
| 158 |
+
if sections.get("Summary"):
|
| 159 |
+
add_section_heading(doc, "Professional Summary")
|
| 160 |
+
doc.add_paragraph(sections["Summary"]).paragraph_format.space_after = Pt(12)
|
| 161 |
+
|
| 162 |
+
# --- Skills ---
|
| 163 |
+
if sections.get("Skills"):
|
| 164 |
+
add_section_heading(doc, "Skills")
|
| 165 |
+
skills_text = ", ".join(sections["Skills"])
|
| 166 |
+
p = doc.add_paragraph(skills_text)
|
| 167 |
+
p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
| 168 |
+
p.paragraph_format.space_after = Pt(12)
|
| 169 |
+
|
| 170 |
+
# --- Professional Experience ---
|
| 171 |
+
if experiences:
|
| 172 |
+
add_section_heading(doc, "Professional Experience")
|
| 173 |
+
for exp in experiences:
|
| 174 |
+
if not isinstance(exp, dict):
|
| 175 |
+
continue
|
| 176 |
|
| 177 |
+
p = doc.add_paragraph()
|
| 178 |
+
p.add_run(exp.get("title", "N/A")).bold = True
|
| 179 |
+
p.add_run(" | ").bold = True
|
| 180 |
+
p.add_run(exp.get("company", "N/A")).italic = True
|
| 181 |
+
p.add_run(f'\t{fmt_range(exp.get("date_range", ""))}')
|
| 182 |
+
|
| 183 |
+
tab_stops = p.paragraph_format.tab_stops
|
| 184 |
+
tab_stops.add_tab_stop(Inches(6.5), WD_TAB_ALIGNMENT.RIGHT)
|
| 185 |
|
| 186 |
+
responsibilities = exp.get("responsibilities", [])
|
| 187 |
+
if responsibilities and isinstance(responsibilities, list):
|
| 188 |
+
for resp in responsibilities:
|
| 189 |
+
if resp.strip():
|
| 190 |
+
try:
|
| 191 |
+
p_resp = doc.add_paragraph(resp, style='List Bullet')
|
| 192 |
+
except KeyError:
|
| 193 |
+
p_resp = doc.add_paragraph(f"• {resp}")
|
| 194 |
+
|
| 195 |
+
p_resp.paragraph_format.left_indent = Inches(0.25)
|
| 196 |
+
p_resp.paragraph_format.space_before = Pt(0)
|
| 197 |
+
p_resp.paragraph_format.space_after = Pt(3)
|
| 198 |
|
| 199 |
+
doc.add_paragraph().paragraph_format.space_after = Pt(6)
|
| 200 |
+
|
| 201 |
+
# --- Education ---
|
| 202 |
+
if sections.get("Education"):
|
| 203 |
+
add_section_heading(doc, "Education")
|
| 204 |
+
for edu in sections.get("Education", []):
|
| 205 |
+
if edu.strip():
|
| 206 |
+
try:
|
| 207 |
+
p_edu = doc.add_paragraph(edu, style='List Bullet')
|
| 208 |
+
except KeyError:
|
| 209 |
+
p_edu = doc.add_paragraph(f"• {edu}")
|
| 210 |
+
|
| 211 |
+
p_edu.paragraph_format.left_indent = Inches(0.25)
|
| 212 |
+
|
| 213 |
+
logger.info("BUILDER: Resume build process completed successfully.")
|
| 214 |
+
return doc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
+
except Exception:
|
| 217 |
+
logger.error("BUILDER: An unexpected error occurred during resume generation.", exc_info=True)
|
| 218 |
+
return Document()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/openai_extractor.py
CHANGED
|
@@ -1,165 +1,175 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
OpenAI
|
| 4 |
-
|
| 5 |
-
This module provides resume extraction using OpenAI's GPT-4o model (GPT-4.1),
|
| 6 |
-
which is the latest and most capable model for complex resume parsing.
|
| 7 |
"""
|
| 8 |
|
| 9 |
import json
|
| 10 |
import re
|
| 11 |
import logging
|
| 12 |
-
import os
|
| 13 |
from typing import Dict, Any, List, Optional
|
|
|
|
|
|
|
| 14 |
from openai import OpenAI
|
| 15 |
|
| 16 |
-
#
|
| 17 |
-
logging.basicConfig(level=logging.INFO)
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
|
|
|
|
| 20 |
class OpenAIResumeExtractor:
|
| 21 |
"""
|
| 22 |
-
|
| 23 |
"""
|
| 24 |
|
| 25 |
def __init__(self, api_key: Optional[str] = None, model: str = "gpt-4o"):
|
| 26 |
-
"""
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
Args:
|
| 30 |
-
api_key: OpenAI API key (optional, will use env var if not provided)
|
| 31 |
-
model: OpenAI model to use (gpt-4o is the latest and most capable GPT-4 model)
|
| 32 |
-
"""
|
| 33 |
-
self.api_key = api_key or os.getenv('OPENAI_API_KEY')
|
| 34 |
self.model = model
|
| 35 |
-
|
| 36 |
-
if not self.api_key:
|
| 37 |
-
raise ValueError("No OpenAI API key found. Set OPENAI_API_KEY environment variable.")
|
| 38 |
-
|
| 39 |
-
self.client = OpenAI(api_key=self.api_key)
|
| 40 |
|
| 41 |
def extract_sections_openai(self, text: str) -> Dict[str, Any]:
|
| 42 |
"""
|
| 43 |
-
Extract resume sections using OpenAI
|
| 44 |
|
| 45 |
Args:
|
| 46 |
text: Raw resume text
|
| 47 |
|
| 48 |
Returns:
|
| 49 |
-
|
| 50 |
"""
|
| 51 |
-
logger.info("Starting OpenAI
|
| 52 |
|
| 53 |
try:
|
| 54 |
-
# Create
|
| 55 |
prompt = self._create_extraction_prompt(text)
|
| 56 |
|
| 57 |
-
#
|
| 58 |
response = self.client.chat.completions.create(
|
| 59 |
model=self.model,
|
| 60 |
messages=[
|
| 61 |
-
{
|
| 62 |
-
|
| 63 |
-
"content": "You are an expert resume parser. Extract information accurately and return valid JSON only."
|
| 64 |
-
},
|
| 65 |
-
{
|
| 66 |
-
"role": "user",
|
| 67 |
-
"content": prompt
|
| 68 |
-
}
|
| 69 |
],
|
| 70 |
-
temperature=0.1,
|
| 71 |
max_tokens=2000
|
| 72 |
)
|
| 73 |
|
| 74 |
-
# Parse
|
| 75 |
-
|
|
|
|
| 76 |
|
| 77 |
-
# Clean
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
elif "```" in result_text:
|
| 81 |
-
result_text = result_text.split("```")[1]
|
| 82 |
|
| 83 |
-
#
|
| 84 |
-
result = json.loads(result_text)
|
| 85 |
-
|
| 86 |
-
# Validate and clean the result
|
| 87 |
result = self._validate_and_clean_result(result)
|
| 88 |
|
| 89 |
-
#
|
| 90 |
contact_info = self._extract_contact_info(text)
|
| 91 |
result["ContactInfo"] = contact_info
|
| 92 |
|
| 93 |
logger.info("✅ OpenAI extraction completed successfully")
|
| 94 |
return result
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
except Exception as e:
|
| 97 |
logger.error(f"OpenAI extraction failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
return self._get_empty_result()
|
| 104 |
|
| 105 |
-
|
| 106 |
-
return self._fallback_extraction(text)
|
| 107 |
|
| 108 |
def _create_extraction_prompt(self, text: str) -> str:
|
| 109 |
-
"""Create
|
| 110 |
-
|
| 111 |
prompt = f"""
|
| 112 |
-
Extract
|
| 113 |
-
|
| 114 |
-
RESUME TEXT:
|
| 115 |
-
{text}
|
| 116 |
-
|
| 117 |
-
Extract and return ONLY a JSON object with this exact structure:
|
| 118 |
|
| 119 |
{{
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
| 133 |
}}
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
2. Summary: Extract the complete professional summary/objective section
|
| 138 |
-
3. Skills: Extract technical skills only (programming languages, tools, frameworks)
|
| 139 |
-
4. StructuredExperiences: For each job, extract:
|
| 140 |
-
- title: The job title/position
|
| 141 |
-
- company: Company name (include location if provided)
|
| 142 |
-
- date_range: Employment dates
|
| 143 |
-
- responsibilities: List of bullet points describing what they did
|
| 144 |
-
5. Education: Extract degrees, institutions, and graduation years
|
| 145 |
-
6. Training: Extract certifications, courses, training programs
|
| 146 |
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
| 148 |
- Return ONLY valid JSON, no explanations
|
| 149 |
- If a section is not found, use empty string or empty array
|
| 150 |
-
-
|
| 151 |
-
- For experiences, look for patterns like "Title | Company | Dates" or similar
|
| 152 |
-
- Extract ALL job experiences found in the resume
|
| 153 |
-
- Include ALL bullet points under each job as responsibilities
|
| 154 |
"""
|
| 155 |
-
|
| 156 |
return prompt
|
| 157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
def _validate_and_clean_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
| 159 |
-
"""Validate and clean the extraction result"""
|
| 160 |
|
| 161 |
# Ensure all required keys exist
|
| 162 |
-
required_keys = ["Name", "Summary", "Skills", "StructuredExperiences", "Education", "Training"]
|
| 163 |
for key in required_keys:
|
| 164 |
if key not in result:
|
| 165 |
result[key] = [] if key in ["Skills", "StructuredExperiences", "Education", "Training"] else ""
|
|
@@ -187,59 +197,45 @@ IMPORTANT:
|
|
| 187 |
|
| 188 |
return result
|
| 189 |
|
| 190 |
-
def _get_empty_result(self) -> Dict[str, Any]:
|
| 191 |
-
"""Return empty result structure for API failures"""
|
| 192 |
-
return {
|
| 193 |
-
"Name": "",
|
| 194 |
-
"Summary": "",
|
| 195 |
-
"Skills": [],
|
| 196 |
-
"StructuredExperiences": [],
|
| 197 |
-
"Education": [],
|
| 198 |
-
"Training": [],
|
| 199 |
-
"ContactInfo": {}
|
| 200 |
-
}
|
| 201 |
-
|
| 202 |
def _is_company_name(self, text: str) -> bool:
|
| 203 |
-
"""Check if text looks like a company name rather than a skill"""
|
| 204 |
company_indicators = [
|
| 205 |
"inc", "llc", "corp", "ltd", "company", "solutions", "services",
|
| 206 |
-
"systems", "technologies", "financial", "insurance"
|
| 207 |
]
|
| 208 |
text_lower = text.lower()
|
| 209 |
return any(indicator in text_lower for indicator in company_indicators)
|
| 210 |
|
| 211 |
def _fallback_extraction(self, text: str) -> Dict[str, Any]:
|
| 212 |
-
"""Fallback to regex-based extraction if OpenAI fails"""
|
| 213 |
logger.info("Using regex fallback extraction...")
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
"Training": [],
|
| 226 |
-
"ContactInfo": self._extract_contact_info(text)
|
| 227 |
-
}
|
| 228 |
|
| 229 |
def _extract_name_regex(self, text: str) -> str:
|
| 230 |
-
"""Regex fallback for name extraction"""
|
| 231 |
lines = text.split('\n')[:5]
|
| 232 |
for line in lines:
|
| 233 |
line = line.strip()
|
| 234 |
if re.search(r'@|phone|email|linkedin|github', line.lower()):
|
| 235 |
continue
|
| 236 |
-
|
|
|
|
| 237 |
if name_match:
|
| 238 |
return name_match.group(1)
|
| 239 |
return ""
|
| 240 |
|
| 241 |
def _extract_summary_regex(self, text: str) -> str:
|
| 242 |
-
"""Regex fallback for summary extraction"""
|
| 243 |
summary_pattern = r'(?i)(?:professional\s+)?summary[:\s]*\n(.*?)(?=\n\s*(?:technical\s+skills?|skills?|experience|education))'
|
| 244 |
match = re.search(summary_pattern, text, re.DOTALL)
|
| 245 |
if match:
|
|
@@ -250,7 +246,7 @@ IMPORTANT:
|
|
| 250 |
return ""
|
| 251 |
|
| 252 |
def _extract_skills_regex(self, text: str) -> List[str]:
|
| 253 |
-
"""Regex fallback for skills extraction"""
|
| 254 |
skills = set()
|
| 255 |
|
| 256 |
# Look for technical skills section
|
|
@@ -269,7 +265,7 @@ IMPORTANT:
|
|
| 269 |
return sorted(list(skills))
|
| 270 |
|
| 271 |
def _extract_experiences_regex(self, text: str) -> List[Dict[str, Any]]:
|
| 272 |
-
"""Regex fallback for experience extraction"""
|
| 273 |
experiences = []
|
| 274 |
|
| 275 |
# Look for work experience section
|
|
@@ -303,7 +299,7 @@ IMPORTANT:
|
|
| 303 |
return experiences
|
| 304 |
|
| 305 |
def _extract_education_regex(self, text: str) -> List[str]:
|
| 306 |
-
"""Regex fallback for education extraction"""
|
| 307 |
education = []
|
| 308 |
|
| 309 |
edu_pattern = r'(?i)education[:\s]*\n(.*?)(?=\n\s*(?:certifications?|projects?|$))'
|
|
@@ -319,98 +315,25 @@ IMPORTANT:
|
|
| 319 |
|
| 320 |
return education
|
| 321 |
|
| 322 |
-
def
|
| 323 |
-
"""
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
contact_info["email"] = email_match.group(0)
|
| 330 |
-
|
| 331 |
-
# Extract phone
|
| 332 |
-
phone_patterns = [
|
| 333 |
-
r'\+?1?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})',
|
| 334 |
-
r'(\d{3})[-.\s](\d{3})[-.\s](\d{4})',
|
| 335 |
-
r'\+\d{1,3}[-.\s]?\d{3}[-.\s]?\d{3}[-.\s]?\d{4}'
|
| 336 |
-
]
|
| 337 |
-
|
| 338 |
-
for pattern in phone_patterns:
|
| 339 |
-
phone_match = re.search(pattern, text)
|
| 340 |
-
if phone_match:
|
| 341 |
-
contact_info["phone"] = phone_match.group(0)
|
| 342 |
-
break
|
| 343 |
-
|
| 344 |
-
# Extract LinkedIn
|
| 345 |
-
linkedin_patterns = [
|
| 346 |
-
r'linkedin\.com/in/[\w-]+',
|
| 347 |
-
r'linkedin\.com/[\w-]+',
|
| 348 |
-
r'(?i)linkedin[:\s]+[\w.-]+',
|
| 349 |
]
|
| 350 |
|
| 351 |
-
for pattern in
|
| 352 |
-
|
| 353 |
-
if
|
| 354 |
-
|
| 355 |
-
if not linkedin_url.startswith('http'):
|
| 356 |
-
linkedin_url = f"https://{linkedin_url}"
|
| 357 |
-
contact_info["linkedin"] = linkedin_url
|
| 358 |
-
break
|
| 359 |
|
| 360 |
-
return
|
|
|
|
| 361 |
|
| 362 |
-
#
|
| 363 |
def extract_sections_openai(text: str, api_key: Optional[str] = None) -> Dict[str, Any]:
|
| 364 |
-
"""
|
| 365 |
-
Extract resume sections using OpenAI GPT-4o (GPT-4.1)
|
| 366 |
-
|
| 367 |
-
Args:
|
| 368 |
-
text: Raw resume text
|
| 369 |
-
api_key: OpenAI API key (optional)
|
| 370 |
-
|
| 371 |
-
Returns:
|
| 372 |
-
Structured resume data
|
| 373 |
-
"""
|
| 374 |
extractor = OpenAIResumeExtractor(api_key=api_key)
|
| 375 |
-
return extractor.extract_sections_openai(text)
|
| 376 |
-
|
| 377 |
-
# Test function
|
| 378 |
-
def test_openai_extraction():
|
| 379 |
-
"""Test the OpenAI extraction with sample resume"""
|
| 380 |
-
|
| 381 |
-
sample_text = """
|
| 382 |
-
John Doe
|
| 383 |
-
Selenium Java Automation Engineer
|
| 384 |
-
Email: [email protected] | Phone: +1-123-456-7890
|
| 385 |
-
|
| 386 |
-
Professional Summary
|
| 387 |
-
Results-driven Automation Test Engineer with 8 years of experience in Selenium and Java,
|
| 388 |
-
specializing in automation frameworks for financial and insurance domains.
|
| 389 |
-
|
| 390 |
-
Technical Skills
|
| 391 |
-
Selenium WebDriver, Java, TestNG, Cucumber, Jenkins, Maven, Git, REST Assured, Postman,
|
| 392 |
-
JIRA, Agile/Scrum, CI/CD
|
| 393 |
-
|
| 394 |
-
Work Experience
|
| 395 |
-
Senior Automation Test Engineer | ABC Financial Services | Jan 2021 - Present
|
| 396 |
-
- Led automation framework enhancements using Selenium and Java, improving test efficiency.
|
| 397 |
-
- Automated end-to-end UI and API testing for financial applications, reducing manual effort by 40%.
|
| 398 |
-
|
| 399 |
-
Automation Test Engineer | XYZ Insurance Solutions | Jun 2017 - Dec 2020
|
| 400 |
-
- Designed and implemented Selenium automation framework using Java and TestNG.
|
| 401 |
-
- Developed automated test scripts for insurance policy management applications.
|
| 402 |
-
|
| 403 |
-
Education
|
| 404 |
-
Bachelor of Technology in Computer Science | ABC University | 2015
|
| 405 |
-
"""
|
| 406 |
-
|
| 407 |
-
extractor = OpenAIResumeExtractor()
|
| 408 |
-
result = extractor.extract_sections_openai(sample_text)
|
| 409 |
-
|
| 410 |
-
print("OpenAI Extraction Results:")
|
| 411 |
-
print(json.dumps(result, indent=2))
|
| 412 |
-
|
| 413 |
-
return result
|
| 414 |
-
|
| 415 |
-
if __name__ == "__main__":
|
| 416 |
-
test_openai_extraction()
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
OpenAI-based resume data extraction.
|
| 3 |
+
Uses GPT models to extract structured information from resume text.
|
|
|
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import json
|
| 7 |
import re
|
| 8 |
import logging
|
|
|
|
| 9 |
from typing import Dict, Any, List, Optional
|
| 10 |
+
|
| 11 |
+
import openai
|
| 12 |
from openai import OpenAI
|
| 13 |
|
| 14 |
+
# Set up logging
|
|
|
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
+
|
| 18 |
class OpenAIResumeExtractor:
|
| 19 |
"""
|
| 20 |
+
Resume data extractor using OpenAI's GPT models.
|
| 21 |
"""
|
| 22 |
|
| 23 |
def __init__(self, api_key: Optional[str] = None, model: str = "gpt-4o"):
|
| 24 |
+
"""Initialize with OpenAI API key and model."""
|
| 25 |
+
self.client = OpenAI(api_key=api_key) if api_key else OpenAI()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
self.model = model
|
| 27 |
+
logger.info(f"OpenAI extractor initialized with model: {model}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def extract_sections_openai(self, text: str) -> Dict[str, Any]:
|
| 30 |
"""
|
| 31 |
+
Extract resume sections using OpenAI API.
|
| 32 |
|
| 33 |
Args:
|
| 34 |
text: Raw resume text
|
| 35 |
|
| 36 |
Returns:
|
| 37 |
+
Dict containing extracted sections
|
| 38 |
"""
|
| 39 |
+
logger.info("Starting OpenAI extraction...")
|
| 40 |
|
| 41 |
try:
|
| 42 |
+
# Create extraction prompt
|
| 43 |
prompt = self._create_extraction_prompt(text)
|
| 44 |
|
| 45 |
+
# Call OpenAI API
|
| 46 |
response = self.client.chat.completions.create(
|
| 47 |
model=self.model,
|
| 48 |
messages=[
|
| 49 |
+
{"role": "system", "content": "You are an expert resume parser. Extract information and return ONLY valid JSON."},
|
| 50 |
+
{"role": "user", "content": prompt}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
],
|
| 52 |
+
temperature=0.1,
|
| 53 |
max_tokens=2000
|
| 54 |
)
|
| 55 |
|
| 56 |
+
# Parse response
|
| 57 |
+
content = response.choices[0].message.content.strip()
|
| 58 |
+
logger.debug(f"OpenAI response: {content[:200]}...")
|
| 59 |
|
| 60 |
+
# Clean and parse JSON
|
| 61 |
+
content = self._clean_json_response(content)
|
| 62 |
+
result = json.loads(content)
|
|
|
|
|
|
|
| 63 |
|
| 64 |
+
# Validate and enhance result
|
|
|
|
|
|
|
|
|
|
| 65 |
result = self._validate_and_clean_result(result)
|
| 66 |
|
| 67 |
+
# Add contact info extraction
|
| 68 |
contact_info = self._extract_contact_info(text)
|
| 69 |
result["ContactInfo"] = contact_info
|
| 70 |
|
| 71 |
logger.info("✅ OpenAI extraction completed successfully")
|
| 72 |
return result
|
| 73 |
|
| 74 |
+
except json.JSONDecodeError as e:
|
| 75 |
+
logger.error(f"JSON parsing error: {e}")
|
| 76 |
+
logger.debug(f"Response content: {content}")
|
| 77 |
+
return self._fallback_extraction(text)
|
| 78 |
+
|
| 79 |
except Exception as e:
|
| 80 |
logger.error(f"OpenAI extraction failed: {e}")
|
| 81 |
+
return self._fallback_extraction(text)
|
| 82 |
+
|
| 83 |
+
def _clean_json_response(self, content: str) -> str:
|
| 84 |
+
"""Clean JSON response from OpenAI."""
|
| 85 |
+
# Remove markdown code blocks
|
| 86 |
+
content = re.sub(r'```json\s*', '', content)
|
| 87 |
+
content = re.sub(r'```\s*$', '', content)
|
| 88 |
+
|
| 89 |
+
# Remove any text before first {
|
| 90 |
+
start = content.find('{')
|
| 91 |
+
if start > 0:
|
| 92 |
+
content = content[start:]
|
| 93 |
|
| 94 |
+
# Remove any text after last }
|
| 95 |
+
end = content.rfind('}')
|
| 96 |
+
if end > 0 and end < len(content) - 1:
|
| 97 |
+
content = content[:end + 1]
|
|
|
|
| 98 |
|
| 99 |
+
return content.strip()
|
|
|
|
| 100 |
|
| 101 |
def _create_extraction_prompt(self, text: str) -> str:
|
| 102 |
+
"""Create prompt for OpenAI extraction."""
|
|
|
|
| 103 |
prompt = f"""
|
| 104 |
+
Extract information from this resume and return ONLY valid JSON in this exact format:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
{{
|
| 107 |
+
"Name": "Full Name with credentials (PhD, MBA, etc.)",
|
| 108 |
+
"Summary": "Professional summary or objective",
|
| 109 |
+
"Skills": ["skill1", "skill2", "skill3"],
|
| 110 |
+
"StructuredExperiences": [
|
| 111 |
+
{{
|
| 112 |
+
"title": "Job Title",
|
| 113 |
+
"company": "Company Name",
|
| 114 |
+
"date_range": "Start Date - End Date",
|
| 115 |
+
"responsibilities": ["responsibility1", "responsibility2"]
|
| 116 |
+
}}
|
| 117 |
+
],
|
| 118 |
+
"Education": ["degree info", "school info"],
|
| 119 |
+
"Training": ["certification1", "training1"],
|
| 120 |
+
"Address": "Full address if available"
|
| 121 |
}}
|
| 122 |
|
| 123 |
+
Resume text:
|
| 124 |
+
{text}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
CRITICAL INSTRUCTIONS:
|
| 127 |
+
- For NAME: Include ALL credentials (PhD, MBA, M.S., B.S., etc.) - example: "John Doe, PhD, MBA"
|
| 128 |
+
- Read the ENTIRE resume text carefully, don't miss content
|
| 129 |
+
- Extract ALL work experiences with full details
|
| 130 |
- Return ONLY valid JSON, no explanations
|
| 131 |
- If a section is not found, use empty string or empty array
|
| 132 |
+
- Extract actual technical skills, not company names
|
|
|
|
|
|
|
|
|
|
| 133 |
"""
|
|
|
|
| 134 |
return prompt
|
| 135 |
|
| 136 |
+
def _extract_contact_info(self, text: str) -> Dict[str, str]:
|
| 137 |
+
"""Extract contact information from resume text."""
|
| 138 |
+
contact_info = {}
|
| 139 |
+
|
| 140 |
+
# Extract email
|
| 141 |
+
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
|
| 142 |
+
email_match = re.search(email_pattern, text)
|
| 143 |
+
if email_match:
|
| 144 |
+
contact_info['email'] = email_match.group()
|
| 145 |
+
|
| 146 |
+
# Extract phone number
|
| 147 |
+
phone_patterns = [
|
| 148 |
+
r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
|
| 149 |
+
r'\+1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
|
| 150 |
+
r'\d{3}[-.\s]?\d{3}[-.\s]?\d{4}'
|
| 151 |
+
]
|
| 152 |
+
|
| 153 |
+
for pattern in phone_patterns:
|
| 154 |
+
phone_match = re.search(pattern, text)
|
| 155 |
+
if phone_match:
|
| 156 |
+
contact_info['phone'] = phone_match.group().strip()
|
| 157 |
+
break
|
| 158 |
+
|
| 159 |
+
# Extract LinkedIn
|
| 160 |
+
linkedin_pattern = r'linkedin\.com/in/[A-Za-z0-9-]+'
|
| 161 |
+
linkedin_match = re.search(linkedin_pattern, text)
|
| 162 |
+
if linkedin_match:
|
| 163 |
+
contact_info['linkedin'] = linkedin_match.group()
|
| 164 |
+
|
| 165 |
+
logger.info(f"OPENAI: Extracted ContactInfo as dict: {contact_info}")
|
| 166 |
+
return contact_info
|
| 167 |
+
|
| 168 |
def _validate_and_clean_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
| 169 |
+
"""Validate and clean the extraction result."""
|
| 170 |
|
| 171 |
# Ensure all required keys exist
|
| 172 |
+
required_keys = ["Name", "Summary", "Skills", "StructuredExperiences", "Education", "Training", "Address"]
|
| 173 |
for key in required_keys:
|
| 174 |
if key not in result:
|
| 175 |
result[key] = [] if key in ["Skills", "StructuredExperiences", "Education", "Training"] else ""
|
|
|
|
| 197 |
|
| 198 |
return result
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
def _is_company_name(self, text: str) -> bool:
|
| 201 |
+
"""Check if text looks like a company name rather than a skill."""
|
| 202 |
company_indicators = [
|
| 203 |
"inc", "llc", "corp", "ltd", "company", "solutions", "services",
|
| 204 |
+
"systems", "technologies", "financial", "insurance"
|
| 205 |
]
|
| 206 |
text_lower = text.lower()
|
| 207 |
return any(indicator in text_lower for indicator in company_indicators)
|
| 208 |
|
| 209 |
def _fallback_extraction(self, text: str) -> Dict[str, Any]:
|
| 210 |
+
"""Fallback to regex-based extraction if OpenAI fails."""
|
| 211 |
logger.info("Using regex fallback extraction...")
|
| 212 |
+
|
| 213 |
+
return {
|
| 214 |
+
"Name": self._extract_name_regex(text),
|
| 215 |
+
"Summary": self._extract_summary_regex(text),
|
| 216 |
+
"Skills": self._extract_skills_regex(text),
|
| 217 |
+
"StructuredExperiences": self._extract_experiences_regex(text),
|
| 218 |
+
"Education": self._extract_education_regex(text),
|
| 219 |
+
"Training": [],
|
| 220 |
+
"Address": self._extract_address_regex(text),
|
| 221 |
+
"ContactInfo": self._extract_contact_info(text)
|
| 222 |
+
}
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
def _extract_name_regex(self, text: str) -> str:
|
| 225 |
+
"""Regex fallback for name extraction."""
|
| 226 |
lines = text.split('\n')[:5]
|
| 227 |
for line in lines:
|
| 228 |
line = line.strip()
|
| 229 |
if re.search(r'@|phone|email|linkedin|github', line.lower()):
|
| 230 |
continue
|
| 231 |
+
# Match name with potential credentials (PhD, MBA, etc.)
|
| 232 |
+
name_match = re.match(r'^([A-Z][a-z]+ [A-Z][a-z]+(?:\s+[A-Z][a-z]+)?(?:,\s*[A-Z][a-z.]+(?:,\s*[A-Z][a-z.]+)?)?)', line)
|
| 233 |
if name_match:
|
| 234 |
return name_match.group(1)
|
| 235 |
return ""
|
| 236 |
|
| 237 |
def _extract_summary_regex(self, text: str) -> str:
|
| 238 |
+
"""Regex fallback for summary extraction."""
|
| 239 |
summary_pattern = r'(?i)(?:professional\s+)?summary[:\s]*\n(.*?)(?=\n\s*(?:technical\s+skills?|skills?|experience|education))'
|
| 240 |
match = re.search(summary_pattern, text, re.DOTALL)
|
| 241 |
if match:
|
|
|
|
| 246 |
return ""
|
| 247 |
|
| 248 |
def _extract_skills_regex(self, text: str) -> List[str]:
|
| 249 |
+
"""Regex fallback for skills extraction."""
|
| 250 |
skills = set()
|
| 251 |
|
| 252 |
# Look for technical skills section
|
|
|
|
| 265 |
return sorted(list(skills))
|
| 266 |
|
| 267 |
def _extract_experiences_regex(self, text: str) -> List[Dict[str, Any]]:
|
| 268 |
+
"""Regex fallback for experience extraction."""
|
| 269 |
experiences = []
|
| 270 |
|
| 271 |
# Look for work experience section
|
|
|
|
| 299 |
return experiences
|
| 300 |
|
| 301 |
def _extract_education_regex(self, text: str) -> List[str]:
|
| 302 |
+
"""Regex fallback for education extraction."""
|
| 303 |
education = []
|
| 304 |
|
| 305 |
edu_pattern = r'(?i)education[:\s]*\n(.*?)(?=\n\s*(?:certifications?|projects?|$))'
|
|
|
|
| 315 |
|
| 316 |
return education
|
| 317 |
|
| 318 |
+
def _extract_address_regex(self, text: str) -> str:
|
| 319 |
+
"""Regex fallback for address extraction."""
|
| 320 |
+
# Look for address patterns like "6001 Tain Dr. Suite 203, Dublin, OH, 43016"
|
| 321 |
+
address_patterns = [
|
| 322 |
+
r'(\d+\s+[A-Za-z\s\.]+(?:Suite|Apt|Unit)\s+\d+,?\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})',
|
| 323 |
+
r'(\d+\s+[A-Za-z\s\.]+,?\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})',
|
| 324 |
+
r'([A-Za-z\s\d\.]+,\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
]
|
| 326 |
|
| 327 |
+
for pattern in address_patterns:
|
| 328 |
+
match = re.search(pattern, text)
|
| 329 |
+
if match:
|
| 330 |
+
return match.group(1).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
+
return ""
|
| 333 |
+
|
| 334 |
|
| 335 |
+
# Main extraction function for compatibility
|
| 336 |
def extract_sections_openai(text: str, api_key: Optional[str] = None) -> Dict[str, Any]:
|
| 337 |
+
"""Extract resume sections using OpenAI API."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
extractor = OpenAIResumeExtractor(api_key=api_key)
|
| 339 |
+
return extractor.extract_sections_openai(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/parser.py
CHANGED
|
@@ -3,7 +3,7 @@ import fitz # PyMuPDF
|
|
| 3 |
import re
|
| 4 |
from io import BytesIO
|
| 5 |
from docx import Document
|
| 6 |
-
from config import supabase, embedding_model,
|
| 7 |
|
| 8 |
def extract_name(resume_text: str) -> str:
|
| 9 |
# look at the very top lines for a capitalized full name
|
|
|
|
| 3 |
import re
|
| 4 |
from io import BytesIO
|
| 5 |
from docx import Document
|
| 6 |
+
from config import supabase, embedding_model, HF_ENDPOINTS, query
|
| 7 |
|
| 8 |
def extract_name(resume_text: str) -> str:
|
| 9 |
# look at the very top lines for a capitalized full name
|
utils/reporting.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
import re
|
| 3 |
import fitz # PyMuPDF
|
| 4 |
from io import BytesIO
|
| 5 |
-
from config import supabase, embedding_model,
|
| 6 |
from .screening import evaluate_resumes
|
| 7 |
|
| 8 |
def generate_pdf_report(shortlisted_candidates, questions=None):
|
|
@@ -45,7 +45,7 @@ def generate_interview_questions_from_summaries(candidates):
|
|
| 45 |
)
|
| 46 |
|
| 47 |
try:
|
| 48 |
-
response =
|
| 49 |
model="tgi",
|
| 50 |
messages=[{"role": "user", "content": prompt}],
|
| 51 |
temperature=0.7,
|
|
|
|
| 2 |
import re
|
| 3 |
import fitz # PyMuPDF
|
| 4 |
from io import BytesIO
|
| 5 |
+
from config import supabase, embedding_model, query
|
| 6 |
from .screening import evaluate_resumes
|
| 7 |
|
| 8 |
def generate_pdf_report(shortlisted_candidates, questions=None):
|
|
|
|
| 45 |
)
|
| 46 |
|
| 47 |
try:
|
| 48 |
+
response = supabase.ai.chat.completions.create(
|
| 49 |
model="tgi",
|
| 50 |
messages=[{"role": "user", "content": prompt}],
|
| 51 |
temperature=0.7,
|
utils/screening.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
from .parser import parse_resume, extract_email, summarize_resume
|
| 3 |
from .hybrid_extractor import extract_resume_sections
|
| 4 |
from .spacy_loader import get_nlp, is_spacy_available
|
| 5 |
-
from config import supabase, embedding_model
|
| 6 |
from fuzzywuzzy import fuzz
|
| 7 |
from sentence_transformers import util
|
| 8 |
import streamlit as st
|
|
|
|
| 2 |
from .parser import parse_resume, extract_email, summarize_resume
|
| 3 |
from .hybrid_extractor import extract_resume_sections
|
| 4 |
from .spacy_loader import get_nlp, is_spacy_available
|
| 5 |
+
from config import supabase, embedding_model
|
| 6 |
from fuzzywuzzy import fuzz
|
| 7 |
from sentence_transformers import util
|
| 8 |
import streamlit as st
|