Spaces:
Sleeping
Sleeping
Johnny
commited on
Commit
·
1131989
1
Parent(s):
8f8f414
updated summarization with validations, update generate_pdf to wrap around text
Browse files
config.py
CHANGED
|
@@ -19,7 +19,7 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
| 19 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 20 |
|
| 21 |
HF_MODELS = {
|
| 22 |
-
"bart": "https://router.huggingface.co/hf-inference/models/
|
| 23 |
}
|
| 24 |
|
| 25 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
|
@@ -28,44 +28,42 @@ if not HF_API_TOKEN:
|
|
| 28 |
|
| 29 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
| 30 |
|
| 31 |
-
def query(payload, model="bart", retries=
|
| 32 |
"""
|
| 33 |
-
Sends a request to the Hugging Face API with retries.
|
| 34 |
-
|
| 35 |
-
:param payload: The input data for inference.
|
| 36 |
-
:param model: The model name ('bart' for summarization).
|
| 37 |
-
:param retries: Number of times to retry if the request fails.
|
| 38 |
-
:param delay: Delay in seconds before retrying.
|
| 39 |
-
:return: The model's response in JSON format, or None if all retries fail.
|
| 40 |
"""
|
| 41 |
if model not in HF_MODELS:
|
| 42 |
-
raise ValueError("Invalid model name. Choose 'bart' for summarization.")
|
| 43 |
|
| 44 |
api_url = HF_MODELS[model]
|
| 45 |
|
| 46 |
for attempt in range(retries):
|
| 47 |
try:
|
| 48 |
-
response = requests.post(api_url, headers=HF_HEADERS, json=payload)
|
| 49 |
|
| 50 |
if response.status_code == 401:
|
| 51 |
-
print(f"
|
| 52 |
return None
|
| 53 |
|
| 54 |
if response.status_code == 402:
|
| 55 |
-
print(f"
|
| 56 |
return None
|
| 57 |
|
| 58 |
-
if response.status_code
|
| 59 |
-
print(f"Server error (
|
| 60 |
time.sleep(delay)
|
| 61 |
continue
|
| 62 |
|
| 63 |
response.raise_for_status()
|
| 64 |
return response.json()
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
except requests.exceptions.RequestException as e:
|
| 67 |
-
print(f"
|
| 68 |
time.sleep(delay)
|
| 69 |
|
| 70 |
-
print("All retry attempts failed.")
|
| 71 |
return None
|
|
|
|
| 19 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 20 |
|
| 21 |
HF_MODELS = {
|
| 22 |
+
"bart": "https://router.huggingface.co/hf-inference/models/transformer3/H2-keywordextractor"
|
| 23 |
}
|
| 24 |
|
| 25 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
|
|
|
| 28 |
|
| 29 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
| 30 |
|
| 31 |
+
def query(payload, model="bart", retries=5, delay=5):
|
| 32 |
"""
|
| 33 |
+
Sends a request to the Hugging Face API with retries and better error handling.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
"""
|
| 35 |
if model not in HF_MODELS:
|
| 36 |
+
raise ValueError(f"Invalid model name: {model}. Choose 'bart' for summarization.")
|
| 37 |
|
| 38 |
api_url = HF_MODELS[model]
|
| 39 |
|
| 40 |
for attempt in range(retries):
|
| 41 |
try:
|
| 42 |
+
response = requests.post(api_url, headers=HF_HEADERS, json=payload, timeout=10)
|
| 43 |
|
| 44 |
if response.status_code == 401:
|
| 45 |
+
print(f"❌ API Key issue: Check HF_API_TOKEN. Unauthorized (401).")
|
| 46 |
return None
|
| 47 |
|
| 48 |
if response.status_code == 402:
|
| 49 |
+
print(f"💰 Payment Required (402). Free tier may not support this model.")
|
| 50 |
return None
|
| 51 |
|
| 52 |
+
if response.status_code in [500, 503]:
|
| 53 |
+
print(f"⚠️ Server error ({response.status_code}) on attempt {attempt + 1}. Retrying in {delay} seconds...")
|
| 54 |
time.sleep(delay)
|
| 55 |
continue
|
| 56 |
|
| 57 |
response.raise_for_status()
|
| 58 |
return response.json()
|
| 59 |
|
| 60 |
+
except requests.exceptions.Timeout:
|
| 61 |
+
print(f"⏳ Timeout error on attempt {attempt + 1}. Retrying...")
|
| 62 |
+
time.sleep(delay)
|
| 63 |
+
|
| 64 |
except requests.exceptions.RequestException as e:
|
| 65 |
+
print(f"❌ API Request Failed: {e}")
|
| 66 |
time.sleep(delay)
|
| 67 |
|
| 68 |
+
print("🚨 All retry attempts failed.")
|
| 69 |
return None
|
utils.py
CHANGED
|
@@ -145,30 +145,33 @@ def score_candidate(resume_text, job_description):
|
|
| 145 |
|
| 146 |
def summarize_resume(resume_text):
|
| 147 |
"""
|
| 148 |
-
Summarizes a resume using the Hugging Face BART model.
|
| 149 |
"""
|
| 150 |
payload = {"inputs": f"Summarize this resume: {resume_text}"}
|
| 151 |
-
response = query(payload, model="bart")
|
| 152 |
|
| 153 |
if not response:
|
| 154 |
-
print("Error: API response is None")
|
| 155 |
-
return "Summary
|
| 156 |
|
| 157 |
try:
|
| 158 |
-
# Check if response is a list (sometimes HF returns a list with a dict inside)
|
| 159 |
if isinstance(response, list) and len(response) > 0:
|
| 160 |
response = response[0]
|
| 161 |
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
-
|
| 170 |
-
print(f"Error parsing summary: {e}")
|
| 171 |
-
return "Summary could not be generated."
|
| 172 |
|
| 173 |
def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
| 174 |
"""
|
|
@@ -194,26 +197,42 @@ def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
|
| 194 |
response = supabase.table("candidates").insert(data).execute()
|
| 195 |
return response
|
| 196 |
|
| 197 |
-
# Test with 10 resumes, if they will be shortlisted
|
| 198 |
def generate_pdf_report(shortlisted_candidates):
|
| 199 |
-
"""Generates a PDF summary of shortlisted candidates."""
|
| 200 |
pdf = BytesIO()
|
| 201 |
doc = fitz.open()
|
| 202 |
|
| 203 |
for candidate in shortlisted_candidates:
|
| 204 |
page = doc.new_page()
|
| 205 |
|
| 206 |
-
# Use
|
| 207 |
summary = candidate.get("summary", "No summary available")
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
f"Candidate: {candidate['name']}\n"
|
| 212 |
f"Email: {candidate['email']}\n"
|
| 213 |
-
f"Score: {candidate['score']}\n"
|
| 214 |
-
f"Summary
|
| 215 |
)
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
doc.save(pdf)
|
| 218 |
pdf.seek(0)
|
| 219 |
return pdf
|
|
|
|
| 145 |
|
| 146 |
def summarize_resume(resume_text):
|
| 147 |
"""
|
| 148 |
+
Summarizes a resume using the Hugging Face BART model with improved error handling.
|
| 149 |
"""
|
| 150 |
payload = {"inputs": f"Summarize this resume: {resume_text}"}
|
| 151 |
+
response = query(payload, model="bart")
|
| 152 |
|
| 153 |
if not response:
|
| 154 |
+
print("⚠️ Error: API response is None. Returning fallback summary.")
|
| 155 |
+
return "Summary unavailable due to API issues."
|
| 156 |
|
| 157 |
try:
|
|
|
|
| 158 |
if isinstance(response, list) and len(response) > 0:
|
| 159 |
response = response[0]
|
| 160 |
|
| 161 |
+
if isinstance(response, dict):
|
| 162 |
+
summary = response.get("generated_text") or response.get("summary_text")
|
| 163 |
+
|
| 164 |
+
if summary:
|
| 165 |
+
return summary.strip()
|
| 166 |
+
else:
|
| 167 |
+
print("⚠️ Unexpected API response format:", response)
|
| 168 |
+
return "Summary unavailable."
|
| 169 |
+
|
| 170 |
+
except Exception as e:
|
| 171 |
+
print(f"⚠️ Error parsing summary: {e}")
|
| 172 |
+
return "Summary unavailable."
|
| 173 |
|
| 174 |
+
return "Summary unavailable."
|
|
|
|
|
|
|
| 175 |
|
| 176 |
def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
| 177 |
"""
|
|
|
|
| 197 |
response = supabase.table("candidates").insert(data).execute()
|
| 198 |
return response
|
| 199 |
|
|
|
|
| 200 |
def generate_pdf_report(shortlisted_candidates):
|
| 201 |
+
"""Generates a PDF summary of shortlisted candidates with proper text wrapping."""
|
| 202 |
pdf = BytesIO()
|
| 203 |
doc = fitz.open()
|
| 204 |
|
| 205 |
for candidate in shortlisted_candidates:
|
| 206 |
page = doc.new_page()
|
| 207 |
|
| 208 |
+
# Use stored summary, or provide a fallback
|
| 209 |
summary = candidate.get("summary", "No summary available")
|
| 210 |
|
| 211 |
+
# Define text area properties
|
| 212 |
+
text_box_x = 50 # Left margin
|
| 213 |
+
text_box_y = 50 # Top margin
|
| 214 |
+
text_box_width = 500 # Max width before wrapping
|
| 215 |
+
text_box_height = 700 # Max height before splitting to a new page
|
| 216 |
+
font_size = 11 # Font size for better readability
|
| 217 |
+
|
| 218 |
+
# Format candidate details
|
| 219 |
+
candidate_info = (
|
| 220 |
f"Candidate: {candidate['name']}\n"
|
| 221 |
f"Email: {candidate['email']}\n"
|
| 222 |
+
f"Score: {candidate['score']}\n\n"
|
| 223 |
+
f"Summary:\n{summary}"
|
| 224 |
)
|
| 225 |
|
| 226 |
+
# Check if the text fits in the allowed area
|
| 227 |
+
text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
|
| 228 |
+
text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
|
| 229 |
+
|
| 230 |
+
# If text overflows, split across multiple pages
|
| 231 |
+
while text_length == 0: # 0 means text didn't fit
|
| 232 |
+
page = doc.new_page() # Create new page
|
| 233 |
+
text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
|
| 234 |
+
text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
|
| 235 |
+
|
| 236 |
doc.save(pdf)
|
| 237 |
pdf.seek(0)
|
| 238 |
return pdf
|