Spaces:

gauravbox
/

TalentLensAI

Sleeping

App Files Files Community

Johnny commited on Apr 9

Commit

cca9b28

1 Parent(s): 2854e2c

updated summarize_resume to pegasus

Browse files

Files changed (2) hide show

config.py +3 -3
utils.py +37 -22

config.py CHANGED Viewed

@@ -19,7 +19,7 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
 embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 HF_MODELS = {
-    "bart": "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn",
     "gemma": "https://router.huggingface.co/nebius/v1/chat/completions"
 }
@@ -30,12 +30,12 @@ if not HF_API_TOKEN:
 HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
-def query(payload, model="bart", retries=5, delay=5):
     """
     Sends a request to the Hugging Face API with retries and better error handling.
     """
     if model not in HF_MODELS:
-        raise ValueError(f"Invalid model name: {model}. Choose 'bart' for summarization.")
     api_url = HF_MODELS[model]

 embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 HF_MODELS = {
+    "pegasus": "https://router.huggingface.co/hf-inference/models/google/pegasus-xsum",
     "gemma": "https://router.huggingface.co/nebius/v1/chat/completions"
 }
 HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
+def query(payload, model="pegasus", retries=5, delay=5):
     """
     Sends a request to the Hugging Face API with retries and better error handling.
     """
     if model not in HF_MODELS:
+        raise ValueError(f"Invalid model name: {model}. Choose 'pegasus' for summarization.")
     api_url = HF_MODELS[model]

utils.py CHANGED Viewed

@@ -163,25 +163,26 @@ def extract_email(resume_text):
     match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
     return match.group(0) if match else None
 def summarize_resume(resume_text):
-    """
-    Generates a summary of the resume using Hugging Face BART.
-    """
-    payload = {"inputs": f"Summarize this resume: {resume_text}"}
-    response = query(payload, model="bart")
-    if not response:
-        return "Summary unavailable due to API issues."
     try:
-        response = response[0] if isinstance(response, list) else response
-        summary = response.get("generated_text") or response.get("summary_text")
-        return summary.strip() if summary else "Summary unavailable."
     except Exception as e:
-        print(f"Error parsing summary: {e}")
-        return "Summary unavailable."
 # === Data Storage & Reporting ===
@@ -229,17 +230,14 @@ def generate_pdf_report(shortlisted_candidates, questions=None):
 def generate_interview_questions_from_summaries(candidates):
-    """
-    Generates 5 interview questions based on combined summaries using Gemma model.
-    """
     if not isinstance(candidates, list):
         raise TypeError("Expected a list of candidate dictionaries.")
     summaries = " ".join(c.get("summary", "") for c in candidates)
     prompt = (
-        "Based on the following summary of this top candidate for a job role, "
-        "generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n"
         f"{summaries}"
     )
@@ -251,10 +249,27 @@ def generate_interview_questions_from_summaries(candidates):
         )
         result = response.choices[0].message.content
-        questions = [re.sub(r"^(?:\*\*)?(Q?\d+[\.\)\-]?\s*)+(?:\*\*)?", "", q.strip())
-                     for q in result.split("\n") if q.strip()]
         return [f"Q{i+1}. {q}" for i, q in enumerate(questions[:5])] or ["⚠️ No questions generated."]
     except Exception as e:
         print(f"❌ Error generating interview questions: {e}")
         return ["⚠️ Error generating questions."]

     match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
     return match.group(0) if match else None
 def summarize_resume(resume_text):
+    prompt = (
+        "You are an expert technical recruiter. Extract a professional summary for this candidate based on their resume text. "
+        "Include: full name (if found), job title, years of experience, key technologies/tools, industries worked in, and certifications. "
+        "Format it as a professional summary paragraph.\n\n"
+        f"Resume:\n{resume_text}\n\n"
+        "Summary:"
+    )
     try:
+        response = client.chat_completion(
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.5,
+            max_tokens=300,
+        )
+        result = response.choices[0].message.content.strip()
+        return result
     except Exception as e:
+        print(f"❌ Error generating structured summary: {e}")
+        return "Summary unavailable due to API issues."
 # === Data Storage & Reporting ===
 def generate_interview_questions_from_summaries(candidates):
     if not isinstance(candidates, list):
         raise TypeError("Expected a list of candidate dictionaries.")
     summaries = " ".join(c.get("summary", "") for c in candidates)
     prompt = (
+        "Based on the following summary of a top candidate for a job role, "
+        "generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n\n"
         f"{summaries}"
     )
         )
         result = response.choices[0].message.content
+        # Clean and normalize questions
+        raw_questions = result.split("\n")
+        questions = []
+        for q in raw_questions:
+            q = q.strip()
+            # Skip empty lines and markdown headers
+            if not q or re.match(r"^#+\s*", q):
+                continue
+            # Remove leading "Q1.", "1)", etc.
+            q = re.sub(r"^(?:Q?\d+[\.\)\-]?\s*)+", "", q)
+            # Remove markdown bold/italics (**, *, etc.)
+            q = re.sub(r"[*_]+", "", q)
+            questions.append(q.strip())
         return [f"Q{i+1}. {q}" for i, q in enumerate(questions[:5])] or ["⚠️ No questions generated."]
     except Exception as e:
         print(f"❌ Error generating interview questions: {e}")
         return ["⚠️ Error generating questions."]