Spaces:

zarsyeda
/

Resume-Parser

Sleeping

App Files Files Community

SyedAzlanzar commited on Aug 31

Commit

a704218

1 Parent(s): eeb682d

@refactor : update resume parsing and PDF generation logic; improve error handling and file upload process

Browse files

Files changed (7) hide show

.gitattributes +0 -1
app/api/routes.py +20 -10
app/main.py +3 -0
app/services/generator.py +79 -28
app/services/hf_storage_service.py +22 -46
app/services/pdf_creator.py +20 -12
app/services/resume_parser.py +24 -6

.gitattributes CHANGED Viewed

@@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-app/static/fonts/Roboto-Regular.ttf filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

app/api/routes.py CHANGED Viewed

@@ -2,32 +2,40 @@ from fastapi import APIRouter, HTTPException
 from app.models.schema import GenerateRequest, GenerateResponse
 from app.services.generator import generate_cover_letter
 from app.services.pdf_creator import save_pdf
-from app.services.resume_parser import extract_text_from_resume
 from app.utils.file_utils import generate_unique_filename
-from fastapi import FastAPI, UploadFile, File
 from app.services.hf_storage_service import HuggingFaceStorageService
 import os
-HF_REPO_ID = os.getenv("HF_REPO_ID")
-HF_TOKEN = os.getenv("HF_TOKEN")
-storage_service = HuggingFaceStorageService(HF_REPO_ID, HF_TOKEN)
 router = APIRouter()
 @router.post("/generate", response_model=GenerateResponse)
 async def generate_cover_letter_api(data: GenerateRequest):
     try:
-        resume_text = extract_text_from_resume(data.resume_path)
-        letter_text = generate_cover_letter(data, resume_text)
         filename = generate_unique_filename()
         pdf_path = save_pdf(letter_text, filename)
         return GenerateResponse(
             letter=letter_text,
-            pdf_url=f"/{filename}"
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -39,8 +47,9 @@ async def upload_resume(resume: UploadFile = File(...)):
         resume_content = await resume.read()
         # Upload to HuggingFace Hub
-        resume_url = storage_service.upload_resume(
             file_content=resume_content,
             filename=resume.filename
         )
@@ -53,4 +62,5 @@ async def upload_resume(resume: UploadFile = File(...)):
         return {
             "success": False,
             "error": str(e)
-        }

 from app.models.schema import GenerateRequest, GenerateResponse
 from app.services.generator import generate_cover_letter
 from app.services.pdf_creator import save_pdf
+from app.services.resume_parser import extract_resume_text
 from app.utils.file_utils import generate_unique_filename
+from fastapi import UploadFile, File
 from app.services.hf_storage_service import HuggingFaceStorageService
 import os
+from dotenv import load_dotenv
+storage_service = HuggingFaceStorageService()
 router = APIRouter()
 @router.post("/generate", response_model=GenerateResponse)
 async def generate_cover_letter_api(data: GenerateRequest):
     try:
+        if len(data.job_details) > 2048:
+            raise HTTPException(status_code=400, detail="Job details are too long")
+        resume_text = extract_resume_text(data.resume_path)
+        letter_text = await generate_cover_letter(data, resume_text)
         filename = generate_unique_filename()
         pdf_path = save_pdf(letter_text, filename)
         return GenerateResponse(
             letter=letter_text,
+            pdf_url=pdf_path
         )
+    except HTTPException as http_exc:
+        raise http_exc
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
         resume_content = await resume.read()
         # Upload to HuggingFace Hub
+        resume_url = storage_service.upload_file_to_hf(
             file_content=resume_content,
+            folder="resumes",
             filename=resume.filename
         )
         return {
             "success": False,
             "error": str(e)
+        }

app/main.py CHANGED Viewed

@@ -3,9 +3,12 @@ from fastapi.staticfiles import StaticFiles
 from app.api.routes import router
 import os
 import logging
 logging.basicConfig(level=logging.INFO)
 app = FastAPI(title="Cover Letter Generator")
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))

 from app.api.routes import router
 import os
 import logging
+from dotenv import load_dotenv
 logging.basicConfig(level=logging.INFO)
 app = FastAPI(title="Cover Letter Generator")
+load_dotenv()
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))

app/services/generator.py CHANGED Viewed

@@ -2,9 +2,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 from app.models.schema import GenerateRequest
 import os
-from fastapi import FastAPI, HTTPException
-import os
-from datetime import datetime
 os.environ["TRANSFORMERS_CACHE"] = "/code/backend/transformers_cache"
@@ -27,36 +24,37 @@ model = AutoModelForCausalLM.from_pretrained(
 model = model.to(device)
-def generate_cover_letter(data: GenerateRequest, resume_text: str) -> str:
-# Updated prompt to explicitly request markdown format
-    prompt = f"""
-    Please generate a professional cover letter in **markdown format** based on the following information:
-    **Job Details:**
-    {data.job_details}
-    **Resume Content:**
-    {resume_text}
-    **Important:**
-    - Format the output as proper markdown
-    - Use appropriate headers (# ## ###)
-    - Use **bold** for emphasis where appropriate
-    - Use bullet points or numbered lists where relevant
-    - Include proper spacing and line breaks
-    - Start with the applicant's contact information as a header
-    - Include date and employer information
-    - Structure it as a professional business letter in markdown format
-    Please ensure the cover letter is:
-    1. Tailored specifically to the job requirements
-    2. Highlights relevant experience from the resume
-    3. Professional and engaging tone
-    4. Proper markdown formatting throughout
-    """
-    try:
         messages = [
         {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful AI cover letter bot that generates professional cover letters in markdown format. Always respond with properly formatted markdown."},
@@ -106,3 +104,56 @@ def generate_cover_letter(data: GenerateRequest, resume_text: str) -> str:
     except Exception as e:
         return {"error": str(e)}

 import torch
 from app.models.schema import GenerateRequest
 import os
 os.environ["TRANSFORMERS_CACHE"] = "/code/backend/transformers_cache"
 model = model.to(device)
+async def generate_cover_letter(data: GenerateRequest, resume_text: str) -> str:
+    try:
+        altered_job_details = await job_details_alteration(data.job_details)
+        prompt = f"""
+        Please generate a professional cover letter in **markdown format** based on the following information:
+        **Job Details:**
+        {altered_job_details}
+        **Resume Content:**
+        {resume_text}
+        **Important:**
+        - Format the output as proper markdown
+        - Use appropriate headers (# ## ###)
+        - Use **bold** for emphasis where appropriate
+        - Use bullet points or numbered lists where relevant
+        - Include proper spacing and line breaks
+        - Start with the applicant's contact information as a header
+        - Include date and employer information
+        - Structure it as a professional business letter in markdown format
+        Please ensure the cover letter is:
+        1. Tailored specifically to the job requirements
+        2. Highlights relevant experience from the resume
+        3. Professional and engaging tone
+        4. Proper markdown formatting throughout
+        """
         messages = [
         {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful AI cover letter bot that generates professional cover letters in markdown format. Always respond with properly formatted markdown."},
     except Exception as e:
         return {"error": str(e)}
+async def job_details_alteration(job_details:str) -> str:
+    try:
+        prompt = f"""
+    job_description: {job_details}
+    Your task: Extract only the important parts and rewrite them clearly into 4 sections:
+    - **Responsibilities** (what the candidate will do)
+    - **Requirements / Qualifications** (skills, education, experience needed)
+    - **Who You Are** (traits, mindset, culture fit)
+    - **Preferred Candidate** (optional nice-to-have skills or experience)
+    Rules:
+    - Remove irrelevant parts such as "About Us", "Why Join Us", "Perks/Benefits", or generic company marketing.
+    - Keep the output concise and professional in markdown format.
+    - If a section is not found, skip it. """
+        messages = [
+            {"role":"system", "content": "You are a job description cleaner. I will give you a long job description that includes many sections like company intro, perks, and marketing fluff. "},
+            {"role": "user", "content": prompt}
+        ]
+        text = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        model_inputs = tokenizer(
+            [text],
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=2048
+        ).to(model.device)
+        generated_ids = model.generate(
+            **model_inputs,
+            max_new_tokens=512,
+            do_sample=True,
+            temperature=0.5,
+            top_p=0.9,
+            top_k=50,
+            repetition_penalty=1.15,
+            eos_token_id=tokenizer.eos_token_id,
+            pad_token_id=tokenizer.pad_token_id
+        )
+        generated_only_ids = generated_ids[:, model_inputs.input_ids.shape[1]:]
+        response = tokenizer.decode(
+            generated_only_ids[0],
+            skip_special_tokens=True
+        )
+        return response.strip()
+    except Exception as e:
+        return {"error": str(e)}

app/services/hf_storage_service.py CHANGED Viewed

@@ -3,45 +3,32 @@ import os
 import uuid
 from datetime import datetime
 import tempfile
 class HuggingFaceStorageService:
-    def __init__(self, repo_id: str, token: str):
-        """
-        Initialize HuggingFace storage service
-        Args:
-            repo_id: Your HuggingFace repository ID (e.g., "username/cover-letter-storage")
-            token: Your HuggingFace access token
-        """
-        self.repo_id = repo_id
-        self.token = token
         self.api = HfApi()
-        # Repository should already exist
-    def upload_resume(self, file_content, filename: str = None) -> str:
-        """
-        Upload resume to HuggingFace Hub
-        Args:
-            file_content: File content (bytes)
-            filename: Original filename (optional)
-        Returns:
-            str: Public URL to the uploaded file
-        """
         if filename is None:
-            filename = f"resume_{uuid.uuid4().hex}.txt"
-        # Create a unique path with timestamp
         timestamp = datetime.now().strftime("%Y/%m/%d")
-        file_path = f"resumes/{timestamp}/{filename}"
-        # Save bytes to temporary file for upload
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file:
             temp_file.write(file_content)
             temp_file_path = temp_file.name
         try:
             # Upload to HuggingFace Hub
             upload_file(
@@ -51,28 +38,16 @@ class HuggingFaceStorageService:
                 token=self.token,
                 repo_type="dataset"
             )
-            # Return the public URL
             return f"https://huggingface.co/datasets/{self.repo_id}/resolve/main/{file_path}"
-        except Exception as e:
-            raise Exception(f"Failed to upload resume: {str(e)}")
         finally:
-            # Clean up temporary file
             os.unlink(temp_file_path)
     # Removed cover letter upload method since we only store resumes
     def delete_file(self, file_path: str) -> bool:
-        """
-        Delete a file from HuggingFace Hub
-        Args:
-            file_path: Path to file in the repository
-        Returns:
-            bool: Success status
-        """
         try:
             self.api.delete_file(
                 path_in_repo=file_path,
@@ -83,4 +58,5 @@ class HuggingFaceStorageService:
             return True
         except Exception as e:
             print(f"Failed to delete file: {str(e)}")
-            return False

 import uuid
 from datetime import datetime
 import tempfile
+from dotenv import load_dotenv
 class HuggingFaceStorageService:
+    def __init__(self):
+        load_dotenv()
+        self.repo_id = os.getenv("HF_REPO_ID")
+        self.token = os.getenv("HF_TOKEN")
         self.api = HfApi()
+    def upload_file_to_hf(self, file_content: bytes, folder: str, filename: str = None) -> str:
+        if folder not in ["resumes", "cover-letters"]:
+            raise ValueError("Folder must be 'resumes' or 'cover-letters'")
         if filename is None:
+            filename = f"{uuid.uuid4().hex}.pdf"
+        # Create a unique path with date
         timestamp = datetime.now().strftime("%Y/%m/%d")
+        file_path = f"{folder}/{timestamp}/{filename}"
+        # Save bytes to temp file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
             temp_file.write(file_content)
             temp_file_path = temp_file.name
         try:
             # Upload to HuggingFace Hub
             upload_file(
                 token=self.token,
                 repo_type="dataset"
             )
+            # Return the direct URL
             return f"https://huggingface.co/datasets/{self.repo_id}/resolve/main/{file_path}"
         finally:
             os.unlink(temp_file_path)
     # Removed cover letter upload method since we only store resumes
     def delete_file(self, file_path: str) -> bool:
         try:
             self.api.delete_file(
                 path_in_repo=file_path,
             return True
         except Exception as e:
             print(f"Failed to delete file: {str(e)}")
+            return False

app/services/pdf_creator.py CHANGED Viewed

@@ -1,13 +1,11 @@
 from fpdf import FPDF
 import os
 from datetime import datetime
-PDF_DIR = "/tmp/pdfs"
-os.makedirs(PDF_DIR, exist_ok=True)
-# fallback for local testing
-SPACE_URL = os.getenv("SPACE_URL", "http://localhost:8000")
 def normalize_text(text: str) -> str:
     replacements = {
@@ -16,20 +14,26 @@ def normalize_text(text: str) -> str:
         "—": "-", "–": "-",
         "…": "...",
         "→": "->",
-        "•": "-",  # optional: convert bullets
     }
     for old, new in replacements.items():
         text = text.replace(old, new)
     return text
 def save_pdf(text: str, filename: str) -> str:
-    # text = normalize_text(text)
-    path = os.path.join(PDF_DIR, filename)
     pdf = FPDF()
     pdf.add_page()
-    pdf.set_font("Arial", size=11)  # Built-in Latin-1 font
     pdf.set_auto_page_break(auto=True, margin=15)
     sections = text.split('\n\n')
@@ -44,5 +48,9 @@ def save_pdf(text: str, filename: str) -> str:
                 pdf.multi_cell(0, 6, section.strip(), align='L')
                 pdf.ln(8)
-    pdf.output(path)
-    return f"{SPACE_URL}/static/pdfs/{filename}"

 from fpdf import FPDF
 import os
 from datetime import datetime
+import io
+from app.services.hf_storage_service import HuggingFaceStorageService
+import unicodedata
+storage_service = HuggingFaceStorageService()
 def normalize_text(text: str) -> str:
     replacements = {
         "—": "-", "–": "-",
         "…": "...",
         "→": "->",
+        "•": "-",
     }
     for old, new in replacements.items():
         text = text.replace(old, new)
+    # Convert accented letters to closest ASCII equivalent
+    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
     return text
 def save_pdf(text: str, filename: str) -> str:
+    text = normalize_text(text)
+    if filename is None:
+        filename = f"coverletter_{uuid.uuid4().hex}.pdf"
+    # Generate PDF in memory
     pdf = FPDF()
     pdf.add_page()
+    pdf.set_font("Arial", size=11)
     pdf.set_auto_page_break(auto=True, margin=15)
     sections = text.split('\n\n')
                 pdf.multi_cell(0, 6, section.strip(), align='L')
                 pdf.ln(8)
+    pdf_data = pdf.output(dest='S').encode('latin-1', errors='replace')
+    # Upload PDF bytes to Hugging Face using your class method
+    url = storage_service.upload_file_to_hf(file_content=pdf_data, folder="cover-letters", filename=filename)
+    return url

app/services/resume_parser.py CHANGED Viewed

@@ -1,8 +1,26 @@
 import pdfplumber
-def extract_text_from_resume(file_path: str) -> str:
-    text = ""
-    with pdfplumber.open(file_path) as pdf:
-        for page in pdf.pages:
-            text += page.extract_text() or ""
-    return text.strip()

+import os
+import requests
+import io
 import pdfplumber
+def extract_resume_text(file_url: str) -> str:
+    HF_TOKEN = os.getenv("HF_TOKEN")
+    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+    try:
+        response = requests.get(file_url, headers=headers, timeout=30)
+        response.raise_for_status()
+        pdf_bytes = io.BytesIO(response.content)
+        text = ""
+        with pdfplumber.open(pdf_bytes) as pdf:
+            for page in pdf.pages:
+                page_text = page.extract_text()
+                if page_text:
+                    text += page_text + "\n"
+        return text.strip()
+    except Exception as e:
+        raise RuntimeError(f"Failed to extract text from resume: {str(e)}")