SyedAzlanzar commited on
Commit
a704218
Β·
1 Parent(s): eeb682d

@refactor : update resume parsing and PDF generation logic; improve error handling and file upload process

Browse files
.gitattributes CHANGED
@@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- app/static/fonts/Roboto-Regular.ttf filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
app/api/routes.py CHANGED
@@ -2,32 +2,40 @@ from fastapi import APIRouter, HTTPException
2
  from app.models.schema import GenerateRequest, GenerateResponse
3
  from app.services.generator import generate_cover_letter
4
  from app.services.pdf_creator import save_pdf
5
- from app.services.resume_parser import extract_text_from_resume
6
  from app.utils.file_utils import generate_unique_filename
7
- from fastapi import FastAPI, UploadFile, File
8
  from app.services.hf_storage_service import HuggingFaceStorageService
9
  import os
 
10
 
11
 
12
- HF_REPO_ID = os.getenv("HF_REPO_ID")
13
- HF_TOKEN = os.getenv("HF_TOKEN")
14
- storage_service = HuggingFaceStorageService(HF_REPO_ID, HF_TOKEN)
15
 
16
  router = APIRouter()
17
 
18
  @router.post("/generate", response_model=GenerateResponse)
19
  async def generate_cover_letter_api(data: GenerateRequest):
20
  try:
21
- resume_text = extract_text_from_resume(data.resume_path)
22
- letter_text = generate_cover_letter(data, resume_text)
 
 
 
23
 
24
  filename = generate_unique_filename()
25
  pdf_path = save_pdf(letter_text, filename)
26
 
27
  return GenerateResponse(
28
  letter=letter_text,
29
- pdf_url=f"/{filename}"
30
  )
 
 
 
 
31
  except Exception as e:
32
  raise HTTPException(status_code=500, detail=str(e))
33
 
@@ -39,8 +47,9 @@ async def upload_resume(resume: UploadFile = File(...)):
39
  resume_content = await resume.read()
40
 
41
  # Upload to HuggingFace Hub
42
- resume_url = storage_service.upload_resume(
43
  file_content=resume_content,
 
44
  filename=resume.filename
45
  )
46
 
@@ -53,4 +62,5 @@ async def upload_resume(resume: UploadFile = File(...)):
53
  return {
54
  "success": False,
55
  "error": str(e)
56
- }
 
 
2
  from app.models.schema import GenerateRequest, GenerateResponse
3
  from app.services.generator import generate_cover_letter
4
  from app.services.pdf_creator import save_pdf
5
+ from app.services.resume_parser import extract_resume_text
6
  from app.utils.file_utils import generate_unique_filename
7
+ from fastapi import UploadFile, File
8
  from app.services.hf_storage_service import HuggingFaceStorageService
9
  import os
10
+ from dotenv import load_dotenv
11
 
12
 
13
+
14
+
15
+ storage_service = HuggingFaceStorageService()
16
 
17
  router = APIRouter()
18
 
19
  @router.post("/generate", response_model=GenerateResponse)
20
  async def generate_cover_letter_api(data: GenerateRequest):
21
  try:
22
+ if len(data.job_details) > 2048:
23
+ raise HTTPException(status_code=400, detail="Job details are too long")
24
+
25
+ resume_text = extract_resume_text(data.resume_path)
26
+ letter_text = await generate_cover_letter(data, resume_text)
27
 
28
  filename = generate_unique_filename()
29
  pdf_path = save_pdf(letter_text, filename)
30
 
31
  return GenerateResponse(
32
  letter=letter_text,
33
+ pdf_url=pdf_path
34
  )
35
+
36
+ except HTTPException as http_exc:
37
+ raise http_exc
38
+
39
  except Exception as e:
40
  raise HTTPException(status_code=500, detail=str(e))
41
 
 
47
  resume_content = await resume.read()
48
 
49
  # Upload to HuggingFace Hub
50
+ resume_url = storage_service.upload_file_to_hf(
51
  file_content=resume_content,
52
+ folder="resumes",
53
  filename=resume.filename
54
  )
55
 
 
62
  return {
63
  "success": False,
64
  "error": str(e)
65
+ }
66
+
app/main.py CHANGED
@@ -3,9 +3,12 @@ from fastapi.staticfiles import StaticFiles
3
  from app.api.routes import router
4
  import os
5
  import logging
 
 
6
 
7
  logging.basicConfig(level=logging.INFO)
8
  app = FastAPI(title="Cover Letter Generator")
 
9
 
10
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11
 
 
3
  from app.api.routes import router
4
  import os
5
  import logging
6
+ from dotenv import load_dotenv
7
+
8
 
9
  logging.basicConfig(level=logging.INFO)
10
  app = FastAPI(title="Cover Letter Generator")
11
+ load_dotenv()
12
 
13
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
14
 
app/services/generator.py CHANGED
@@ -2,9 +2,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
  import torch
3
  from app.models.schema import GenerateRequest
4
  import os
5
- from fastapi import FastAPI, HTTPException
6
- import os
7
- from datetime import datetime
8
 
9
 
10
  os.environ["TRANSFORMERS_CACHE"] = "/code/backend/transformers_cache"
@@ -27,36 +24,37 @@ model = AutoModelForCausalLM.from_pretrained(
27
  model = model.to(device)
28
 
29
 
30
- def generate_cover_letter(data: GenerateRequest, resume_text: str) -> str:
31
- # Updated prompt to explicitly request markdown format
32
- prompt = f"""
33
- Please generate a professional cover letter in **markdown format** based on the following information:
34
 
35
- **Job Details:**
36
- {data.job_details}
37
 
38
- **Resume Content:**
39
- {resume_text}
40
 
 
 
41
 
42
- **Important:**
43
- - Format the output as proper markdown
44
- - Use appropriate headers (# ## ###)
45
- - Use **bold** for emphasis where appropriate
46
- - Use bullet points or numbered lists where relevant
47
- - Include proper spacing and line breaks
48
- - Start with the applicant's contact information as a header
49
- - Include date and employer information
50
- - Structure it as a professional business letter in markdown format
51
 
52
- Please ensure the cover letter is:
53
- 1. Tailored specifically to the job requirements
54
- 2. Highlights relevant experience from the resume
55
- 3. Professional and engaging tone
56
- 4. Proper markdown formatting throughout
57
- """
58
-
59
- try:
 
 
 
 
 
 
 
 
 
60
 
61
  messages = [
62
  {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful AI cover letter bot that generates professional cover letters in markdown format. Always respond with properly formatted markdown."},
@@ -106,3 +104,56 @@ def generate_cover_letter(data: GenerateRequest, resume_text: str) -> str:
106
 
107
  except Exception as e:
108
  return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import torch
3
  from app.models.schema import GenerateRequest
4
  import os
 
 
 
5
 
6
 
7
  os.environ["TRANSFORMERS_CACHE"] = "/code/backend/transformers_cache"
 
24
  model = model.to(device)
25
 
26
 
27
+ async def generate_cover_letter(data: GenerateRequest, resume_text: str) -> str:
28
+ try:
 
 
29
 
30
+ altered_job_details = await job_details_alteration(data.job_details)
 
31
 
32
+ prompt = f"""
33
+ Please generate a professional cover letter in **markdown format** based on the following information:
34
 
35
+ **Job Details:**
36
+ {altered_job_details}
37
 
38
+ **Resume Content:**
39
+ {resume_text}
 
 
 
 
 
 
 
40
 
41
+
42
+ **Important:**
43
+ - Format the output as proper markdown
44
+ - Use appropriate headers (# ## ###)
45
+ - Use **bold** for emphasis where appropriate
46
+ - Use bullet points or numbered lists where relevant
47
+ - Include proper spacing and line breaks
48
+ - Start with the applicant's contact information as a header
49
+ - Include date and employer information
50
+ - Structure it as a professional business letter in markdown format
51
+
52
+ Please ensure the cover letter is:
53
+ 1. Tailored specifically to the job requirements
54
+ 2. Highlights relevant experience from the resume
55
+ 3. Professional and engaging tone
56
+ 4. Proper markdown formatting throughout
57
+ """
58
 
59
  messages = [
60
  {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful AI cover letter bot that generates professional cover letters in markdown format. Always respond with properly formatted markdown."},
 
104
 
105
  except Exception as e:
106
  return {"error": str(e)}
107
+
108
+
109
+ async def job_details_alteration(job_details:str) -> str:
110
+ try:
111
+ prompt = f"""
112
+ job_description: {job_details}
113
+
114
+ Your task: Extract only the important parts and rewrite them clearly into 4 sections:
115
+ - **Responsibilities** (what the candidate will do)
116
+ - **Requirements / Qualifications** (skills, education, experience needed)
117
+ - **Who You Are** (traits, mindset, culture fit)
118
+ - **Preferred Candidate** (optional nice-to-have skills or experience)
119
+
120
+ Rules:
121
+ - Remove irrelevant parts such as "About Us", "Why Join Us", "Perks/Benefits", or generic company marketing.
122
+ - Keep the output concise and professional in markdown format.
123
+ - If a section is not found, skip it. """
124
+
125
+ messages = [
126
+ {"role":"system", "content": "You are a job description cleaner. I will give you a long job description that includes many sections like company intro, perks, and marketing fluff. "},
127
+ {"role": "user", "content": prompt}
128
+ ]
129
+ text = tokenizer.apply_chat_template(
130
+ messages,
131
+ tokenize=False,
132
+ add_generation_prompt=True
133
+ )
134
+ model_inputs = tokenizer(
135
+ [text],
136
+ return_tensors="pt",
137
+ padding=True,
138
+ truncation=True,
139
+ max_length=2048
140
+ ).to(model.device)
141
+ generated_ids = model.generate(
142
+ **model_inputs,
143
+ max_new_tokens=512,
144
+ do_sample=True,
145
+ temperature=0.5,
146
+ top_p=0.9,
147
+ top_k=50,
148
+ repetition_penalty=1.15,
149
+ eos_token_id=tokenizer.eos_token_id,
150
+ pad_token_id=tokenizer.pad_token_id
151
+ )
152
+ generated_only_ids = generated_ids[:, model_inputs.input_ids.shape[1]:]
153
+ response = tokenizer.decode(
154
+ generated_only_ids[0],
155
+ skip_special_tokens=True
156
+ )
157
+ return response.strip()
158
+ except Exception as e:
159
+ return {"error": str(e)}
app/services/hf_storage_service.py CHANGED
@@ -3,45 +3,32 @@ import os
3
  import uuid
4
  from datetime import datetime
5
  import tempfile
 
6
 
7
  class HuggingFaceStorageService:
8
- def __init__(self, repo_id: str, token: str):
9
- """
10
- Initialize HuggingFace storage service
11
-
12
- Args:
13
- repo_id: Your HuggingFace repository ID (e.g., "username/cover-letter-storage")
14
- token: Your HuggingFace access token
15
- """
16
- self.repo_id = repo_id
17
- self.token = token
18
  self.api = HfApi()
19
 
20
- # Repository should already exist
21
 
22
- def upload_resume(self, file_content, filename: str = None) -> str:
23
- """
24
- Upload resume to HuggingFace Hub
25
-
26
- Args:
27
- file_content: File content (bytes)
28
- filename: Original filename (optional)
29
-
30
- Returns:
31
- str: Public URL to the uploaded file
32
- """
33
  if filename is None:
34
- filename = f"resume_{uuid.uuid4().hex}.txt"
35
-
36
- # Create a unique path with timestamp
37
  timestamp = datetime.now().strftime("%Y/%m/%d")
38
- file_path = f"resumes/{timestamp}/{filename}"
39
-
40
- # Save bytes to temporary file for upload
41
- with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file:
42
  temp_file.write(file_content)
43
  temp_file_path = temp_file.name
44
-
45
  try:
46
  # Upload to HuggingFace Hub
47
  upload_file(
@@ -51,28 +38,16 @@ class HuggingFaceStorageService:
51
  token=self.token,
52
  repo_type="dataset"
53
  )
54
-
55
- # Return the public URL
56
  return f"https://huggingface.co/datasets/{self.repo_id}/resolve/main/{file_path}"
57
-
58
- except Exception as e:
59
- raise Exception(f"Failed to upload resume: {str(e)}")
60
  finally:
61
- # Clean up temporary file
62
  os.unlink(temp_file_path)
63
 
64
  # Removed cover letter upload method since we only store resumes
65
 
66
  def delete_file(self, file_path: str) -> bool:
67
- """
68
- Delete a file from HuggingFace Hub
69
-
70
- Args:
71
- file_path: Path to file in the repository
72
-
73
- Returns:
74
- bool: Success status
75
- """
76
  try:
77
  self.api.delete_file(
78
  path_in_repo=file_path,
@@ -83,4 +58,5 @@ class HuggingFaceStorageService:
83
  return True
84
  except Exception as e:
85
  print(f"Failed to delete file: {str(e)}")
86
- return False
 
 
3
  import uuid
4
  from datetime import datetime
5
  import tempfile
6
+ from dotenv import load_dotenv
7
 
8
  class HuggingFaceStorageService:
9
+ def __init__(self):
10
+ load_dotenv()
11
+ self.repo_id = os.getenv("HF_REPO_ID")
12
+ self.token = os.getenv("HF_TOKEN")
 
 
 
 
 
 
13
  self.api = HfApi()
14
 
 
15
 
16
+ def upload_file_to_hf(self, file_content: bytes, folder: str, filename: str = None) -> str:
17
+ if folder not in ["resumes", "cover-letters"]:
18
+ raise ValueError("Folder must be 'resumes' or 'cover-letters'")
19
+
 
 
 
 
 
 
 
20
  if filename is None:
21
+ filename = f"{uuid.uuid4().hex}.pdf"
22
+
23
+ # Create a unique path with date
24
  timestamp = datetime.now().strftime("%Y/%m/%d")
25
+ file_path = f"{folder}/{timestamp}/{filename}"
26
+
27
+ # Save bytes to temp file
28
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
29
  temp_file.write(file_content)
30
  temp_file_path = temp_file.name
31
+
32
  try:
33
  # Upload to HuggingFace Hub
34
  upload_file(
 
38
  token=self.token,
39
  repo_type="dataset"
40
  )
41
+
42
+ # Return the direct URL
43
  return f"https://huggingface.co/datasets/{self.repo_id}/resolve/main/{file_path}"
44
+
 
 
45
  finally:
 
46
  os.unlink(temp_file_path)
47
 
48
  # Removed cover letter upload method since we only store resumes
49
 
50
  def delete_file(self, file_path: str) -> bool:
 
 
 
 
 
 
 
 
 
51
  try:
52
  self.api.delete_file(
53
  path_in_repo=file_path,
 
58
  return True
59
  except Exception as e:
60
  print(f"Failed to delete file: {str(e)}")
61
+ return False
62
+
app/services/pdf_creator.py CHANGED
@@ -1,13 +1,11 @@
1
  from fpdf import FPDF
2
  import os
3
  from datetime import datetime
 
 
 
4
 
5
- PDF_DIR = "/tmp/pdfs"
6
- os.makedirs(PDF_DIR, exist_ok=True)
7
-
8
- # fallback for local testing
9
- SPACE_URL = os.getenv("SPACE_URL", "http://localhost:8000")
10
-
11
 
12
  def normalize_text(text: str) -> str:
13
  replacements = {
@@ -16,20 +14,26 @@ def normalize_text(text: str) -> str:
16
  "β€”": "-", "–": "-",
17
  "…": "...",
18
  "β†’": "->",
19
- "β€’": "-", # optional: convert bullets
20
  }
21
  for old, new in replacements.items():
22
  text = text.replace(old, new)
 
 
 
 
23
  return text
24
 
25
 
26
  def save_pdf(text: str, filename: str) -> str:
27
- # text = normalize_text(text)
28
- path = os.path.join(PDF_DIR, filename)
 
29
 
 
30
  pdf = FPDF()
31
  pdf.add_page()
32
- pdf.set_font("Arial", size=11) # Built-in Latin-1 font
33
  pdf.set_auto_page_break(auto=True, margin=15)
34
 
35
  sections = text.split('\n\n')
@@ -44,5 +48,9 @@ def save_pdf(text: str, filename: str) -> str:
44
  pdf.multi_cell(0, 6, section.strip(), align='L')
45
  pdf.ln(8)
46
 
47
- pdf.output(path)
48
- return f"{SPACE_URL}/static/pdfs/{filename}"
 
 
 
 
 
1
  from fpdf import FPDF
2
  import os
3
  from datetime import datetime
4
+ import io
5
+ from app.services.hf_storage_service import HuggingFaceStorageService
6
+ import unicodedata
7
 
8
+ storage_service = HuggingFaceStorageService()
 
 
 
 
 
9
 
10
  def normalize_text(text: str) -> str:
11
  replacements = {
 
14
  "β€”": "-", "–": "-",
15
  "…": "...",
16
  "β†’": "->",
17
+ "β€’": "-",
18
  }
19
  for old, new in replacements.items():
20
  text = text.replace(old, new)
21
+
22
+ # Convert accented letters to closest ASCII equivalent
23
+ text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
24
+
25
  return text
26
 
27
 
28
  def save_pdf(text: str, filename: str) -> str:
29
+ text = normalize_text(text)
30
+ if filename is None:
31
+ filename = f"coverletter_{uuid.uuid4().hex}.pdf"
32
 
33
+ # Generate PDF in memory
34
  pdf = FPDF()
35
  pdf.add_page()
36
+ pdf.set_font("Arial", size=11)
37
  pdf.set_auto_page_break(auto=True, margin=15)
38
 
39
  sections = text.split('\n\n')
 
48
  pdf.multi_cell(0, 6, section.strip(), align='L')
49
  pdf.ln(8)
50
 
51
+ pdf_data = pdf.output(dest='S').encode('latin-1', errors='replace')
52
+
53
+
54
+ # Upload PDF bytes to Hugging Face using your class method
55
+ url = storage_service.upload_file_to_hf(file_content=pdf_data, folder="cover-letters", filename=filename)
56
+ return url
app/services/resume_parser.py CHANGED
@@ -1,8 +1,26 @@
 
 
 
1
  import pdfplumber
2
 
3
- def extract_text_from_resume(file_path: str) -> str:
4
- text = ""
5
- with pdfplumber.open(file_path) as pdf:
6
- for page in pdf.pages:
7
- text += page.extract_text() or ""
8
- return text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import io
4
  import pdfplumber
5
 
6
+ def extract_resume_text(file_url: str) -> str:
7
+ HF_TOKEN = os.getenv("HF_TOKEN")
8
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
9
+
10
+ try:
11
+ response = requests.get(file_url, headers=headers, timeout=30)
12
+ response.raise_for_status()
13
+
14
+ pdf_bytes = io.BytesIO(response.content)
15
+
16
+ text = ""
17
+ with pdfplumber.open(pdf_bytes) as pdf:
18
+ for page in pdf.pages:
19
+ page_text = page.extract_text()
20
+ if page_text:
21
+ text += page_text + "\n"
22
+
23
+ return text.strip()
24
+
25
+ except Exception as e:
26
+ raise RuntimeError(f"Failed to extract text from resume: {str(e)}")