Spaces:
Sleeping
Sleeping
SyedAzlanzar
commited on
Commit
·
a02f5b2
1
Parent(s):
82c45bb
@feat : storage for resume repo created.
Browse files- .gitignore +8 -0
- app/api/routes.py +32 -0
- app/services/generator.py +42 -67
- app/services/hf_storage_service.py +86 -0
- requirements.txt +4 -1
.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
__pycache__/
|
3 |
+
*.pyc
|
4 |
+
*.pyo
|
5 |
+
*.pyd
|
6 |
+
.Python
|
7 |
+
venv/
|
8 |
+
env/
|
app/api/routes.py
CHANGED
@@ -4,6 +4,14 @@ from app.services.generator import generate_cover_letter
|
|
4 |
from app.services.pdf_creator import save_pdf
|
5 |
from app.services.resume_parser import extract_text_from_resume
|
6 |
from app.utils.file_utils import generate_unique_filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
router = APIRouter()
|
9 |
|
@@ -22,3 +30,27 @@ async def generate_cover_letter_api(data: GenerateRequest):
|
|
22 |
)
|
23 |
except Exception as e:
|
24 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from app.services.pdf_creator import save_pdf
|
5 |
from app.services.resume_parser import extract_text_from_resume
|
6 |
from app.utils.file_utils import generate_unique_filename
|
7 |
+
from fastapi import FastAPI, UploadFile, File
|
8 |
+
from app.services.hf_storage_service import HuggingFaceStorageService
|
9 |
+
import os
|
10 |
+
|
11 |
+
|
12 |
+
HF_REPO_ID = "zarsyeda/resume-parser-storage" # Your repo ID
|
13 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
14 |
+
storage_service = HuggingFaceStorageService(HF_REPO_ID, HF_TOKEN)
|
15 |
|
16 |
router = APIRouter()
|
17 |
|
|
|
30 |
)
|
31 |
except Exception as e:
|
32 |
raise HTTPException(status_code=500, detail=str(e))
|
33 |
+
|
34 |
+
|
35 |
+
@router.post("/upload-resume")
|
36 |
+
async def upload_resume(resume: UploadFile = File(...)):
|
37 |
+
try:
|
38 |
+
# Read resume content
|
39 |
+
resume_content = await resume.read()
|
40 |
+
|
41 |
+
# Upload to HuggingFace Hub
|
42 |
+
resume_url = storage_service.upload_resume(
|
43 |
+
file_content=resume_content,
|
44 |
+
filename=resume.filename
|
45 |
+
)
|
46 |
+
|
47 |
+
return {
|
48 |
+
"success": True,
|
49 |
+
"url": resume_url
|
50 |
+
}
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
+
return {
|
54 |
+
"success": False,
|
55 |
+
"error": str(e)
|
56 |
+
}
|
app/services/generator.py
CHANGED
@@ -3,6 +3,8 @@ import torch
|
|
3 |
from app.models.schema import GenerateRequest
|
4 |
import os
|
5 |
from fastapi import FastAPI, HTTPException
|
|
|
|
|
6 |
|
7 |
|
8 |
os.environ["TRANSFORMERS_CACHE"] = "/code/backend/transformers_cache"
|
@@ -26,68 +28,40 @@ model = model.to(device)
|
|
26 |
|
27 |
|
28 |
def generate_cover_letter(data: GenerateRequest, resume_text: str) -> str:
|
|
|
29 |
prompt = f"""
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
- Subject line
|
57 |
-
- Closing signature
|
58 |
-
- Any formatting beyond the basic greeting and paragraphs
|
59 |
-
|
60 |
-
Rules:
|
61 |
-
- 3 paragraphs only (50-80 words each)
|
62 |
-
- Total: 200-300 words
|
63 |
-
- Sound natural, not AI-written
|
64 |
-
- Match resume skills to job requirements
|
65 |
-
- Mention specific technologies/companies from inputs
|
66 |
-
- Use simple language, avoid: robust, leverage, utilize, optimal, innovative
|
67 |
-
|
68 |
-
**Missing Skills? Say: "I am eager to learn [skill] and have quickly mastered [example] before"
|
69 |
-
**Less Experience? Focus on quality: "My experience with [skill] includes [specific achievement]"
|
70 |
-
|
71 |
-
Before finalizing, ensure the cover letter:
|
72 |
-
|
73 |
-
- [ ] Addresses the specific role and company by name
|
74 |
-
- [ ] Highlights 2-3 most relevant experiences from the CV
|
75 |
-
- [ ] Addresses any obvious skill gaps with learning commitment
|
76 |
-
- [ ] Uses natural, varied language (avoiding the banned word list)
|
77 |
-
- [ ] Uses simple, clear language that sounds human-written (not AI-generated)
|
78 |
-
- [ ] Avoids overly sophisticated vocabulary or complex sentence structures
|
79 |
-
- [ ] Includes a clear call to action
|
80 |
-
- [ ] Stays within 3-4 paragraph limit
|
81 |
-
|
82 |
-
Remember: Your goal is to write a cover letter that sounds like it was written by a real person with good English skills, not by AI. Keep it natural, straightforward, and genuine.
|
83 |
-
"""
|
84 |
try:
|
85 |
|
86 |
messages = [
|
87 |
-
|
88 |
-
|
89 |
-
]
|
90 |
-
|
91 |
# Apply chat template
|
92 |
text = tokenizer.apply_chat_template(
|
93 |
messages,
|
@@ -104,16 +78,17 @@ Remember: Your goal is to write a cover letter that sounds like it was written b
|
|
104 |
max_length=2048
|
105 |
).to(model.device)
|
106 |
|
107 |
-
|
108 |
generated_ids = model.generate(
|
109 |
**model_inputs,
|
110 |
-
max_new_tokens=
|
111 |
-
do_sample=True,
|
112 |
-
temperature=0.
|
113 |
-
top_p=0.9,
|
114 |
-
top_k=50,
|
115 |
-
repetition_penalty=1.
|
116 |
-
eos_token_id=tokenizer.eos_token_id
|
|
|
117 |
)
|
118 |
|
119 |
generated_only_ids = generated_ids[:, model_inputs.input_ids.shape[1]:]
|
@@ -125,7 +100,7 @@ Remember: Your goal is to write a cover letter that sounds like it was written b
|
|
125 |
skip_special_tokens=True
|
126 |
)
|
127 |
|
128 |
-
return response
|
129 |
|
130 |
|
131 |
|
|
|
3 |
from app.models.schema import GenerateRequest
|
4 |
import os
|
5 |
from fastapi import FastAPI, HTTPException
|
6 |
+
import os
|
7 |
+
from datetime import datetime
|
8 |
|
9 |
|
10 |
os.environ["TRANSFORMERS_CACHE"] = "/code/backend/transformers_cache"
|
|
|
28 |
|
29 |
|
30 |
def generate_cover_letter(data: GenerateRequest, resume_text: str) -> str:
|
31 |
+
# Updated prompt to explicitly request markdown format
|
32 |
prompt = f"""
|
33 |
+
Please generate a professional cover letter in **markdown format** based on the following information:
|
34 |
+
|
35 |
+
**Job Details:**
|
36 |
+
{data.job_details}
|
37 |
+
|
38 |
+
**Resume Content:**
|
39 |
+
{resume_text}
|
40 |
+
|
41 |
+
|
42 |
+
**Important:**
|
43 |
+
- Format the output as proper markdown
|
44 |
+
- Use appropriate headers (# ## ###)
|
45 |
+
- Use **bold** for emphasis where appropriate
|
46 |
+
- Use bullet points or numbered lists where relevant
|
47 |
+
- Include proper spacing and line breaks
|
48 |
+
- Start with the applicant's contact information as a header
|
49 |
+
- Include date and employer information
|
50 |
+
- Structure it as a professional business letter in markdown format
|
51 |
+
|
52 |
+
Please ensure the cover letter is:
|
53 |
+
1. Tailored specifically to the job requirements
|
54 |
+
2. Highlights relevant experience from the resume
|
55 |
+
3. Professional and engaging tone
|
56 |
+
4. Proper markdown formatting throughout
|
57 |
+
"""
|
58 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
try:
|
60 |
|
61 |
messages = [
|
62 |
+
{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful AI cover letter bot that generates professional cover letters in markdown format. Always respond with properly formatted markdown."},
|
63 |
+
{"role": "user", "content": prompt}
|
64 |
+
]
|
|
|
65 |
# Apply chat template
|
66 |
text = tokenizer.apply_chat_template(
|
67 |
messages,
|
|
|
78 |
max_length=2048
|
79 |
).to(model.device)
|
80 |
|
81 |
+
# Generate with parameters optimized for markdown
|
82 |
generated_ids = model.generate(
|
83 |
**model_inputs,
|
84 |
+
max_new_tokens=800, # Increased for longer markdown content
|
85 |
+
do_sample=True,
|
86 |
+
temperature=0.6, # Slightly lower for more consistent formatting
|
87 |
+
top_p=0.9,
|
88 |
+
top_k=50,
|
89 |
+
repetition_penalty=1.15, # Higher to avoid repetitive formatting
|
90 |
+
eos_token_id=tokenizer.eos_token_id,
|
91 |
+
pad_token_id=tokenizer.pad_token_id
|
92 |
)
|
93 |
|
94 |
generated_only_ids = generated_ids[:, model_inputs.input_ids.shape[1]:]
|
|
|
100 |
skip_special_tokens=True
|
101 |
)
|
102 |
|
103 |
+
return response.strip()
|
104 |
|
105 |
|
106 |
|
app/services/hf_storage_service.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import HfApi, upload_file
|
2 |
+
import os
|
3 |
+
import uuid
|
4 |
+
from datetime import datetime
|
5 |
+
import tempfile
|
6 |
+
|
7 |
+
class HuggingFaceStorageService:
|
8 |
+
def __init__(self, repo_id: str, token: str):
|
9 |
+
"""
|
10 |
+
Initialize HuggingFace storage service
|
11 |
+
|
12 |
+
Args:
|
13 |
+
repo_id: Your HuggingFace repository ID (e.g., "username/cover-letter-storage")
|
14 |
+
token: Your HuggingFace access token
|
15 |
+
"""
|
16 |
+
self.repo_id = repo_id
|
17 |
+
self.token = token
|
18 |
+
self.api = HfApi()
|
19 |
+
|
20 |
+
# Repository should already exist
|
21 |
+
|
22 |
+
def upload_resume(self, file_content, filename: str = None) -> str:
|
23 |
+
"""
|
24 |
+
Upload resume to HuggingFace Hub
|
25 |
+
|
26 |
+
Args:
|
27 |
+
file_content: File content (bytes)
|
28 |
+
filename: Original filename (optional)
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
str: Public URL to the uploaded file
|
32 |
+
"""
|
33 |
+
if filename is None:
|
34 |
+
filename = f"resume_{uuid.uuid4().hex}.txt"
|
35 |
+
|
36 |
+
# Create a unique path with timestamp
|
37 |
+
timestamp = datetime.now().strftime("%Y/%m/%d")
|
38 |
+
file_path = f"resumes/{timestamp}/{filename}"
|
39 |
+
|
40 |
+
# Save bytes to temporary file for upload
|
41 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file:
|
42 |
+
temp_file.write(file_content)
|
43 |
+
temp_file_path = temp_file.name
|
44 |
+
|
45 |
+
try:
|
46 |
+
# Upload to HuggingFace Hub
|
47 |
+
upload_file(
|
48 |
+
path_or_fileobj=temp_file_path,
|
49 |
+
path_in_repo=file_path,
|
50 |
+
repo_id=self.repo_id,
|
51 |
+
token=self.token,
|
52 |
+
repo_type="dataset"
|
53 |
+
)
|
54 |
+
|
55 |
+
# Return the public URL
|
56 |
+
return f"https://huggingface.co/datasets/{self.repo_id}/resolve/main/{file_path}"
|
57 |
+
|
58 |
+
except Exception as e:
|
59 |
+
raise Exception(f"Failed to upload resume: {str(e)}")
|
60 |
+
finally:
|
61 |
+
# Clean up temporary file
|
62 |
+
os.unlink(temp_file_path)
|
63 |
+
|
64 |
+
# Removed cover letter upload method since we only store resumes
|
65 |
+
|
66 |
+
def delete_file(self, file_path: str) -> bool:
|
67 |
+
"""
|
68 |
+
Delete a file from HuggingFace Hub
|
69 |
+
|
70 |
+
Args:
|
71 |
+
file_path: Path to file in the repository
|
72 |
+
|
73 |
+
Returns:
|
74 |
+
bool: Success status
|
75 |
+
"""
|
76 |
+
try:
|
77 |
+
self.api.delete_file(
|
78 |
+
path_in_repo=file_path,
|
79 |
+
repo_id=self.repo_id,
|
80 |
+
token=self.token,
|
81 |
+
repo_type="dataset"
|
82 |
+
)
|
83 |
+
return True
|
84 |
+
except Exception as e:
|
85 |
+
print(f"Failed to delete file: {str(e)}")
|
86 |
+
return False
|
requirements.txt
CHANGED
@@ -5,4 +5,7 @@ torch
|
|
5 |
fpdf
|
6 |
pdfplumber
|
7 |
accelerate
|
8 |
-
safetensors
|
|
|
|
|
|
|
|
5 |
fpdf
|
6 |
pdfplumber
|
7 |
accelerate
|
8 |
+
safetensors
|
9 |
+
python-multipart
|
10 |
+
huggingface_hub
|
11 |
+
python-dotenv
|