Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,44 +4,32 @@ import PyPDF2
|
|
4 |
import io
|
5 |
from docx import Document
|
6 |
import os
|
|
|
7 |
|
8 |
# For PDF generation
|
9 |
from reportlab.pdfgen import canvas
|
10 |
from reportlab.lib.pagesizes import letter
|
11 |
-
from reportlab.lib import utils
|
12 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
13 |
from reportlab.lib.styles import getSampleStyleSheet
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
try:
|
21 |
-
client = InferenceClient(
|
22 |
-
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
23 |
-
token=os.getenv("HF_TOKEN")
|
24 |
-
)
|
25 |
-
except Exception as e:
|
26 |
-
print(f"Error initializing InferenceClient: {e}")
|
27 |
|
|
|
28 |
|
29 |
def extract_text_from_pdf(pdf_file):
|
30 |
-
"""Extract text from PDF file."""
|
31 |
try:
|
32 |
-
|
33 |
-
text = ""
|
34 |
-
for page in pdf_reader.pages:
|
35 |
-
page_text = page.extract_text()
|
36 |
-
if page_text:
|
37 |
-
text += page_text + "\n"
|
38 |
return text.strip() or "No text could be extracted from the PDF."
|
39 |
except Exception as e:
|
40 |
return f"Error reading PDF: {e}"
|
41 |
|
42 |
-
|
43 |
def extract_text_from_docx(docx_file):
|
44 |
-
"""Extract text from DOCX file."""
|
45 |
try:
|
46 |
doc = Document(docx_file)
|
47 |
text = "\n".join(para.text for para in doc.paragraphs)
|
@@ -49,14 +37,13 @@ def extract_text_from_docx(docx_file):
|
|
49 |
except Exception as e:
|
50 |
return f"Error reading DOCX: {e}"
|
51 |
|
52 |
-
|
53 |
def parse_cv(file, job_description):
|
54 |
-
"""Analyze the CV, show the prompt (debug) and return LLM analysis."""
|
55 |
if file is None:
|
56 |
return "Please upload a CV file.", ""
|
57 |
|
58 |
try:
|
59 |
-
file_path = file.name
|
60 |
file_ext = os.path.splitext(file_path)[1].lower()
|
61 |
|
62 |
if file_ext == ".pdf":
|
@@ -64,20 +51,13 @@ def parse_cv(file, job_description):
|
|
64 |
elif file_ext == ".docx":
|
65 |
extracted_text = extract_text_from_docx(file_path)
|
66 |
else:
|
67 |
-
return
|
68 |
-
"Unsupported file format. Please upload a PDF or DOCX file.",
|
69 |
-
"Unsupported file format.",
|
70 |
-
)
|
71 |
-
|
72 |
except Exception as e:
|
73 |
-
|
74 |
-
return error_msg, error_msg
|
75 |
|
76 |
-
# Check for extraction errors
|
77 |
if extracted_text.startswith("Error"):
|
78 |
return extracted_text, "Error during text extraction. Please check the file."
|
79 |
|
80 |
-
# Prepare debug prompt
|
81 |
prompt = (
|
82 |
f"Analyze the CV against the job description. Provide a summary, assessment, "
|
83 |
f"and a score 0-10.\n\n"
|
@@ -85,201 +65,100 @@ def parse_cv(file, job_description):
|
|
85 |
f"Candidate CV:\n{extracted_text}\n"
|
86 |
)
|
87 |
|
88 |
-
# Call LLM
|
89 |
try:
|
90 |
analysis = client.text_generation(prompt, max_new_tokens=512)
|
91 |
-
|
92 |
-
analysis_report = (
|
93 |
-
f"--- DEBUG PROMPT ---\n{prompt}\n"
|
94 |
-
f"--- LLM ANALYSIS ---\n{analysis}"
|
95 |
-
)
|
96 |
-
return extracted_text, analysis_report
|
97 |
except Exception as e:
|
98 |
return extracted_text, f"Analysis Error: {e}"
|
99 |
|
100 |
-
|
101 |
-
def respond(
|
102 |
-
message,
|
103 |
-
history: list[tuple[str, str]],
|
104 |
-
system_message,
|
105 |
-
max_tokens,
|
106 |
-
temperature,
|
107 |
-
top_p,
|
108 |
-
):
|
109 |
-
"""Generate chatbot response."""
|
110 |
-
messages = [{"role": "system", "content": system_message}]
|
111 |
-
for user_msg, bot_msg in history:
|
112 |
-
if user_msg:
|
113 |
-
messages.append({"role": "user", "content": user_msg})
|
114 |
-
if bot_msg:
|
115 |
-
messages.append({"role": "assistant", "content": bot_msg})
|
116 |
-
messages.append({"role": "user", "content": message})
|
117 |
-
|
118 |
-
response = ""
|
119 |
-
try:
|
120 |
-
for message_chunk in client.text_generation(
|
121 |
-
messages,
|
122 |
-
max_new_tokens=max_tokens,
|
123 |
-
stream=True,
|
124 |
-
temperature=temperature,
|
125 |
-
top_p=top_p,
|
126 |
-
):
|
127 |
-
response += message_chunk
|
128 |
-
yield response
|
129 |
-
except Exception as e:
|
130 |
-
yield f"Error during chat generation: {e}"
|
131 |
-
|
132 |
-
|
133 |
-
def create_pdf_report(report_text):
|
134 |
-
"""Creates a PDF report using SimpleDocTemplate for better formatting."""
|
135 |
-
if not report_text.strip():
|
136 |
-
report_text = "No analysis report to convert."
|
137 |
-
|
138 |
-
buffer = io.BytesIO()
|
139 |
-
doc = SimpleDocTemplate(buffer, pagesize=letter)
|
140 |
-
styles = getSampleStyleSheet()
|
141 |
-
Story = []
|
142 |
-
|
143 |
-
# Title
|
144 |
-
Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
|
145 |
-
Story.append(Spacer(1, 12))
|
146 |
-
|
147 |
-
# Report Content
|
148 |
-
for line in report_text.split("\n"):
|
149 |
-
Story.append(Paragraph(line, styles["Normal"]))
|
150 |
-
Story.append(Spacer(1, 6)) # Add a small space between lines
|
151 |
-
|
152 |
-
doc.build(Story)
|
153 |
-
buffer.seek(0)
|
154 |
-
return buffer
|
155 |
-
|
156 |
-
|
157 |
-
def toggle_download_button(analysis_report):
|
158 |
-
"""Toggle the download button."""
|
159 |
-
return gr.update(
|
160 |
-
interactive=bool(analysis_report.strip()),
|
161 |
-
visible=bool(analysis_report.strip()),
|
162 |
-
)
|
163 |
-
|
164 |
-
# Function to optimize resume based on job title
|
165 |
def optimize_resume(resume_text, job_title):
|
166 |
prompt = f"Optimize the following resume for the job title '{job_title}':\n\n{resume_text}"
|
167 |
responses = []
|
168 |
try:
|
169 |
-
for message in client.
|
170 |
-
prompt,
|
171 |
-
|
172 |
stream=True,
|
173 |
):
|
174 |
-
responses.append(message)
|
175 |
except Exception as e:
|
176 |
return f"Error during model inference: {e}"
|
177 |
|
178 |
return ''.join(responses)
|
179 |
|
180 |
-
# Function to
|
181 |
-
def extract_text_from_pdf_fitz(pdf_file_path):
|
182 |
-
text = ""
|
183 |
-
try:
|
184 |
-
pdf_document = fitz.open(pdf_file_path)
|
185 |
-
for page_num in range(len(pdf_document)):
|
186 |
-
page = pdf_document.load_page(page_num)
|
187 |
-
text += page.get_text()
|
188 |
-
except Exception as e:
|
189 |
-
return f"Error extracting text from PDF: {e}"
|
190 |
-
return text
|
191 |
-
|
192 |
-
# Function to process the resume and job title inputs for optimization
|
193 |
def process_resume(file, job_title):
|
194 |
try:
|
195 |
file_name = file.name
|
196 |
if file_name.endswith(".pdf"):
|
197 |
-
|
198 |
-
resume_text = extract_text_from_pdf_fitz(file.name)
|
199 |
elif file_name.endswith(".docx"):
|
200 |
-
# Extract text if the file is a Word document
|
201 |
resume_text = extract_text_from_docx(file.name)
|
202 |
else:
|
203 |
-
|
204 |
-
with open(file.name, 'r', encoding='utf-8') as f:
|
205 |
-
resume_text = f.read()
|
206 |
|
207 |
-
# Optimize the resume
|
208 |
optimized_resume = optimize_resume(resume_text, job_title)
|
209 |
-
|
210 |
return optimized_resume
|
211 |
except Exception as e:
|
212 |
return f"Error processing resume: {e}"
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
# Build the Gradio UI
|
215 |
demo = gr.Blocks()
|
216 |
with demo:
|
217 |
-
gr.Markdown("## AI-powered CV Analyzer and Chatbot")
|
218 |
|
219 |
with gr.Tab("Chatbot"):
|
220 |
chat_interface = gr.ChatInterface(
|
221 |
-
|
222 |
-
|
|
|
|
|
|
|
223 |
textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
|
224 |
-
additional_inputs=[
|
225 |
-
gr.Textbox(
|
226 |
-
value="You are a friendly Chatbot.", label="System message"
|
227 |
-
),
|
228 |
-
gr.Slider(
|
229 |
-
minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
|
230 |
-
),
|
231 |
-
gr.Slider(
|
232 |
-
minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
|
233 |
-
),
|
234 |
-
gr.Slider(
|
235 |
-
minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
|
236 |
-
),
|
237 |
-
],
|
238 |
)
|
239 |
|
240 |
with gr.Tab("CV Analyzer"):
|
241 |
gr.Markdown("### Upload your CV and provide the job description")
|
242 |
file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
|
243 |
job_desc_input = gr.Textbox(label="Job Description", lines=5)
|
244 |
-
extracted_text = gr.Textbox(
|
245 |
-
|
246 |
-
)
|
247 |
-
analysis_output = gr.Textbox(
|
248 |
-
label="Analysis Report", lines=10, interactive=False
|
249 |
-
)
|
250 |
-
download_pdf_button = gr.Button(
|
251 |
-
"Download Analysis as PDF", visible=False, interactive=False
|
252 |
-
)
|
253 |
pdf_file = gr.File(label="Download PDF", interactive=False)
|
254 |
-
|
255 |
analyze_button = gr.Button("Analyze CV")
|
|
|
|
|
|
|
|
|
256 |
|
257 |
-
analyze_button.click(
|
258 |
-
parse_cv,
|
259 |
-
inputs=[file_input, job_desc_input],
|
260 |
-
outputs=[extracted_text, analysis_output],
|
261 |
-
).then(
|
262 |
-
toggle_download_button,
|
263 |
-
inputs=[analysis_output],
|
264 |
-
outputs=[download_pdf_button],
|
265 |
-
)
|
266 |
-
|
267 |
-
download_pdf_button.click(
|
268 |
-
create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
|
269 |
-
)
|
270 |
-
|
271 |
with gr.Tab("CV Optimizer"):
|
272 |
-
gr.Markdown("### Upload your
|
273 |
-
|
274 |
-
job_title_input = gr.Textbox(label="Job Title",
|
275 |
optimized_resume_output = gr.Textbox(label="Optimized Resume", lines=20)
|
276 |
-
optimize_button = gr.Button("Optimize
|
277 |
-
|
278 |
-
optimize_button.click(
|
279 |
-
process_resume,
|
280 |
-
inputs=[cv_file_input, job_title_input],
|
281 |
-
outputs=[optimized_resume_output]
|
282 |
-
)
|
283 |
|
284 |
if __name__ == "__main__":
|
285 |
-
demo.queue().launch()
|
|
|
4 |
import io
|
5 |
from docx import Document
|
6 |
import os
|
7 |
+
import pymupdf # Corrected import for PyMuPDF
|
8 |
|
9 |
# For PDF generation
|
10 |
from reportlab.pdfgen import canvas
|
11 |
from reportlab.lib.pagesizes import letter
|
|
|
12 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
13 |
from reportlab.lib.styles import getSampleStyleSheet
|
14 |
|
15 |
+
# Initialize Hugging Face Inference Client with Meta-Llama-3.1-8B-Instruct
|
16 |
+
client = InferenceClient(
|
17 |
+
model="meta-llama/Meta-Llama-3.1-8B-Instruct",
|
18 |
+
token=os.getenv("HF_TOKEN")
|
19 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
# Function to extract text from PDF
|
22 |
|
23 |
def extract_text_from_pdf(pdf_file):
|
|
|
24 |
try:
|
25 |
+
pdf_document = pymupdf.open(pdf_file)
|
26 |
+
text = "".join(page.get_text() for page in pdf_document)
|
|
|
|
|
|
|
|
|
27 |
return text.strip() or "No text could be extracted from the PDF."
|
28 |
except Exception as e:
|
29 |
return f"Error reading PDF: {e}"
|
30 |
|
31 |
+
# Function to extract text from DOCX
|
32 |
def extract_text_from_docx(docx_file):
|
|
|
33 |
try:
|
34 |
doc = Document(docx_file)
|
35 |
text = "\n".join(para.text for para in doc.paragraphs)
|
|
|
37 |
except Exception as e:
|
38 |
return f"Error reading DOCX: {e}"
|
39 |
|
40 |
+
# Function to analyze CV
|
41 |
def parse_cv(file, job_description):
|
|
|
42 |
if file is None:
|
43 |
return "Please upload a CV file.", ""
|
44 |
|
45 |
try:
|
46 |
+
file_path = file.name
|
47 |
file_ext = os.path.splitext(file_path)[1].lower()
|
48 |
|
49 |
if file_ext == ".pdf":
|
|
|
51 |
elif file_ext == ".docx":
|
52 |
extracted_text = extract_text_from_docx(file_path)
|
53 |
else:
|
54 |
+
return "Unsupported file format. Please upload a PDF or DOCX file.", ""
|
|
|
|
|
|
|
|
|
55 |
except Exception as e:
|
56 |
+
return f"Error reading file: {e}", ""
|
|
|
57 |
|
|
|
58 |
if extracted_text.startswith("Error"):
|
59 |
return extracted_text, "Error during text extraction. Please check the file."
|
60 |
|
|
|
61 |
prompt = (
|
62 |
f"Analyze the CV against the job description. Provide a summary, assessment, "
|
63 |
f"and a score 0-10.\n\n"
|
|
|
65 |
f"Candidate CV:\n{extracted_text}\n"
|
66 |
)
|
67 |
|
|
|
68 |
try:
|
69 |
analysis = client.text_generation(prompt, max_new_tokens=512)
|
70 |
+
return extracted_text, f"--- Analysis Report ---\n{analysis}"
|
|
|
|
|
|
|
|
|
|
|
71 |
except Exception as e:
|
72 |
return extracted_text, f"Analysis Error: {e}"
|
73 |
|
74 |
+
# Function to optimize resume
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def optimize_resume(resume_text, job_title):
|
76 |
prompt = f"Optimize the following resume for the job title '{job_title}':\n\n{resume_text}"
|
77 |
responses = []
|
78 |
try:
|
79 |
+
for message in client.chat_completion(
|
80 |
+
messages=[{"role": "user", "content": prompt}],
|
81 |
+
max_tokens=1000,
|
82 |
stream=True,
|
83 |
):
|
84 |
+
responses.append(message.choices[0].delta.content)
|
85 |
except Exception as e:
|
86 |
return f"Error during model inference: {e}"
|
87 |
|
88 |
return ''.join(responses)
|
89 |
|
90 |
+
# Function to process resume and job title inputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
def process_resume(file, job_title):
|
92 |
try:
|
93 |
file_name = file.name
|
94 |
if file_name.endswith(".pdf"):
|
95 |
+
resume_text = extract_text_from_pdf(file.name)
|
|
|
96 |
elif file_name.endswith(".docx"):
|
|
|
97 |
resume_text = extract_text_from_docx(file.name)
|
98 |
else:
|
99 |
+
return "Unsupported file format. Please upload a PDF or DOCX file."
|
|
|
|
|
100 |
|
|
|
101 |
optimized_resume = optimize_resume(resume_text, job_title)
|
|
|
102 |
return optimized_resume
|
103 |
except Exception as e:
|
104 |
return f"Error processing resume: {e}"
|
105 |
|
106 |
+
# Function to generate a PDF report
|
107 |
+
def create_pdf_report(report_text):
|
108 |
+
buffer = io.BytesIO()
|
109 |
+
doc = SimpleDocTemplate(buffer, pagesize=letter)
|
110 |
+
styles = getSampleStyleSheet()
|
111 |
+
Story = [Paragraph("<b>Analysis Report</b>", styles["Title"]), Spacer(1, 12)]
|
112 |
+
|
113 |
+
for line in report_text.split("\n"):
|
114 |
+
Story.append(Paragraph(line, styles["Normal"]))
|
115 |
+
Story.append(Spacer(1, 6))
|
116 |
+
|
117 |
+
doc.build(Story)
|
118 |
+
buffer.seek(0)
|
119 |
+
return buffer
|
120 |
+
|
121 |
+
# Function to toggle the download button
|
122 |
+
def toggle_download_button(analysis_report):
|
123 |
+
return gr.update(interactive=bool(analysis_report.strip()), visible=bool(analysis_report.strip()))
|
124 |
+
|
125 |
# Build the Gradio UI
|
126 |
demo = gr.Blocks()
|
127 |
with demo:
|
128 |
+
gr.Markdown("## AI-powered CV Analyzer, Optimizer, and Chatbot")
|
129 |
|
130 |
with gr.Tab("Chatbot"):
|
131 |
chat_interface = gr.ChatInterface(
|
132 |
+
lambda message, history: client.chat_completion(
|
133 |
+
messages=[{"role": "user", "content": message}],
|
134 |
+
max_tokens=512,
|
135 |
+
),
|
136 |
+
chatbot=gr.Chatbot(label="Chatbot"),
|
137 |
textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
)
|
139 |
|
140 |
with gr.Tab("CV Analyzer"):
|
141 |
gr.Markdown("### Upload your CV and provide the job description")
|
142 |
file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
|
143 |
job_desc_input = gr.Textbox(label="Job Description", lines=5)
|
144 |
+
extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
|
145 |
+
analysis_output = gr.Textbox(label="Analysis Report", lines=10, interactive=False)
|
146 |
+
download_pdf_button = gr.Button("Download Analysis as PDF", visible=False, interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
pdf_file = gr.File(label="Download PDF", interactive=False)
|
|
|
148 |
analyze_button = gr.Button("Analyze CV")
|
149 |
+
|
150 |
+
analyze_button.click(parse_cv, [file_input, job_desc_input], [extracted_text, analysis_output])
|
151 |
+
analyze_button.then(toggle_download_button, [analysis_output], [download_pdf_button])
|
152 |
+
download_pdf_button.click(create_pdf_report, [analysis_output], [pdf_file])
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
with gr.Tab("CV Optimizer"):
|
155 |
+
gr.Markdown("### Upload your Resume and Enter Job Title")
|
156 |
+
resume_file = gr.File(label="Upload Resume (PDF or Word)")
|
157 |
+
job_title_input = gr.Textbox(label="Job Title", lines=1)
|
158 |
optimized_resume_output = gr.Textbox(label="Optimized Resume", lines=20)
|
159 |
+
optimize_button = gr.Button("Optimize Resume")
|
160 |
+
|
161 |
+
optimize_button.click(process_resume, [resume_file, job_title_input], [optimized_resume_output])
|
|
|
|
|
|
|
|
|
162 |
|
163 |
if __name__ == "__main__":
|
164 |
+
demo.queue().launch()
|