ruslanmv commited on
Commit
74f556d
·
verified ·
1 Parent(s): e8612c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -183
app.py CHANGED
@@ -4,44 +4,32 @@ import PyPDF2
4
  import io
5
  from docx import Document
6
  import os
 
7
 
8
  # For PDF generation
9
  from reportlab.pdfgen import canvas
10
  from reportlab.lib.pagesizes import letter
11
- from reportlab.lib import utils
12
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
  from reportlab.lib.styles import getSampleStyleSheet
14
 
15
- # Import for CV Optimizer
16
- import fitz # PyMuPDF for PDF handling
17
-
18
- # Initialize the inference client from Hugging Face.
19
- # Updated model to Meta-Llama-3.1-8B-Instruct
20
- try:
21
- client = InferenceClient(
22
- model="meta-llama/Meta-Llama-3-8B-Instruct",
23
- token=os.getenv("HF_TOKEN")
24
- )
25
- except Exception as e:
26
- print(f"Error initializing InferenceClient: {e}")
27
 
 
28
 
29
  def extract_text_from_pdf(pdf_file):
30
- """Extract text from PDF file."""
31
  try:
32
- pdf_reader = PyPDF2.PdfReader(pdf_file)
33
- text = ""
34
- for page in pdf_reader.pages:
35
- page_text = page.extract_text()
36
- if page_text:
37
- text += page_text + "\n"
38
  return text.strip() or "No text could be extracted from the PDF."
39
  except Exception as e:
40
  return f"Error reading PDF: {e}"
41
 
42
-
43
  def extract_text_from_docx(docx_file):
44
- """Extract text from DOCX file."""
45
  try:
46
  doc = Document(docx_file)
47
  text = "\n".join(para.text for para in doc.paragraphs)
@@ -49,14 +37,13 @@ def extract_text_from_docx(docx_file):
49
  except Exception as e:
50
  return f"Error reading DOCX: {e}"
51
 
52
-
53
  def parse_cv(file, job_description):
54
- """Analyze the CV, show the prompt (debug) and return LLM analysis."""
55
  if file is None:
56
  return "Please upload a CV file.", ""
57
 
58
  try:
59
- file_path = file.name # Get the file path
60
  file_ext = os.path.splitext(file_path)[1].lower()
61
 
62
  if file_ext == ".pdf":
@@ -64,20 +51,13 @@ def parse_cv(file, job_description):
64
  elif file_ext == ".docx":
65
  extracted_text = extract_text_from_docx(file_path)
66
  else:
67
- return (
68
- "Unsupported file format. Please upload a PDF or DOCX file.",
69
- "Unsupported file format.",
70
- )
71
-
72
  except Exception as e:
73
- error_msg = f"Error reading file: {e}"
74
- return error_msg, error_msg
75
 
76
- # Check for extraction errors
77
  if extracted_text.startswith("Error"):
78
  return extracted_text, "Error during text extraction. Please check the file."
79
 
80
- # Prepare debug prompt
81
  prompt = (
82
  f"Analyze the CV against the job description. Provide a summary, assessment, "
83
  f"and a score 0-10.\n\n"
@@ -85,201 +65,100 @@ def parse_cv(file, job_description):
85
  f"Candidate CV:\n{extracted_text}\n"
86
  )
87
 
88
- # Call LLM
89
  try:
90
  analysis = client.text_generation(prompt, max_new_tokens=512)
91
- # Show both the debug prompt and the LLM analysis in the "Analysis Report"
92
- analysis_report = (
93
- f"--- DEBUG PROMPT ---\n{prompt}\n"
94
- f"--- LLM ANALYSIS ---\n{analysis}"
95
- )
96
- return extracted_text, analysis_report
97
  except Exception as e:
98
  return extracted_text, f"Analysis Error: {e}"
99
 
100
-
101
- def respond(
102
- message,
103
- history: list[tuple[str, str]],
104
- system_message,
105
- max_tokens,
106
- temperature,
107
- top_p,
108
- ):
109
- """Generate chatbot response."""
110
- messages = [{"role": "system", "content": system_message}]
111
- for user_msg, bot_msg in history:
112
- if user_msg:
113
- messages.append({"role": "user", "content": user_msg})
114
- if bot_msg:
115
- messages.append({"role": "assistant", "content": bot_msg})
116
- messages.append({"role": "user", "content": message})
117
-
118
- response = ""
119
- try:
120
- for message_chunk in client.text_generation(
121
- messages,
122
- max_new_tokens=max_tokens,
123
- stream=True,
124
- temperature=temperature,
125
- top_p=top_p,
126
- ):
127
- response += message_chunk
128
- yield response
129
- except Exception as e:
130
- yield f"Error during chat generation: {e}"
131
-
132
-
133
- def create_pdf_report(report_text):
134
- """Creates a PDF report using SimpleDocTemplate for better formatting."""
135
- if not report_text.strip():
136
- report_text = "No analysis report to convert."
137
-
138
- buffer = io.BytesIO()
139
- doc = SimpleDocTemplate(buffer, pagesize=letter)
140
- styles = getSampleStyleSheet()
141
- Story = []
142
-
143
- # Title
144
- Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
145
- Story.append(Spacer(1, 12))
146
-
147
- # Report Content
148
- for line in report_text.split("\n"):
149
- Story.append(Paragraph(line, styles["Normal"]))
150
- Story.append(Spacer(1, 6)) # Add a small space between lines
151
-
152
- doc.build(Story)
153
- buffer.seek(0)
154
- return buffer
155
-
156
-
157
- def toggle_download_button(analysis_report):
158
- """Toggle the download button."""
159
- return gr.update(
160
- interactive=bool(analysis_report.strip()),
161
- visible=bool(analysis_report.strip()),
162
- )
163
-
164
- # Function to optimize resume based on job title
165
  def optimize_resume(resume_text, job_title):
166
  prompt = f"Optimize the following resume for the job title '{job_title}':\n\n{resume_text}"
167
  responses = []
168
  try:
169
- for message in client.text_generation(
170
- prompt,
171
- max_new_tokens=1000,
172
  stream=True,
173
  ):
174
- responses.append(message)
175
  except Exception as e:
176
  return f"Error during model inference: {e}"
177
 
178
  return ''.join(responses)
179
 
180
- # Function to extract text from a PDF file (using PyMuPDF)
181
- def extract_text_from_pdf_fitz(pdf_file_path):
182
- text = ""
183
- try:
184
- pdf_document = fitz.open(pdf_file_path)
185
- for page_num in range(len(pdf_document)):
186
- page = pdf_document.load_page(page_num)
187
- text += page.get_text()
188
- except Exception as e:
189
- return f"Error extracting text from PDF: {e}"
190
- return text
191
-
192
- # Function to process the resume and job title inputs for optimization
193
  def process_resume(file, job_title):
194
  try:
195
  file_name = file.name
196
  if file_name.endswith(".pdf"):
197
- # Extract text if the file is a PDF
198
- resume_text = extract_text_from_pdf_fitz(file.name)
199
  elif file_name.endswith(".docx"):
200
- # Extract text if the file is a Word document
201
  resume_text = extract_text_from_docx(file.name)
202
  else:
203
- # Assume the file is a text file and read it directly
204
- with open(file.name, 'r', encoding='utf-8') as f:
205
- resume_text = f.read()
206
 
207
- # Optimize the resume
208
  optimized_resume = optimize_resume(resume_text, job_title)
209
-
210
  return optimized_resume
211
  except Exception as e:
212
  return f"Error processing resume: {e}"
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  # Build the Gradio UI
215
  demo = gr.Blocks()
216
  with demo:
217
- gr.Markdown("## AI-powered CV Analyzer and Chatbot")
218
 
219
  with gr.Tab("Chatbot"):
220
  chat_interface = gr.ChatInterface(
221
- respond,
222
- chatbot=gr.Chatbot(value=[], label="Chatbot"),
 
 
 
223
  textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
224
- additional_inputs=[
225
- gr.Textbox(
226
- value="You are a friendly Chatbot.", label="System message"
227
- ),
228
- gr.Slider(
229
- minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
230
- ),
231
- gr.Slider(
232
- minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
233
- ),
234
- gr.Slider(
235
- minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
236
- ),
237
- ],
238
  )
239
 
240
  with gr.Tab("CV Analyzer"):
241
  gr.Markdown("### Upload your CV and provide the job description")
242
  file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
243
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
244
- extracted_text = gr.Textbox(
245
- label="Extracted CV Content", lines=10, interactive=False
246
- )
247
- analysis_output = gr.Textbox(
248
- label="Analysis Report", lines=10, interactive=False
249
- )
250
- download_pdf_button = gr.Button(
251
- "Download Analysis as PDF", visible=False, interactive=False
252
- )
253
  pdf_file = gr.File(label="Download PDF", interactive=False)
254
-
255
  analyze_button = gr.Button("Analyze CV")
 
 
 
 
256
 
257
- analyze_button.click(
258
- parse_cv,
259
- inputs=[file_input, job_desc_input],
260
- outputs=[extracted_text, analysis_output],
261
- ).then(
262
- toggle_download_button,
263
- inputs=[analysis_output],
264
- outputs=[download_pdf_button],
265
- )
266
-
267
- download_pdf_button.click(
268
- create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
269
- )
270
-
271
  with gr.Tab("CV Optimizer"):
272
- gr.Markdown("### Upload your CV and enter the job title to optimize your resume")
273
- cv_file_input = gr.File(label="Upload CV (PDF or DOCX)", file_types=[".pdf", ".docx"])
274
- job_title_input = gr.Textbox(label="Job Title", placeholder="Enter the job title...")
275
  optimized_resume_output = gr.Textbox(label="Optimized Resume", lines=20)
276
- optimize_button = gr.Button("Optimize CV")
277
-
278
- optimize_button.click(
279
- process_resume,
280
- inputs=[cv_file_input, job_title_input],
281
- outputs=[optimized_resume_output]
282
- )
283
 
284
  if __name__ == "__main__":
285
- demo.queue().launch()
 
4
  import io
5
  from docx import Document
6
  import os
7
+ import pymupdf # Corrected import for PyMuPDF
8
 
9
  # For PDF generation
10
  from reportlab.pdfgen import canvas
11
  from reportlab.lib.pagesizes import letter
 
12
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
  from reportlab.lib.styles import getSampleStyleSheet
14
 
15
+ # Initialize Hugging Face Inference Client with Meta-Llama-3.1-8B-Instruct
16
+ client = InferenceClient(
17
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct",
18
+ token=os.getenv("HF_TOKEN")
19
+ )
 
 
 
 
 
 
 
20
 
21
+ # Function to extract text from PDF
22
 
23
  def extract_text_from_pdf(pdf_file):
 
24
  try:
25
+ pdf_document = pymupdf.open(pdf_file)
26
+ text = "".join(page.get_text() for page in pdf_document)
 
 
 
 
27
  return text.strip() or "No text could be extracted from the PDF."
28
  except Exception as e:
29
  return f"Error reading PDF: {e}"
30
 
31
+ # Function to extract text from DOCX
32
  def extract_text_from_docx(docx_file):
 
33
  try:
34
  doc = Document(docx_file)
35
  text = "\n".join(para.text for para in doc.paragraphs)
 
37
  except Exception as e:
38
  return f"Error reading DOCX: {e}"
39
 
40
+ # Function to analyze CV
41
  def parse_cv(file, job_description):
 
42
  if file is None:
43
  return "Please upload a CV file.", ""
44
 
45
  try:
46
+ file_path = file.name
47
  file_ext = os.path.splitext(file_path)[1].lower()
48
 
49
  if file_ext == ".pdf":
 
51
  elif file_ext == ".docx":
52
  extracted_text = extract_text_from_docx(file_path)
53
  else:
54
+ return "Unsupported file format. Please upload a PDF or DOCX file.", ""
 
 
 
 
55
  except Exception as e:
56
+ return f"Error reading file: {e}", ""
 
57
 
 
58
  if extracted_text.startswith("Error"):
59
  return extracted_text, "Error during text extraction. Please check the file."
60
 
 
61
  prompt = (
62
  f"Analyze the CV against the job description. Provide a summary, assessment, "
63
  f"and a score 0-10.\n\n"
 
65
  f"Candidate CV:\n{extracted_text}\n"
66
  )
67
 
 
68
  try:
69
  analysis = client.text_generation(prompt, max_new_tokens=512)
70
+ return extracted_text, f"--- Analysis Report ---\n{analysis}"
 
 
 
 
 
71
  except Exception as e:
72
  return extracted_text, f"Analysis Error: {e}"
73
 
74
+ # Function to optimize resume
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def optimize_resume(resume_text, job_title):
76
  prompt = f"Optimize the following resume for the job title '{job_title}':\n\n{resume_text}"
77
  responses = []
78
  try:
79
+ for message in client.chat_completion(
80
+ messages=[{"role": "user", "content": prompt}],
81
+ max_tokens=1000,
82
  stream=True,
83
  ):
84
+ responses.append(message.choices[0].delta.content)
85
  except Exception as e:
86
  return f"Error during model inference: {e}"
87
 
88
  return ''.join(responses)
89
 
90
+ # Function to process resume and job title inputs
 
 
 
 
 
 
 
 
 
 
 
 
91
  def process_resume(file, job_title):
92
  try:
93
  file_name = file.name
94
  if file_name.endswith(".pdf"):
95
+ resume_text = extract_text_from_pdf(file.name)
 
96
  elif file_name.endswith(".docx"):
 
97
  resume_text = extract_text_from_docx(file.name)
98
  else:
99
+ return "Unsupported file format. Please upload a PDF or DOCX file."
 
 
100
 
 
101
  optimized_resume = optimize_resume(resume_text, job_title)
 
102
  return optimized_resume
103
  except Exception as e:
104
  return f"Error processing resume: {e}"
105
 
106
+ # Function to generate a PDF report
107
+ def create_pdf_report(report_text):
108
+ buffer = io.BytesIO()
109
+ doc = SimpleDocTemplate(buffer, pagesize=letter)
110
+ styles = getSampleStyleSheet()
111
+ Story = [Paragraph("<b>Analysis Report</b>", styles["Title"]), Spacer(1, 12)]
112
+
113
+ for line in report_text.split("\n"):
114
+ Story.append(Paragraph(line, styles["Normal"]))
115
+ Story.append(Spacer(1, 6))
116
+
117
+ doc.build(Story)
118
+ buffer.seek(0)
119
+ return buffer
120
+
121
+ # Function to toggle the download button
122
+ def toggle_download_button(analysis_report):
123
+ return gr.update(interactive=bool(analysis_report.strip()), visible=bool(analysis_report.strip()))
124
+
125
  # Build the Gradio UI
126
  demo = gr.Blocks()
127
  with demo:
128
+ gr.Markdown("## AI-powered CV Analyzer, Optimizer, and Chatbot")
129
 
130
  with gr.Tab("Chatbot"):
131
  chat_interface = gr.ChatInterface(
132
+ lambda message, history: client.chat_completion(
133
+ messages=[{"role": "user", "content": message}],
134
+ max_tokens=512,
135
+ ),
136
+ chatbot=gr.Chatbot(label="Chatbot"),
137
  textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  )
139
 
140
  with gr.Tab("CV Analyzer"):
141
  gr.Markdown("### Upload your CV and provide the job description")
142
  file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
143
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
144
+ extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
145
+ analysis_output = gr.Textbox(label="Analysis Report", lines=10, interactive=False)
146
+ download_pdf_button = gr.Button("Download Analysis as PDF", visible=False, interactive=False)
 
 
 
 
 
 
147
  pdf_file = gr.File(label="Download PDF", interactive=False)
 
148
  analyze_button = gr.Button("Analyze CV")
149
+
150
+ analyze_button.click(parse_cv, [file_input, job_desc_input], [extracted_text, analysis_output])
151
+ analyze_button.then(toggle_download_button, [analysis_output], [download_pdf_button])
152
+ download_pdf_button.click(create_pdf_report, [analysis_output], [pdf_file])
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  with gr.Tab("CV Optimizer"):
155
+ gr.Markdown("### Upload your Resume and Enter Job Title")
156
+ resume_file = gr.File(label="Upload Resume (PDF or Word)")
157
+ job_title_input = gr.Textbox(label="Job Title", lines=1)
158
  optimized_resume_output = gr.Textbox(label="Optimized Resume", lines=20)
159
+ optimize_button = gr.Button("Optimize Resume")
160
+
161
+ optimize_button.click(process_resume, [resume_file, job_title_input], [optimized_resume_output])
 
 
 
 
162
 
163
  if __name__ == "__main__":
164
+ demo.queue().launch()