ruslanmv commited on
Commit
8af823f
·
verified ·
1 Parent(s): c1c0b76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -50
app.py CHANGED
@@ -7,7 +7,6 @@ from docx import Document
7
  # Initialize the inference client from Hugging Face.
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
 
10
-
11
  def extract_text_from_pdf(pdf_file_bytes):
12
  """Extract text from PDF bytes."""
13
  try:
@@ -21,7 +20,6 @@ def extract_text_from_pdf(pdf_file_bytes):
21
  except Exception as e:
22
  return f"Error reading PDF: {e}"
23
 
24
-
25
  def extract_text_from_docx(docx_file_bytes):
26
  """Extract text from DOCX bytes."""
27
  try:
@@ -31,58 +29,49 @@ def extract_text_from_docx(docx_file_bytes):
31
  except Exception as e:
32
  return f"Error reading DOCX: {e}"
33
 
34
-
35
  def parse_cv(file, job_description):
36
- """Analyze the CV (PDF or DOCX) against the job description and return an analysis report."""
37
  if file is None:
38
- return "Please upload a CV file."
39
-
40
- # Correctly handle the file object when type="binary"
41
  try:
42
  file_bytes = file
43
- file_ext = "pdf" # Default assumption
44
- if file_bytes:
45
- # Heuristic to detect file type based on content
46
- if file_bytes.startswith(b'%PDF'):
47
- file_ext = "pdf"
48
- elif file_bytes.startswith(b'PK\x03\x04'): # DOCX magic number
49
- file_ext = "docx"
50
- else:
51
- return "Unsupported file format. Cannot determine type from content"
52
  except Exception as e:
53
- return f"Error reading the uploaded file: {e}"
 
54
 
 
55
  if file_ext == "pdf":
56
- text = extract_text_from_pdf(file_bytes)
57
  elif file_ext == "docx":
58
- text = extract_text_from_docx(file_bytes)
59
- else:
60
- return "Unsupported file format. Please upload a PDF or DOCX file."
61
-
62
- if text.startswith("Error"):
63
- return text # Return extraction error if any.
64
-
65
- # Print the extracted CV text
66
- print("Extracted CV text (before sending to LLM):\n", text)
67
 
 
68
  prompt = (
69
- f"Analyze the following CV against the provided job description. "
70
- f"Provide a summary, an assessment of fit, and a score from 0 to 10.\n\n"
71
  f"Job Description:\n{job_description}\n\n"
72
- f"Candidate CV:\n{text}"
73
  )
74
 
75
  try:
76
- # Use 'max_new_tokens' instead of 'max_tokens'
77
- response = client.text_generation(prompt, max_new_tokens=512)
78
  except Exception as e:
79
- return f"Error during CV analysis: {e}"
80
-
81
- return response
82
-
83
 
84
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
85
- """Generate a chatbot response based on the conversation history and parameters."""
86
  messages = [{"role": "system", "content": system_message}]
87
  for user_msg, bot_msg in history:
88
  if user_msg:
@@ -93,11 +82,9 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
93
 
94
  response = ""
95
  try:
96
- # Stream response tokens from the chat completion endpoint.
97
- # Replace 'max_tokens' with 'max_new_tokens'
98
  for message_chunk in client.chat_completion(
99
  messages,
100
- max_new_tokens=max_tokens,
101
  stream=True,
102
  temperature=temperature,
103
  top_p=top_p,
@@ -108,14 +95,12 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
108
  except Exception as e:
109
  yield f"Error during chat generation: {e}"
110
 
111
-
112
- # Build the Gradio interface
113
  demo = gr.Blocks()
114
  with demo:
115
  gr.Markdown("## AI-powered CV Analyzer and Chatbot")
116
 
117
  with gr.Tab("Chatbot"):
118
- # Set type="messages" for both the chat interface and the chatbot.
119
  chat_interface = gr.ChatInterface(
120
  respond,
121
  chatbot=gr.Chatbot(value=[], label="Chatbot", type="messages"),
@@ -129,16 +114,18 @@ with demo:
129
  )
130
 
131
  with gr.Tab("CV Analyzer"):
132
- gr.Markdown(
133
- "### Upload your CV (PDF or DOCX) and provide the job description to receive a professional analysis and suitability score."
134
- )
135
- # Use type="binary" for the file component.
136
  file_input = gr.File(label="Upload CV", type="binary")
137
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
138
- output_text = gr.Textbox(label="CV Analysis Report", lines=10)
 
139
  analyze_button = gr.Button("Analyze CV")
140
 
141
- analyze_button.click(parse_cv, inputs=[file_input, job_desc_input], outputs=output_text)
 
 
 
 
142
 
143
  if __name__ == "__main__":
144
- demo.queue().launch()
 
7
  # Initialize the inference client from Hugging Face.
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
 
 
10
  def extract_text_from_pdf(pdf_file_bytes):
11
  """Extract text from PDF bytes."""
12
  try:
 
20
  except Exception as e:
21
  return f"Error reading PDF: {e}"
22
 
 
23
  def extract_text_from_docx(docx_file_bytes):
24
  """Extract text from DOCX bytes."""
25
  try:
 
29
  except Exception as e:
30
  return f"Error reading DOCX: {e}"
31
 
 
32
  def parse_cv(file, job_description):
33
+ """Analyze the CV and return both extracted text and analysis report."""
34
  if file is None:
35
+ return "Please upload a CV file.", ""
36
+
 
37
  try:
38
  file_bytes = file
39
+ file_ext = "pdf"
40
+ if file_bytes.startswith(b'%PDF'):
41
+ file_ext = "pdf"
42
+ elif file_bytes.startswith(b'PK\x03\x04'):
43
+ file_ext = "docx"
44
+ else:
45
+ return "Unsupported file format.", "Cannot determine file type from content"
 
 
46
  except Exception as e:
47
+ error_msg = f"Error reading file: {e}"
48
+ return error_msg, error_msg
49
 
50
+ # Extract text
51
  if file_ext == "pdf":
52
+ extracted_text = extract_text_from_pdf(file_bytes)
53
  elif file_ext == "docx":
54
+ extracted_text = extract_text_from_docx(file_bytes)
55
+
56
+ # Check for extraction errors
57
+ if extracted_text.startswith("Error"):
58
+ return extracted_text, "Error during text extraction. Please check the file."
 
 
 
 
59
 
60
+ # Prepare and send to LLM
61
  prompt = (
62
+ f"Analyze the CV against the job description. Provide a summary, assessment, and score 0-10.\n\n"
 
63
  f"Job Description:\n{job_description}\n\n"
64
+ f"Candidate CV:\n{extracted_text}"
65
  )
66
 
67
  try:
68
+ analysis = client.text_generation(prompt, max_new_tokens=512)
69
+ return extracted_text, analysis
70
  except Exception as e:
71
+ return extracted_text, f"Analysis Error: {e}"
 
 
 
72
 
73
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
74
+ """Generate chatbot response."""
75
  messages = [{"role": "system", "content": system_message}]
76
  for user_msg, bot_msg in history:
77
  if user_msg:
 
82
 
83
  response = ""
84
  try:
 
 
85
  for message_chunk in client.chat_completion(
86
  messages,
87
+ max_tokens=max_tokens,
88
  stream=True,
89
  temperature=temperature,
90
  top_p=top_p,
 
95
  except Exception as e:
96
  yield f"Error during chat generation: {e}"
97
 
98
+ # Gradio Interface
 
99
  demo = gr.Blocks()
100
  with demo:
101
  gr.Markdown("## AI-powered CV Analyzer and Chatbot")
102
 
103
  with gr.Tab("Chatbot"):
 
104
  chat_interface = gr.ChatInterface(
105
  respond,
106
  chatbot=gr.Chatbot(value=[], label="Chatbot", type="messages"),
 
114
  )
115
 
116
  with gr.Tab("CV Analyzer"):
117
+ gr.Markdown("### Upload your CV and provide the job description")
 
 
 
118
  file_input = gr.File(label="Upload CV", type="binary")
119
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
120
+ extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
121
+ analysis_output = gr.Textbox(label="Analysis Report", lines=10)
122
  analyze_button = gr.Button("Analyze CV")
123
 
124
+ analyze_button.click(
125
+ parse_cv,
126
+ inputs=[file_input, job_desc_input],
127
+ outputs=[extracted_text, analysis_output]
128
+ )
129
 
130
  if __name__ == "__main__":
131
+ demo.queue().launch()