gopiashokan commited on
Commit
2a6a1b1
·
verified ·
1 Parent(s): 980742d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +438 -0
app.py ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import numpy as np
3
+ import pandas as pd
4
+ import streamlit as st
5
+ from streamlit_option_menu import option_menu
6
+ from streamlit_extras.add_vertical_space import add_vertical_space
7
+ from PyPDF2 import PdfReader
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.embeddings.openai import OpenAIEmbeddings
10
+ from langchain.vectorstores import FAISS
11
+ from langchain.chat_models import ChatOpenAI
12
+ from langchain.chains.question_answering import load_qa_chain
13
+ from selenium import webdriver
14
+ from selenium.webdriver.common.by import By
15
+ import warnings
16
+ warnings.filterwarnings('ignore')
17
+
18
+
19
+ def streamlit_config():
20
+
21
+ # page configuration
22
+ st.set_page_config(page_title='Resume Analyzer AI', layout="wide")
23
+
24
+ # page header transparent color
25
+ page_background_color = """
26
+ <style>
27
+
28
+ [data-testid="stHeader"]
29
+ {
30
+ background: rgba(0,0,0,0);
31
+ }
32
+
33
+ </style>
34
+ """
35
+ st.markdown(page_background_color, unsafe_allow_html=True)
36
+
37
+ # title and position
38
+ st.markdown(f'<h1 style="text-align: center;">AI-Powered Resume Analyzer and <br> LinkedIn Scraper with Selenium</h1>',
39
+ unsafe_allow_html=True)
40
+
41
+
42
+ class resume_analyzer:
43
+
44
+ def pdf_to_chunks(pdf):
45
+ # read pdf and it returns memory address
46
+ pdf_reader = PdfReader(pdf)
47
+
48
+ # extrat text from each page separately
49
+ text = ""
50
+ for page in pdf_reader.pages:
51
+ text += page.extract_text()
52
+
53
+ # Split the long text into small chunks.
54
+ text_splitter = RecursiveCharacterTextSplitter(
55
+ chunk_size=700,
56
+ chunk_overlap=200,
57
+ length_function=len)
58
+
59
+ chunks = text_splitter.split_text(text=text)
60
+ return chunks
61
+
62
+
63
+ def resume_summary(query_with_chunks):
64
+ query = f''' need to detailed summarization of below resume and finally conclude them
65
+
66
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
67
+ {query_with_chunks}
68
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
69
+ '''
70
+ return query
71
+
72
+
73
+ def resume_strength(query_with_chunks):
74
+ query = f'''need to detailed analysis and explain of the strength of below resume and finally conclude them
75
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
76
+ {query_with_chunks}
77
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
78
+ '''
79
+ return query
80
+
81
+
82
+ def resume_weakness(query_with_chunks):
83
+ query = f'''need to detailed analysis and explain of the weakness of below resume and how to improve make a better resume.
84
+
85
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
86
+ {query_with_chunks}
87
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
88
+ '''
89
+ return query
90
+
91
+
92
+ def job_title_suggestion(query_with_chunks):
93
+
94
+ query = f''' what are the job roles i apply to likedin based on below?
95
+
96
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
97
+ {query_with_chunks}
98
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
99
+ '''
100
+ return query
101
+
102
+
103
+ def openai(openai_api_key, chunks, analyze):
104
+
105
+ # Using OpenAI service for embedding
106
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
107
+
108
+ # Facebook AI Similarity Serach library help us to convert text data to numerical vector
109
+ vectorstores = FAISS.from_texts(chunks, embedding=embeddings)
110
+
111
+ # compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores.
112
+ docs = vectorstores.similarity_search(query=analyze, k=3)
113
+
114
+ # creates an OpenAI object, using the ChatGPT 3.5 Turbo model
115
+ llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)
116
+
117
+ # question-answering (QA) pipeline, making use of the load_qa_chain function
118
+ chain = load_qa_chain(llm=llm, chain_type='stuff')
119
+
120
+ response = chain.run(input_documents=docs, question=analyze)
121
+ return response
122
+
123
+
124
+ class linkedin_scrap:
125
+
126
+ def linkedin_open_scrolldown(driver, user_job_title):
127
+
128
+ b = []
129
+ for i in user_job_title:
130
+ x = i.split()
131
+ y = '%20'.join(x)
132
+ b.append(y)
133
+ job_title = '%2C%20'.join(b)
134
+
135
+ link = f"https://in.linkedin.com/jobs/search?keywords={job_title}&location=India&locationId=&geoId=102713980&f_TPR=r604800&position=1&pageNum=0"
136
+
137
+ driver.get(link)
138
+ driver.implicitly_wait(10)
139
+
140
+ for i in range(0,3):
141
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
142
+ time.sleep(5)
143
+ try:
144
+ x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
145
+ time.sleep(3)
146
+ except:
147
+ pass
148
+
149
+
150
+ def company_name(driver):
151
+
152
+ company = driver.find_elements(by=By.CSS_SELECTOR, value='h4[class="base-search-card__subtitle"]')
153
+
154
+ company_name = []
155
+
156
+ for i in company:
157
+ company_name.append(i.text)
158
+
159
+ return company_name
160
+
161
+
162
+ def company_location(driver):
163
+
164
+ location = driver.find_elements(by=By.CSS_SELECTOR, value='span[class="job-search-card__location"]')
165
+
166
+ company_location = []
167
+
168
+ for i in location:
169
+ company_location.append(i.text)
170
+
171
+ return company_location
172
+
173
+
174
+ def job_title(driver):
175
+
176
+ title = driver.find_elements(by=By.CSS_SELECTOR, value='h3[class="base-search-card__title"]')
177
+
178
+ job_title = []
179
+
180
+ for i in title:
181
+ job_title.append(i.text)
182
+
183
+ return job_title
184
+
185
+
186
+ def job_url(driver):
187
+
188
+ url = driver.find_elements(by=By.XPATH, value='//a[contains(@href, "/jobs/")]')
189
+
190
+ url_list = [i.get_attribute('href') for i in url]
191
+
192
+ job_url = []
193
+
194
+ for url in url_list:
195
+ job_url.append(url)
196
+
197
+ return job_url
198
+
199
+
200
+ def job_title_filter(x, user_job_title):
201
+
202
+ s = [i.lower() for i in user_job_title]
203
+ suggestion = []
204
+ for i in s:
205
+ suggestion.extend(i.split())
206
+
207
+ s = x.split()
208
+ a = [i.lower() for i in s]
209
+
210
+ intersection = list(set(suggestion).intersection(set(a)))
211
+ return x if len(intersection) > 1 else np.nan
212
+
213
+
214
+ def get_description(driver, link):
215
+
216
+ driver.get(link)
217
+ time.sleep(3)
218
+
219
+ driver.find_element(by=By.CSS_SELECTOR,
220
+ value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
221
+ time.sleep(2)
222
+
223
+ description = driver.find_elements(by=By.CSS_SELECTOR,
224
+ value='div[class="show-more-less-html__markup relative overflow-hidden"]')
225
+ driver.implicitly_wait(4)
226
+
227
+ for j in description:
228
+ return j.text
229
+
230
+
231
+ def data_scrap(driver, user_job_title):
232
+
233
+ # combine the all data to single dataframe
234
+ df = pd.DataFrame(linkedin_scrap.company_name(driver), columns=['Company Name'])
235
+ df['Job Title'] = pd.DataFrame(linkedin_scrap.job_title(driver))
236
+ df['Location'] = pd.DataFrame(linkedin_scrap.company_location(driver))
237
+ df['Website URL'] = pd.DataFrame(linkedin_scrap.job_url(driver))
238
+
239
+ # job title filter based on user input
240
+ df['Job Title'] = df['Job Title'].apply(lambda x: linkedin_scrap.job_title_filter(x, user_job_title))
241
+ df = df.dropna()
242
+ df.reset_index(drop=True, inplace=True)
243
+ df = df.iloc[:10, :]
244
+
245
+ # make a list after filter
246
+ website_url = df['Website URL'].tolist()
247
+
248
+ # add job description in df
249
+ job_description = []
250
+
251
+ for i in range(0, len(website_url)):
252
+ link = website_url[i]
253
+ data = linkedin_scrap.get_description(driver, link)
254
+ if data is not None and len(data.strip()) > 0:
255
+ job_description.append(data)
256
+ else:
257
+ job_description.append('Description Not Available')
258
+
259
+ df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])
260
+ df = df.dropna()
261
+ df.reset_index(drop=True, inplace=True)
262
+ return df
263
+
264
+
265
+ def main(user_job_title):
266
+
267
+ driver = webdriver.Chrome()
268
+ driver.maximize_window()
269
+
270
+ linkedin_scrap.linkedin_open_scrolldown(driver, user_job_title)
271
+
272
+ final_df = linkedin_scrap.data_scrap(driver, user_job_title)
273
+ driver.quit()
274
+
275
+ return final_df
276
+
277
+
278
+ streamlit_config()
279
+ add_vertical_space(1)
280
+
281
+
282
+ # sidebar
283
+ with st.sidebar:
284
+
285
+ add_vertical_space(3)
286
+
287
+ option = option_menu(menu_title='', options=['Summary', 'Strength', 'Weakness', 'Job Titles', 'Linkedin Jobs', 'Exit'],
288
+ icons=['house-fill', 'database-fill', 'pass-fill', 'list-ul', 'linkedin', 'sign-turn-right-fill'])
289
+
290
+
291
+ if option == 'Summary':
292
+
293
+ # file upload
294
+ pdf = st.file_uploader(label='', type='pdf')
295
+ openai_api_key = st.text_input(label='OpenAI API Key', type='password')
296
+
297
+ try:
298
+ if pdf is not None and openai_api_key is not None:
299
+ pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
300
+
301
+ summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
302
+ result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)
303
+
304
+ st.subheader('Summary:')
305
+ st.write(result_summary)
306
+
307
+ except Exception as e:
308
+ col1, col2 = st.columns(2)
309
+ with col1:
310
+ st.warning(e)
311
+
312
+
313
+ elif option == 'Strength':
314
+
315
+ # file upload
316
+ pdf = st.file_uploader(label='', type='pdf')
317
+ openai_api_key = st.text_input(label='OpenAI API Key', type='password')
318
+
319
+ try:
320
+ if pdf is not None and openai_api_key is not None:
321
+
322
+ pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
323
+
324
+ # Resume summary
325
+ summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
326
+ result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)
327
+
328
+ strength = resume_analyzer.resume_strength(query_with_chunks=result_summary)
329
+ result_strength = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=strength)
330
+
331
+ st.subheader('Strength:')
332
+ st.write(result_strength)
333
+
334
+ except Exception as e:
335
+ col1, col2 = st.columns(2)
336
+ with col1:
337
+ st.warning(e)
338
+
339
+
340
+ elif option == 'Weakness':
341
+
342
+ # file upload
343
+ pdf = st.file_uploader(label='', type='pdf')
344
+ openai_api_key = st.text_input(label='OpenAI API Key', type='password')
345
+
346
+ try:
347
+ if pdf is not None and openai_api_key is not None:
348
+
349
+ pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
350
+
351
+ # Resume summary
352
+ summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
353
+ result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)
354
+
355
+ weakness = resume_analyzer.resume_weakness(query_with_chunks=result_summary)
356
+ result_weakness = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=weakness)
357
+
358
+ st.subheader('Weakness:')
359
+ st.write(result_weakness)
360
+
361
+ except Exception as e:
362
+ col1, col2 = st.columns(2)
363
+ with col1:
364
+ st.warning(e)
365
+
366
+
367
+ elif option == 'Job Titles':
368
+
369
+ # file upload
370
+ pdf = st.file_uploader(label='', type='pdf')
371
+ openai_api_key = st.text_input(label='OpenAI API Key', type='password')
372
+
373
+ try:
374
+ if pdf is not None and openai_api_key is not None:
375
+ pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
376
+
377
+ # Resume summary
378
+ summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
379
+ result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)
380
+
381
+ job_suggestion = resume_analyzer.job_title_suggestion(query_with_chunks=result_summary)
382
+ result_suggestion = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=job_suggestion)
383
+
384
+ st.subheader('Suggestion: ')
385
+ st.write(result_suggestion)
386
+
387
+ except Exception as e:
388
+ col1, col2 = st.columns(2)
389
+ with col1:
390
+ st.warning(e)
391
+
392
+
393
+ elif option == 'Linkedin Jobs':
394
+
395
+ try:
396
+ # get user input of job title
397
+ user_input_job_title = st.text_input(label='Enter Job Titles (with comma separated):')
398
+ submit = st.button('Submit')
399
+
400
+ if submit and len(user_input_job_title) > 0:
401
+
402
+ user_job_title = user_input_job_title.split(',')
403
+
404
+ df = linkedin_scrap.main(user_job_title)
405
+
406
+ l = len(df['Company Name'])
407
+ for i in range(0, l):
408
+ st.write(f"Company Name : {df.iloc[i,0]}")
409
+ st.write(f"Job Title : {df.iloc[i,1]}")
410
+ st.write(f"Location : {df.iloc[i,2]}")
411
+ st.write(f"Website URL : {df.iloc[i,3]}")
412
+ with st.expander(label='Job Desription'):
413
+ st.write(df.iloc[i, 4])
414
+ st.write('')
415
+ st.write('')
416
+
417
+ elif submit and len(user_input_job_title) == 0:
418
+ col1, col2 = st.columns(2)
419
+ with col1:
420
+ st.info('Please Enter the Job Titles')
421
+
422
+ except:
423
+ st.write('')
424
+ st.info("This feature is currently not working in the deployed Streamlit application due to a 'selenium.common.exceptions.WebDriverException' error.")
425
+ st.write('')
426
+
427
+ st.write(
428
+ "Please use the local Streamlit application for a smooth experience: [http://localhost:8501](http://localhost:8501)")
429
+
430
+
431
+ elif option == 'Exit':
432
+
433
+ add_vertical_space(3)
434
+ col1, col2, col3 = st.columns([0.3,0.4,0.3])
435
+ with col2:
436
+ st.success('Thank you for your time. Exiting the application')
437
+ st.balloons()
438
+