import time
import numpy as np
import pandas as pd
import streamlit as st
from streamlit_option_menu import option_menu
from streamlit_extras.add_vertical_space import add_vertical_space
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain
from selenium import webdriver
from selenium.webdriver.common.by import By
import warnings
warnings.filterwarnings('ignore')
def streamlit_config():
# page configuration
st.set_page_config(page_title='Resume Analyzer AI', layout="wide")
# page header transparent color
page_background_color = """
"""
st.markdown(page_background_color, unsafe_allow_html=True)
# title and position
st.markdown(f'
AI-Powered Resume Analyzer and
LinkedIn Scraper with Selenium
',
unsafe_allow_html=True)
class resume_analyzer:
def pdf_to_chunks(pdf):
# read pdf and it returns memory address
pdf_reader = PdfReader(pdf)
# extrat text from each page separately
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
# Split the long text into small chunks.
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=700,
chunk_overlap=200,
length_function=len)
chunks = text_splitter.split_text(text=text)
return chunks
def resume_summary(query_with_chunks):
query = f''' need to detailed summarization of below resume and finally conclude them
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
{query_with_chunks}
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
'''
return query
def resume_strength(query_with_chunks):
query = f'''need to detailed analysis and explain of the strength of below resume and finally conclude them
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
{query_with_chunks}
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
'''
return query
def resume_weakness(query_with_chunks):
query = f'''need to detailed analysis and explain of the weakness of below resume and how to improve make a better resume.
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
{query_with_chunks}
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
'''
return query
def job_title_suggestion(query_with_chunks):
query = f''' what are the job roles i apply to likedin based on below?
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
{query_with_chunks}
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
'''
return query
def openai(openai_api_key, chunks, analyze):
# Using OpenAI service for embedding
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Facebook AI Similarity Serach library help us to convert text data to numerical vector
vectorstores = FAISS.from_texts(chunks, embedding=embeddings)
# compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores.
docs = vectorstores.similarity_search(query=analyze, k=3)
# creates an OpenAI object, using the ChatGPT 3.5 Turbo model
llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)
# question-answering (QA) pipeline, making use of the load_qa_chain function
chain = load_qa_chain(llm=llm, chain_type='stuff')
response = chain.run(input_documents=docs, question=analyze)
return response
class linkedin_scraper:
def webdriver_setup():
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(options=options)
driver.maximize_window()
return driver
def get_userinput():
add_vertical_space(2)
with st.form(key='linkedin_scarp'):
add_vertical_space(1)
col1,col2 = st.columns([0.7,0.3], gap='medium')
with col1:
job_title = st.text_input(label='Job Title')
job_title = job_title.split()
with col2:
job_count = st.number_input(label='Job Count', min_value=1, value=1, step=1)
# Submit Button
add_vertical_space(1)
submit = st.form_submit_button(label='Submit')
add_vertical_space(1)
return job_title, job_count, submit
def build_url(job_title):
b = []
for i in job_title:
x = i.split()
y = '%20'.join(x)
b.append(y)
job_title = '%2C%20'.join(b)
link = f"https://in.linkedin.com/jobs/search?keywords={job_title}&location=India&locationId=&geoId=102713980&f_TPR=r604800&position=1&pageNum=0"
return link
def link_open_scrolldown(driver, link, job_count):
# Open the Link in LinkedIn
driver.get(link)
driver.implicitly_wait(10)
# Scroll Down the Page
for i in range(0,job_count):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.implicitly_wait(5)
# Click on See More Jobs Button if Present
try:
x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
driver.implicitly_wait(5)
except:
pass
def job_title_filter(scrap_job_title, user_job_title_input):
# User Job Title Convert Lower Case and Split into List
user_input = []
for i in [i.lower() for i in user_job_title_input]:
user_input.extend(i.split())
# scraped Job Title Convert Lower Case and Split into List
scrap_title = [i.lower() for i in scrap_job_title.split()]
# Identify Same Words in Both Lists
matched_words = list(set(user_input).intersection(set(scrap_title)))
# Return Job Title if there are more than 1 matched word else return NaN
return scrap_job_title if len(matched_words) > 1 else np.nan
def scrap_company_data(driver, job_title_input):
# scraping the Company Data
company = driver.find_elements(by=By.CSS_SELECTOR, value='h4[class="base-search-card__subtitle"]')
company_name = [i.text for i in company]
location = driver.find_elements(by=By.CSS_SELECTOR, value='span[class="job-search-card__location"]')
company_location = [i.text for i in location]
title = driver.find_elements(by=By.CSS_SELECTOR, value='h3[class="base-search-card__title"]')
job_title = [i.text for i in title]
url = driver.find_elements(by=By.XPATH, value='//a[contains(@href, "/jobs/")]')
website_url = [i.get_attribute('href') for i in url]
# combine the all data to single dataframe
df = pd.DataFrame(company_name, columns=['Company Name'])
df['Job Title'] = pd.DataFrame(job_title)
df['Location'] = pd.DataFrame(company_location)
df['Website URL'] = pd.DataFrame(website_url)
# Return Job Title if there are more than 1 matched word else return NaN
df['Job Title'] = df['Job Title'].apply(lambda x: linkedin_scraper.job_title_filter(x, job_title_input))
# Drop Null Values and Reset Index
df = df.dropna()
df.reset_index(drop=True, inplace=True)
return df
def scrap_job_description(driver, df, job_count):
# Get URL into List
website_url = df['Website URL'].tolist()
# Scrap the Job Description
job_description, description_count = [], 0
for i in range(0, len(website_url)):
try:
# Open the URL
driver.get(website_url[i])
driver.implicitly_wait(5)
time.sleep(1)
# Click on Show More Button
driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
driver.implicitly_wait(5)
time.sleep(1)
# Get Job Description
description = driver.find_elements(by=By.CSS_SELECTOR, value='div[class="show-more-less-html__markup relative overflow-hidden"]')
data = [i.text for i in description][0]
if len(data.strip()) > 0:
job_description.append(data)
description_count += 1
else:
job_description.append('Description Not Available')
# If URL cannot Loading Properly
except:
job_description.append('Description Not Available')
# Check Description Count Meets User Job Count
if description_count == job_count:
break
# Filter the Job Description
df = df.iloc[:len(job_description), :]
# Add Job Description in Dataframe
df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])
df['Job Description'] = df['Job Description'].apply(lambda x: np.nan if x=='Description Not Available' else x)
df = df.dropna()
df.reset_index(drop=True, inplace=True)
return df
def display_data_userinterface(df_final):
# Display the Data in User Interface
add_vertical_space(1)
for i in range(0, len(df_final)):
st.markdown(f'Job Posting Details : {i+1}
', unsafe_allow_html=True)
st.write(f"Company Name : {df_final.iloc[i,0]}")
st.write(f"Job Title : {df_final.iloc[i,1]}")
st.write(f"Location : {df_final.iloc[i,2]}")
st.write(f"Website URL : {df_final.iloc[i,3]}")
with st.expander(label='Job Desription'):
st.write(df_final.iloc[i, 4])
add_vertical_space(3)
def main():
# Initially set driver to None
# driver = None
# try:
job_title_input, job_count, submit = linkedin_scraper.get_userinput()
add_vertical_space(2)
if submit:
if job_title_input != '':
with st.spinner('Webdriver Setup Initializing...'):
driver = linkedin_scraper.webdriver_setup()
with st.spinner('Build URL and Open Link...'):
# build URL based on User Job Title Input
link = linkedin_scraper.build_url(job_title_input)
# Open the Link in LinkedIn and Scroll Down the Page
linkedin_scraper.link_open_scrolldown(driver, link, job_count)
with st.spinner('scraping Company Data...'):
df = linkedin_scraper.scrap_company_data(driver, job_title_input)
with st.spinner('Scraping Job Description Data...'):
df_final = linkedin_scraper. scrap_job_description(driver, df, job_count)
# Display the Data in User Interface
linkedin_scraper.display_data_userinterface(df_final)
# If User Click Submit Button and Job Title is Empty
elif job_title_input == '':
st.markdown(f'Job Title is Empty
',
unsafe_allow_html=True)
# except Exception as e:
# add_vertical_space(2)
# st.markdown(f'{e}
', unsafe_allow_html=True)
# finally:
# if driver:
# driver.quit()
# Streamlit Configuration Setup
streamlit_config()
add_vertical_space(1)
# sidebar
with st.sidebar:
add_vertical_space(3)
option = option_menu(menu_title='', options=['Summary', 'Strength', 'Weakness', 'Job Titles', 'Linkedin Jobs', 'Exit'],
icons=['house-fill', 'database-fill', 'pass-fill', 'list-ul', 'linkedin', 'sign-turn-right-fill'])
if option == 'Summary':
# file upload
pdf = st.file_uploader(label='', type='pdf')
openai_api_key = st.text_input(label='OpenAI API Key', type='password')
try:
if pdf is not None and openai_api_key is not None:
pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)
st.subheader('Summary:')
st.write(result_summary)
except Exception as e:
add_vertical_space(2)
st.markdown(f'{e}
', unsafe_allow_html=True)
elif option == 'Strength':
# file upload
pdf = st.file_uploader(label='', type='pdf')
openai_api_key = st.text_input(label='OpenAI API Key', type='password')
try:
if pdf is not None and openai_api_key is not None:
pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
# Resume summary
summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)
strength = resume_analyzer.resume_strength(query_with_chunks=result_summary)
result_strength = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=strength)
st.subheader('Strength:')
st.write(result_strength)
except Exception as e:
add_vertical_space(2)
st.markdown(f'{e}
', unsafe_allow_html=True)
elif option == 'Weakness':
# file upload
pdf = st.file_uploader(label='', type='pdf')
openai_api_key = st.text_input(label='OpenAI API Key', type='password')
try:
if pdf is not None and openai_api_key is not None:
pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
# Resume summary
summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)
weakness = resume_analyzer.resume_weakness(query_with_chunks=result_summary)
result_weakness = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=weakness)
st.subheader('Weakness:')
st.write(result_weakness)
except Exception as e:
add_vertical_space(2)
st.markdown(f'{e}
', unsafe_allow_html=True)
elif option == 'Job Titles':
# file upload
pdf = st.file_uploader(label='', type='pdf')
openai_api_key = st.text_input(label='OpenAI API Key', type='password')
try:
if pdf is not None and openai_api_key is not None:
pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)
# Resume summary
summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)
job_suggestion = resume_analyzer.job_title_suggestion(query_with_chunks=result_summary)
result_suggestion = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=job_suggestion)
st.subheader('Suggestion: ')
st.write(result_suggestion)
except Exception as e:
add_vertical_space(2)
st.markdown(f'{e}
', unsafe_allow_html=True)
elif option == 'Linkedin Jobs':
add_vertical_space(2)
linkedin_scraper.main()
elif option == 'Exit':
add_vertical_space(3)
col1, col2, col3 = st.columns([0.3,0.4,0.3])
with col2:
st.success('Thank you for your time. Exiting the application')
st.balloons()