Spaces:

gopiashokan
/

Resume-Analyzer-AI

Paused

App Files Files Community

Resume-Analyzer-AI / app.py

gopiashokan

Upload app.py

2a6a1b1 verified over 1 year ago

raw

history blame

14.3 kB

	import time
	import numpy as np
	import pandas as pd
	import streamlit as st
	from streamlit_option_menu import option_menu
	from streamlit_extras.add_vertical_space import add_vertical_space
	from PyPDF2 import PdfReader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.chat_models import ChatOpenAI
	from langchain.chains.question_answering import load_qa_chain
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	import warnings
	warnings.filterwarnings('ignore')


	def streamlit_config():

	# page configuration
	st.set_page_config(page_title='Resume Analyzer AI', layout="wide")

	# page header transparent color
	page_background_color = """
	<style>

	[data-testid="stHeader"]
	{
	background: rgba(0,0,0,0);
	}

	</style>
	"""
	st.markdown(page_background_color, unsafe_allow_html=True)

	# title and position
	st.markdown(f'<h1 style="text-align: center;">AI-Powered Resume Analyzer and <br> LinkedIn Scraper with Selenium</h1>',
	unsafe_allow_html=True)


	class resume_analyzer:

	def pdf_to_chunks(pdf):
	# read pdf and it returns memory address
	pdf_reader = PdfReader(pdf)

	# extrat text from each page separately
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()

	# Split the long text into small chunks.
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=700,
	chunk_overlap=200,
	length_function=len)

	chunks = text_splitter.split_text(text=text)
	return chunks


	def resume_summary(query_with_chunks):
	query = f''' need to detailed summarization of below resume and finally conclude them

	"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	{query_with_chunks}
	"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	'''
	return query


	def resume_strength(query_with_chunks):
	query = f'''need to detailed analysis and explain of the strength of below resume and finally conclude them
	"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	{query_with_chunks}
	"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	'''
	return query


	def resume_weakness(query_with_chunks):
	query = f'''need to detailed analysis and explain of the weakness of below resume and how to improve make a better resume.

	"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	{query_with_chunks}
	"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	'''
	return query


	def job_title_suggestion(query_with_chunks):

	query = f''' what are the job roles i apply to likedin based on below?

	"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	{query_with_chunks}
	"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	'''
	return query


	def openai(openai_api_key, chunks, analyze):

	# Using OpenAI service for embedding
	embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

	# Facebook AI Similarity Serach library help us to convert text data to numerical vector
	vectorstores = FAISS.from_texts(chunks, embedding=embeddings)

	# compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores.
	docs = vectorstores.similarity_search(query=analyze, k=3)

	# creates an OpenAI object, using the ChatGPT 3.5 Turbo model
	llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)

	# question-answering (QA) pipeline, making use of the load_qa_chain function
	chain = load_qa_chain(llm=llm, chain_type='stuff')

	response = chain.run(input_documents=docs, question=analyze)
	return response


	class linkedin_scrap:

	def linkedin_open_scrolldown(driver, user_job_title):

	b = []
	for i in user_job_title:
	x = i.split()
	y = '%20'.join(x)
	b.append(y)
	job_title = '%2C%20'.join(b)

	link = f"https://in.linkedin.com/jobs/search?keywords={job_title}&location=India&locationId=&geoId=102713980&f_TPR=r604800&position=1&pageNum=0"

	driver.get(link)
	driver.implicitly_wait(10)

	for i in range(0,3):
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
	time.sleep(5)
	try:
	x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click()
	time.sleep(3)
	except:
	pass


	def company_name(driver):

	company = driver.find_elements(by=By.CSS_SELECTOR, value='h4[class="base-search-card__subtitle"]')

	company_name = []

	for i in company:
	company_name.append(i.text)

	return company_name


	def company_location(driver):

	location = driver.find_elements(by=By.CSS_SELECTOR, value='span[class="job-search-card__location"]')

	company_location = []

	for i in location:
	company_location.append(i.text)

	return company_location


	def job_title(driver):

	title = driver.find_elements(by=By.CSS_SELECTOR, value='h3[class="base-search-card__title"]')

	job_title = []

	for i in title:
	job_title.append(i.text)

	return job_title


	def job_url(driver):

	url = driver.find_elements(by=By.XPATH, value='//a[contains(@href, "/jobs/")]')

	url_list = [i.get_attribute('href') for i in url]

	job_url = []

	for url in url_list:
	job_url.append(url)

	return job_url


	def job_title_filter(x, user_job_title):

	s = [i.lower() for i in user_job_title]
	suggestion = []
	for i in s:
	suggestion.extend(i.split())

	s = x.split()
	a = [i.lower() for i in s]

	intersection = list(set(suggestion).intersection(set(a)))
	return x if len(intersection) > 1 else np.nan


	def get_description(driver, link):

	driver.get(link)
	time.sleep(3)

	driver.find_element(by=By.CSS_SELECTOR,
	value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
	time.sleep(2)

	description = driver.find_elements(by=By.CSS_SELECTOR,
	value='div[class="show-more-less-html__markup relative overflow-hidden"]')
	driver.implicitly_wait(4)

	for j in description:
	return j.text


	def data_scrap(driver, user_job_title):

	# combine the all data to single dataframe
	df = pd.DataFrame(linkedin_scrap.company_name(driver), columns=['Company Name'])
	df['Job Title'] = pd.DataFrame(linkedin_scrap.job_title(driver))
	df['Location'] = pd.DataFrame(linkedin_scrap.company_location(driver))
	df['Website URL'] = pd.DataFrame(linkedin_scrap.job_url(driver))

	# job title filter based on user input
	df['Job Title'] = df['Job Title'].apply(lambda x: linkedin_scrap.job_title_filter(x, user_job_title))
	df = df.dropna()
	df.reset_index(drop=True, inplace=True)
	df = df.iloc[:10, :]

	# make a list after filter
	website_url = df['Website URL'].tolist()

	# add job description in df
	job_description = []

	for i in range(0, len(website_url)):
	link = website_url[i]
	data = linkedin_scrap.get_description(driver, link)
	if data is not None and len(data.strip()) > 0:
	job_description.append(data)
	else:
	job_description.append('Description Not Available')

	df['Job Description'] = pd.DataFrame(job_description, columns=['Description'])
	df = df.dropna()
	df.reset_index(drop=True, inplace=True)
	return df


	def main(user_job_title):

	driver = webdriver.Chrome()
	driver.maximize_window()

	linkedin_scrap.linkedin_open_scrolldown(driver, user_job_title)

	final_df = linkedin_scrap.data_scrap(driver, user_job_title)
	driver.quit()

	return final_df


	streamlit_config()
	add_vertical_space(1)


	# sidebar
	with st.sidebar:

	add_vertical_space(3)

	option = option_menu(menu_title='', options=['Summary', 'Strength', 'Weakness', 'Job Titles', 'Linkedin Jobs', 'Exit'],
	icons=['house-fill', 'database-fill', 'pass-fill', 'list-ul', 'linkedin', 'sign-turn-right-fill'])


	if option == 'Summary':

	# file upload
	pdf = st.file_uploader(label='', type='pdf')
	openai_api_key = st.text_input(label='OpenAI API Key', type='password')

	try:
	if pdf is not None and openai_api_key is not None:
	pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)

	summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
	result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)

	st.subheader('Summary:')
	st.write(result_summary)

	except Exception as e:
	col1, col2 = st.columns(2)
	with col1:
	st.warning(e)


	elif option == 'Strength':

	# file upload
	pdf = st.file_uploader(label='', type='pdf')
	openai_api_key = st.text_input(label='OpenAI API Key', type='password')

	try:
	if pdf is not None and openai_api_key is not None:

	pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)

	# Resume summary
	summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
	result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)

	strength = resume_analyzer.resume_strength(query_with_chunks=result_summary)
	result_strength = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=strength)

	st.subheader('Strength:')
	st.write(result_strength)

	except Exception as e:
	col1, col2 = st.columns(2)
	with col1:
	st.warning(e)


	elif option == 'Weakness':

	# file upload
	pdf = st.file_uploader(label='', type='pdf')
	openai_api_key = st.text_input(label='OpenAI API Key', type='password')

	try:
	if pdf is not None and openai_api_key is not None:

	pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)

	# Resume summary
	summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
	result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)

	weakness = resume_analyzer.resume_weakness(query_with_chunks=result_summary)
	result_weakness = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=weakness)

	st.subheader('Weakness:')
	st.write(result_weakness)

	except Exception as e:
	col1, col2 = st.columns(2)
	with col1:
	st.warning(e)


	elif option == 'Job Titles':

	# file upload
	pdf = st.file_uploader(label='', type='pdf')
	openai_api_key = st.text_input(label='OpenAI API Key', type='password')

	try:
	if pdf is not None and openai_api_key is not None:
	pdf_chunks = resume_analyzer.pdf_to_chunks(pdf)

	# Resume summary
	summary = resume_analyzer.resume_summary(query_with_chunks=pdf_chunks)
	result_summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary)

	job_suggestion = resume_analyzer.job_title_suggestion(query_with_chunks=result_summary)
	result_suggestion = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=job_suggestion)

	st.subheader('Suggestion: ')
	st.write(result_suggestion)

	except Exception as e:
	col1, col2 = st.columns(2)
	with col1:
	st.warning(e)


	elif option == 'Linkedin Jobs':

	try:
	# get user input of job title
	user_input_job_title = st.text_input(label='Enter Job Titles (with comma separated):')
	submit = st.button('Submit')

	if submit and len(user_input_job_title) > 0:

	user_job_title = user_input_job_title.split(',')

	df = linkedin_scrap.main(user_job_title)

	l = len(df['Company Name'])
	for i in range(0, l):
	st.write(f"Company Name : {df.iloc[i,0]}")
	st.write(f"Job Title : {df.iloc[i,1]}")
	st.write(f"Location : {df.iloc[i,2]}")
	st.write(f"Website URL : {df.iloc[i,3]}")
	with st.expander(label='Job Desription'):
	st.write(df.iloc[i, 4])
	st.write('')
	st.write('')

	elif submit and len(user_input_job_title) == 0:
	col1, col2 = st.columns(2)
	with col1:
	st.info('Please Enter the Job Titles')

	except:
	st.write('')
	st.info("This feature is currently not working in the deployed Streamlit application due to a 'selenium.common.exceptions.WebDriverException' error.")
	st.write('')

	st.write(
	"Please use the local Streamlit application for a smooth experience: [http://localhost:8501](http://localhost:8501)")


	elif option == 'Exit':

	add_vertical_space(3)
	col1, col2, col3 = st.columns([0.3,0.4,0.3])
	with col2:
	st.success('Thank you for your time. Exiting the application')
	st.balloons()