Spaces:

palitrajarshi
/

Smart-Apps

Sleeping

App Files Files Community

palitrajarshi commited on Aug 17, 2023

Commit

e9f8bde

1 Parent(s): 319051d

Upload 5 files

Browse files

Files changed (5) hide show

app.py +72 -0
constants (1).py +3 -0
env-example.txt +2 -0
requirements.txt +12 -0
utils.py +110 -0

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import streamlit as st
+import constants
+from utils import *
+import uuid
+#Creating session variables
+if 'unique_id' not in st.session_state:
+    st.session_state['unique_id'] =''
+def main():
+    st.set_page_config(page_title="Resume Screening Assistance")
+    st.title("HR - Resume Screening Assistance...💁 ")
+    st.subheader("I can help you in resume screening process")
+    job_description = st.text_area("Please paste the 'JOB DESCRIPTION' here...",key="1")
+    document_count = st.text_input("No.of 'RESUMES' to return",key="2")
+    # Upload the Resumes (pdf files)
+    pdf = st.file_uploader("Upload resumes here, only PDF files allowed", type=["pdf"],accept_multiple_files=True)
+    submit=st.button("Help me with the analysis")
+    if submit:
+        with st.spinner('Wait for it...'):
+            #Creating a unique ID, so that we can use to query and get only the user uploaded documents from PINECONE vector store
+            st.session_state['unique_id']=uuid.uuid4().hex
+            #Create a documents list out of all the user uploaded pdf files
+            final_docs_list=create_docs(pdf,st.session_state['unique_id'])
+            #st.write(final_docs_list)
+            #Displaying the count of resumes that have been uploaded
+            st.write("*Resumes uploaded* :"+str(len(final_docs_list)))
+            #Create embeddings instance
+            embeddings=create_embeddings_load_data()
+            #Push data to PINECONE
+            #push_to_pinecone(constants.PINECONE_API_KEY,constants.PINECONE_ENVIRONMENT,constants.PINECONE_INDEX,embeddings,final_docs_list)
+            #Fecth relavant documents from PINECONE
+            #relavant_docs=similar_docs(job_description,document_count,constants.PINECONE_API_KEY,constants.PINECONE_ENVIRONMENT,constants.PINECONE_INDEX,embeddings,st.session_state['unique_id'])
+            relavant_docs=close_matches(job_description,document_count,final_docs_list,embeddings)
+            #st.write(relavant_docs)
+            #Introducing a line separator
+            st.write(":heavy_minus_sign:" * 30)
+            #For each item in relavant docs - we are displaying some info of it on the UI
+            for item in range(len(relavant_docs)):
+                st.subheader("👉 "+str(item+1))
+                #Displaying Filepath
+                st.write("**File** : "+relavant_docs[item][0].metadata['name'])
+                #Introducing Expander feature
+                with st.expander('Show me 👀'):
+                    st.info("**Match Score** : "+ str(1 - relavant_docs[item][1]))
+                    #st.write("***"+relavant_docs[item][0].page_content)
+                    #Gets the summary of the current item using 'get_summary' function that we have created which uses LLM & Langchain chain
+                    summary = get_summary(relavant_docs[item][0])
+                    st.write("**Summary** : "+summary)
+        st.success("Hope I was able to save your time❤️")
+#Invoking main function
+if __name__ == '__main__':
+    main()

constants (1).py ADDED Viewed

	@@ -0,0 +1,3 @@

+PINECONE_API_KEY="a4405723-2309-4c5c-87d0-760f461fdef0"
+PINECONE_ENVIRONMENT="gcp-starter"
+PINECONE_INDEX="hresume"

env-example.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ OPENAI_API_KEY=""
2	+ HUGGINGFACEHUB_API_TOKEN=""

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+langchain
+streamlit
+openai
+tiktoken
+python-dotenv
+unstructured
+pinecone-client
+pypdf
+sentence_transformers
+pdf2image
+pdfminer.six
+faiss-cpu

utils.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import openai
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import Pinecone
+from langchain.llms import OpenAI
+from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+from langchain.schema import Document
+import pinecone
+from langchain.vectorstores import FAISS
+from pypdf import PdfReader
+from langchain.llms.openai import OpenAI
+from langchain.chains.summarize import load_summarize_chain
+from langchain import HuggingFaceHub
+from langchain.document_loaders import DirectoryLoader
+#Extract Information from PDF file
+def get_pdf_text(pdf_doc):
+    text = ""
+    pdf_reader = PdfReader(pdf_doc)
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+# iterate over files in
+# that user uploaded PDF files, one by one
+def create_docs(user_pdf_list, unique_id):
+    docs=[]
+    for filename in user_pdf_list:
+        chunks=get_pdf_text(filename)
+        #Adding items to our list - Adding data & its metadata
+        docs.append(Document(
+            page_content=chunks,
+            metadata={"name": filename.name,"id":filename.id,"type=":filename.type,"size":filename.size,"unique_id":unique_id},
+        ))
+    # Load Files from Directory (Local Version)
+    #loader = DirectoryLoader('./Repository', glob='**/*')
+    #docs1 = loader.load()
+    #final_docs = docs + docs1
+    return docs
+#Create embeddings instance
+def create_embeddings_load_data():
+    embeddings = OpenAIEmbeddings()
+    #embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+    return embeddings
+#Function to push data to Vector Store - Pinecone here
+def push_to_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,docs):
+    pinecone.init(
+    api_key=pinecone_apikey,
+    environment=pinecone_environment
+    )
+    print("done......2")
+    Pinecone.from_documents(docs, embeddings, index_name=pinecone_index_name)
+#Function to pull infrmation from Vector Store - Pinecone here
+def pull_from_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings):
+    pinecone.init(
+    api_key=pinecone_apikey,
+    environment=pinecone_environment
+    )
+    index_name = pinecone_index_name
+    index = Pinecone.from_existing_index(index_name, embeddings)
+    return index
+#Function to help us get relavant documents from vector store - based on user input
+def similar_docs(query,k,pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,unique_id):
+    pinecone.init(
+    api_key=pinecone_apikey,
+    environment=pinecone_environment
+    )
+    index_name = pinecone_index_name
+    index = pull_from_pinecone(pinecone_apikey,pinecone_environment,index_name,embeddings)
+    #similar_docs = index.similarity_search_with_score(query, int(k),{"unique_id":unique_id})
+    similar_docs = index.similarity_search_with_score(query, int(k))
+    #print(similar_docs)
+    return similar_docs
+def close_matches(query,k,docs,embeddings):
+    #https://api.python.langchain.com/en/latest/vectorstores/langchain.vectorstores.faiss.FAISS.html#langchain.vectorstores.faiss.FAISS.similarity_search_with_score
+    db = FAISS.from_documents(docs, embeddings)
+    similar_docs = db.similarity_search_with_score(query, int(k))
+    return similar_docs
+# Helps us get the summary of a document
+def get_summary(current_doc):
+    llm = OpenAI(temperature=0)
+    #llm = HuggingFaceHub(repo_id="bigscience/bloom", model_kwargs={"temperature":1e-10})
+    chain = load_summarize_chain(llm, chain_type="map_reduce")
+    summary = chain.run([current_doc])
+    return summary