Spaces:

p1utoze
/

hAIring

Sleeping

App Files Files Community

plutoze commited on Dec 23, 2023

Commit

3634e75

1 Parent(s): d240f92

update: add file upload query in search candidates page

Browse files

Files changed (4) hide show

pages/02_Professional Screen.py +1 -1
pages/05_Search_Candidates.py +17 -29
requirements.txt +1 -0
utils/utils.py +8 -5

pages/02_Professional Screen.py CHANGED Viewed

@@ -11,7 +11,7 @@ from langchain.chat_models import ChatOpenAI
 from langchain.chains import ConversationChain, RetrievalQA
 from langchain.prompts.prompt import PromptTemplate
 from langchain.text_splitter import NLTKTextSplitter
-from langchain.embeddings import OpenAIEmbeddings, VoyageEmbeddings
 from langchain.vectorstores import FAISS
 import nltk
 from prompts.prompts import templates

 from langchain.chains import ConversationChain, RetrievalQA
 from langchain.prompts.prompt import PromptTemplate
 from langchain.text_splitter import NLTKTextSplitter
+from langchain.embeddings import VoyageEmbeddings
 from langchain.vectorstores import FAISS
 import nltk
 from prompts.prompts import templates

pages/05_Search_Candidates.py CHANGED Viewed

@@ -1,39 +1,12 @@
 import streamlit as st
 import os
-from pathlib import Path
-from langchain.chat_models import ChatOpenAI
-from langchain.llms import HuggingFaceEndpoint
-from langchain.document_loaders import PyPDFDirectoryLoader
-from langchain.chains import RetrievalQA
-from langchain.chat_models import ChatOpenAI
-from langchain.vectorstores import DeepLake
 from utils.llm import model_pipeline, load_memory, typewriter
 from dotenv import load_dotenv
 load_dotenv()
 st.title("Search the right candidates!")
-# st.write( "OPENAI_API_KEY" not in os.environ.keys())
-# if "OPENAI_API_KEY" not in os.environ.keys():
-#     os.environ["OPENAI_API_KEY"] = st.text_input(
-#         "PLEASE ENTER YOUR OPEN API KEY. Head over to this [link](https://platform.openai.com/api-keys)",
-#         placeholder="Enter your Key here...",
-#         type="password")
-#     st.write("You can also set the OPENAI_API_KEY environment variable.")
-#     st.rerun()
-#
-# if "COHERE_API_KEY" not in os.environ.keys():
-#     os.environ["COHERE_API_KEY"] = st.text_input(
-#         "PLEASE ENTER YOUR COHERE API KEY. Head over to this [link](https://dashboard.cohere.com/welcome/login?redirect_uri=%2F)",
-#         placeholder="Enter your Key here...",
-#         type="password")
-#     st.rerun()
-#
-# if "VOYAGE_API_KEY" not in os.environ.keys():
-#     os.environ["VOYAGE_API_KEY"] = st.text_input(
-#         "PLEASE ENTER YOUR VOYAGE API KEY. Head over to this [link](https://dash.voyageai.com/)",
-#         placeholder="Enter your Key here...",
-#         type="password")
-#     st.rerun()
 if "messages" not in st.session_state:
     st.session_state.messages = []
@@ -41,6 +14,21 @@ if "messages" not in st.session_state:
 if "memory" not in st.session_state:
     st.session_state["memory"] = load_memory()
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])

 import streamlit as st
 import os
+import tempfile
 from utils.llm import model_pipeline, load_memory, typewriter
+from utils.utils import load_documents
 from dotenv import load_dotenv
 load_dotenv()
 st.title("Search the right candidates!")
 if "messages" not in st.session_state:
     st.session_state.messages = []
 if "memory" not in st.session_state:
     st.session_state["memory"] = load_memory()
+uploaded_file = st.file_uploader("Choose a PDF...", type="pdf")
+if uploaded_file is not None:
+    # Create a temporary directory
+    temp_dir = tempfile.mkdtemp()
+    file_name = st.text_input("Enter File name: ", "uploaded_file.pdf")
+    st.session_state["file_name"] = file_name
+    # Save the uploaded file to the temporary directory
+    with open(os.path.join(temp_dir, 'uploaded_file.pdf'), 'wb') as f:
+        f.write(uploaded_file.getvalue())
+    # Pass the file path to the load_documents function
+    load_documents(file_path=os.path.join(temp_dir, 'uploaded_file.pdf'))
+    st.session_state.messages.append({"role": "assistant", "content": "I have loaded the resume."})
+    del uploaded_file
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])

requirements.txt CHANGED Viewed

@@ -13,6 +13,7 @@ cohere
 wave
 nltk
 tiktoken
 audio_recorder_streamlit
 streamlit-option-menu
 streamlit-lottie

 wave
 nltk
 tiktoken
+pymupdf
 audio_recorder_streamlit
 streamlit-option-menu
 streamlit-lottie

utils/utils.py CHANGED Viewed

@@ -12,11 +12,14 @@ def init_vectorstore(dataset_path="hub://p1utoze/default", embeddings="voyage/vo
     db = DeepLake(dataset_path=dataset_path, embedding=embeddings)
     return db
-def load_documents(base_path="data/INFORMATION-TECHNOLOGY/"):
-    for file in os.listdir(base_path):
-        path = base_path + file
-        print(path)
-        loader = PyMuPDFLoader(path)
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
     docs = loader.load_and_split(text_splitter)
     db = init_vectorstore("hub://p1utoze/resumes", embeddings)

     db = DeepLake(dataset_path=dataset_path, embedding=embeddings)
     return db
+def load_documents(file_path=None, base_path="data/INFORMATION-TECHNOLOGY/"):
+    if file_path:
+        loader = PyMuPDFLoader(file_path)
+    else:
+        for file in os.listdir(base_path):
+            path = base_path + file
+            print(path)
+            loader = PyMuPDFLoader(path)
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
     docs = loader.load_and_split(text_splitter)
     db = init_vectorstore("hub://p1utoze/resumes", embeddings)