update: add file upload query in search candidates page
Browse files- pages/02_Professional Screen.py +1 -1
- pages/05_Search_Candidates.py +17 -29
- requirements.txt +1 -0
- utils/utils.py +8 -5
pages/02_Professional Screen.py
CHANGED
|
@@ -11,7 +11,7 @@ from langchain.chat_models import ChatOpenAI
|
|
| 11 |
from langchain.chains import ConversationChain, RetrievalQA
|
| 12 |
from langchain.prompts.prompt import PromptTemplate
|
| 13 |
from langchain.text_splitter import NLTKTextSplitter
|
| 14 |
-
from langchain.embeddings import
|
| 15 |
from langchain.vectorstores import FAISS
|
| 16 |
import nltk
|
| 17 |
from prompts.prompts import templates
|
|
|
|
| 11 |
from langchain.chains import ConversationChain, RetrievalQA
|
| 12 |
from langchain.prompts.prompt import PromptTemplate
|
| 13 |
from langchain.text_splitter import NLTKTextSplitter
|
| 14 |
+
from langchain.embeddings import VoyageEmbeddings
|
| 15 |
from langchain.vectorstores import FAISS
|
| 16 |
import nltk
|
| 17 |
from prompts.prompts import templates
|
pages/05_Search_Candidates.py
CHANGED
|
@@ -1,39 +1,12 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import os
|
| 3 |
-
|
| 4 |
-
from langchain.chat_models import ChatOpenAI
|
| 5 |
-
from langchain.llms import HuggingFaceEndpoint
|
| 6 |
-
from langchain.document_loaders import PyPDFDirectoryLoader
|
| 7 |
-
from langchain.chains import RetrievalQA
|
| 8 |
-
from langchain.chat_models import ChatOpenAI
|
| 9 |
-
from langchain.vectorstores import DeepLake
|
| 10 |
from utils.llm import model_pipeline, load_memory, typewriter
|
|
|
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
load_dotenv()
|
| 13 |
|
| 14 |
st.title("Search the right candidates!")
|
| 15 |
-
# st.write( "OPENAI_API_KEY" not in os.environ.keys())
|
| 16 |
-
# if "OPENAI_API_KEY" not in os.environ.keys():
|
| 17 |
-
# os.environ["OPENAI_API_KEY"] = st.text_input(
|
| 18 |
-
# "PLEASE ENTER YOUR OPEN API KEY. Head over to this [link](https://platform.openai.com/api-keys)",
|
| 19 |
-
# placeholder="Enter your Key here...",
|
| 20 |
-
# type="password")
|
| 21 |
-
# st.write("You can also set the OPENAI_API_KEY environment variable.")
|
| 22 |
-
# st.rerun()
|
| 23 |
-
#
|
| 24 |
-
# if "COHERE_API_KEY" not in os.environ.keys():
|
| 25 |
-
# os.environ["COHERE_API_KEY"] = st.text_input(
|
| 26 |
-
# "PLEASE ENTER YOUR COHERE API KEY. Head over to this [link](https://dashboard.cohere.com/welcome/login?redirect_uri=%2F)",
|
| 27 |
-
# placeholder="Enter your Key here...",
|
| 28 |
-
# type="password")
|
| 29 |
-
# st.rerun()
|
| 30 |
-
#
|
| 31 |
-
# if "VOYAGE_API_KEY" not in os.environ.keys():
|
| 32 |
-
# os.environ["VOYAGE_API_KEY"] = st.text_input(
|
| 33 |
-
# "PLEASE ENTER YOUR VOYAGE API KEY. Head over to this [link](https://dash.voyageai.com/)",
|
| 34 |
-
# placeholder="Enter your Key here...",
|
| 35 |
-
# type="password")
|
| 36 |
-
# st.rerun()
|
| 37 |
|
| 38 |
if "messages" not in st.session_state:
|
| 39 |
st.session_state.messages = []
|
|
@@ -41,6 +14,21 @@ if "messages" not in st.session_state:
|
|
| 41 |
if "memory" not in st.session_state:
|
| 42 |
st.session_state["memory"] = load_memory()
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
for message in st.session_state.messages:
|
| 45 |
with st.chat_message(message["role"]):
|
| 46 |
st.markdown(message["content"])
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import os
|
| 3 |
+
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from utils.llm import model_pipeline, load_memory, typewriter
|
| 5 |
+
from utils.utils import load_documents
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
load_dotenv()
|
| 8 |
|
| 9 |
st.title("Search the right candidates!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
if "messages" not in st.session_state:
|
| 12 |
st.session_state.messages = []
|
|
|
|
| 14 |
if "memory" not in st.session_state:
|
| 15 |
st.session_state["memory"] = load_memory()
|
| 16 |
|
| 17 |
+
uploaded_file = st.file_uploader("Choose a PDF...", type="pdf")
|
| 18 |
+
if uploaded_file is not None:
|
| 19 |
+
# Create a temporary directory
|
| 20 |
+
temp_dir = tempfile.mkdtemp()
|
| 21 |
+
file_name = st.text_input("Enter File name: ", "uploaded_file.pdf")
|
| 22 |
+
st.session_state["file_name"] = file_name
|
| 23 |
+
# Save the uploaded file to the temporary directory
|
| 24 |
+
with open(os.path.join(temp_dir, 'uploaded_file.pdf'), 'wb') as f:
|
| 25 |
+
f.write(uploaded_file.getvalue())
|
| 26 |
+
|
| 27 |
+
# Pass the file path to the load_documents function
|
| 28 |
+
load_documents(file_path=os.path.join(temp_dir, 'uploaded_file.pdf'))
|
| 29 |
+
st.session_state.messages.append({"role": "assistant", "content": "I have loaded the resume."})
|
| 30 |
+
del uploaded_file
|
| 31 |
+
|
| 32 |
for message in st.session_state.messages:
|
| 33 |
with st.chat_message(message["role"]):
|
| 34 |
st.markdown(message["content"])
|
requirements.txt
CHANGED
|
@@ -13,6 +13,7 @@ cohere
|
|
| 13 |
wave
|
| 14 |
nltk
|
| 15 |
tiktoken
|
|
|
|
| 16 |
audio_recorder_streamlit
|
| 17 |
streamlit-option-menu
|
| 18 |
streamlit-lottie
|
|
|
|
| 13 |
wave
|
| 14 |
nltk
|
| 15 |
tiktoken
|
| 16 |
+
pymupdf
|
| 17 |
audio_recorder_streamlit
|
| 18 |
streamlit-option-menu
|
| 19 |
streamlit-lottie
|
utils/utils.py
CHANGED
|
@@ -12,11 +12,14 @@ def init_vectorstore(dataset_path="hub://p1utoze/default", embeddings="voyage/vo
|
|
| 12 |
db = DeepLake(dataset_path=dataset_path, embedding=embeddings)
|
| 13 |
return db
|
| 14 |
|
| 15 |
-
def load_documents(base_path="data/INFORMATION-TECHNOLOGY/"):
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
| 20 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
|
| 21 |
docs = loader.load_and_split(text_splitter)
|
| 22 |
db = init_vectorstore("hub://p1utoze/resumes", embeddings)
|
|
|
|
| 12 |
db = DeepLake(dataset_path=dataset_path, embedding=embeddings)
|
| 13 |
return db
|
| 14 |
|
| 15 |
+
def load_documents(file_path=None, base_path="data/INFORMATION-TECHNOLOGY/"):
|
| 16 |
+
if file_path:
|
| 17 |
+
loader = PyMuPDFLoader(file_path)
|
| 18 |
+
else:
|
| 19 |
+
for file in os.listdir(base_path):
|
| 20 |
+
path = base_path + file
|
| 21 |
+
print(path)
|
| 22 |
+
loader = PyMuPDFLoader(path)
|
| 23 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
|
| 24 |
docs = loader.load_and_split(text_splitter)
|
| 25 |
db = init_vectorstore("hub://p1utoze/resumes", embeddings)
|