karthiksagarn commited on
Commit
738fa95
1 Parent(s): 6185e3d

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +112 -0
  2. background_image.png +0 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # exec(os.getenv("CODE")) # to execute the whole code in huggingface.
3
+
4
+ import streamlit as st
5
+ from PyPDF2 import PdfReader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
8
+ import google.generativeai as genai
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_google_genai import ChatGoogleGenerativeAI
11
+ from langchain.chains.question_answering import load_qa_chain
12
+ from langchain.prompts import PromptTemplate
13
+ from dotenv import load_dotenv
14
+ import base64
15
+ from io import BytesIO
16
+
17
+ load_dotenv()
18
+
19
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
20
+
21
+ ## going to each and very pdf and each page of that padf and extracting text from it.
22
+ def get_pdf_text(pdf_docs):
23
+ text = ""
24
+ for pdf in pdf_docs:
25
+ pdf_reader = PdfReader(BytesIO(pdf.read()))
26
+ for page in pdf_reader.pages:
27
+ text+=page.extract_text()
28
+ return text
29
+
30
+ def get_text_chunks(text):
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 10000, chunk_overlap = 1000)
32
+ chunks = text_splitter.split_text(text)
33
+ return chunks
34
+
35
+ ## converting chunks into vectors
36
+ def get_vector_store(text_chunks):
37
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
38
+ vector_store = FAISS.from_texts(text_chunks, embedding =embeddings)
39
+ vector_store.save_local("faiss_index")
40
+
41
+ ## developing bot
42
+ def get_conversational_chain():
43
+ prompt_template= """
44
+ Answer the question as detailed as possible from the provided context, make sure to provide
45
+ all the details if the answer is not in the provided context just say, "answer is not available in the context",
46
+ don't provide the wrong answer.
47
+ Context: \n{context}?\n
48
+ Question: \n{question}\n
49
+
50
+ Answer:
51
+ """
52
+ model = ChatGoogleGenerativeAI(model = "gemini-pro", temperature= 0.45)
53
+ prompt= PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
54
+ chain = load_qa_chain(model, chain_type="stuff", prompt= prompt)
55
+ return chain
56
+
57
+ ## the user input interface
58
+ def user_input(user_question):
59
+ embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
60
+
61
+ db = FAISS.load_local('faiss_index', embeddings, allow_dangerous_deserialization= True)
62
+ docs = db.similarity_search(user_question)
63
+
64
+ chain = get_conversational_chain()
65
+
66
+ response= chain({"input_documents":docs, "question":user_question}, return_only_outputs=True)
67
+
68
+ print(response)
69
+ st.write("Bot: ", response["output_text"])
70
+
71
+ # streamlit app
72
+ def main():
73
+ st.set_page_config(page_title="Chat With Multiple PDF")
74
+
75
+ # Function to set a background image
76
+ def set_background(image_file):
77
+ with open(image_file, "rb") as image:
78
+ b64_image = base64.b64encode(image.read()).decode("utf-8")
79
+ css = f"""
80
+ <style>
81
+ .stApp {{
82
+ background: url(data:image/png;base64,{b64_image});
83
+ background-size: cover;
84
+ background-position: centre;
85
+ backgroun-repeat: no-repeat;
86
+ }}
87
+ </style>
88
+ """
89
+ st.markdown(css, unsafe_allow_html=True)
90
+
91
+ # Set the background image
92
+ set_background("background_image.png")
93
+
94
+ st.header("Podcast With Your PDF's")
95
+
96
+ user_question = st.text_input("Ask a Question from the PDF Files")
97
+
98
+ if user_question:
99
+ user_input(user_question)
100
+
101
+ with st.sidebar:
102
+ st.title("Menu:")
103
+ pdf_docs = st.file_uploader("Upload Your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True, type='pdf')
104
+ if st.button("Submit & Process") :
105
+ with st.spinner("Processing..."):
106
+ raw_text = get_pdf_text(pdf_docs)
107
+ text_chunks = get_text_chunks(raw_text)
108
+ get_vector_store(text_chunks)
109
+ st.success("Done")
110
+
111
+ if __name__ == "__main__":
112
+ main()
background_image.png ADDED
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ google-generativeai
3
+ python-dotenv
4
+ langchain
5
+ langchain-community
6
+ PyPDF2
7
+ faiss-cpu
8
+ langchain_google_genai