Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Upload 5 files
Browse files- Pipfile +19 -0
- Pipfile.lock +0 -0
- app.py +104 -0
- htmlTemplates.py +45 -0
- requirements.txt +14 -0
    	
        Pipfile
    ADDED
    
    | @@ -0,0 +1,19 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [[source]]
         | 
| 2 | 
            +
            url = "https://pypi.org/simple"
         | 
| 3 | 
            +
            verify_ssl = true
         | 
| 4 | 
            +
            name = "pypi"
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            [packages]
         | 
| 7 | 
            +
            langchain = "==0.0.184"
         | 
| 8 | 
            +
            pypdf2 = "==3.0.1"
         | 
| 9 | 
            +
            python-dotenv = "==1.0.0"
         | 
| 10 | 
            +
            streamlit = "==1.18.1"
         | 
| 11 | 
            +
            openai = "==0.27.6"
         | 
| 12 | 
            +
            faiss-cpu = "==1.7.4"
         | 
| 13 | 
            +
            altair = "==4"
         | 
| 14 | 
            +
            tiktoken = "==0.4.0"
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            [dev-packages]
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            [requires]
         | 
| 19 | 
            +
            python_version = "3.10"
         | 
    	
        Pipfile.lock
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,104 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import streamlit as st
         | 
| 2 | 
            +
            from dotenv import load_dotenv
         | 
| 3 | 
            +
            from PyPDF2 import PdfReader
         | 
| 4 | 
            +
            from langchain.text_splitter import CharacterTextSplitter
         | 
| 5 | 
            +
            from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
         | 
| 6 | 
            +
            from langchain.vectorstores import FAISS
         | 
| 7 | 
            +
            from langchain.chat_models import ChatOpenAI
         | 
| 8 | 
            +
            from langchain.memory import ConversationBufferMemory
         | 
| 9 | 
            +
            from langchain.chains import ConversationalRetrievalChain
         | 
| 10 | 
            +
            from htmlTemplates import css, bot_template, user_template
         | 
| 11 | 
            +
            from langchain.llms import HuggingFaceHub
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            def get_pdf_text(pdf_docs):
         | 
| 14 | 
            +
                text = ""
         | 
| 15 | 
            +
                for pdf in pdf_docs:
         | 
| 16 | 
            +
                    pdf_reader = PdfReader(pdf)
         | 
| 17 | 
            +
                    for page in pdf_reader.pages:
         | 
| 18 | 
            +
                        text += page.extract_text()
         | 
| 19 | 
            +
                return text
         | 
| 20 | 
            +
             | 
| 21 | 
            +
             | 
| 22 | 
            +
            def get_text_chunks(text):
         | 
| 23 | 
            +
                text_splitter = CharacterTextSplitter(
         | 
| 24 | 
            +
                    separator="\n",
         | 
| 25 | 
            +
                    chunk_size=1000,
         | 
| 26 | 
            +
                    chunk_overlap=200,
         | 
| 27 | 
            +
                    length_function=len
         | 
| 28 | 
            +
                )
         | 
| 29 | 
            +
                chunks = text_splitter.split_text(text)
         | 
| 30 | 
            +
                return chunks
         | 
| 31 | 
            +
             | 
| 32 | 
            +
             | 
| 33 | 
            +
            def get_vectorstore(text_chunks):
         | 
| 34 | 
            +
                # embeddings = OpenAIEmbeddings()
         | 
| 35 | 
            +
                embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
         | 
| 36 | 
            +
                vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
         | 
| 37 | 
            +
                return vectorstore
         | 
| 38 | 
            +
             | 
| 39 | 
            +
             | 
| 40 | 
            +
            def get_conversation_chain(vectorstore):
         | 
| 41 | 
            +
                # llm = ChatOpenAI()
         | 
| 42 | 
            +
                llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.5, "max_length":512})
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                memory = ConversationBufferMemory(
         | 
| 45 | 
            +
                    memory_key='chat_history', return_messages=True)
         | 
| 46 | 
            +
                conversation_chain = ConversationalRetrievalChain.from_llm(
         | 
| 47 | 
            +
                    llm=llm,
         | 
| 48 | 
            +
                    retriever=vectorstore.as_retriever(),
         | 
| 49 | 
            +
                    memory=memory
         | 
| 50 | 
            +
                )
         | 
| 51 | 
            +
                return conversation_chain
         | 
| 52 | 
            +
             | 
| 53 | 
            +
             | 
| 54 | 
            +
            def handle_userinput(user_question):
         | 
| 55 | 
            +
                response = st.session_state.conversation({'question': user_question})
         | 
| 56 | 
            +
                st.session_state.chat_history = response['chat_history']
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                for i, message in enumerate(st.session_state.chat_history):
         | 
| 59 | 
            +
                    if i % 2 == 0:
         | 
| 60 | 
            +
                        st.write(user_template.replace(
         | 
| 61 | 
            +
                            "{{MSG}}", message.content), unsafe_allow_html=True)
         | 
| 62 | 
            +
                    else:
         | 
| 63 | 
            +
                        st.write(bot_template.replace(
         | 
| 64 | 
            +
                            "{{MSG}}", message.content), unsafe_allow_html=True)
         | 
| 65 | 
            +
             | 
| 66 | 
            +
             | 
| 67 | 
            +
            def main():
         | 
| 68 | 
            +
                load_dotenv()
         | 
| 69 | 
            +
                st.set_page_config(page_title="Chat with multiple PDFs",
         | 
| 70 | 
            +
                                   page_icon=":books:")
         | 
| 71 | 
            +
                st.write(css, unsafe_allow_html=True)
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                if "conversation" not in st.session_state:
         | 
| 74 | 
            +
                    st.session_state.conversation = None
         | 
| 75 | 
            +
                if "chat_history" not in st.session_state:
         | 
| 76 | 
            +
                    st.session_state.chat_history = None
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                st.header("Chat with multiple PDFs :books:")
         | 
| 79 | 
            +
                user_question = st.text_input("Ask a question about your documents:")
         | 
| 80 | 
            +
                if user_question:
         | 
| 81 | 
            +
                    handle_userinput(user_question)
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                with st.sidebar:
         | 
| 84 | 
            +
                    st.subheader("Your documents")
         | 
| 85 | 
            +
                    pdf_docs = st.file_uploader(
         | 
| 86 | 
            +
                        "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
         | 
| 87 | 
            +
                    if st.button("Process"):
         | 
| 88 | 
            +
                        with st.spinner("Processing"):
         | 
| 89 | 
            +
                            # get pdf text
         | 
| 90 | 
            +
                            raw_text = get_pdf_text(pdf_docs)
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                            # get the text chunks
         | 
| 93 | 
            +
                            text_chunks = get_text_chunks(raw_text)
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                            # create vector store
         | 
| 96 | 
            +
                            vectorstore = get_vectorstore(text_chunks)
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                            # create conversation chain
         | 
| 99 | 
            +
                            st.session_state.conversation = get_conversation_chain(
         | 
| 100 | 
            +
                                vectorstore)
         | 
| 101 | 
            +
             | 
| 102 | 
            +
             | 
| 103 | 
            +
            if __name__ == '__main__':
         | 
| 104 | 
            +
                main()
         | 
    	
        htmlTemplates.py
    ADDED
    
    | @@ -0,0 +1,45 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            css = '''
         | 
| 2 | 
            +
            <style>
         | 
| 3 | 
            +
            .chat-message {
         | 
| 4 | 
            +
                padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
         | 
| 5 | 
            +
            }
         | 
| 6 | 
            +
            .chat-message.user {
         | 
| 7 | 
            +
                background-color: #2b313e
         | 
| 8 | 
            +
            }
         | 
| 9 | 
            +
            .chat-message.bot {
         | 
| 10 | 
            +
                background-color: #475063
         | 
| 11 | 
            +
            }
         | 
| 12 | 
            +
            .chat-message .avatar {
         | 
| 13 | 
            +
              width: 20%;
         | 
| 14 | 
            +
            }
         | 
| 15 | 
            +
            .chat-message .avatar img {
         | 
| 16 | 
            +
              max-width: 78px;
         | 
| 17 | 
            +
              max-height: 78px;
         | 
| 18 | 
            +
              border-radius: 50%;
         | 
| 19 | 
            +
              object-fit: cover;
         | 
| 20 | 
            +
            }
         | 
| 21 | 
            +
            .chat-message .message {
         | 
| 22 | 
            +
              width: 80%;
         | 
| 23 | 
            +
              padding: 0 1.5rem;
         | 
| 24 | 
            +
              color: #fff;
         | 
| 25 | 
            +
            }
         | 
| 26 | 
            +
            '''
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            bot_template = '''
         | 
| 29 | 
            +
            <div class="chat-message bot">
         | 
| 30 | 
            +
                <div class="avatar">
         | 
| 31 | 
            +
                    <img src="file:///Users/mohamedabdallaoui/Desktop/ayoub_bot2.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
         | 
| 32 | 
            +
                </div>
         | 
| 33 | 
            +
                <div class="message">{{MSG}}</div>
         | 
| 34 | 
            +
            </div>
         | 
| 35 | 
            +
            '''
         | 
| 36 | 
            +
             | 
| 37 | 
            +
             | 
| 38 | 
            +
            user_template = '''
         | 
| 39 | 
            +
            <div class="chat-message user">
         | 
| 40 | 
            +
                <div class="avatar">
         | 
| 41 | 
            +
                    <img src="file:///Users/mohamedabdallaoui/Desktop/mohamed_bot2.png">
         | 
| 42 | 
            +
                </div>    
         | 
| 43 | 
            +
                <div class="message">{{MSG}}</div>
         | 
| 44 | 
            +
            </div>
         | 
| 45 | 
            +
            '''
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            langchain==0.0.184
         | 
| 2 | 
            +
            PyPDF2==3.0.1
         | 
| 3 | 
            +
            python-dotenv==1.0.0
         | 
| 4 | 
            +
            streamlit==1.18.1
         | 
| 5 | 
            +
            openai==0.27.6
         | 
| 6 | 
            +
            faiss-cpu==1.7.4
         | 
| 7 | 
            +
            altair==4
         | 
| 8 | 
            +
            tiktoken==0.4.0
         | 
| 9 | 
            +
            # uncomment to use huggingface llms
         | 
| 10 | 
            +
            # huggingface-hub==0.14.1
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            # uncomment to use instructor embeddings
         | 
| 13 | 
            +
            # InstructorEmbedding==1.0.1
         | 
| 14 | 
            +
            # sentence-transformers==2.2.2
         | 
