import streamlit as st
import tempfile
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Clarifai
from langchain.chains import RetrievalQA
from clarifai.modules.css import ClarifaiStreamlitCSS

st.set_page_config(page_title="Chat with Documents", page_icon="🦜")
st.title("🦜 RAG with Clarifai and Langchain")

ClarifaiStreamlitCSS.insert_default_css(st)

# 1. Data Organization: chunk documents
@st.cache_resource(ttl="1h")
def load_chunk_pdf(uploaded_files):
    # Read documents
    documents = []
    temp_dir = tempfile.TemporaryDirectory()
    for file in uploaded_files:
        temp_filepath = os.path.join(temp_dir.name, file.name)
        with open(temp_filepath, "wb") as f:
            f.write(file.getvalue())
        loader = PyPDFLoader(temp_filepath)
        documents.extend(loader.load())

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    chunked_documents = text_splitter.split_documents(documents)
    return chunked_documents

# Create vector store on Clarifai for use in step 2
def vectorstore(USER_ID, APP_ID, docs, CLARIFAI_PAT):
    clarifai_vector_db = Clarifai.from_documents(
        user_id=USER_ID,
        app_id=APP_ID,
        documents=docs,
        pat=CLARIFAI_PAT,
        number_of_docs=3,
    )
    return clarifai_vector_db

def QandA(CLARIFAI_PAT, clarifai_vector_db):
    from langchain.llms import Clarifai
    USER_ID = "openai"
    APP_ID = "chat-completion"
    MODEL_ID = "GPT-4"

    # LLM to use (set to GPT-4 above)
    clarifai_llm = Clarifai(
        pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)

    # Type of Langchain chain to use, the "stuff" chain which combines chunks retrieved
    # and prepends them all to the prompt
    qa = RetrievalQA.from_chain_type(
        llm=clarifai_llm,
        chain_type="stuff",
        retriever=clarifai_vector_db.as_retriever()
    )
    return qa

def main():
    user_question = st.text_input("Ask a question to GPT 3.5 Turbo model about your documents and click on get the response")

    with st.sidebar:
        st.subheader("Add your Clarifai PAT, USER ID, APP ID along with the documents")

        # Get the USER_ID, APP_ID, Clarifai API Key
        CLARIFAI_PAT = st.text_input("Clarifai PAT", type="password")
        USER_ID = st.text_input("Clarifai user id")
        APP_ID = st.text_input("Clarifai app id")

        uploaded_files = st.file_uploader(
            "Upload your PDFs here", accept_multiple_files=True)

    if not (CLARIFAI_PAT and USER_ID and APP_ID and uploaded_files):
        st.info("Please add your Clarifai PAT, USER_ID, APP_ID and upload files to continue.")
    elif st.button("Get the response"):
        with st.spinner("Processing"):
            # process pdfs
            docs = load_chunk_pdf(uploaded_files)

            # create a vector store
            clarifai_vector_db = vectorstore(USER_ID, APP_ID, docs, CLARIFAI_PAT)

            # 2. Vector Creation: create Q&A chain
            conversation = QandA(CLARIFAI_PAT, clarifai_vector_db)

            # 3. Querying: Ask the question to the GPT 4 model based on the documents
            # This step also combines 4. retrieval and 5. Prepending the context
            response = conversation.run(user_question)

            st.write(response)

if __name__ == '__main__':
    main()