Spaces:

eaglelandsonce
/

clarify-rag

Sleeping

App Files Files Community

eaglelandsonce commited on Jan 16, 2024

Commit

2e79f3a

verified ·

1 Parent(s): 9bc5b30

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -60

app.py CHANGED Viewed

@@ -1,77 +1,96 @@
 import streamlit as st
 import os
-from langchain.document_loaders import TextLoader
 from langchain.text_splitter import CharacterTextSplitter
-from langchain_community.llms import Clarifai
 from langchain.chains import RetrievalQA
-from clarifai.client.user import User
-from clarifai.client.app import App
-from langchain_community.embeddings import ClarifaiEmbeddings
-from langchain_community.vectorstores import Clarifai
-# Load and prepare your data
-loader = TextLoader("resources/state_of_the_union.txt")
-documents = loader.load()
-text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-docs = text_splitter.split_documents(documents)
-# Environment Variables
-CLARIFAI_PAT = os.environ.get('CLARIFAI_PAT')
-USER_ID = "qaillc"
-APP_ID = "MRAG"
-NUMBER_OF_DOCS = 3
-# Create Clarifai App
-client = User(user_id="qaillc")
-existing_apps = client.list_apps()  # The method to list apps might be different
-app_exists = App(user_id=USER_ID, app_id=APP_ID )
-# If the app does not exist, create it
-if not app_exists:
-    app = client.create_app(app_id=APP_ID, base_workflow="baai-general-embedding-base-en")
-    print(f"App {APP_ID} created successfully.")
-else:
-    print(f"App {APP_ID} already exists.")
-# Setup Clarifai Vector DB
-clarifai_vector_db = Clarifai.from_documents(
         user_id=USER_ID,
         app_id=APP_ID,
         documents=docs,
         pat=CLARIFAI_PAT,
         number_of_docs=3,
     )
-# Setup Clarifai LLM
-USER_ID = "openai"
-APP_ID = "chat-completion"
-MODEL_ID = "GPT-3_5-turbo"
-clarifai_llm = Clarifai(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
-# Initialize RetrievalQA
-qa = RetrievalQA.from_chain_type(
-    llm=clarifai_llm,
-    retriever=clarifai_vector_db.as_retriever(),
-    chain_type="stuff"
-)
-# Streamlit Interface
-st.title("RetrievalQA Interface")
-# User Query Input
-user_query = st.text_input("Enter your query:", "According to the document, what did Vladimir Putin miscalculate?")
-# Run Model on Button Click
-if st.button('Run Query'):
-    with st.spinner('Processing...'):
-        # Run the query through the model
-        answer = qa.run(user_query)
-        # Display the answer
-        st.write("Answer:", answer)

 import streamlit as st
+import tempfile
 import os
+from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import Clarifai
 from langchain.chains import RetrievalQA
+from clarifai.modules.css import ClarifaiStreamlitCSS
+st.set_page_config(page_title="Chat with Documents", page_icon="🦜")
+st.title("🦜 RAG with Clarifai and Langchain")
+ClarifaiStreamlitCSS.insert_default_css(st)
+# 1. Data Organization: chunk documents
+@st.cache_resource(ttl="1h")
+def load_chunk_pdf(uploaded_files):
+    # Read documents
+    documents = []
+    temp_dir = tempfile.TemporaryDirectory()
+    for file in uploaded_files:
+        temp_filepath = os.path.join(temp_dir.name, file.name)
+        with open(temp_filepath, "wb") as f:
+            f.write(file.getvalue())
+        loader = PyPDFLoader(temp_filepath)
+        documents.extend(loader.load())
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    chunked_documents = text_splitter.split_documents(documents)
+    return chunked_documents
+# Create vector store on Clarifai for use in step 2
+def vectorstore(USER_ID, APP_ID, docs, CLARIFAI_PAT):
+    clarifai_vector_db = Clarifai.from_documents(
         user_id=USER_ID,
         app_id=APP_ID,
         documents=docs,
         pat=CLARIFAI_PAT,
         number_of_docs=3,
     )
+    return clarifai_vector_db
+def QandA(CLARIFAI_PAT, clarifai_vector_db):
+    from langchain.llms import Clarifai
+    USER_ID = "openai"
+    APP_ID = "chat-completion"
+    MODEL_ID = "GPT-4"
+    # LLM to use (set to GPT-4 above)
+    clarifai_llm = Clarifai(
+        pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
+    # Type of Langchain chain to use, the "stuff" chain which combines chunks retrieved
+    # and prepends them all to the prompt
+    qa = RetrievalQA.from_chain_type(
+        llm=clarifai_llm,
+        chain_type="stuff",
+        retriever=clarifai_vector_db.as_retriever()
+    )
+    return qa
+def main():
+    user_question = st.text_input("Ask a question to GPT 3.5 Turbo model about your documents and click on get the response")
+    with st.sidebar:
+        st.subheader("Add your Clarifai PAT, USER ID, APP ID along with the documents")
+        # Get the USER_ID, APP_ID, Clarifai API Key
+        CLARIFAI_PAT = st.text_input("Clarifai PAT", type="password")
+        USER_ID = st.text_input("Clarifai user id")
+        APP_ID = st.text_input("Clarifai app id")
+        uploaded_files = st.file_uploader(
+            "Upload your PDFs here", accept_multiple_files=True)
+    if not (CLARIFAI_PAT and USER_ID and APP_ID and uploaded_files):
+        st.info("Please add your Clarifai PAT, USER_ID, APP_ID and upload files to continue.")
+    elif st.button("Get the response"):
+        with st.spinner("Processing"):
+            # process pdfs
+            docs = load_chunk_pdf(uploaded_files)
+            # create a vector store
+            clarifai_vector_db = vectorstore(USER_ID, APP_ID, docs, CLARIFAI_PAT)
+            # 2. Vector Creation: create Q&A chain
+            conversation = QandA(CLARIFAI_PAT, clarifai_vector_db)
+            # 3. Querying: Ask the question to the GPT 4 model based on the documents
+            # This step also combines 4. retrieval and 5. Prepending the context
+            response = conversation.run(user_question)
+            st.write(response)
+if __name__ == '__main__':
+    main()