eaglelandsonce commited on
Commit
2e79f3a
·
verified ·
1 Parent(s): 9bc5b30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -60
app.py CHANGED
@@ -1,77 +1,96 @@
1
  import streamlit as st
 
2
  import os
3
- from langchain.document_loaders import TextLoader
4
  from langchain.text_splitter import CharacterTextSplitter
5
- from langchain_community.llms import Clarifai
6
  from langchain.chains import RetrievalQA
7
- from clarifai.client.user import User
8
- from clarifai.client.app import App
9
- from langchain_community.embeddings import ClarifaiEmbeddings
10
-
11
- from langchain_community.vectorstores import Clarifai
12
-
13
- # Load and prepare your data
14
- loader = TextLoader("resources/state_of_the_union.txt")
15
- documents = loader.load()
16
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
17
- docs = text_splitter.split_documents(documents)
18
-
19
- # Environment Variables
20
- CLARIFAI_PAT = os.environ.get('CLARIFAI_PAT')
21
- USER_ID = "qaillc"
22
- APP_ID = "MRAG"
23
- NUMBER_OF_DOCS = 3
24
-
25
- # Create Clarifai App
26
-
27
- client = User(user_id="qaillc")
28
-
29
- existing_apps = client.list_apps() # The method to list apps might be different
30
-
31
- app_exists = App(user_id=USER_ID, app_id=APP_ID )
32
-
33
- # If the app does not exist, create it
34
- if not app_exists:
35
- app = client.create_app(app_id=APP_ID, base_workflow="baai-general-embedding-base-en")
36
- print(f"App {APP_ID} created successfully.")
37
- else:
38
- print(f"App {APP_ID} already exists.")
39
-
40
-
41
- # Setup Clarifai Vector DB
42
- clarifai_vector_db = Clarifai.from_documents(
43
  user_id=USER_ID,
44
  app_id=APP_ID,
45
  documents=docs,
46
  pat=CLARIFAI_PAT,
47
  number_of_docs=3,
48
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- # Setup Clarifai LLM
51
- USER_ID = "openai"
52
- APP_ID = "chat-completion"
53
- MODEL_ID = "GPT-3_5-turbo"
54
- clarifai_llm = Clarifai(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
55
 
 
 
 
 
 
 
56
 
 
 
57
 
58
- # Initialize RetrievalQA
59
- qa = RetrievalQA.from_chain_type(
60
- llm=clarifai_llm,
61
- retriever=clarifai_vector_db.as_retriever(),
62
- chain_type="stuff"
63
- )
64
 
65
- # Streamlit Interface
66
- st.title("RetrievalQA Interface")
 
67
 
68
- # User Query Input
69
- user_query = st.text_input("Enter your query:", "According to the document, what did Vladimir Putin miscalculate?")
70
 
71
- # Run Model on Button Click
72
- if st.button('Run Query'):
73
- with st.spinner('Processing...'):
74
- # Run the query through the model
75
- answer = qa.run(user_query)
76
- # Display the answer
77
- st.write("Answer:", answer)
 
1
  import streamlit as st
2
+ import tempfile
3
  import os
4
+ from langchain.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.vectorstores import Clarifai
7
  from langchain.chains import RetrievalQA
8
+ from clarifai.modules.css import ClarifaiStreamlitCSS
9
+
10
+ st.set_page_config(page_title="Chat with Documents", page_icon="🦜")
11
+ st.title("🦜 RAG with Clarifai and Langchain")
12
+
13
+ ClarifaiStreamlitCSS.insert_default_css(st)
14
+
15
+ # 1. Data Organization: chunk documents
16
+ @st.cache_resource(ttl="1h")
17
+ def load_chunk_pdf(uploaded_files):
18
+ # Read documents
19
+ documents = []
20
+ temp_dir = tempfile.TemporaryDirectory()
21
+ for file in uploaded_files:
22
+ temp_filepath = os.path.join(temp_dir.name, file.name)
23
+ with open(temp_filepath, "wb") as f:
24
+ f.write(file.getvalue())
25
+ loader = PyPDFLoader(temp_filepath)
26
+ documents.extend(loader.load())
27
+
28
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
29
+ chunked_documents = text_splitter.split_documents(documents)
30
+ return chunked_documents
31
+
32
+ # Create vector store on Clarifai for use in step 2
33
+ def vectorstore(USER_ID, APP_ID, docs, CLARIFAI_PAT):
34
+ clarifai_vector_db = Clarifai.from_documents(
 
 
 
 
 
 
 
 
 
35
  user_id=USER_ID,
36
  app_id=APP_ID,
37
  documents=docs,
38
  pat=CLARIFAI_PAT,
39
  number_of_docs=3,
40
  )
41
+ return clarifai_vector_db
42
+
43
+ def QandA(CLARIFAI_PAT, clarifai_vector_db):
44
+ from langchain.llms import Clarifai
45
+ USER_ID = "openai"
46
+ APP_ID = "chat-completion"
47
+ MODEL_ID = "GPT-4"
48
+
49
+ # LLM to use (set to GPT-4 above)
50
+ clarifai_llm = Clarifai(
51
+ pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
52
+
53
+ # Type of Langchain chain to use, the "stuff" chain which combines chunks retrieved
54
+ # and prepends them all to the prompt
55
+ qa = RetrievalQA.from_chain_type(
56
+ llm=clarifai_llm,
57
+ chain_type="stuff",
58
+ retriever=clarifai_vector_db.as_retriever()
59
+ )
60
+ return qa
61
+
62
+ def main():
63
+ user_question = st.text_input("Ask a question to GPT 3.5 Turbo model about your documents and click on get the response")
64
+
65
+ with st.sidebar:
66
+ st.subheader("Add your Clarifai PAT, USER ID, APP ID along with the documents")
67
+
68
+ # Get the USER_ID, APP_ID, Clarifai API Key
69
+ CLARIFAI_PAT = st.text_input("Clarifai PAT", type="password")
70
+ USER_ID = st.text_input("Clarifai user id")
71
+ APP_ID = st.text_input("Clarifai app id")
72
 
73
+ uploaded_files = st.file_uploader(
74
+ "Upload your PDFs here", accept_multiple_files=True)
 
 
 
75
 
76
+ if not (CLARIFAI_PAT and USER_ID and APP_ID and uploaded_files):
77
+ st.info("Please add your Clarifai PAT, USER_ID, APP_ID and upload files to continue.")
78
+ elif st.button("Get the response"):
79
+ with st.spinner("Processing"):
80
+ # process pdfs
81
+ docs = load_chunk_pdf(uploaded_files)
82
 
83
+ # create a vector store
84
+ clarifai_vector_db = vectorstore(USER_ID, APP_ID, docs, CLARIFAI_PAT)
85
 
86
+ # 2. Vector Creation: create Q&A chain
87
+ conversation = QandA(CLARIFAI_PAT, clarifai_vector_db)
 
 
 
 
88
 
89
+ # 3. Querying: Ask the question to the GPT 4 model based on the documents
90
+ # This step also combines 4. retrieval and 5. Prepending the context
91
+ response = conversation.run(user_question)
92
 
93
+ st.write(response)
 
94
 
95
+ if __name__ == '__main__':
96
+ main()