Spaces:

datascientist22
/

blogpostQA-retrieval-bot

Sleeping

App Files Files Community

datascientist22 commited on Sep 6, 2024

Commit

709bbfd

verified ·

1 Parent(s): 0953464

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -32

app.py CHANGED Viewed

@@ -132,50 +132,53 @@ if st.button("Submit Query"):
     elif not url_input:
         st.warning("Please enter a valid URL in the sidebar.")
     else:
-        # Blog loading logic based on user input URL
-        loader = WebBaseLoader(
-            web_paths=(url_input,),  # Use the user-input URL
-            bs_kwargs=dict(
-                parse_only=bs4.SoupStrainer()  # Adjust based on the user's URL structure
-            ),
-        )
-        docs = loader.load()
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=300)
-        splits = text_splitter.split_documents(docs)
-        # Initialize the embedding model
-        embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
-        # Initialize Chroma with the embedding class
-        vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
-        # Retrieve and generate using the relevant snippets of the blog
-        retriever = vectorstore.as_retriever()
-        # Retrieve relevant documents
-        retrieved_docs = retriever.get_relevant_documents(query)
-        # Format the retrieved documents
-        def format_docs(docs):
-            return "\n\n".join(doc.page_content for doc in docs)
-        context = format_docs(retrieved_docs)
-        # Initialize the language model
-        custom_llm = CustomLanguageModel()
-        # Initialize RAG chain using the prompt
-        prompt = RAGPrompt()
-        # Apply the prompt directly to the data (no chaining using `|`)
-        prompt_data = prompt({"question": query, "context": context})
-        # Generate the response using the language model, focusing on the answer from the retrieved context
-        result = custom_llm.generate(prompt_data["question"], prompt_data["context"])
-        # Store query and response in session for chat history
-        st.session_state['chat_history'].append((query, result))
 # Display chat history
 for q, r in st.session_state['chat_history']:

     elif not url_input:
         st.warning("Please enter a valid URL in the sidebar.")
     else:
+        try:
+            # Blog loading logic based on user input URL
+            loader = WebBaseLoader(
+                web_paths=(url_input,),  # Use the user-input URL
+                bs_kwargs=dict(
+                    parse_only=bs4.SoupStrainer()  # Adjust based on the user's URL structure
+                ),
+            )
+            docs = loader.load()
+            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=300)
+            splits = text_splitter.split_documents(docs)
+            # Initialize the embedding model
+            embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
+            # Initialize Chroma with the embedding class
+            vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
+            # Retrieve and generate using the relevant snippets of the blog
+            retriever = vectorstore.as_retriever()
+            # Retrieve relevant documents
+            retrieved_docs = retriever.get_relevant_documents(query)
+            # Format the retrieved documents
+            def format_docs(docs):
+                return "\n\n".join(doc.page_content for doc in docs)
+            context = format_docs(retrieved_docs)
+            # Initialize the language model
+            custom_llm = CustomLanguageModel()
+            # Initialize RAG chain using the prompt
+            prompt = RAGPrompt()
+            # Apply the prompt directly to the data (no chaining using `|`)
+            prompt_data = prompt({"question": query, "context": context})
+            # Generate the response using the language model, focusing on the answer from the retrieved context
+            result = custom_llm.generate(prompt_data["question"], prompt_data["context"])
+            # Store query and response in session for chat history
+            st.session_state['chat_history'].append((query, result))
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
 # Display chat history
 for q, r in st.session_state['chat_history']: