Spaces:

usmanyousaf
/

Pakistan_Law_Bot

Sleeping

App Files Files Community

usmanyousaf commited on Mar 14

Commit

cb4d732

verified ·

1 Parent(s): 1ed40d9

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -127

app.py CHANGED Viewed

@@ -140,8 +140,8 @@ st.set_page_config(
 # Constants
 DEFAULT_GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-MODEL_NAME = "llama-3.3-70b-versatile"
-DEFAULT_DOCUMENT_PATH = "lawbook.pdf"  # Path to your hardcoded Pakistan laws PDF
 DEFAULT_COLLECTION_NAME = "pakistan_laws_default"
 CHROMA_PERSIST_DIR = "./chroma_db"
@@ -179,49 +179,41 @@ def setup_llm():
 def check_default_db_exists():
     """Check if the default document database already exists"""
-    if os.path.exists(os.path.join(CHROMA_PERSIST_DIR, DEFAULT_COLLECTION_NAME)):
-        return True
-    return False
 def load_existing_vectordb(collection_name):
     """Load an existing vector database from disk"""
     embeddings = setup_embeddings()
     try:
-        db = Chroma(
             persist_directory=CHROMA_PERSIST_DIR,
             embedding_function=embeddings,
             collection_name=collection_name
         )
-        return db
     except Exception as e:
         st.error(f"Error loading existing database: {str(e)}")
         return None
 def process_default_document(force_rebuild=False):
-    """Process the default Pakistan laws document or load from disk if available"""
-    # Check if database already exists
     if check_default_db_exists() and not force_rebuild:
         st.info("Loading existing Pakistan law database...")
         db = load_existing_vectordb(DEFAULT_COLLECTION_NAME)
-        if db is not None:
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = False
             return True
-    # If database doesn't exist or force rebuild, create it
     if not os.path.exists(DEFAULT_DOCUMENT_PATH):
-        st.error(f"Default document {DEFAULT_DOCUMENT_PATH} not found. Please make sure it exists.")
         return False
-    embeddings = setup_embeddings()
     try:
-        with st.spinner("Building Pakistan law database (this may take a few minutes)..."):
             loader = PyPDFLoader(DEFAULT_DOCUMENT_PATH)
             documents = loader.load()
-            # Add source filename to metadata
             for doc in documents:
                 doc.metadata["source"] = "Pakistan Laws (Official)"
@@ -231,61 +223,44 @@ def process_default_document(force_rebuild=False):
             )
             chunks = text_splitter.split_documents(documents)
-            # Create vector store
             db = Chroma.from_documents(
                 documents=chunks,
-                embedding=embeddings,
                 collection_name=DEFAULT_COLLECTION_NAME,
                 persist_directory=CHROMA_PERSIST_DIR
             )
-            # Explicitly persist to disk
             db.persist()
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = False
             return True
     except Exception as e:
         st.error(f"Error processing default document: {str(e)}")
         return False
-def check_custom_db_exists(collection_name):
-    """Check if a custom document database already exists"""
-    if os.path.exists(os.path.join(CHROMA_PERSIST_DIR, collection_name)):
-        return True
-    return False
 def process_custom_documents(uploaded_files):
     """Process user-uploaded PDF documents"""
     embeddings = setup_embeddings()
     collection_name = st.session_state.custom_collection_name
     documents = []
     for uploaded_file in uploaded_files:
-        # Save file temporarily
         with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
             tmp_file.write(uploaded_file.getvalue())
             tmp_path = tmp_file.name
-        # Load and split the document
         try:
             loader = PyPDFLoader(tmp_path)
             file_docs = loader.load()
-            # Add source filename to metadata
             for doc in file_docs:
                 doc.metadata["source"] = uploaded_file.name
             documents.extend(file_docs)
-            # Clean up temp file
             os.unlink(tmp_path)
         except Exception as e:
             st.error(f"Error processing {uploaded_file.name}: {str(e)}")
-            continue
     if documents:
         text_splitter = RecursiveCharacterTextSplitter(
@@ -294,19 +269,18 @@ def process_custom_documents(uploaded_files):
         )
         chunks = text_splitter.split_documents(documents)
-        # Create vector store
         with st.spinner("Building custom document database..."):
-            # If a previous custom DB exists for this user, delete it first
-            if check_custom_db_exists(collection_name):
-                # We need to recreate the vectorstore to delete the old collection
-                temp_db = Chroma(
                     persist_directory=CHROMA_PERSIST_DIR,
                     embedding_function=embeddings,
                     collection_name=collection_name
-                )
-                temp_db.delete_collection()
-            # Create new vector store
             db = Chroma.from_documents(
                 documents=chunks,
                 embedding=embeddings,
@@ -314,25 +288,18 @@ def process_custom_documents(uploaded_files):
                 persist_directory=CHROMA_PERSIST_DIR
             )
-            # Explicitly persist to disk
             db.persist()
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = True
             return True
     return False
 def setup_qa_chain():
     """Set up the QA chain with the RAG system"""
     if st.session_state.vectordb:
-        llm = setup_llm()
-        # Create prompt template
         template = """You are a helpful legal assistant specializing in Pakistani law.
-        Use the following context to answer the question. If you don't know the answer based on the context,
-        say that you don't have enough information, but provide general legal information if possible.
         Context: {context}
@@ -340,52 +307,33 @@ def setup_qa_chain():
         Answer:"""
-        prompt = ChatPromptTemplate.from_template(template)
-        # Create the QA chain
         st.session_state.qa_chain = RetrievalQA.from_chain_type(
-            llm=llm,
             chain_type="stuff",
             retriever=st.session_state.vectordb.as_retriever(search_kwargs={"k": 3}),
-            chain_type_kwargs={"prompt": prompt},
             return_source_documents=True
         )
 def generate_similar_questions(question, docs):
     """Generate similar questions based on retrieved documents"""
     llm = setup_llm()
-    # Extract key content from docs
     context = "\n".join([doc.page_content for doc in docs[:2]])
-    # Prompt to generate similar questions
-    prompt = f"""Based on the following user question and legal context, generate 3 similar questions that the user might also be interested in.
-    Make the questions specific, related to Pakistani law, and directly relevant to the original question.
     Original Question: {question}
     Legal Context: {context}
     Generate exactly 3 similar questions:"""
     try:
         response = llm.invoke(prompt)
-        # Extract questions from response using regex
         questions = re.findall(r"\d+\.\s+(.*?)(?=\d+\.|$)", response.content, re.DOTALL)
-        if not questions:
-            questions = response.content.split("\n")
-            questions = [q.strip() for q in questions if q.strip() and not q.startswith("Similar") and "?" in q]
-        # Clean and limit to 3 questions
-        questions = [q.strip().replace("\n", " ") for q in questions if "?" in q]
-        return questions[:3]
     except Exception as e:
-        print(f"Error generating similar questions: {e}")
         return []
 def get_answer(question):
     """Get answer from QA chain"""
-    # If default documents haven't been processed yet, try to load them
     if not st.session_state.vectordb:
         with st.spinner("Loading Pakistan law database..."):
             process_default_document()
@@ -393,71 +341,56 @@ def get_answer(question):
     if st.session_state.qa_chain:
         result = st.session_state.qa_chain({"query": question})
         answer = result["result"]
-        # Generate similar questions
-        source_docs = result.get("source_documents", [])
-        st.session_state.similar_questions = generate_similar_questions(question, source_docs)
-        # Add source information
         sources = set()
-        for doc in source_docs:
             if "source" in doc.metadata:
                 sources.add(doc.metadata["source"])
         if sources:
             answer += f"\n\nSources: {', '.join(sources)}"
         return answer
-    else:
-        return "Initializing the knowledge base. Please try again in a moment."
 def main():
-    st.title("Pakistan Law AI Agent")
-    # Determine current mode
     if st.session_state.using_custom_docs:
         st.subheader("Training on your personal resources")
     else:
-        st.subheader("Powered by  Pakistan law database")
-    # Sidebar for uploading documents and switching modes
     with st.sidebar:
         st.header("Resource Management")
-        # Option to return to default documents
         if st.session_state.using_custom_docs:
             if st.button("Return to Official Database"):
-                with st.spinner("Loading official Pakistan law database..."):
                     process_default_document()
-                    st.success("Switched to official Pakistan law database!")
-                    st.session_state.messages.append(AIMessage(content="Switched to official Pakistan law database. You can now ask legal questions."))
                     st.rerun()
-        # Option to rebuild the default database
         if not st.session_state.using_custom_docs:
             if st.button("Rebuild Official Database"):
-                with st.spinner("Rebuilding official Pakistan law database..."):
                     process_default_document(force_rebuild=True)
-                    st.success("Official database rebuilt successfully!")
                     st.rerun()
-        # Option to upload custom documents
-        st.header("Upload Custom Legal Documents")
         uploaded_files = st.file_uploader(
-            "Upload PDF files containing legal documents",
-            type=["pdf"],
-            accept_multiple_files=True
-        )
         if st.button("Train on Uploaded Documents") and uploaded_files:
-            with st.spinner("Processing your documents..."):
-                success = process_custom_documents(uploaded_files)
-                if success:
-                    st.success("Your documents processed successfully!")
-                    st.session_state.messages.append(AIMessage(content="Custom legal documents loaded successfully. You are now training on your personal resources."))
                     st.rerun()
-    # Display chat messages
     for message in st.session_state.messages:
         if isinstance(message, HumanMessage):
             with st.chat_message("user"):
@@ -465,42 +398,29 @@ def main():
         else:
             with st.chat_message("assistant", avatar="⚖️"):
                 st.write(message.content)
-    # Display similar questions if available
     if st.session_state.similar_questions:
         st.markdown("#### Related Questions:")
         cols = st.columns(len(st.session_state.similar_questions))
-        for i, question in enumerate(st.session_state.similar_questions):
-            if cols[i].button(question, key=f"similar_q_{i}"):
-                # Add selected question as user input
-                st.session_state.messages.append(HumanMessage(content=question))
-                # Generate and display assistant response
-                with st.chat_message("assistant", avatar="⚖️"):
-                    with st.spinner("Thinking..."):
-                        response = get_answer(question)
-                    st.write(response)
-                # Add assistant response to chat history
-                st.session_state.messages.append(AIMessage(content=response))
                 st.rerun()
-    # Input for new question
     if user_input := st.chat_input("Ask a legal question..."):
-        # Add user message to chat history
         st.session_state.messages.append(HumanMessage(content=user_input))
-        # Display user message
         with st.chat_message("user"):
             st.write(user_input)
-        # Generate and display assistant response
         with st.chat_message("assistant", avatar="⚖️"):
             with st.spinner("Thinking..."):
                 response = get_answer(user_input)
             st.write(response)
-        # Add assistant response to chat history
         st.session_state.messages.append(AIMessage(content=response))
         st.rerun()

 # Constants
 DEFAULT_GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+MODEL_NAME = "llama3-70b-8192"
+DEFAULT_DOCUMENT_PATH = "lawbook.pdf"
 DEFAULT_COLLECTION_NAME = "pakistan_laws_default"
 CHROMA_PERSIST_DIR = "./chroma_db"
 def check_default_db_exists():
     """Check if the default document database already exists"""
+    return os.path.exists(os.path.join(CHROMA_PERSIST_DIR, DEFAULT_COLLECTION_NAME))
 def load_existing_vectordb(collection_name):
     """Load an existing vector database from disk"""
     embeddings = setup_embeddings()
     try:
+        return Chroma(
             persist_directory=CHROMA_PERSIST_DIR,
             embedding_function=embeddings,
             collection_name=collection_name
         )
     except Exception as e:
         st.error(f"Error loading existing database: {str(e)}")
         return None
 def process_default_document(force_rebuild=False):
+    """Process the default Pakistan laws document"""
     if check_default_db_exists() and not force_rebuild:
         st.info("Loading existing Pakistan law database...")
         db = load_existing_vectordb(DEFAULT_COLLECTION_NAME)
+        if db:
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = False
             return True
     if not os.path.exists(DEFAULT_DOCUMENT_PATH):
+        st.error(f"Default document {DEFAULT_DOCUMENT_PATH} not found.")
         return False
     try:
+        with st.spinner("Building Pakistan law database..."):
             loader = PyPDFLoader(DEFAULT_DOCUMENT_PATH)
             documents = loader.load()
             for doc in documents:
                 doc.metadata["source"] = "Pakistan Laws (Official)"
             )
             chunks = text_splitter.split_documents(documents)
             db = Chroma.from_documents(
                 documents=chunks,
+                embedding=setup_embeddings(),
                 collection_name=DEFAULT_COLLECTION_NAME,
                 persist_directory=CHROMA_PERSIST_DIR
             )
             db.persist()
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = False
             return True
     except Exception as e:
         st.error(f"Error processing default document: {str(e)}")
         return False
 def process_custom_documents(uploaded_files):
     """Process user-uploaded PDF documents"""
     embeddings = setup_embeddings()
     collection_name = st.session_state.custom_collection_name
     documents = []
     for uploaded_file in uploaded_files:
         with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
             tmp_file.write(uploaded_file.getvalue())
             tmp_path = tmp_file.name
         try:
             loader = PyPDFLoader(tmp_path)
             file_docs = loader.load()
             for doc in file_docs:
                 doc.metadata["source"] = uploaded_file.name
             documents.extend(file_docs)
             os.unlink(tmp_path)
         except Exception as e:
             st.error(f"Error processing {uploaded_file.name}: {str(e)}")
     if documents:
         text_splitter = RecursiveCharacterTextSplitter(
         )
         chunks = text_splitter.split_documents(documents)
         with st.spinner("Building custom document database..."):
+            if Chroma(
+                persist_directory=CHROMA_PERSIST_DIR,
+                embedding_function=embeddings,
+                collection_name=collection_name
+            ).get():
+                Chroma(
                     persist_directory=CHROMA_PERSIST_DIR,
                     embedding_function=embeddings,
                     collection_name=collection_name
+                ).delete_collection()
             db = Chroma.from_documents(
                 documents=chunks,
                 embedding=embeddings,
                 persist_directory=CHROMA_PERSIST_DIR
             )
             db.persist()
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = True
             return True
     return False
 def setup_qa_chain():
     """Set up the QA chain with the RAG system"""
     if st.session_state.vectordb:
         template = """You are a helpful legal assistant specializing in Pakistani law.
+        Use the context to answer. If unsure, say so but provide general info.
         Context: {context}
         Answer:"""
         st.session_state.qa_chain = RetrievalQA.from_chain_type(
+            llm=setup_llm(),
             chain_type="stuff",
             retriever=st.session_state.vectordb.as_retriever(search_kwargs={"k": 3}),
+            chain_type_kwargs={"prompt": ChatPromptTemplate.from_template(template)},
             return_source_documents=True
         )
 def generate_similar_questions(question, docs):
     """Generate similar questions based on retrieved documents"""
     llm = setup_llm()
     context = "\n".join([doc.page_content for doc in docs[:2]])
+    prompt = f"""Generate 3 similar questions based on:
     Original Question: {question}
     Legal Context: {context}
     Generate exactly 3 similar questions:"""
     try:
         response = llm.invoke(prompt)
         questions = re.findall(r"\d+\.\s+(.*?)(?=\d+\.|$)", response.content, re.DOTALL)
+        return [q.strip() for q in questions[:3] if "?" in q]
     except Exception as e:
         return []
 def get_answer(question):
     """Get answer from QA chain"""
     if not st.session_state.vectordb:
         with st.spinner("Loading Pakistan law database..."):
             process_default_document()
     if st.session_state.qa_chain:
         result = st.session_state.qa_chain({"query": question})
         answer = result["result"]
         sources = set()
+        for doc in result.get("source_documents", []):
             if "source" in doc.metadata:
                 sources.add(doc.metadata["source"])
         if sources:
             answer += f"\n\nSources: {', '.join(sources)}"
+        st.session_state.similar_questions = generate_similar_questions(
+            question, result.get("source_documents", [])
+        )
         return answer
+    return "Initializing knowledge base..."
 def main():
+    inject_custom_css()  # CSS injection added here
+    st.title("Pakistan Law AI Agent ⚖️")
     if st.session_state.using_custom_docs:
         st.subheader("Training on your personal resources")
     else:
+        st.subheader("Powered by Pakistan law database")
     with st.sidebar:
         st.header("Resource Management")
         if st.session_state.using_custom_docs:
             if st.button("Return to Official Database"):
+                with st.spinner("Loading official database..."):
                     process_default_document()
+                    st.session_state.messages.append(AIMessage(content="Switched to official database!"))
                     st.rerun()
         if not st.session_state.using_custom_docs:
             if st.button("Rebuild Official Database"):
+                with st.spinner("Rebuilding..."):
                     process_default_document(force_rebuild=True)
                     st.rerun()
+        st.header("Upload Custom Documents")
         uploaded_files = st.file_uploader(
+            "Upload PDFs", type=["pdf"], accept_multiple_files=True)
         if st.button("Train on Uploaded Documents") and uploaded_files:
+            with st.spinner("Processing..."):
+                if process_custom_documents(uploaded_files):
+                    st.session_state.messages.append(AIMessage(content="Custom documents loaded!"))
                     st.rerun()
     for message in st.session_state.messages:
         if isinstance(message, HumanMessage):
             with st.chat_message("user"):
         else:
             with st.chat_message("assistant", avatar="⚖️"):
                 st.write(message.content)
     if st.session_state.similar_questions:
         st.markdown("#### Related Questions:")
         cols = st.columns(len(st.session_state.similar_questions))
+        for i, q in enumerate(st.session_state.similar_questions):
+            if cols[i].button(q, key=f"similar_q_{i}"):
+                st.session_state.messages.extend([
+                    HumanMessage(content=q),
+                    AIMessage(content=get_answer(q))
+                ])
                 st.rerun()
     if user_input := st.chat_input("Ask a legal question..."):
         st.session_state.messages.append(HumanMessage(content=user_input))
         with st.chat_message("user"):
             st.write(user_input)
         with st.chat_message("assistant", avatar="⚖️"):
             with st.spinner("Thinking..."):
                 response = get_answer(user_input)
             st.write(response)
         st.session_state.messages.append(AIMessage(content=response))
         st.rerun()