Spaces:

usmanyousaf
/

Pakistan_Law_Bot

Running

App Files Files Community

usmanyousaf commited on Mar 14

Commit

f12016a

verified ·

1 Parent(s): c879e4e

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -213

app.py CHANGED Viewed

@@ -12,138 +12,13 @@ from langchain_community.vectorstores import Chroma
 from langchain.chains import RetrievalQA
 import re
-from app import check_custom_db_exists
-# Custom CSS Injection
-def inject_custom_css():
-    st.markdown("""
-        <style>
-            /* Main container */
-            .stApp {
-                background: linear-gradient(135deg, #1a1a1a, #2d2d2d);
-                color: #e0e0e0;
-            }
-            /* Chat containers */
-            .stChatMessage {
-                padding: 1.5rem;
-                border-radius: 15px;
-                margin: 1rem 0;
-                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-            }
-            /* User message styling */
-            [data-testid="stChatMessage"][aria-label="user"] {
-                background-color: #2d2d2d;
-                border: 1px solid #3d3d3d;
-                margin-left: 10%;
-            }
-            /* Assistant message styling */
-            [data-testid="stChatMessage"][aria-label="assistant"] {
-                background-color: #004d40;
-                border: 1px solid #00695c;
-                margin-right: 10%;
-            }
-            /* Sidebar styling */
-            [data-testid="stSidebar"] {
-                background: #121212 !important;
-                border-right: 2px solid #2d2d2d;
-                padding: 1rem;
-            }
-            /* Button styling */
-            .stButton>button {
-                background: linear-gradient(45deg, #00695c, #004d40);
-                color: white !important;
-                border: none;
-                border-radius: 8px;
-                padding: 0.8rem 1.5rem;
-                transition: all 0.3s;
-                font-weight: 500;
-            }
-            .stButton>button:hover {
-                transform: translateY(-2px);
-                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
-            }
-            /* File uploader */
-            [data-testid="stFileUploader"] {
-                border: 2px dashed #3d3d3d;
-                border-radius: 10px;
-                padding: 1rem;
-                background: #2d2d2d;
-            }
-            /* Input field */
-            .stTextInput>div>div>input {
-                background-color: #2d2d2d;
-                color: white;
-                border: 1px solid #3d3d3d;
-                border-radius: 8px;
-                padding: 0.8rem;
-            }
-            /* Spinner color */
-            .stSpinner>div>div {
-                border-color: #00bcd4 transparent transparent transparent;
-            }
-            /* Custom title styling */
-            .title-text {
-                background: linear-gradient(45deg, #00bcd4, #00695c);
-                -webkit-background-clip: text;
-                -webkit-text-fill-color: transparent;
-                font-family: 'Roboto', sans-serif;
-                font-size: 2.8rem;
-                text-align: center;
-                margin-bottom: 2rem;
-                letter-spacing: -0.5px;
-                text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2);
-            }
-            /* Similar questions buttons */
-            .stButton>button.similar-q {
-                background: #2d2d2d;
-                border: 1px solid #00bcd4;
-                color: #00bcd4 !important;
-                white-space: normal;
-                height: auto;
-                min-height: 3rem;
-                transition: all 0.3s;
-            }
-            /* Hover effects */
-            .stButton>button.similar-q:hover {
-                background: #004d40 !important;
-                transform: scale(1.02);
-            }
-            /* Source text styling */
-            .source-text {
-                color: #00bcd4;
-                font-size: 0.9rem;
-                margin-top: 1rem;
-                padding-top: 0.5rem;
-                border-top: 1px solid #3d3d3d;
-            }
-        </style>
-    """, unsafe_allow_html=True)
 # Page Configuration
-st.set_page_config(
-    page_title="AI Law Agent",
-    page_icon="⚖️",
-    layout="centered",
-    initial_sidebar_state="expanded"
-)
 # Constants
 DEFAULT_GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 MODEL_NAME = "llama-3.3-70b-versatile"
-DEFAULT_DOCUMENT_PATH = "/Users/appleenterprises/Desktop/ai law bot/lawbook.pdf"
 DEFAULT_COLLECTION_NAME = "pakistan_laws_default"
 CHROMA_PERSIST_DIR = "./chroma_db"
@@ -166,9 +41,11 @@ if "custom_collection_name" not in st.session_state:
     st.session_state.custom_collection_name = f"custom_laws_{st.session_state.user_id}"
 def setup_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 def setup_llm():
     if st.session_state.llm is None:
         st.session_state.llm = ChatGroq(
             model_name=MODEL_NAME,
@@ -178,37 +55,50 @@ def setup_llm():
     return st.session_state.llm
 def check_default_db_exists():
-    return os.path.exists(os.path.join(CHROMA_PERSIST_DIR, DEFAULT_COLLECTION_NAME))
 def load_existing_vectordb(collection_name):
     try:
-        return Chroma(
             persist_directory=CHROMA_PERSIST_DIR,
-            embedding_function=setup_embeddings(),
             collection_name=collection_name
         )
     except Exception as e:
-        st.error(f"Error loading database: {str(e)}")
         return None
 def process_default_document(force_rebuild=False):
     if check_default_db_exists() and not force_rebuild:
         db = load_existing_vectordb(DEFAULT_COLLECTION_NAME)
-        if db:
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = False
             return True
     if not os.path.exists(DEFAULT_DOCUMENT_PATH):
-        st.error("Default document not found.")
         return False
     try:
-        with st.spinner("Building knowledge base..."):
             loader = PyPDFLoader(DEFAULT_DOCUMENT_PATH)
             documents = loader.load()
             for doc in documents:
                 doc.metadata["source"] = "Pakistan Laws (Official)"
@@ -218,40 +108,61 @@ def process_default_document(force_rebuild=False):
             )
             chunks = text_splitter.split_documents(documents)
             db = Chroma.from_documents(
                 documents=chunks,
-                embedding=setup_embeddings(),
                 collection_name=DEFAULT_COLLECTION_NAME,
                 persist_directory=CHROMA_PERSIST_DIR
             )
             db.persist()
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = False
             return True
     except Exception as e:
-        st.error(f"Error processing document: {str(e)}")
         return False
 def process_custom_documents(uploaded_files):
     collection_name = st.session_state.custom_collection_name
     documents = []
     for uploaded_file in uploaded_files:
         with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
             tmp_file.write(uploaded_file.getvalue())
             tmp_path = tmp_file.name
         try:
             loader = PyPDFLoader(tmp_path)
             file_docs = loader.load()
             for doc in file_docs:
                 doc.metadata["source"] = uploaded_file.name
             documents.extend(file_docs)
             os.unlink(tmp_path)
         except Exception as e:
             st.error(f"Error processing {uploaded_file.name}: {str(e)}")
     if documents:
         text_splitter = RecursiveCharacterTextSplitter(
@@ -260,33 +171,45 @@ def process_custom_documents(uploaded_files):
         )
         chunks = text_splitter.split_documents(documents)
-        with st.spinner("Analyzing documents..."):
             if check_custom_db_exists(collection_name):
                 temp_db = Chroma(
                     persist_directory=CHROMA_PERSIST_DIR,
-                    embedding_function=setup_embeddings(),
                     collection_name=collection_name
                 )
                 temp_db.delete_collection()
             db = Chroma.from_documents(
                 documents=chunks,
-                embedding=setup_embeddings(),
                 collection_name=collection_name,
                 persist_directory=CHROMA_PERSIST_DIR
             )
             db.persist()
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = True
             return True
     return False
 def setup_qa_chain():
     if st.session_state.vectordb:
-        template = """You are a legal expert specializing in Pakistani law.
-        Use context to answer. If unsure, state uncertainty but provide general legal info.
         Context: {context}
@@ -296,8 +219,9 @@ def setup_qa_chain():
         prompt = ChatPromptTemplate.from_template(template)
         st.session_state.qa_chain = RetrievalQA.from_chain_type(
-            llm=setup_llm(),
             chain_type="stuff",
             retriever=st.session_state.vectordb.as_retriever(search_kwargs={"k": 3}),
             chain_type_kwargs={"prompt": prompt},
@@ -305,135 +229,155 @@ def setup_qa_chain():
         )
 def generate_similar_questions(question, docs):
     llm = setup_llm()
     context = "\n".join([doc.page_content for doc in docs[:2]])
-    prompt = f"""Generate 3 specific Pakistani law questions related to:
-    Original: {question}
-    Context: {context}
-    Generate exactly 3 questions:"""
     try:
         response = llm.invoke(prompt)
         questions = re.findall(r"\d+\.\s+(.*?)(?=\d+\.|$)", response.content, re.DOTALL)
         if not questions:
             questions = response.content.split("\n")
-            questions = [q.strip() for q in questions if q.strip() and "?" in q]
-        return [q.strip().replace("\n", " ") for q in questions if "?" in q][:3]
-    except:
         return []
 def get_answer(question):
     if not st.session_state.vectordb:
-        with st.spinner("Initializing system..."):
             process_default_document()
     if st.session_state.qa_chain:
         result = st.session_state.qa_chain({"query": question})
         answer = result["result"]
-        st.session_state.similar_questions = generate_similar_questions(question, result.get("source_documents", []))
         sources = set()
-        for doc in result.get("source_documents", []):
             if "source" in doc.metadata:
                 sources.add(doc.metadata["source"])
         if sources:
-            answer += f"\n\n<div class='source-text'>Sources: {', '.join(sources)}</div>"
         return answer
-    return "System initializing... Please try again."
 def main():
-    inject_custom_css()
-    st.markdown("""
-        <h1 class="title-text">
-            <div style="display: flex; align-items: center; justify-content: center; gap: 0.5rem;">
-                <span>⚖️</span>
-                <span>Your AI Law Agent</span>
-            </div>
-        </h1>
-    """, unsafe_allow_html=True)
-    # Sidebar Management
     with st.sidebar:
-        st.header("📚 Document Management")
         if st.session_state.using_custom_docs:
-            if st.button("🔙 Return to Official Database", use_container_width=True):
-                with st.spinner("Switching..."):
                     process_default_document()
-                    st.session_state.messages.append(AIMessage(content="Switched to official database"))
                     st.rerun()
         if not st.session_state.using_custom_docs:
-            if st.button("🔄 Rebuild Database", use_container_width=True):
-                with st.spinner("Rebuilding..."):
                     process_default_document(force_rebuild=True)
                     st.rerun()
-        st.header("📁 Upload Documents")
         uploaded_files = st.file_uploader(
-            "Upload legal PDFs",
             type=["pdf"],
-            accept_multiple_files=True,
-            label_visibility="collapsed"
         )
-        if st.button("🚀 Train on Uploads", use_container_width=True) and uploaded_files:
-            with st.spinner("Processing..."):
-                if process_custom_documents(uploaded_files):
-                    st.session_state.messages.append(AIMessage(content="Custom documents loaded"))
                     st.rerun()
-    # Chat Display
     for message in st.session_state.messages:
-        avatar = "👤" if isinstance(message, HumanMessage) else "⚖️"
-        with st.chat_message("user" if isinstance(message, HumanMessage) else "assistant", avatar=avatar):
-            st.write(message.content)
-    # Similar Questions
     if st.session_state.similar_questions:
-        st.markdown("""
-            <div style="padding: 1rem; background: #2d2d2d; border-radius: 10px; margin: 1rem 0;">
-                <h4 style="color: #00bcd4; margin-bottom: 0.5rem;">🔍 Related Queries</h4>
-        """, unsafe_allow_html=True)
-        cols = st.columns([1,1,1])
         for i, question in enumerate(st.session_state.similar_questions):
-            with cols[i]:
-                if st.button(
-                    f"❓ {question}",
-                    key=f"similar_q_{i}",
-                    use_container_width=True,
-                    help="Click to ask this related question"
-                ):
-                    st.session_state.messages.append(HumanMessage(content=question))
-                    with st.chat_message("assistant", avatar="⚖️"):
-                        with st.spinner("Analyzing..."):
-                            response = get_answer(question)
-                        st.write(response, unsafe_allow_html=True)
-                    st.session_state.messages.append(AIMessage(content=response))
-                    st.rerun()
-        st.markdown("</div>", unsafe_allow_html=True)
-    # Input Handling
-    if user_input := st.chat_input("Ask your legal question..."):
         st.session_state.messages.append(HumanMessage(content=user_input))
         with st.chat_message("user"):
             st.write(user_input)
         with st.chat_message("assistant", avatar="⚖️"):
-            with st.spinner("Researching..."):
                 response = get_answer(user_input)
-            st.write(response, unsafe_allow_html=True)
         st.session_state.messages.append(AIMessage(content=response))
         st.rerun()

 from langchain.chains import RetrievalQA
 import re
 # Page Configuration
+st.set_page_config(page_title="Pakistan Law AI Agent", page_icon="⚖️")
 # Constants
 DEFAULT_GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 MODEL_NAME = "llama-3.3-70b-versatile"
+DEFAULT_DOCUMENT_PATH = "/Users/appleenterprises/Desktop/ai law bot/lawbook.pdf"  # Path to your hardcoded Pakistan laws PDF
 DEFAULT_COLLECTION_NAME = "pakistan_laws_default"
 CHROMA_PERSIST_DIR = "./chroma_db"
     st.session_state.custom_collection_name = f"custom_laws_{st.session_state.user_id}"
 def setup_embeddings():
+    """Sets up embeddings model"""
     return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 def setup_llm():
+    """Setup the language model"""
     if st.session_state.llm is None:
         st.session_state.llm = ChatGroq(
             model_name=MODEL_NAME,
     return st.session_state.llm
 def check_default_db_exists():
+    """Check if the default document database already exists"""
+    if os.path.exists(os.path.join(CHROMA_PERSIST_DIR, DEFAULT_COLLECTION_NAME)):
+        return True
+    return False
 def load_existing_vectordb(collection_name):
+    """Load an existing vector database from disk"""
+    embeddings = setup_embeddings()
     try:
+        db = Chroma(
             persist_directory=CHROMA_PERSIST_DIR,
+            embedding_function=embeddings,
             collection_name=collection_name
         )
+        return db
     except Exception as e:
+        st.error(f"Error loading existing database: {str(e)}")
         return None
 def process_default_document(force_rebuild=False):
+    """Process the default Pakistan laws document or load from disk if available"""
+    # Check if database already exists
     if check_default_db_exists() and not force_rebuild:
+        st.info("Loading existing Pakistan law database...")
         db = load_existing_vectordb(DEFAULT_COLLECTION_NAME)
+        if db is not None:
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = False
             return True
+    # If database doesn't exist or force rebuild, create it
     if not os.path.exists(DEFAULT_DOCUMENT_PATH):
+        st.error(f"Default document {DEFAULT_DOCUMENT_PATH} not found. Please make sure it exists.")
         return False
+    embeddings = setup_embeddings()
     try:
+        with st.spinner("Building Pakistan law database (this may take a few minutes)..."):
             loader = PyPDFLoader(DEFAULT_DOCUMENT_PATH)
             documents = loader.load()
+            # Add source filename to metadata
             for doc in documents:
                 doc.metadata["source"] = "Pakistan Laws (Official)"
             )
             chunks = text_splitter.split_documents(documents)
+            # Create vector store
             db = Chroma.from_documents(
                 documents=chunks,
+                embedding=embeddings,
                 collection_name=DEFAULT_COLLECTION_NAME,
                 persist_directory=CHROMA_PERSIST_DIR
             )
+            # Explicitly persist to disk
             db.persist()
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = False
             return True
     except Exception as e:
+        st.error(f"Error processing default document: {str(e)}")
         return False
+def check_custom_db_exists(collection_name):
+    """Check if a custom document database already exists"""
+    if os.path.exists(os.path.join(CHROMA_PERSIST_DIR, collection_name)):
+        return True
+    return False
 def process_custom_documents(uploaded_files):
+    """Process user-uploaded PDF documents"""
+    embeddings = setup_embeddings()
     collection_name = st.session_state.custom_collection_name
     documents = []
     for uploaded_file in uploaded_files:
+        # Save file temporarily
         with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
             tmp_file.write(uploaded_file.getvalue())
             tmp_path = tmp_file.name
+        # Load and split the document
         try:
             loader = PyPDFLoader(tmp_path)
             file_docs = loader.load()
+            # Add source filename to metadata
             for doc in file_docs:
                 doc.metadata["source"] = uploaded_file.name
             documents.extend(file_docs)
+            # Clean up temp file
             os.unlink(tmp_path)
         except Exception as e:
             st.error(f"Error processing {uploaded_file.name}: {str(e)}")
+            continue
     if documents:
         text_splitter = RecursiveCharacterTextSplitter(
         )
         chunks = text_splitter.split_documents(documents)
+        # Create vector store
+        with st.spinner("Building custom document database..."):
+            # If a previous custom DB exists for this user, delete it first
             if check_custom_db_exists(collection_name):
+                # We need to recreate the vectorstore to delete the old collection
                 temp_db = Chroma(
                     persist_directory=CHROMA_PERSIST_DIR,
+                    embedding_function=embeddings,
                     collection_name=collection_name
                 )
                 temp_db.delete_collection()
+            # Create new vector store
             db = Chroma.from_documents(
                 documents=chunks,
+                embedding=embeddings,
                 collection_name=collection_name,
                 persist_directory=CHROMA_PERSIST_DIR
             )
+            # Explicitly persist to disk
             db.persist()
             st.session_state.vectordb = db
             setup_qa_chain()
             st.session_state.using_custom_docs = True
             return True
     return False
 def setup_qa_chain():
+    """Set up the QA chain with the RAG system"""
     if st.session_state.vectordb:
+        llm = setup_llm()
+        # Create prompt template
+        template = """You are a helpful legal assistant specializing in Pakistani law.
+        Use the following context to answer the question. If you don't know the answer based on the context,
+        say that you don't have enough information, but provide general legal information if possible.
         Context: {context}
         prompt = ChatPromptTemplate.from_template(template)
+        # Create the QA chain
         st.session_state.qa_chain = RetrievalQA.from_chain_type(
+            llm=llm,
             chain_type="stuff",
             retriever=st.session_state.vectordb.as_retriever(search_kwargs={"k": 3}),
             chain_type_kwargs={"prompt": prompt},
         )
 def generate_similar_questions(question, docs):
+    """Generate similar questions based on retrieved documents"""
     llm = setup_llm()
+    # Extract key content from docs
     context = "\n".join([doc.page_content for doc in docs[:2]])
+    # Prompt to generate similar questions
+    prompt = f"""Based on the following user question and legal context, generate 3 similar questions that the user might also be interested in.
+    Make the questions specific, related to Pakistani law, and directly relevant to the original question.
+    Original Question: {question}
+    Legal Context: {context}
+    Generate exactly 3 similar questions:"""
     try:
         response = llm.invoke(prompt)
+        # Extract questions from response using regex
         questions = re.findall(r"\d+\.\s+(.*?)(?=\d+\.|$)", response.content, re.DOTALL)
         if not questions:
             questions = response.content.split("\n")
+            questions = [q.strip() for q in questions if q.strip() and not q.startswith("Similar") and "?" in q]
+        # Clean and limit to 3 questions
+        questions = [q.strip().replace("\n", " ") for q in questions if "?" in q]
+        return questions[:3]
+    except Exception as e:
+        print(f"Error generating similar questions: {e}")
         return []
 def get_answer(question):
+    """Get answer from QA chain"""
+    # If default documents haven't been processed yet, try to load them
     if not st.session_state.vectordb:
+        with st.spinner("Loading Pakistan law database..."):
             process_default_document()
     if st.session_state.qa_chain:
         result = st.session_state.qa_chain({"query": question})
         answer = result["result"]
+        # Generate similar questions
+        source_docs = result.get("source_documents", [])
+        st.session_state.similar_questions = generate_similar_questions(question, source_docs)
+        # Add source information
         sources = set()
+        for doc in source_docs:
             if "source" in doc.metadata:
                 sources.add(doc.metadata["source"])
         if sources:
+            answer += f"\n\nSources: {', '.join(sources)}"
         return answer
+    else:
+        return "Initializing the knowledge base. Please try again in a moment."
 def main():
+    st.title("Pakistan Law AI Agent")
+    # Determine current mode
+    if st.session_state.using_custom_docs:
+        st.subheader("Training on your personal resources")
+    else:
+        st.subheader("Powered by  Pakistan law database")
+    # Sidebar for uploading documents and switching modes
     with st.sidebar:
+        st.header("Resource Management")
+        # Option to return to default documents
         if st.session_state.using_custom_docs:
+            if st.button("Return to Official Database"):
+                with st.spinner("Loading official Pakistan law database..."):
                     process_default_document()
+                    st.success("Switched to official Pakistan law database!")
+                    st.session_state.messages.append(AIMessage(content="Switched to official Pakistan law database. You can now ask legal questions."))
                     st.rerun()
+        # Option to rebuild the default database
         if not st.session_state.using_custom_docs:
+            if st.button("Rebuild Official Database"):
+                with st.spinner("Rebuilding official Pakistan law database..."):
                     process_default_document(force_rebuild=True)
+                    st.success("Official database rebuilt successfully!")
                     st.rerun()
+        # Option to upload custom documents
+        st.header("Upload Custom Legal Documents")
         uploaded_files = st.file_uploader(
+            "Upload PDF files containing legal documents",
             type=["pdf"],
+            accept_multiple_files=True
         )
+        if st.button("Train on Uploaded Documents") and uploaded_files:
+            with st.spinner("Processing your documents..."):
+                success = process_custom_documents(uploaded_files)
+                if success:
+                    st.success("Your documents processed successfully!")
+                    st.session_state.messages.append(AIMessage(content="Custom legal documents loaded successfully. You are now training on your personal resources."))
                     st.rerun()
+    # Display chat messages
     for message in st.session_state.messages:
+        if isinstance(message, HumanMessage):
+            with st.chat_message("user"):
+                st.write(message.content)
+        else:
+            with st.chat_message("assistant", avatar="⚖️"):
+                st.write(message.content)
+    # Display similar questions if available
     if st.session_state.similar_questions:
+        st.markdown("#### Related Questions:")
+        cols = st.columns(len(st.session_state.similar_questions))
         for i, question in enumerate(st.session_state.similar_questions):
+            if cols[i].button(question, key=f"similar_q_{i}"):
+                # Add selected question as user input
+                st.session_state.messages.append(HumanMessage(content=question))
+                # Generate and display assistant response
+                with st.chat_message("assistant", avatar="⚖️"):
+                    with st.spinner("Thinking..."):
+                        response = get_answer(question)
+                    st.write(response)
+                # Add assistant response to chat history
+                st.session_state.messages.append(AIMessage(content=response))
+                st.rerun()
+    # Input for new question
+    if user_input := st.chat_input("Ask a legal question..."):
+        # Add user message to chat history
         st.session_state.messages.append(HumanMessage(content=user_input))
+        # Display user message
         with st.chat_message("user"):
             st.write(user_input)
+        # Generate and display assistant response
         with st.chat_message("assistant", avatar="⚖️"):
+            with st.spinner("Thinking..."):
                 response = get_answer(user_input)
+            st.write(response)
+        # Add assistant response to chat history
         st.session_state.messages.append(AIMessage(content=response))
         st.rerun()