Spaces:

lawcounselai
/

v2-dualmodel

Runtime error

App Files Files Community

yashphogat4all commited on May 21

Commit

65d665a

verified ·

1 Parent(s): 49ae1df

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -18

app.py CHANGED Viewed

@@ -1,28 +1,29 @@
-# app.py
 import os
-import json
 import gradio as gr
 import torch
-from langchain.vectorstores import FAISS
-from langchain.docstore.document import Document
-from langchain_huggingface import HuggingFaceEmbeddings
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# Load embedding model and FAISS index
-embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-db = FAISS.load_local("faiss_index_lawcounsel", embedding_model, allow_dangerous_deserialization=True)
-# Load CUAD from hf itself
-from datasets import load_dataset
 cuad_data = load_dataset("cuad")
-# Model setup flags
 USE_LLAMA = os.environ.get("USE_LLAMA", "false").lower() == "true"
 HF_TOKEN = os.environ.get("HF_TOKEN")
-# Define generation logic
 def load_llama():
     tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", use_auth_token=True)
     model = AutoModelForCausalLM.from_pretrained(
@@ -40,7 +41,6 @@ def generate_llama_response(prompt):
 def generate_mistral_response(prompt):
     return mistral_client.text_generation(prompt=prompt, max_new_tokens=300).strip()
-# Load selected model
 if USE_LLAMA:
     llama_tokenizer, llama_model = load_llama()
     generate_response = generate_llama_response
@@ -51,9 +51,9 @@ else:
     )
     generate_response = generate_mistral_response
-# Main QA function
 def answer_question(user_query):
-    docs = db.similarity_search(user_query, k=3)
     context = "\n".join([doc.page_content for doc in docs])
     prompt = f"""[Context]
 {context}
@@ -65,13 +65,13 @@ def answer_question(user_query):
 """
     return generate_response(prompt)
-# Gradio UI
 iface = gr.Interface(
     fn=answer_question,
     inputs=gr.Textbox(placeholder="Ask a question about your contract..."),
     outputs=gr.Textbox(label="Answer"),
     title="LawCounsel AI",
-    description="Choose clause-related questions from your uploaded contract. Powered by RAG with Mistral or LLaMA.",
 )
 iface.launch()

 import os
 import gradio as gr
 import torch
+from datasets import load_dataset
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+# === 1. Build the FAISS vectorstore from CUAD ===
+print("🔄 Loading CUAD and building index...")
 cuad_data = load_dataset("cuad")
+texts = [item["text"] for item in cuad_data["train"] if "text" in item]
+splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+docs = splitter.create_documents(texts)
+embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+vectorstore = FAISS.from_documents(docs, embedding_model)
+# === 2. Model setup ===
 USE_LLAMA = os.environ.get("USE_LLAMA", "false").lower() == "true"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 def load_llama():
     tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", use_auth_token=True)
     model = AutoModelForCausalLM.from_pretrained(
 def generate_mistral_response(prompt):
     return mistral_client.text_generation(prompt=prompt, max_new_tokens=300).strip()
 if USE_LLAMA:
     llama_tokenizer, llama_model = load_llama()
     generate_response = generate_llama_response
     )
     generate_response = generate_mistral_response
+# === 3. Main QA function ===
 def answer_question(user_query):
+    docs = vectorstore.similarity_search(user_query, k=3)
     context = "\n".join([doc.page_content for doc in docs])
     prompt = f"""[Context]
 {context}
 """
     return generate_response(prompt)
+# === 4. Gradio UI ===
 iface = gr.Interface(
     fn=answer_question,
     inputs=gr.Textbox(placeholder="Ask a question about your contract..."),
     outputs=gr.Textbox(label="Answer"),
     title="LawCounsel AI",
+    description="Ask clause-specific questions from CUAD-trained contracts. Powered by RAG using Mistral or LLaMA.",
 )
 iface.launch()