Spaces:

saadawaissheikh
/

SystemsHealthcareChatbot

Sleeping

App Files Files Community

saadawaissheikh commited on Jul 23

Commit

021cd91

verified ·

1 Parent(s): 75aaf21

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -0

app.py CHANGED Viewed

	@@ -0,0 +1,141 @@

+import os
+import gradio as gr
+import pdfplumber
+import re
+from langchain.docstore.document import Document
+from langchain.vectorstores import FAISS
+from langchain.embeddings.base import Embeddings
+from sklearn.feature_extraction.text import TfidfVectorizer
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI
+os.environ["OPENAI_API_KEY"] = os.environ["OPENROUTER_API_KEY"]
+os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
+os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co", "X-Title":"PDF-RAG"}'
+#Section-aware PDF extractor
+def extract_clean_sections(file_path):
+    with pdfplumber.open(file_path) as pdf:
+        full_text = ""
+        for page in pdf.pages:
+            text = page.extract_text()
+            if text:
+                text = re.sub(r'Systems Campus.*?Lahore', '', text)
+                text = re.sub(r'E-mail:.*?systemsltd\.com', '', text)
+                full_text += text + "\n"
+    pattern = r"(?<=\n)([A-Z][^\n]{3,50}):"
+    parts = re.split(pattern, full_text)
+    docs = []
+    for i in range(1, len(parts), 2):
+        title = parts[i].strip()
+        content = parts[i + 1].strip()
+        if len(content) > 20:
+            docs.append(Document(page_content=f"{title}:\n{content}", metadata={"section": title}))
+    return docs
+#TF-IDF Embedding for RAG
+class TfidfEmbedding(Embeddings):
+    def __init__(self):
+        self.vectorizer = TfidfVectorizer()
+    def fit(self, texts):
+        self.vectorizer.fit(texts)
+    def embed_documents(self, texts):
+        return self.vectorizer.transform(texts).toarray()
+    def embed_query(self, text):
+        return self.vectorizer.transform([text]).toarray()[0]
+#  prompt
+TEMPLATE = """
+You are a strict healthcare policy checker for Systems Ltd.
+Always begin your answer clearly:
+- Say "Yes, ..." if the claim is valid
+- Say "No, ..." if the claim is not valid
+- Say "Partially, ..." if it's conditionally allowed
+Use the following policy information to support your answer.
+{context}
+Question: {question}
+Answer:
+"""
+custom_prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
+# Global state
+retriever = None
+qa_chain = None
+# ✅ Process the PDF once when button is clicked
+def load_policy():
+    global retriever, qa_chain
+    docs = extract_clean_sections("healthcare_policy.pdf")
+    texts = [doc.page_content for doc in docs]
+    embedder = TfidfEmbedding()
+    embedder.fit(texts)
+    vectordb = FAISS.from_texts(texts, embedder)
+    retriever = vectordb.as_retriever()
+    llm = ChatOpenAI(
+        model="mistralai/mixtral-8x7b",
+        base_url="https://openrouter.ai/api/v1",
+        api_key=os.getenv("OPENAI_API_KEY"),
+        default_headers={
+            "HTTP-Referer": "https://huggingface.co",
+            "X-Title": "PDF-RAG"
+        },
+        temperature=0.0
+    )
+    qa_chain_local = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=retriever,
+        return_source_documents=False,
+        chain_type_kwargs={"prompt": custom_prompt}
+    )
+    qa_chain = qa_chain_local
+    return "Policy loaded. You may now ask questions."
+# ✅ Answer a claim question
+def ask_policy_question(question):
+    if qa_chain is None:
+        return "Please click 'Ask about claim' to load the policy first."
+    try:
+        return qa_chain.run(question)
+    except Exception as e:
+        return f"Error: {str(e)}"
+# ✅ Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("## SL HealthCare Claim Checker (RAG)")
+    load_btn = gr.Button("📥 Ask about claim (Load Policy)")
+    load_status = gr.Textbox(label="Status")
+    load_btn.click(fn=load_policy, outputs=load_status)
+    with gr.Row():
+        question = gr.Textbox(label="Enter your claim question")
+        ask_btn = gr.Button("Ask")
+    answer = gr.Textbox(label="Answer", lines=6)
+    ask_btn.click(fn=ask_policy_question, inputs=question, outputs=answer)
+demo.launch()