File size: 4,078 Bytes
021cd91 05cbe4d 021cd91 05cbe4d 021cd91 05cbe4d 021cd91 05cbe4d 021cd91 05cbe4d 6238318 021cd91 26a2e48 021cd91 6238318 021cd91 05cbe4d 021cd91 6238318 021cd91 05cbe4d 6238318 05cbe4d 6238318 021cd91 6238318 021cd91 6238318 021cd91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import gradio as gr
import pdfplumber
import re
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
os.environ["OPENAI_API_KEY"] = os.environ["OPENROUTER_API_KEY"]
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co", "X-Title":"PDF-RAG"}'
#Load and clean the policy PDF
def extract_clean_sections(file_path):
with pdfplumber.open(file_path) as pdf:
full_text = ""
for page in pdf.pages:
text = page.extract_text()
if text:
text = re.sub(r'Systems Campus.*?Lahore', '', text)
text = re.sub(r'E-mail:.*?systemsltd\.com', '', text)
full_text += text + "\n"
pattern = r"(?<=\n)([A-Z][^\n]{3,50}):"
parts = re.split(pattern, full_text)
docs = []
for i in range(1, len(parts), 2):
title = parts[i].strip()
content = parts[i + 1].strip()
if len(content) > 20:
docs.append(Document(page_content=f"{title}:\n{content}", metadata={"section": title}))
return docs
#TF-IDF Embeddings
class TfidfEmbedding(Embeddings):
def __init__(self):
self.vectorizer = TfidfVectorizer()
def fit(self, texts):
self.vectorizer.fit(texts)
def embed_documents(self, texts):
return self.vectorizer.transform(texts).toarray()
def embed_query(self, text):
return self.vectorizer.transform([text]).toarray()[0]
# Prompt Template
TEMPLATE = """
You are a strict healthcare policy checker for Systems Ltd.
Always begin your answer clearly:
- Say "Yes, ..." if the claim is valid
- Say "No, ..." if the claim is not valid
- Say "Partially, ..." if it's conditionally allowed
Use the following policy information to support your answer.
{context}
Question: {question}
Answer:
"""
custom_prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
# Load the policy at startup
def initialize_policy():
global qa_chain
docs = extract_clean_sections("healthcare_policy.pdf")
texts = [doc.page_content for doc in docs]
embedder = TfidfEmbedding()
embedder.fit(texts)
vectordb = FAISS.from_texts(texts, embedder)
retriever = vectordb.as_retriever()
llm = ChatOpenAI(
model="tngtech/deepseek-r1t2-chimera:free",
base_url="https://openrouter.ai/api/v1",
api_key=os.getenv("OPENAI_API_KEY"),
default_headers={
"HTTP-Referer": "https://huggingface.co",
"X-Title": "PDF-RAG"
},
temperature=0.0
)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=False,
chain_type_kwargs={"prompt": custom_prompt}
)
# Run QA on user question
def ask_policy_question(question):
if qa_chain is None:
return "The policy is still loading. Please wait."
try:
return qa_chain.run(question)
except Exception as e:
return f"Error: {str(e)}"
# Gradio Interface
qa_chain = None
status_text = "Loading..."
with gr.Blocks() as demo:
gr.Markdown("## SL HealthCare Claim Checker (RAG)")
status_box = gr.Textbox(label="Status", value=status_text, interactive=False)
with gr.Row():
question = gr.Textbox(label="Enter your claim question")
ask_btn = gr.Button("Ask")
answer = gr.Textbox(label="Answer", lines=6)
ask_btn.click(fn=ask_policy_question, inputs=question, outputs=answer)
# Load the policy on startup
def startup():
global status_text
initialize_policy()
status_text = "Policy loaded. You may now ask questions."
return status_text
demo.load(fn=startup, outputs=status_box)
demo.launch()
|