File size: 4,078 Bytes
021cd91
 
 
 
 
 
 
 
 
 
 
 
 
 
05cbe4d
021cd91
 
 
 
05cbe4d
021cd91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05cbe4d
021cd91
 
 
 
 
 
 
 
 
 
 
 
 
05cbe4d
021cd91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05cbe4d
6238318
 
021cd91
 
 
 
 
 
 
 
26a2e48
021cd91
 
 
 
 
 
 
 
 
6238318
021cd91
 
 
 
 
 
 
05cbe4d
021cd91
 
6238318
021cd91
 
 
 
 
 
05cbe4d
6238318
05cbe4d
6238318
021cd91
 
6238318
021cd91
 
 
 
 
 
 
 
6238318
 
 
 
 
 
 
 
 
021cd91
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import gradio as gr
import pdfplumber
import re

from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI



os.environ["OPENAI_API_KEY"] = os.environ["OPENROUTER_API_KEY"]
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co", "X-Title":"PDF-RAG"}'

#Load and clean the policy PDF
def extract_clean_sections(file_path):
    with pdfplumber.open(file_path) as pdf:
        full_text = ""
        for page in pdf.pages:
            text = page.extract_text()
            if text:
                text = re.sub(r'Systems Campus.*?Lahore', '', text)
                text = re.sub(r'E-mail:.*?systemsltd\.com', '', text)
                full_text += text + "\n"

    pattern = r"(?<=\n)([A-Z][^\n]{3,50}):"
    parts = re.split(pattern, full_text)

    docs = []
    for i in range(1, len(parts), 2):
        title = parts[i].strip()
        content = parts[i + 1].strip()
        if len(content) > 20:
            docs.append(Document(page_content=f"{title}:\n{content}", metadata={"section": title}))
    return docs

#TF-IDF Embeddings
class TfidfEmbedding(Embeddings):
    def __init__(self):
        self.vectorizer = TfidfVectorizer()

    def fit(self, texts):
        self.vectorizer.fit(texts)

    def embed_documents(self, texts):
        return self.vectorizer.transform(texts).toarray()

    def embed_query(self, text):
        return self.vectorizer.transform([text]).toarray()[0]

#  Prompt Template 
TEMPLATE = """
You are a strict healthcare policy checker for Systems Ltd.

Always begin your answer clearly:
- Say "Yes, ..." if the claim is valid
- Say "No, ..." if the claim is not valid
- Say "Partially, ..." if it's conditionally allowed

Use the following policy information to support your answer.

{context}

Question: {question}
Answer:
"""
custom_prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])


# Load the policy at startup
def initialize_policy():
    global qa_chain
    docs = extract_clean_sections("healthcare_policy.pdf")
    texts = [doc.page_content for doc in docs]
    embedder = TfidfEmbedding()
    embedder.fit(texts)
    vectordb = FAISS.from_texts(texts, embedder)
    retriever = vectordb.as_retriever()

    llm = ChatOpenAI(
        model="tngtech/deepseek-r1t2-chimera:free",
        base_url="https://openrouter.ai/api/v1",
        api_key=os.getenv("OPENAI_API_KEY"),
        default_headers={
            "HTTP-Referer": "https://huggingface.co",
            "X-Title": "PDF-RAG"
        },
        temperature=0.0
    )

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=False,
        chain_type_kwargs={"prompt": custom_prompt}
    )

# Run QA on user question
def ask_policy_question(question):
    if qa_chain is None:
        return "The policy is still loading. Please wait."
    try:
        return qa_chain.run(question)
    except Exception as e:
        return f"Error: {str(e)}"


#  Gradio Interface
qa_chain = None
status_text = "Loading..." 

with gr.Blocks() as demo:
    gr.Markdown("## SL HealthCare Claim Checker (RAG)")
    status_box = gr.Textbox(label="Status", value=status_text, interactive=False)

    with gr.Row():
        question = gr.Textbox(label="Enter your claim question")
        ask_btn = gr.Button("Ask")

    answer = gr.Textbox(label="Answer", lines=6)
    ask_btn.click(fn=ask_policy_question, inputs=question, outputs=answer)

    # Load the policy on startup
    def startup():
        global status_text
        initialize_policy()
        status_text = "Policy loaded. You may now ask questions."
        return status_text

    demo.load(fn=startup, outputs=status_box)

demo.launch()