File size: 4,050 Bytes
021cd91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26a2e48
021cd91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import gradio as gr
import pdfplumber
import re

from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI



os.environ["OPENAI_API_KEY"] = os.environ["OPENROUTER_API_KEY"]
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co", "X-Title":"PDF-RAG"}'


#Section-aware PDF extractor
def extract_clean_sections(file_path):
    with pdfplumber.open(file_path) as pdf:
        full_text = ""
        for page in pdf.pages:
            text = page.extract_text()
            if text:
                text = re.sub(r'Systems Campus.*?Lahore', '', text)
                text = re.sub(r'E-mail:.*?systemsltd\.com', '', text)
                full_text += text + "\n"

    pattern = r"(?<=\n)([A-Z][^\n]{3,50}):"
    parts = re.split(pattern, full_text)

    docs = []
    for i in range(1, len(parts), 2):
        title = parts[i].strip()
        content = parts[i + 1].strip()
        if len(content) > 20:
            docs.append(Document(page_content=f"{title}:\n{content}", metadata={"section": title}))
    return docs


#TF-IDF Embedding for RAG
class TfidfEmbedding(Embeddings):
    def __init__(self):
        self.vectorizer = TfidfVectorizer()

    def fit(self, texts):
        self.vectorizer.fit(texts)

    def embed_documents(self, texts):
        return self.vectorizer.transform(texts).toarray()

    def embed_query(self, text):
        return self.vectorizer.transform([text]).toarray()[0]


#  prompt
TEMPLATE = """
You are a strict healthcare policy checker for Systems Ltd.

Always begin your answer clearly:
- Say "Yes, ..." if the claim is valid
- Say "No, ..." if the claim is not valid
- Say "Partially, ..." if it's conditionally allowed

Use the following policy information to support your answer.

{context}

Question: {question}
Answer:
"""

custom_prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])

# Global state
retriever = None
qa_chain = None


# βœ… Process the PDF once when button is clicked
def load_policy():
    global retriever, qa_chain
    docs = extract_clean_sections("healthcare_policy.pdf")
    texts = [doc.page_content for doc in docs]
    embedder = TfidfEmbedding()
    embedder.fit(texts)
    vectordb = FAISS.from_texts(texts, embedder)
    retriever = vectordb.as_retriever()

    llm = ChatOpenAI(
        model="tngtech/deepseek-r1t2-chimera:free",
        base_url="https://openrouter.ai/api/v1",
        api_key=os.getenv("OPENAI_API_KEY"),
        default_headers={
            "HTTP-Referer": "https://huggingface.co",
            "X-Title": "PDF-RAG"
        },
        temperature=0.0
    )

    qa_chain_local = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=False,
        chain_type_kwargs={"prompt": custom_prompt}
    )

    qa_chain = qa_chain_local
    return "Policy loaded. You may now ask questions."


# βœ… Answer a claim question
def ask_policy_question(question):
    if qa_chain is None:
        return "Please click 'Ask about claim' to load the policy first."
    try:
        return qa_chain.run(question)
    except Exception as e:
        return f"Error: {str(e)}"


# βœ… Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## SL HealthCare Claim Checker (RAG)")

    load_btn = gr.Button("πŸ“₯ Ask about claim (Load Policy)")
    load_status = gr.Textbox(label="Status")
    load_btn.click(fn=load_policy, outputs=load_status)

    with gr.Row():
        question = gr.Textbox(label="Enter your claim question")
        ask_btn = gr.Button("Ask")

    answer = gr.Textbox(label="Answer", lines=6)
    ask_btn.click(fn=ask_policy_question, inputs=question, outputs=answer)

demo.launch()