File size: 5,232 Bytes
2e5ddcc
8f0f770
fc9b01a
 
f96d7b6
2e5ddcc
f96d7b6
2e5ddcc
 
8f0f770
fc9b01a
2e5ddcc
f96d7b6
 
2e5ddcc
f96d7b6
 
2e5ddcc
fc9b01a
 
f96d7b6
 
 
 
fc9b01a
f96d7b6
fc9b01a
2e5ddcc
fc9b01a
2e5ddcc
fc9b01a
 
 
 
 
f96d7b6
2e5ddcc
fc9b01a
2e5ddcc
 
 
fc9b01a
2e5ddcc
 
fc9b01a
2e5ddcc
 
f96d7b6
2e5ddcc
 
fc9b01a
f96d7b6
2e5ddcc
fc9b01a
f96d7b6
2e5ddcc
fc9b01a
f96d7b6
2e5ddcc
fc9b01a
2e5ddcc
f96d7b6
fc9b01a
 
2e5ddcc
 
 
fc9b01a
 
2e5ddcc
 
 
 
fc9b01a
8f0f770
f96d7b6
fc9b01a
f96d7b6
 
fc9b01a
 
f96d7b6
fc9b01a
 
f96d7b6
fc9b01a
 
2e5ddcc
 
fc9b01a
2e5ddcc
 
 
 
fc9b01a
2e5ddcc
 
 
 
 
 
 
fc9b01a
2e5ddcc
fc9b01a
2e5ddcc
 
f96d7b6
 
 
 
 
 
fc9b01a
2e5ddcc
f96d7b6
2e5ddcc
fc9b01a
f96d7b6
 
 
 
 
 
2e5ddcc
f96d7b6
 
 
 
2e5ddcc
 
f96d7b6
 
fc9b01a
8f0f770
f96d7b6
 
 
 
2e5ddcc
f96d7b6
 
2e5ddcc
fc9b01a
 
 
f96d7b6
fc9b01a
2e5ddcc
fc9b01a
2e5ddcc
fc9b01a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os
import gradio as gr
import pdfplumber
import re

from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS  # ✅ Fixed deprecation warning
from langchain.embeddings.base import Embeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from transformers import pipeline


# Set OpenRouter API env vars (used by ChatOpenAI)
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENROUTER_API_KEY")
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co", "X-Title":"PDF-RAG"}'

# Global variables
qa_chain = None
translator_en2ur = None
translator_ur2en = None

# -------------------- PDF Extraction --------------------
def extract_clean_sections(file_path):
    with pdfplumber.open(file_path) as pdf:
        full_text = ""
        for page in pdf.pages:
            text = page.extract_text()
            if text:
                text = re.sub(r'Systems Campus.*?Lahore', '', text)
                text = re.sub(r'E-mail:.*?systemsltd\.com', '', text)
                full_text += text + "\n"

    pattern = r"(?<=\n)([A-Z][^\n]{3,50}):"
    parts = re.split(pattern, full_text)

    docs = []
    for i in range(1, len(parts), 2):
        title = parts[i].strip()
        content = parts[i + 1].strip()
        if len(content) > 20:
            docs.append(Document(page_content=f"{title}:\n{content}", metadata={"section": title}))
    return docs

# -------------------- TF-IDF Embedder --------------------
class TfidfEmbedding(Embeddings):
    def __init__(self):
        self.vectorizer = TfidfVectorizer()

    def fit(self, texts):
        self.vectorizer.fit(texts)

    def embed_documents(self, texts):
        return self.vectorizer.transform(texts).toarray()

    def embed_query(self, text):
        return self.vectorizer.transform([text]).toarray()[0]

# -------------------- Custom Prompt --------------------
TEMPLATE = """
You are a strict healthcare policy checker for Systems Ltd.
Always begin your answer clearly:
- Say "Yes, ..." if the claim is valid
- Say "No, ..." if the claim is not valid
- Say "Partially, ..." if it's conditionally allowed
Use the following policy information to support your answer.
{context}
Question: {question}
Answer:
"""
custom_prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])

# -------------------- Policy Initialization --------------------
def initialize_policy():
    global qa_chain, translator_en2ur, translator_ur2en

    docs = extract_clean_sections("healthcare_policy.pdf")
    texts = [doc.page_content for doc in docs]

    embedder = TfidfEmbedding()
    embedder.fit(texts)

    vectordb = FAISS.from_texts(texts, embedder)
    retriever = vectordb.as_retriever()

    llm = ChatOpenAI(
        model="tngtech/deepseek-r1t2-chimera:free",
        base_url="https://openrouter.ai/api/v1",
        api_key=os.getenv("OPENAI_API_KEY"),
        default_headers={
            "HTTP-Referer": "https://huggingface.co",
            "X-Title": "PDF-RAG"
        },
        temperature=0.0
    )

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=False,
        chain_type_kwargs={"prompt": custom_prompt}
    )

    # ✅ Load translation models
    translator_en2ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
    translator_ur2en = pipeline("translation", model="Helsinki-NLP/opus-mt-ur-en")


# -------------------- QA with Bilingual Support --------------------
def ask_policy_question(question, language):
    if qa_chain is None:
        return "The policy is still loading. Please wait."
    try:
        if language == "Urdu":
            question_en = translator_ur2en(question)[0]['translation_text']
            answer_en = qa_chain.run(question_en)
            answer_ur = translator_en2ur(answer_en)[0]['translation_text']
            return answer_ur
        else:
            return qa_chain.run(question)
    except Exception as e:
        return f"Error: {str(e)}"

# -------------------- Gradio Interface --------------------
status_text = "Loading..."

with gr.Blocks() as demo:
    gr.Markdown("## 📋 SL HealthCare Claim Checker (Bilingual: English / اردو)")

    status_box = gr.Textbox(label="Status", value=status_text, interactive=False)

    with gr.Row():
        language = gr.Radio(choices=["English", "Urdu"], label="Select Language / زبان منتخب کریں", value="English")
        question = gr.Textbox(label="Enter your claim question / اپنا سوال درج کریں")
        ask_btn = gr.Button("Ask / پوچھیں")

    answer = gr.Textbox(label="Answer / جواب", lines=6)
    ask_btn.click(fn=ask_policy_question, inputs=[question, language], outputs=answer)

    def startup():
        global status_text
        initialize_policy()
        status_text = "Policy loaded. You may now ask questions."
        return status_text

    demo.load(fn=startup, outputs=status_box)

demo.launch()