Spaces:
Sleeping
Sleeping
File size: 5,232 Bytes
2e5ddcc 8f0f770 fc9b01a f96d7b6 2e5ddcc f96d7b6 2e5ddcc 8f0f770 fc9b01a 2e5ddcc f96d7b6 2e5ddcc f96d7b6 2e5ddcc fc9b01a f96d7b6 fc9b01a f96d7b6 fc9b01a 2e5ddcc fc9b01a 2e5ddcc fc9b01a f96d7b6 2e5ddcc fc9b01a 2e5ddcc fc9b01a 2e5ddcc fc9b01a 2e5ddcc f96d7b6 2e5ddcc fc9b01a f96d7b6 2e5ddcc fc9b01a f96d7b6 2e5ddcc fc9b01a f96d7b6 2e5ddcc fc9b01a 2e5ddcc f96d7b6 fc9b01a 2e5ddcc fc9b01a 2e5ddcc fc9b01a 8f0f770 f96d7b6 fc9b01a f96d7b6 fc9b01a f96d7b6 fc9b01a f96d7b6 fc9b01a 2e5ddcc fc9b01a 2e5ddcc fc9b01a 2e5ddcc fc9b01a 2e5ddcc fc9b01a 2e5ddcc f96d7b6 fc9b01a 2e5ddcc f96d7b6 2e5ddcc fc9b01a f96d7b6 2e5ddcc f96d7b6 2e5ddcc f96d7b6 fc9b01a 8f0f770 f96d7b6 2e5ddcc f96d7b6 2e5ddcc fc9b01a f96d7b6 fc9b01a 2e5ddcc fc9b01a 2e5ddcc fc9b01a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import os
import gradio as gr
import pdfplumber
import re
from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS # ✅ Fixed deprecation warning
from langchain.embeddings.base import Embeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from transformers import pipeline
# Set OpenRouter API env vars (used by ChatOpenAI)
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENROUTER_API_KEY")
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co", "X-Title":"PDF-RAG"}'
# Global variables
qa_chain = None
translator_en2ur = None
translator_ur2en = None
# -------------------- PDF Extraction --------------------
def extract_clean_sections(file_path):
with pdfplumber.open(file_path) as pdf:
full_text = ""
for page in pdf.pages:
text = page.extract_text()
if text:
text = re.sub(r'Systems Campus.*?Lahore', '', text)
text = re.sub(r'E-mail:.*?systemsltd\.com', '', text)
full_text += text + "\n"
pattern = r"(?<=\n)([A-Z][^\n]{3,50}):"
parts = re.split(pattern, full_text)
docs = []
for i in range(1, len(parts), 2):
title = parts[i].strip()
content = parts[i + 1].strip()
if len(content) > 20:
docs.append(Document(page_content=f"{title}:\n{content}", metadata={"section": title}))
return docs
# -------------------- TF-IDF Embedder --------------------
class TfidfEmbedding(Embeddings):
def __init__(self):
self.vectorizer = TfidfVectorizer()
def fit(self, texts):
self.vectorizer.fit(texts)
def embed_documents(self, texts):
return self.vectorizer.transform(texts).toarray()
def embed_query(self, text):
return self.vectorizer.transform([text]).toarray()[0]
# -------------------- Custom Prompt --------------------
TEMPLATE = """
You are a strict healthcare policy checker for Systems Ltd.
Always begin your answer clearly:
- Say "Yes, ..." if the claim is valid
- Say "No, ..." if the claim is not valid
- Say "Partially, ..." if it's conditionally allowed
Use the following policy information to support your answer.
{context}
Question: {question}
Answer:
"""
custom_prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
# -------------------- Policy Initialization --------------------
def initialize_policy():
global qa_chain, translator_en2ur, translator_ur2en
docs = extract_clean_sections("healthcare_policy.pdf")
texts = [doc.page_content for doc in docs]
embedder = TfidfEmbedding()
embedder.fit(texts)
vectordb = FAISS.from_texts(texts, embedder)
retriever = vectordb.as_retriever()
llm = ChatOpenAI(
model="tngtech/deepseek-r1t2-chimera:free",
base_url="https://openrouter.ai/api/v1",
api_key=os.getenv("OPENAI_API_KEY"),
default_headers={
"HTTP-Referer": "https://huggingface.co",
"X-Title": "PDF-RAG"
},
temperature=0.0
)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=False,
chain_type_kwargs={"prompt": custom_prompt}
)
# ✅ Load translation models
translator_en2ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
translator_ur2en = pipeline("translation", model="Helsinki-NLP/opus-mt-ur-en")
# -------------------- QA with Bilingual Support --------------------
def ask_policy_question(question, language):
if qa_chain is None:
return "The policy is still loading. Please wait."
try:
if language == "Urdu":
question_en = translator_ur2en(question)[0]['translation_text']
answer_en = qa_chain.run(question_en)
answer_ur = translator_en2ur(answer_en)[0]['translation_text']
return answer_ur
else:
return qa_chain.run(question)
except Exception as e:
return f"Error: {str(e)}"
# -------------------- Gradio Interface --------------------
status_text = "Loading..."
with gr.Blocks() as demo:
gr.Markdown("## 📋 SL HealthCare Claim Checker (Bilingual: English / اردو)")
status_box = gr.Textbox(label="Status", value=status_text, interactive=False)
with gr.Row():
language = gr.Radio(choices=["English", "Urdu"], label="Select Language / زبان منتخب کریں", value="English")
question = gr.Textbox(label="Enter your claim question / اپنا سوال درج کریں")
ask_btn = gr.Button("Ask / پوچھیں")
answer = gr.Textbox(label="Answer / جواب", lines=6)
ask_btn.click(fn=ask_policy_question, inputs=[question, language], outputs=answer)
def startup():
global status_text
initialize_policy()
status_text = "Policy loaded. You may now ask questions."
return status_text
demo.load(fn=startup, outputs=status_box)
demo.launch()
|