Spaces:

saadawaissheikh
/

SystemsHealthcareChatbot

Sleeping

App Files Files Community

SystemsHealthcareChatbot / app.py

saadawaissheikh

Update app.py

26a2e48 verified about 2 months ago

raw

history blame

4.05 kB

	import os
	import gradio as gr
	import pdfplumber
	import re

	from langchain.docstore.document import Document
	from langchain.vectorstores import FAISS
	from langchain.embeddings.base import Embeddings
	from sklearn.feature_extraction.text import TfidfVectorizer
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain_openai import ChatOpenAI



	os.environ["OPENAI_API_KEY"] = os.environ["OPENROUTER_API_KEY"]
	os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
	os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co", "X-Title":"PDF-RAG"}'


	#Section-aware PDF extractor
	def extract_clean_sections(file_path):
	with pdfplumber.open(file_path) as pdf:
	full_text = ""
	for page in pdf.pages:
	text = page.extract_text()
	if text:
	text = re.sub(r'Systems Campus.*?Lahore', '', text)
	text = re.sub(r'E-mail:.*?systemsltd\.com', '', text)
	full_text += text + "\n"

	pattern = r"(?<=\n)([A-Z][^\n]{3,50}):"
	parts = re.split(pattern, full_text)

	docs = []
	for i in range(1, len(parts), 2):
	title = parts[i].strip()
	content = parts[i + 1].strip()
	if len(content) > 20:
	docs.append(Document(page_content=f"{title}:\n{content}", metadata={"section": title}))
	return docs


	#TF-IDF Embedding for RAG
	class TfidfEmbedding(Embeddings):
	def __init__(self):
	self.vectorizer = TfidfVectorizer()

	def fit(self, texts):
	self.vectorizer.fit(texts)

	def embed_documents(self, texts):
	return self.vectorizer.transform(texts).toarray()

	def embed_query(self, text):
	return self.vectorizer.transform([text]).toarray()[0]


	# prompt
	TEMPLATE = """
	You are a strict healthcare policy checker for Systems Ltd.

	Always begin your answer clearly:
	- Say "Yes, ..." if the claim is valid
	- Say "No, ..." if the claim is not valid
	- Say "Partially, ..." if it's conditionally allowed

	Use the following policy information to support your answer.

	{context}

	Question: {question}
	Answer:
	"""

	custom_prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])

	# Global state
	retriever = None
	qa_chain = None


	# ✅ Process the PDF once when button is clicked
	def load_policy():
	global retriever, qa_chain
	docs = extract_clean_sections("healthcare_policy.pdf")
	texts = [doc.page_content for doc in docs]
	embedder = TfidfEmbedding()
	embedder.fit(texts)
	vectordb = FAISS.from_texts(texts, embedder)
	retriever = vectordb.as_retriever()

	llm = ChatOpenAI(
	model="tngtech/deepseek-r1t2-chimera:free",
	base_url="https://openrouter.ai/api/v1",
	api_key=os.getenv("OPENAI_API_KEY"),
	default_headers={
	"HTTP-Referer": "https://huggingface.co",
	"X-Title": "PDF-RAG"
	},
	temperature=0.0
	)

	qa_chain_local = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=False,
	chain_type_kwargs={"prompt": custom_prompt}
	)

	qa_chain = qa_chain_local
	return "Policy loaded. You may now ask questions."


	# ✅ Answer a claim question
	def ask_policy_question(question):
	if qa_chain is None:
	return "Please click 'Ask about claim' to load the policy first."
	try:
	return qa_chain.run(question)
	except Exception as e:
	return f"Error: {str(e)}"


	# ✅ Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## SL HealthCare Claim Checker (RAG)")

	load_btn = gr.Button("📥 Ask about claim (Load Policy)")
	load_status = gr.Textbox(label="Status")
	load_btn.click(fn=load_policy, outputs=load_status)

	with gr.Row():
	question = gr.Textbox(label="Enter your claim question")
	ask_btn = gr.Button("Ask")

	answer = gr.Textbox(label="Answer", lines=6)
	ask_btn.click(fn=ask_policy_question, inputs=question, outputs=answer)

	demo.launch()