Spaces:

saadawaissheikh
/

SystemsHealthcareChatbot

Sleeping

App Files Files Community

SystemsHealthcareChatbot / app.py

saadawaissheikh

Update app.py

05cbe4d verified about 2 months ago

raw

history blame contribute delete

4.08 kB

	import os
	import gradio as gr
	import pdfplumber
	import re

	from langchain.docstore.document import Document
	from langchain.vectorstores import FAISS
	from langchain.embeddings.base import Embeddings
	from sklearn.feature_extraction.text import TfidfVectorizer
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain_openai import ChatOpenAI



	os.environ["OPENAI_API_KEY"] = os.environ["OPENROUTER_API_KEY"]
	os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
	os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co", "X-Title":"PDF-RAG"}'

	#Load and clean the policy PDF
	def extract_clean_sections(file_path):
	with pdfplumber.open(file_path) as pdf:
	full_text = ""
	for page in pdf.pages:
	text = page.extract_text()
	if text:
	text = re.sub(r'Systems Campus.*?Lahore', '', text)
	text = re.sub(r'E-mail:.*?systemsltd\.com', '', text)
	full_text += text + "\n"

	pattern = r"(?<=\n)([A-Z][^\n]{3,50}):"
	parts = re.split(pattern, full_text)

	docs = []
	for i in range(1, len(parts), 2):
	title = parts[i].strip()
	content = parts[i + 1].strip()
	if len(content) > 20:
	docs.append(Document(page_content=f"{title}:\n{content}", metadata={"section": title}))
	return docs

	#TF-IDF Embeddings
	class TfidfEmbedding(Embeddings):
	def __init__(self):
	self.vectorizer = TfidfVectorizer()

	def fit(self, texts):
	self.vectorizer.fit(texts)

	def embed_documents(self, texts):
	return self.vectorizer.transform(texts).toarray()

	def embed_query(self, text):
	return self.vectorizer.transform([text]).toarray()[0]

	# Prompt Template
	TEMPLATE = """
	You are a strict healthcare policy checker for Systems Ltd.

	Always begin your answer clearly:
	- Say "Yes, ..." if the claim is valid
	- Say "No, ..." if the claim is not valid
	- Say "Partially, ..." if it's conditionally allowed

	Use the following policy information to support your answer.

	{context}

	Question: {question}
	Answer:
	"""
	custom_prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])


	# Load the policy at startup
	def initialize_policy():
	global qa_chain
	docs = extract_clean_sections("healthcare_policy.pdf")
	texts = [doc.page_content for doc in docs]
	embedder = TfidfEmbedding()
	embedder.fit(texts)
	vectordb = FAISS.from_texts(texts, embedder)
	retriever = vectordb.as_retriever()

	llm = ChatOpenAI(
	model="tngtech/deepseek-r1t2-chimera:free",
	base_url="https://openrouter.ai/api/v1",
	api_key=os.getenv("OPENAI_API_KEY"),
	default_headers={
	"HTTP-Referer": "https://huggingface.co",
	"X-Title": "PDF-RAG"
	},
	temperature=0.0
	)

	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=False,
	chain_type_kwargs={"prompt": custom_prompt}
	)

	# Run QA on user question
	def ask_policy_question(question):
	if qa_chain is None:
	return "The policy is still loading. Please wait."
	try:
	return qa_chain.run(question)
	except Exception as e:
	return f"Error: {str(e)}"


	# Gradio Interface
	qa_chain = None
	status_text = "Loading..."

	with gr.Blocks() as demo:
	gr.Markdown("## SL HealthCare Claim Checker (RAG)")
	status_box = gr.Textbox(label="Status", value=status_text, interactive=False)

	with gr.Row():
	question = gr.Textbox(label="Enter your claim question")
	ask_btn = gr.Button("Ask")

	answer = gr.Textbox(label="Answer", lines=6)
	ask_btn.click(fn=ask_policy_question, inputs=question, outputs=answer)

	# Load the policy on startup
	def startup():
	global status_text
	initialize_policy()
	status_text = "Policy loaded. You may now ask questions."
	return status_text

	demo.load(fn=startup, outputs=status_box)

	demo.launch()