Spaces:

saadawaissheikh
/

SLclaimchecker

Sleeping

App Files Files Community

SLclaimchecker / app.py

saadawaissheikh

Update app.py

88ca21f verified about 2 months ago

raw

history blame contribute delete

5.01 kB



	import os
	import re
	import gradio as gr
	import pdfplumber
	import pytesseract
	from PIL import Image
	from langchain.docstore.document import Document
	from langchain.vectorstores import FAISS
	from langchain.embeddings.base import Embeddings
	from sklearn.feature_extraction.text import TfidfVectorizer
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain_openai import ChatOpenAI
	from langchain.text_splitter import RecursiveCharacterTextSplitter


	os.environ["OPENAI_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
	os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
	os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co/spaces/saadawaissheikh/SystemsHealthcareChatbot", "X-Title":"PDF Chatbot"}'

	# ✅ Load PDF once at startup
	PDF_PATH = "healthcare_policy.pdf"

	import pytesseract

	# ✅ Force path to Tesseract binary (required for Hugging Face Spaces)
	pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"


	class TfidfEmbedding(Embeddings):
	def __init__(self):
	self.vectorizer = TfidfVectorizer()

	def fit(self, texts):
	self.vectorizer.fit(texts)

	def embed_documents(self, texts):
	return self.vectorizer.transform(texts).toarray()

	def embed_query(self, text):
	return self.vectorizer.transform([text]).toarray()[0]

	def load_pdf_chunks(pdf_path):
	with pdfplumber.open(pdf_path) as pdf:
	full_text = "\n".join([page.extract_text() or "" for page in pdf.pages])
	splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
	chunks = splitter.split_text(full_text)
	return [Document(page_content=chunk) for chunk in chunks]

	def setup_vectordb(docs):
	texts = [doc.page_content for doc in docs]
	embedder = TfidfEmbedding()
	embedder.fit(texts)
	vectordb = FAISS.from_texts(texts, embedder)
	return vectordb

	def get_llm():
	return ChatOpenAI(
	model="tngtech/deepseek-r1t2-chimera:free",
	temperature=0.0
	)

	def get_qa_chain():
	docs = load_pdf_chunks(PDF_PATH)
	vectordb = setup_vectordb(docs)
	retriever = vectordb.as_retriever()
	prompt = PromptTemplate.from_template("Answer with Yes or No first. Then explain: {context}\nQuestion: {question}")
	llm = get_llm()
	return RetrievalQA.from_chain_type(
	llm=llm,
	retriever=retriever,
	chain_type="stuff",
	return_source_documents=False,
	chain_type_kwargs={"prompt": prompt}
	)

	qa_chain = get_qa_chain()

	# ✅ Standard PDF QA
	def ask_question(query):
	try:
	return qa_chain.run(query)
	except Exception as e:
	return f"Error: {e}"

	# ✅ Extract Tablets from Image
	def extract_tablet_names(text):
	medicines = []
	for line in text.splitlines():
	match = re.search(r"\\b([A-Za-z]+(?:\\s+[A-Za-z]+))\\s(\\d+mg\|\\d+\\s*mg)?\\b", line)
	if match:
	name = match.group(1).strip()
	if name.lower() not in ["cash", "scaling", "polish"]:
	medicines.append(name)
	return list(set(medicines))

	def extract_text_from_image(img_path):
	image = Image.open(img_path)
	raw_text = pytesseract.image_to_string(image)
	return extract_tablet_names(raw_text)

	def check_tablets(img_path):
	try:
	# Step 1: Confirm image path is received
	if not img_path or not os.path.exists(img_path):
	return "❌ Error: Image path is invalid or file not found."

	# Step 2: Run OCR
	image = Image.open(img_path)
	raw_text = pytesseract.image_to_string(image)

	# Step 3: Extract medicine names
	tablets = extract_tablet_names(raw_text)
	if not tablets:
	return "❌ No tablets found in the receipt text."

	# Step 4: Use RAG to check each tablet
	result = ""
	for med in tablets:
	question = f"Is the medicine {med} covered under the healthcare policy?"
	try:
	answer = qa_chain.run(question)
	except Exception as e:
	answer = f"RAG error: {str(e)}"
	result += f"💊 {med} → {answer}\n\n"

	return result.strip()

	except Exception as e:
	return f"❌ Critical error during tablet check: {str(e)}"


	# ✅ Gradio UI
	with gr.Blocks(title="Healthcare Chatbot") as app:
	gr.Markdown("# 💬 Systems Healthcare Chatbot")
	gr.Markdown("📄 Policy document loaded. You may now ask questions or upload a medicine receipt to check claims.")

	with gr.Tab("Ask about Policy"):
	with gr.Row():
	txt = gr.Textbox(label="Your Question")
	ans = gr.Textbox(label="Answer")
	txt.submit(fn=ask_question, inputs=txt, outputs=ans)

	with gr.Tab("Check Tablet Claim"):
	with gr.Row():
	img = gr.Image(type="filepath", label="Upload Tablet Receipt")
	out = gr.Textbox(label="Result")
	img.change(fn=check_tablets, inputs=img, outputs=out)




	# ✅ Launch App
	app.launch()