Spaces:

saadawaissheikh
/

SLclaimchecker

Sleeping

File size: 5,009 Bytes



import os
import re
import gradio as gr
import pdfplumber
import pytesseract
from PIL import Image
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter


os.environ["OPENAI_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co/spaces/saadawaissheikh/SystemsHealthcareChatbot", "X-Title":"PDF Chatbot"}'

# ✅ Load PDF once at startup
PDF_PATH = "healthcare_policy.pdf"

import pytesseract

# ✅ Force path to Tesseract binary (required for Hugging Face Spaces)
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"


class TfidfEmbedding(Embeddings):
    def __init__(self):
        self.vectorizer = TfidfVectorizer()

    def fit(self, texts):
        self.vectorizer.fit(texts)

    def embed_documents(self, texts):
        return self.vectorizer.transform(texts).toarray()

    def embed_query(self, text):
        return self.vectorizer.transform([text]).toarray()[0]

def load_pdf_chunks(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        full_text = "\n".join([page.extract_text() or "" for page in pdf.pages])
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    chunks = splitter.split_text(full_text)
    return [Document(page_content=chunk) for chunk in chunks]

def setup_vectordb(docs):
    texts = [doc.page_content for doc in docs]
    embedder = TfidfEmbedding()
    embedder.fit(texts)
    vectordb = FAISS.from_texts(texts, embedder)
    return vectordb

def get_llm():
    return ChatOpenAI(
        model="tngtech/deepseek-r1t2-chimera:free",
        temperature=0.0
    )

def get_qa_chain():
    docs = load_pdf_chunks(PDF_PATH)
    vectordb = setup_vectordb(docs)
    retriever = vectordb.as_retriever()
    prompt = PromptTemplate.from_template("Answer with Yes or No first. Then explain: {context}\nQuestion: {question}")
    llm = get_llm()
    return RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        return_source_documents=False,
        chain_type_kwargs={"prompt": prompt}
    )

qa_chain = get_qa_chain()

# ✅ Standard PDF QA
def ask_question(query):
    try:
        return qa_chain.run(query)
    except Exception as e:
        return f"Error: {e}"

# ✅ Extract Tablets from Image
def extract_tablet_names(text):
    medicines = []
    for line in text.splitlines():
        match = re.search(r"\\b([A-Za-z]+(?:\\s+[A-Za-z]+)*)\\s*(\\d+mg|\\d+\\s*mg)?\\b", line)
        if match:
            name = match.group(1).strip()
            if name.lower() not in ["cash", "scaling", "polish"]:
                medicines.append(name)
    return list(set(medicines))

def extract_text_from_image(img_path):
    image = Image.open(img_path)
    raw_text = pytesseract.image_to_string(image)
    return extract_tablet_names(raw_text)

def check_tablets(img_path):
    try:
        # Step 1: Confirm image path is received
        if not img_path or not os.path.exists(img_path):
            return "❌ Error: Image path is invalid or file not found."

        # Step 2: Run OCR
        image = Image.open(img_path)
        raw_text = pytesseract.image_to_string(image)

        # Step 3: Extract medicine names
        tablets = extract_tablet_names(raw_text)
        if not tablets:
            return "❌ No tablets found in the receipt text."

        # Step 4: Use RAG to check each tablet
        result = ""
        for med in tablets:
            question = f"Is the medicine {med} covered under the healthcare policy?"
            try:
                answer = qa_chain.run(question)
            except Exception as e:
                answer = f"RAG error: {str(e)}"
            result += f"💊 {med} → {answer}\n\n"

        return result.strip()

    except Exception as e:
        return f"❌ Critical error during tablet check: {str(e)}"


# ✅ Gradio UI
with gr.Blocks(title="Healthcare Chatbot") as app:
    gr.Markdown("# 💬 Systems Healthcare Chatbot")
    gr.Markdown("📄 Policy document loaded. You may now ask questions or upload a medicine receipt to check claims.")

    with gr.Tab("Ask about Policy"):
        with gr.Row():
            txt = gr.Textbox(label="Your Question")
            ans = gr.Textbox(label="Answer")
        txt.submit(fn=ask_question, inputs=txt, outputs=ans)

    with gr.Tab("Check Tablet Claim"):
        with gr.Row():
            img = gr.Image(type="filepath", label="Upload Tablet Receipt")
            out = gr.Textbox(label="Result")
        img.change(fn=check_tablets, inputs=img, outputs=out)




# ✅ Launch App
app.launch()