import os
import re
import gradio as gr
import pdfplumber
import pytesseract
from PIL import Image
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from sklearn.feature_extraction.text import TfidfVectorizer
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter


os.environ["OPENAI_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co/spaces/saadawaissheikh/SystemsHealthcareChatbot", "X-Title":"PDF Chatbot"}'

# ✅ Load PDF once at startup
PDF_PATH = "healthcare_policy.pdf"

import pytesseract

# ✅ Force path to Tesseract binary (required for Hugging Face Spaces)
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"


class TfidfEmbedding(Embeddings):
    def __init__(self):
        self.vectorizer = TfidfVectorizer()

    def fit(self, texts):
        self.vectorizer.fit(texts)

    def embed_documents(self, texts):
        return self.vectorizer.transform(texts).toarray()

    def embed_query(self, text):
        return self.vectorizer.transform([text]).toarray()[0]

def load_pdf_chunks(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        full_text = "\n".join([page.extract_text() or "" for page in pdf.pages])
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    chunks = splitter.split_text(full_text)
    return [Document(page_content=chunk) for chunk in chunks]

def setup_vectordb(docs):
    texts = [doc.page_content for doc in docs]
    embedder = TfidfEmbedding()
    embedder.fit(texts)
    vectordb = FAISS.from_texts(texts, embedder)
    return vectordb

def get_llm():
    return ChatOpenAI(
        model="tngtech/deepseek-r1t2-chimera:free",
        temperature=0.0
    )

def get_qa_chain():
    docs = load_pdf_chunks(PDF_PATH)
    vectordb = setup_vectordb(docs)
    retriever = vectordb.as_retriever()
    prompt = PromptTemplate.from_template("Answer with Yes or No first. Then explain: {context}\nQuestion: {question}")
    llm = get_llm()
    return RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        return_source_documents=False,
        chain_type_kwargs={"prompt": prompt}
    )

qa_chain = get_qa_chain()

# ✅ Standard PDF QA
def ask_question(query):
    try:
        return qa_chain.run(query)
    except Exception as e:
        return f"Error: {e}"

# ✅ Extract Tablets from Image
def extract_tablet_names(text):
    medicines = []
    for line in text.splitlines():
        match = re.search(r"\\b([A-Za-z]+(?:\\s+[A-Za-z]+)*)\\s*(\\d+mg|\\d+\\s*mg)?\\b", line)
        if match:
            name = match.group(1).strip()
            if name.lower() not in ["cash", "scaling", "polish"]:
                medicines.append(name)
    return list(set(medicines))

def extract_text_from_image(img_path):
    image = Image.open(img_path)
    raw_text = pytesseract.image_to_string(image)
    return extract_tablet_names(raw_text)

def check_tablets(img_path):
    try:
        # Step 1: Confirm image path is received
        if not img_path or not os.path.exists(img_path):
            return "❌ Error: Image path is invalid or file not found."

        # Step 2: Run OCR
        image = Image.open(img_path)
        raw_text = pytesseract.image_to_string(image)

        # Step 3: Extract medicine names
        tablets = extract_tablet_names(raw_text)
        if not tablets:
            return "❌ No tablets found in the receipt text."

        # Step 4: Use RAG to check each tablet
        result = ""
        for med in tablets:
            question = f"Is the medicine {med} covered under the healthcare policy?"
            try:
                answer = qa_chain.run(question)
            except Exception as e:
                answer = f"RAG error: {str(e)}"
            result += f"💊 {med} → {answer}\n\n"

        return result.strip()

    except Exception as e:
        return f"❌ Critical error during tablet check: {str(e)}"


# ✅ Gradio UI
with gr.Blocks(title="Healthcare Chatbot") as app:
    gr.Markdown("# 💬 Systems Healthcare Chatbot")
    gr.Markdown("📄 Policy document loaded. You may now ask questions or upload a medicine receipt to check claims.")

    with gr.Tab("Ask about Policy"):
        with gr.Row():
            txt = gr.Textbox(label="Your Question")
            ans = gr.Textbox(label="Answer")
        txt.submit(fn=ask_question, inputs=txt, outputs=ans)

    with gr.Tab("Check Tablet Claim"):
        with gr.Row():
            img = gr.Image(type="filepath", label="Upload Tablet Receipt")
            out = gr.Textbox(label="Result")
        img.change(fn=check_tablets, inputs=img, outputs=out)


# ✅ Launch App
app.launch()