saadawaissheikh commited on
Commit
2ca0835
Β·
verified Β·
1 Parent(s): 561058a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import os
4
+ import re
5
+ import gradio as gr
6
+ import pdfplumber
7
+ import pytesseract
8
+ from PIL import Image
9
+ from langchain.docstore.document import Document
10
+ from langchain.vectorstores import FAISS
11
+ from langchain.embeddings.base import Embeddings
12
+ from sklearn.feature_extraction.text import TfidfVectorizer
13
+ from langchain.chains import RetrievalQA
14
+ from langchain.prompts import PromptTemplate
15
+ from langchain_openai import ChatOpenAI
16
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
17
+
18
+
19
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
20
+ os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
21
+ os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co/spaces/saadawaissheikh/SystemsHealthcareChatbot", "X-Title":"PDF Chatbot"}'
22
+
23
+ # βœ… Load PDF once at startup
24
+ PDF_PATH = "HealthCare Policy.pdf"
25
+
26
+ class TfidfEmbedding(Embeddings):
27
+ def __init__(self):
28
+ self.vectorizer = TfidfVectorizer()
29
+
30
+ def fit(self, texts):
31
+ self.vectorizer.fit(texts)
32
+
33
+ def embed_documents(self, texts):
34
+ return self.vectorizer.transform(texts).toarray()
35
+
36
+ def embed_query(self, text):
37
+ return self.vectorizer.transform([text]).toarray()[0]
38
+
39
+ def load_pdf_chunks(pdf_path):
40
+ with pdfplumber.open(pdf_path) as pdf:
41
+ full_text = "\n".join([page.extract_text() or "" for page in pdf.pages])
42
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
43
+ chunks = splitter.split_text(full_text)
44
+ return [Document(page_content=chunk) for chunk in chunks]
45
+
46
+ def setup_vectordb(docs):
47
+ texts = [doc.page_content for doc in docs]
48
+ embedder = TfidfEmbedding()
49
+ embedder.fit(texts)
50
+ vectordb = FAISS.from_texts(texts, embedder)
51
+ return vectordb
52
+
53
+ def get_llm():
54
+ return ChatOpenAI(
55
+ model="tngtech/deepseek-r1t2-chimera:free",
56
+ temperature=0.0
57
+ )
58
+
59
+ def get_qa_chain():
60
+ docs = load_pdf_chunks(PDF_PATH)
61
+ vectordb = setup_vectordb(docs)
62
+ retriever = vectordb.as_retriever()
63
+ prompt = PromptTemplate.from_template("Answer with Yes or No first. Then explain: {context}\nQuestion: {question}")
64
+ llm = get_llm()
65
+ return RetrievalQA.from_chain_type(
66
+ llm=llm,
67
+ retriever=retriever,
68
+ chain_type="stuff",
69
+ return_source_documents=False,
70
+ chain_type_kwargs={"prompt": prompt}
71
+ )
72
+
73
+ qa_chain = get_qa_chain()
74
+
75
+ # βœ… Standard PDF QA
76
+ def ask_question(query):
77
+ try:
78
+ return qa_chain.run(query)
79
+ except Exception as e:
80
+ return f"Error: {e}"
81
+
82
+ # βœ… Extract Tablets from Image
83
+ def extract_tablet_names(text):
84
+ medicines = []
85
+ for line in text.splitlines():
86
+ match = re.search(r"\\b([A-Za-z]+(?:\\s+[A-Za-z]+)*)\\s*(\\d+mg|\\d+\\s*mg)?\\b", line)
87
+ if match:
88
+ name = match.group(1).strip()
89
+ if name.lower() not in ["cash", "scaling", "polish"]:
90
+ medicines.append(name)
91
+ return list(set(medicines))
92
+
93
+ def extract_text_from_image(img_path):
94
+ image = Image.open(img_path)
95
+ raw_text = pytesseract.image_to_string(image)
96
+ return extract_tablet_names(raw_text)
97
+
98
+ # βœ… Tablet Claim Checker
99
+ def check_tablets(img):
100
+ tablets = extract_text_from_image(img)
101
+ if not tablets:
102
+ return "❌ No tablets found in receipt."
103
+ result = ""
104
+ for med in tablets:
105
+ question = f"Is the medicine {med} covered under the healthcare policy?"
106
+ answer = qa_chain.run(question)
107
+ result += f"🧾 **{med}** β†’ {answer}\n\n"
108
+ return result
109
+
110
+ # βœ… Gradio UI
111
+ with gr.Blocks(title="Healthcare Chatbot") as app:
112
+ gr.Markdown("# πŸ’¬ Systems Healthcare Chatbot")
113
+ gr.Markdown("πŸ“„ Policy document loaded. You may now ask questions or upload a medicine receipt to check claims.")
114
+
115
+ with gr.Tab("Ask about Policy"):
116
+ with gr.Row():
117
+ txt = gr.Textbox(label="Your Question")
118
+ ans = gr.Textbox(label="Answer")
119
+ txt.submit(fn=ask_question, inputs=txt, outputs=ans)
120
+
121
+ with gr.Tab("Check Tablet Claim"):
122
+ with gr.Row():
123
+ img = gr.Image(type="filepath", label="Upload Tablet Receipt")
124
+ out = gr.Textbox(label="Result")
125
+ img.change(fn=check_tablets, inputs=img, outputs=out)
126
+
127
+ # βœ… Launch App
128
+ app.launch()