saadawaissheikh commited on
Commit
021cd91
Β·
verified Β·
1 Parent(s): 75aaf21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -0
app.py CHANGED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import pdfplumber
4
+ import re
5
+
6
+ from langchain.docstore.document import Document
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.embeddings.base import Embeddings
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
+ from langchain.chains import RetrievalQA
11
+ from langchain.prompts import PromptTemplate
12
+ from langchain_openai import ChatOpenAI
13
+
14
+
15
+
16
+ os.environ["OPENAI_API_KEY"] = os.environ["OPENROUTER_API_KEY"]
17
+ os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
18
+ os.environ["OPENAI_API_HEADERS"] = '{"HTTP-Referer":"https://huggingface.co", "X-Title":"PDF-RAG"}'
19
+
20
+
21
+ #Section-aware PDF extractor
22
+ def extract_clean_sections(file_path):
23
+ with pdfplumber.open(file_path) as pdf:
24
+ full_text = ""
25
+ for page in pdf.pages:
26
+ text = page.extract_text()
27
+ if text:
28
+ text = re.sub(r'Systems Campus.*?Lahore', '', text)
29
+ text = re.sub(r'E-mail:.*?systemsltd\.com', '', text)
30
+ full_text += text + "\n"
31
+
32
+ pattern = r"(?<=\n)([A-Z][^\n]{3,50}):"
33
+ parts = re.split(pattern, full_text)
34
+
35
+ docs = []
36
+ for i in range(1, len(parts), 2):
37
+ title = parts[i].strip()
38
+ content = parts[i + 1].strip()
39
+ if len(content) > 20:
40
+ docs.append(Document(page_content=f"{title}:\n{content}", metadata={"section": title}))
41
+ return docs
42
+
43
+
44
+ #TF-IDF Embedding for RAG
45
+ class TfidfEmbedding(Embeddings):
46
+ def __init__(self):
47
+ self.vectorizer = TfidfVectorizer()
48
+
49
+ def fit(self, texts):
50
+ self.vectorizer.fit(texts)
51
+
52
+ def embed_documents(self, texts):
53
+ return self.vectorizer.transform(texts).toarray()
54
+
55
+ def embed_query(self, text):
56
+ return self.vectorizer.transform([text]).toarray()[0]
57
+
58
+
59
+ # prompt
60
+ TEMPLATE = """
61
+ You are a strict healthcare policy checker for Systems Ltd.
62
+
63
+ Always begin your answer clearly:
64
+ - Say "Yes, ..." if the claim is valid
65
+ - Say "No, ..." if the claim is not valid
66
+ - Say "Partially, ..." if it's conditionally allowed
67
+
68
+ Use the following policy information to support your answer.
69
+
70
+ {context}
71
+
72
+ Question: {question}
73
+ Answer:
74
+ """
75
+
76
+ custom_prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
77
+
78
+ # Global state
79
+ retriever = None
80
+ qa_chain = None
81
+
82
+
83
+ # βœ… Process the PDF once when button is clicked
84
+ def load_policy():
85
+ global retriever, qa_chain
86
+ docs = extract_clean_sections("healthcare_policy.pdf")
87
+ texts = [doc.page_content for doc in docs]
88
+ embedder = TfidfEmbedding()
89
+ embedder.fit(texts)
90
+ vectordb = FAISS.from_texts(texts, embedder)
91
+ retriever = vectordb.as_retriever()
92
+
93
+ llm = ChatOpenAI(
94
+ model="mistralai/mixtral-8x7b",
95
+ base_url="https://openrouter.ai/api/v1",
96
+ api_key=os.getenv("OPENAI_API_KEY"),
97
+ default_headers={
98
+ "HTTP-Referer": "https://huggingface.co",
99
+ "X-Title": "PDF-RAG"
100
+ },
101
+ temperature=0.0
102
+ )
103
+
104
+ qa_chain_local = RetrievalQA.from_chain_type(
105
+ llm=llm,
106
+ chain_type="stuff",
107
+ retriever=retriever,
108
+ return_source_documents=False,
109
+ chain_type_kwargs={"prompt": custom_prompt}
110
+ )
111
+
112
+ qa_chain = qa_chain_local
113
+ return "Policy loaded. You may now ask questions."
114
+
115
+
116
+ # βœ… Answer a claim question
117
+ def ask_policy_question(question):
118
+ if qa_chain is None:
119
+ return "Please click 'Ask about claim' to load the policy first."
120
+ try:
121
+ return qa_chain.run(question)
122
+ except Exception as e:
123
+ return f"Error: {str(e)}"
124
+
125
+
126
+ # βœ… Gradio UI
127
+ with gr.Blocks() as demo:
128
+ gr.Markdown("## SL HealthCare Claim Checker (RAG)")
129
+
130
+ load_btn = gr.Button("πŸ“₯ Ask about claim (Load Policy)")
131
+ load_status = gr.Textbox(label="Status")
132
+ load_btn.click(fn=load_policy, outputs=load_status)
133
+
134
+ with gr.Row():
135
+ question = gr.Textbox(label="Enter your claim question")
136
+ ask_btn = gr.Button("Ask")
137
+
138
+ answer = gr.Textbox(label="Answer", lines=6)
139
+ ask_btn.click(fn=ask_policy_question, inputs=question, outputs=answer)
140
+
141
+ demo.launch()