Hasnain-Ali commited on
Commit
46d5a21
·
verified ·
1 Parent(s): badd4d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -9
app.py CHANGED
@@ -1,12 +1,76 @@
1
- import pytesseract
2
- import subprocess
3
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- # Check if Tesseract is installed
6
- tesseract_path = subprocess.run(["which", "tesseract"], capture_output=True, text=True).stdout.strip()
 
 
7
 
8
- if not tesseract_path:
9
- st.error("Tesseract is not installed. Make sure 'apt.txt' contains 'tesseract-ocr'.")
10
- else:
11
- st.success(f"Tesseract is installed at: {tesseract_path}")
12
- pytesseract.pytesseract.tesseract_cmd = tesseract_path
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pdfplumber
3
+ import pytesseract
4
+ from PIL import Image
5
+ from transformers import pipeline
6
+ import re
7
+
8
+ # Ensure Tesseract-OCR is properly configured (Uncomment & update path if needed)
9
+ # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
10
+
11
+ # Load pre-trained Hugging Face models
12
+ summarizer = pipeline("summarization", model="t5-small")
13
+ medical_qa = pipeline("question-answering", model="deepset/bert-base-cased-squad2")
14
+
15
+ # Function to extract text from PDF
16
+ def extract_text_from_pdf(pdf_file):
17
+ with pdfplumber.open(pdf_file) as pdf:
18
+ text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
19
+ return text if text else "No text found in PDF."
20
+
21
+ # Function to extract text from images (JPG, PNG)
22
+ def extract_text_from_image(image_file):
23
+ image = Image.open(image_file)
24
+ text = pytesseract.image_to_string(image)
25
+ return text.strip() if text else "No text found in Image."
26
+
27
+ # Function to summarize medical report
28
+ def summarize_report(text):
29
+ if len(text) > 500: # Handle long text
30
+ text = text[:500]
31
+ summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
32
+ return summary[0]['summary_text']
33
+
34
+ # Function to find medical terms dynamically using regex
35
+ def extract_medical_terms(text):
36
+ words = re.findall(r'\b[A-Z][a-z]+(?:[ -][A-Z][a-z]+)*\b', text)
37
+ return list(set(words))
38
+
39
+ # Function to explain medical terms
40
+ def explain_term(term):
41
+ context = "Hypercholesterolemia is a condition with high cholesterol in the blood. Atherosclerosis refers to artery narrowing due to fat buildup."
42
+ response = medical_qa(question=f"What is {term}?", context=context)
43
+ return response["answer"]
44
+
45
+ # Streamlit UI
46
+ st.title("🩺 AI Medical Report Analyzer")
47
+ st.write("Upload a medical **PDF or Image (JPG, PNG)** to get a summarized report with term explanations.")
48
+
49
+ uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "jpg", "png"])
50
+
51
+ if uploaded_file:
52
+ file_type = uploaded_file.type
53
+
54
+ if file_type == "application/pdf":
55
+ text = extract_text_from_pdf(uploaded_file)
56
+ st.subheader("📜 Extracted Text from PDF:")
57
+ elif file_type in ["image/png", "image/jpeg"]:
58
+ text = extract_text_from_image(uploaded_file)
59
+ st.subheader("🖼️ Extracted Text from Image:")
60
+
61
+ st.text_area("Report Content:", text, height=200)
62
 
63
+ if st.button("Generate AI Summary"):
64
+ summary = summarize_report(text)
65
+ st.subheader("📑 AI-Generated Summary:")
66
+ st.markdown(f"**{summary}**")
67
 
68
+ if st.button("Explain Medical Terms"):
69
+ terms = extract_medical_terms(text)
70
+ if terms:
71
+ st.subheader("📖 Medical Term Explanations:")
72
+ for term in terms[:5]: # Limit to 5 terms for efficiency
73
+ explanation = explain_term(term)
74
+ st.markdown(f"**{term}:** {explanation}")
75
+ else:
76
+ st.write("No medical terms detected.")