Hasnain-Ali commited on
Commit
1cb485a
·
verified ·
1 Parent(s): 7482aad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -21
app.py CHANGED
@@ -5,6 +5,9 @@ from PIL import Image
5
  from transformers import pipeline
6
  import re
7
 
 
 
 
8
  # Load pre-trained Hugging Face models
9
  summarizer = pipeline("summarization", model="t5-small")
10
  medical_qa = pipeline("question-answering", model="deepset/bert-base-cased-squad2")
@@ -13,56 +16,61 @@ medical_qa = pipeline("question-answering", model="deepset/bert-base-cased-squad
13
  def extract_text_from_pdf(pdf_file):
14
  with pdfplumber.open(pdf_file) as pdf:
15
  text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
16
- return text if text else "No text found."
17
 
18
- # Function to extract text from images
19
  def extract_text_from_image(image_file):
20
  image = Image.open(image_file)
21
  text = pytesseract.image_to_string(image)
22
- return text if text.strip() else "No text found in the image."
23
 
24
  # Function to summarize medical report
25
  def summarize_report(text):
26
- text = text[:1000] # Limiting input text length for better summarization accuracy
 
27
  summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
28
  return summary[0]['summary_text']
29
 
30
- # Function to explain medical terms dynamically
31
  def extract_medical_terms(text):
32
- words = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b', text)
33
- return list(set(words))[:5] # Extract and limit to top 5 terms
34
 
35
- # Function to get explanation for medical terms
36
  def explain_term(term):
37
- context = "Medical terminology explanation."
38
  response = medical_qa(question=f"What is {term}?", context=context)
39
  return response["answer"]
40
 
41
  # Streamlit UI
42
- st.title("🩺 AI Medical Report Summarizer & Explanation Assistant")
43
- st.write("Upload a medical report (PDF/Image) and get a simplified summary with medical term explanations.")
 
 
44
 
45
- uploaded_file = st.file_uploader("Upload Medical Report (PDF or Image)", type=["pdf", "png", "jpg"])
46
  if uploaded_file:
47
- if uploaded_file.type == "application/pdf":
 
 
48
  text = extract_text_from_pdf(uploaded_file)
49
- else:
 
50
  text = extract_text_from_image(uploaded_file)
51
-
52
- st.subheader("📜 Extracted Medical Report Text:")
53
  st.text_area("Report Content:", text, height=200)
54
-
55
  if st.button("Generate AI Summary"):
56
  summary = summarize_report(text)
57
  st.subheader("📑 AI-Generated Summary:")
58
- st.markdown(f"<div style='background-color:#f0f8ff;padding:10px;border-radius:5px;'>{summary}</div>", unsafe_allow_html=True)
59
-
60
  if st.button("Explain Medical Terms"):
61
  terms = extract_medical_terms(text)
62
  if terms:
63
  st.subheader("📖 Medical Term Explanations:")
64
- for term in terms:
65
  explanation = explain_term(term)
66
  st.markdown(f"**{term}:** {explanation}")
67
  else:
68
- st.write("No specific medical terms detected in the report.")
 
5
  from transformers import pipeline
6
  import re
7
 
8
+ # Ensure Tesseract-OCR is properly configured (Uncomment & update path if needed)
9
+ # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
10
+
11
  # Load pre-trained Hugging Face models
12
  summarizer = pipeline("summarization", model="t5-small")
13
  medical_qa = pipeline("question-answering", model="deepset/bert-base-cased-squad2")
 
16
  def extract_text_from_pdf(pdf_file):
17
  with pdfplumber.open(pdf_file) as pdf:
18
  text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
19
+ return text if text else "No text found in PDF."
20
 
21
+ # Function to extract text from images (JPG, PNG)
22
  def extract_text_from_image(image_file):
23
  image = Image.open(image_file)
24
  text = pytesseract.image_to_string(image)
25
+ return text.strip() if text else "No text found in Image."
26
 
27
  # Function to summarize medical report
28
  def summarize_report(text):
29
+ if len(text) > 500: # Handle long text
30
+ text = text[:500]
31
  summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
32
  return summary[0]['summary_text']
33
 
34
+ # Function to find medical terms dynamically using regex
35
  def extract_medical_terms(text):
36
+ words = re.findall(r'\b[A-Z][a-z]+(?:[ -][A-Z][a-z]+)*\b', text)
37
+ return list(set(words))
38
 
39
+ # Function to explain medical terms
40
  def explain_term(term):
41
+ context = "Hypercholesterolemia is a condition with high cholesterol in the blood. Atherosclerosis refers to artery narrowing due to fat buildup."
42
  response = medical_qa(question=f"What is {term}?", context=context)
43
  return response["answer"]
44
 
45
  # Streamlit UI
46
+ st.title("🩺 AI Medical Report Analyzer")
47
+ st.write("Upload a medical **PDF or Image (JPG, PNG)** to get a summarized report with term explanations.")
48
+
49
+ uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "jpg", "png"])
50
 
 
51
  if uploaded_file:
52
+ file_type = uploaded_file.type
53
+
54
+ if file_type == "application/pdf":
55
  text = extract_text_from_pdf(uploaded_file)
56
+ st.subheader("📜 Extracted Text from PDF:")
57
+ elif file_type in ["image/png", "image/jpeg"]:
58
  text = extract_text_from_image(uploaded_file)
59
+ st.subheader("🖼️ Extracted Text from Image:")
60
+
61
  st.text_area("Report Content:", text, height=200)
62
+
63
  if st.button("Generate AI Summary"):
64
  summary = summarize_report(text)
65
  st.subheader("📑 AI-Generated Summary:")
66
+ st.markdown(f"**{summary}**")
67
+
68
  if st.button("Explain Medical Terms"):
69
  terms = extract_medical_terms(text)
70
  if terms:
71
  st.subheader("📖 Medical Term Explanations:")
72
+ for term in terms[:5]: # Limit to 5 terms for efficiency
73
  explanation = explain_term(term)
74
  st.markdown(f"**{term}:** {explanation}")
75
  else:
76
+ st.write("No medical terms detected.")