Spaces:

Hasnain-Ali
/

MedGen-AI

Running

App Files Files Community

Hasnain-Ali commited on Feb 20

Commit

40ab83f

verified ·

1 Parent(s): 948f710

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -74

app.py CHANGED Viewed

@@ -1,77 +1,12 @@
-import streamlit as st
-import pdfplumber
 import pytesseract
-from PIL import Image
-from transformers import pipeline
-import re
-# Manually specify the path for Tesseract in Linux
-pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
-# Load pre-trained Hugging Face models
-summarizer = pipeline("summarization", model="t5-small")
-medical_qa = pipeline("question-answering", model="deepset/bert-base-cased-squad2")
-# Function to extract text from PDF
-def extract_text_from_pdf(pdf_file):
-    with pdfplumber.open(pdf_file) as pdf:
-        text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
-    return text if text else "No text found in PDF."
-# Function to extract text from images (JPG, PNG)
-def extract_text_from_image(image_file):
-    image = Image.open(image_file)
-    text = pytesseract.image_to_string(image)
-    return text.strip() if text else "No text found in Image."
-# Function to summarize medical report
-def summarize_report(text):
-    if len(text) > 500:  # Handle long text
-        text = text[:500]
-    summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
-    return summary[0]['summary_text']
-# Function to find medical terms dynamically using regex
-def extract_medical_terms(text):
-    words = re.findall(r'\b[A-Z][a-z]+(?:[ -][A-Z][a-z]+)*\b', text)
-    return list(set(words))
-# Function to explain medical terms
-def explain_term(term):
-    context = "Hypercholesterolemia is a condition with high cholesterol in the blood. Atherosclerosis refers to artery narrowing due to fat buildup."
-    response = medical_qa(question=f"What is {term}?", context=context)
-    return response["answer"]
-# Streamlit UI
-st.title("🩺 AI Medical Report Analyzer")
-st.write("Upload a medical **PDF or Image (JPG, PNG)** to get a summarized report with term explanations.")
-uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "jpg", "png"])
-if uploaded_file:
-    file_type = uploaded_file.type
-    if file_type == "application/pdf":
-        text = extract_text_from_pdf(uploaded_file)
-        st.subheader("📜 Extracted Text from PDF:")
-    elif file_type in ["image/png", "image/jpeg"]:
-        text = extract_text_from_image(uploaded_file)
-        st.subheader("🖼️ Extracted Text from Image:")
-    st.text_area("Report Content:", text, height=200)
-    if st.button("Generate AI Summary"):
-        summary = summarize_report(text)
-        st.subheader("📑 AI-Generated Summary:")
-        st.markdown(f"**{summary}**")
-    if st.button("Explain Medical Terms"):
-        terms = extract_medical_terms(text)
-        if terms:
-            st.subheader("📖 Medical Term Explanations:")
-            for term in terms[:5]:  # Limit to 5 terms for efficiency
-                explanation = explain_term(term)
-                st.markdown(f"**{term}:** {explanation}")
-        else:
-            st.write("No medical terms detected.")

 import pytesseract
+import subprocess
+import streamlit as st
+# Check if Tesseract is installed
+tesseract_path = subprocess.run(["which", "tesseract"], capture_output=True, text=True).stdout.strip()
+if not tesseract_path:
+    st.error("Tesseract is not installed. Make sure 'apt.txt' contains 'tesseract-ocr'.")
+else:
+    st.success(f"Tesseract is installed at: {tesseract_path}")
+    pytesseract.pytesseract.tesseract_cmd = tesseract_path