Spaces:

bhaskartripathi
/

pdfChatterSandbox

Runtime error

File size: 3,817 Bytes

1e8edc1
faa00e9
 
 
 
 
 
 
 
 
1e8edc1
 
 
b160a8b
b6c14b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e8edc1
faa00e9
1e8edc1
faa00e9
 
 
1e8edc1
faa00e9
1e8edc1
 
 
 
 
 
 
faa00e9
 
1e8edc1
faa00e9
1e8edc1
cf0d0cb
1e8edc1
 
 
efb89c8
3aae3ba

import urllib.request 
import fitz
import re
import numpy as np
import openai
import gradio as gr
import os
from sklearn.neighbors import NearestNeighbors


title = 'MediDiagnostix AI'
description = """MediDiagnostix AI allows you to upload medical reports for analysis. Just click a picture of your medical report or upload a pdf report, it will 
 extract, analyze and provide you the medical interpretations of the report, potential diagnoses, and recommended follow-up actions. Furthermore, you can save diagnosis for future reference"""

import pytesseract  # Assuming Tesseract OCR is used for image processing

def analyze_reports(files, num_reports):
    """
    Process and analyze the uploaded reports.

    Args:
    files (list): List of uploaded files (PDFs and images).
    num_reports (int): Number of reports to analyze.

    Returns:
    str: Analysis results in a formatted text.
    """
    # Check if the number of files matches num_reports
    if len(files) != num_reports:
        return "Number of uploaded files does not match the specified number of reports."

    # Initialize a list to hold text from each report
    report_texts = []

    for file in files:
        # Check file type and process accordingly
        if file.name.endswith('.pdf'):
            # Process PDF file
            pdf_text = pdf_to_text(file.name)
            report_texts.extend(pdf_text)
        else:
            # Process Image file
            image_text = image_to_text(file)
            report_texts.append(image_text)

    # Combine texts from all reports
    combined_text = ' '.join(report_texts)

    # Analyze the combined text (Placeholder for actual analysis logic)
    analysis_results = analyze_text(combined_text)  # This function needs to be implemented

    return analysis_results

def image_to_text(image_file):
    """
    Extract text from an image file using OCR.

    Args:
    image_file (file): An image file.

    Returns:
    str: Extracted text from the image.
    """
    try:
        # Read the image file
        image = Image.open(image_file)
        # Extract text using OCR
        extracted_text = pytesseract.image_to_string(image)
        return extracted_text
    except Exception as e:
        return f"Error in text extraction from image: {e}"

def analyze_text(text):
    """
    Analyze the extracted text and generate insights.

    Args:
    text (str): Combined text from all reports.

    Returns:
    str: Analysis results based on the text.
    """
    # Placeholder for text analysis logic
    # This could involve calling an AI model, processing the text, etc.
    # Returning a dummy response for demonstration purposes
    return "Analysis results based on the processed text."


with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:

    gr.Markdown(f'<center><h3>{title}</h3></center>')
    gr.Markdown(description)

    with gr.Row():
        
        with gr.Group():
            gr.Markdown(f'<p style="text-align:center">Enter the number of reports to analyze</p>')
            num_reports = gr.Number(label='Number of Reports', value=1)
            
            with gr.Accordion("Upload Reports"):
                file_upload = gr.File(label='Upload Reports (PDF/Image)', file_types=['.pdf', '.jpg', '.png'], interactive=True, type="file", allow_multiple=True)
            
            analyze_button = gr.Button(value='Analyze Reports')

        with gr.Group():
            analysis_results = gr.Textbox(label='Analysis Results', placeholder="Results will appear here after analysis", lines=20)

    analyze_button.click(
        #func=analyze_reports,  # This function needs to be defined to handle the report analysis.
        inputs=[file_upload, num_reports],
        outputs=[analysis_results],
    )

demo.launch()