import urllib.request 
import fitz
import re
import numpy as np
import openai
import gradio as gr
import os
from sklearn.neighbors import NearestNeighbors


title = 'MediDiagnostix AI'
description = """MediDiagnostix AI allows you to upload medical reports for analysis. Just click a picture of your medical report or upload a pdf report, it will 
 extract, analyze and provide you the medical interpretations of the report, potential diagnoses, and recommended follow-up actions. Furthermore, you can save diagnosis for future reference"""

import pytesseract  # Assuming Tesseract OCR is used for image processing

def analyze_reports(files, num_reports):
    """
    Process and analyze the uploaded reports.

    Args:
    files (list): List of uploaded files (PDFs and images).
    num_reports (int): Number of reports to analyze.

    Returns:
    str: Analysis results in a formatted text.
    """
    # Check if the number of files matches num_reports
    if len(files) != num_reports:
        return "Number of uploaded files does not match the specified number of reports."

    # Initialize a list to hold text from each report
    report_texts = []

    for file in files:
        # Check file type and process accordingly
        if file.name.endswith('.pdf'):
            # Process PDF file
            pdf_text = pdf_to_text(file.name)
            report_texts.extend(pdf_text)
        else:
            # Process Image file
            image_text = image_to_text(file)
            report_texts.append(image_text)

    # Combine texts from all reports
    combined_text = ' '.join(report_texts)

    # Analyze the combined text (Placeholder for actual analysis logic)
    analysis_results = analyze_text(combined_text)  # This function needs to be implemented

    return analysis_results

def image_to_text(image_file):
    """
    Extract text from an image file using OCR.

    Args:
    image_file (file): An image file.

    Returns:
    str: Extracted text from the image.
    """
    try:
        # Read the image file
        image = Image.open(image_file)
        # Extract text using OCR
        extracted_text = pytesseract.image_to_string(image)
        return extracted_text
    except Exception as e:
        return f"Error in text extraction from image: {e}"

def analyze_text(text):
    """
    Analyze the extracted text and generate insights.

    Args:
    text (str): Combined text from all reports.

    Returns:
    str: Analysis results based on the text.
    """
    # Placeholder for text analysis logic
    # This could involve calling an AI model, processing the text, etc.
    # Returning a dummy response for demonstration purposes
    return "Analysis results based on the processed text."


with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:

    gr.Markdown(f'<center><h3>{title}</h3></center>')
    gr.Markdown(description)

    with gr.Row():
        
        with gr.Group():
            gr.Markdown(f'<p style="text-align:center">Enter the number of reports to analyze</p>')
            num_reports = gr.Number(label='Number of Reports', value=1)
            
            with gr.Accordion("Upload Reports"):
                file_upload = gr.File(label='Upload Reports (PDF/Image)', file_types=['.pdf', '.jpg', '.png'], interactive=True, type="file", allow_multiple=True)
            
            analyze_button = gr.Button(value='Analyze Reports')

        with gr.Group():
            analysis_results = gr.Textbox(label='Analysis Results', placeholder="Results will appear here after analysis", lines=20)

    analyze_button.click(
        func=analyze_reports,  # This function needs to be defined to handle the report analysis.
        inputs=[file_upload, num_reports],
        outputs=[analysis_results],
    )

demo.launch()