Spaces:

bhaskartripathi
/

pdfChatterSandbox

Runtime error

App Files Files Community

pdfChatterSandbox / app.py

bhaskartripathi

Update app.py

b6c14b6 10 months ago

raw

history blame

No virus

3.82 kB

	import urllib.request
	import fitz
	import re
	import numpy as np
	import openai
	import gradio as gr
	import os
	from sklearn.neighbors import NearestNeighbors


	title = 'MediDiagnostix AI'
	description = """MediDiagnostix AI allows you to upload medical reports for analysis. Just click a picture of your medical report or upload a pdf report, it will
	extract, analyze and provide you the medical interpretations of the report, potential diagnoses, and recommended follow-up actions. Furthermore, you can save diagnosis for future reference"""

	import pytesseract # Assuming Tesseract OCR is used for image processing

	def analyze_reports(files, num_reports):
	"""
	Process and analyze the uploaded reports.

	Args:
	files (list): List of uploaded files (PDFs and images).
	num_reports (int): Number of reports to analyze.

	Returns:
	str: Analysis results in a formatted text.
	"""
	# Check if the number of files matches num_reports
	if len(files) != num_reports:
	return "Number of uploaded files does not match the specified number of reports."

	# Initialize a list to hold text from each report
	report_texts = []

	for file in files:
	# Check file type and process accordingly
	if file.name.endswith('.pdf'):
	# Process PDF file
	pdf_text = pdf_to_text(file.name)
	report_texts.extend(pdf_text)
	else:
	# Process Image file
	image_text = image_to_text(file)
	report_texts.append(image_text)

	# Combine texts from all reports
	combined_text = ' '.join(report_texts)

	# Analyze the combined text (Placeholder for actual analysis logic)
	analysis_results = analyze_text(combined_text) # This function needs to be implemented

	return analysis_results

	def image_to_text(image_file):
	"""
	Extract text from an image file using OCR.

	Args:
	image_file (file): An image file.

	Returns:
	str: Extracted text from the image.
	"""
	try:
	# Read the image file
	image = Image.open(image_file)
	# Extract text using OCR
	extracted_text = pytesseract.image_to_string(image)
	return extracted_text
	except Exception as e:
	return f"Error in text extraction from image: {e}"

	def analyze_text(text):
	"""
	Analyze the extracted text and generate insights.

	Args:
	text (str): Combined text from all reports.

	Returns:
	str: Analysis results based on the text.
	"""
	# Placeholder for text analysis logic
	# This could involve calling an AI model, processing the text, etc.
	# Returning a dummy response for demonstration purposes
	return "Analysis results based on the processed text."


	with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:

	gr.Markdown(f'<center><h3>{title}</h3></center>')
	gr.Markdown(description)

	with gr.Row():

	with gr.Group():
	gr.Markdown(f'<p style="text-align:center">Enter the number of reports to analyze</p>')
	num_reports = gr.Number(label='Number of Reports', value=1)

	with gr.Accordion("Upload Reports"):
	file_upload = gr.File(label='Upload Reports (PDF/Image)', file_types=['.pdf', '.jpg', '.png'], interactive=True, type="file", allow_multiple=True)

	analyze_button = gr.Button(value='Analyze Reports')

	with gr.Group():
	analysis_results = gr.Textbox(label='Analysis Results', placeholder="Results will appear here after analysis", lines=20)

	analyze_button.click(
	func=analyze_reports, # This function needs to be defined to handle the report analysis.
	inputs=[file_upload, num_reports],
	outputs=[analysis_results],
	)

	demo.launch()