Spaces:

bhaskartripathi
/

pdfChatterSandbox

Runtime error

App Files Files Community

pdfChatterSandbox / app.py

bhaskartripathi

Update app.py

cf0d0cb about 1 year ago

raw

history blame

3.82 kB

	import urllib.request
	import fitz
	import re
	import numpy as np
	import openai
	import gradio as gr
	import os
	from sklearn.neighbors import NearestNeighbors


	title = 'MediDiagnostix AI'
	description = """MediDiagnostix AI allows you to upload medical reports for analysis. Just click a picture of your medical report or upload a pdf report, it will
	extract, analyze and provide you the medical interpretations of the report, potential diagnoses, and recommended follow-up actions. Furthermore, you can save diagnosis for future reference"""

	import pytesseract # Assuming Tesseract OCR is used for image processing

	def analyze_reports(files, num_reports):
	"""
	Process and analyze the uploaded reports.

	Args:
	files (list): List of uploaded files (PDFs and images).
	num_reports (int): Number of reports to analyze.

	Returns:
	str: Analysis results in a formatted text.
	"""
	# Check if the number of files matches num_reports
	if len(files) != num_reports:
	return "Number of uploaded files does not match the specified number of reports."

	# Initialize a list to hold text from each report
	report_texts = []

	for file in files:
	# Check file type and process accordingly
	if file.name.endswith('.pdf'):
	# Process PDF file
	pdf_text = pdf_to_text(file.name)
	report_texts.extend(pdf_text)
	else:
	# Process Image file
	image_text = image_to_text(file)
	report_texts.append(image_text)

	# Combine texts from all reports
	combined_text = ' '.join(report_texts)

	# Analyze the combined text (Placeholder for actual analysis logic)
	analysis_results = analyze_text(combined_text) # This function needs to be implemented

	return analysis_results

	def image_to_text(image_file):
	"""
	Extract text from an image file using OCR.

	Args:
	image_file (file): An image file.

	Returns:
	str: Extracted text from the image.
	"""
	try:
	# Read the image file
	image = Image.open(image_file)
	# Extract text using OCR
	extracted_text = pytesseract.image_to_string(image)
	return extracted_text
	except Exception as e:
	return f"Error in text extraction from image: {e}"

	def analyze_text(text):
	"""
	Analyze the extracted text and generate insights.

	Args:
	text (str): Combined text from all reports.

	Returns:
	str: Analysis results based on the text.
	"""
	# Placeholder for text analysis logic
	# This could involve calling an AI model, processing the text, etc.
	# Returning a dummy response for demonstration purposes
	return "Analysis results based on the processed text."


	with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:

	gr.Markdown(f'<center><h3>{title}</h3></center>')
	gr.Markdown(description)

	with gr.Row():

	with gr.Group():
	gr.Markdown(f'<p style="text-align:center">Enter the number of reports to analyze</p>')
	num_reports = gr.Number(label='Number of Reports', value=1)

	with gr.Accordion("Upload Reports"):
	file_upload = gr.File(label='Upload Reports (PDF/Image)', file_types=['.pdf', '.jpg', '.png'], interactive=True, type="file", allow_multiple=True)

	analyze_button = gr.Button(value='Analyze Reports')

	with gr.Group():
	analysis_results = gr.Textbox(label='Analysis Results', placeholder="Results will appear here after analysis", lines=20)

	analyze_button.click(
	#func=analyze_reports, # This function needs to be defined to handle the report analysis.
	inputs=[file_upload, num_reports],
	outputs=[analysis_results],
	)

	demo.launch()