image-enhancer-multiple-image

Build error

App Files Files Community

image-enhancer-multiple-image / app.py

abiabidali

Update app.py

dea192e verified 8 months ago

raw

history blame

3.31 kB

	import torch
	from PIL import Image
	from RealESRGAN import RealESRGAN
	from transformers import BlipProcessor, BlipForConditionalGeneration # Example for Hugging Face model
	import gradio as gr
	import numpy as np
	import io
	import zipfile
	import os
	import time

	# Set the device to CUDA if available, otherwise CPU
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	# Load the RealESRGAN models for enhancement
	def load_model(scale):
	model = RealESRGAN(device, scale=scale)
	weights_path = f'weights/RealESRGAN_x{scale}.pth'
	try:
	model.load_weights(weights_path, download=True)
	print(f"Weights for scale {scale} loaded successfully.")
	except Exception as e:
	print(f"Error loading weights for scale {scale}: {e}")
	return model

	model2 = load_model(2)
	model4 = load_model(4)
	model8 = load_model(8)

	# Load Hugging Face model and processor for image description
	processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

	def enhance_image(image, scale):
	try:
	image_np = np.array(image.convert('RGB'))
	model = model2 if scale == '2x' else model4 if scale == '4x' else model8
	result = model.predict(image_np)
	return Image.fromarray(np.uint8(result))
	except Exception as e:
	print(f"Error enhancing image: {e}")
	return image

	def describe_image(image):
	inputs = processor(image, return_tensors="pt").to(device)
	generated_ids = caption_model.generate(**inputs)
	description = processor.decode(generated_ids[0], skip_special_tokens=True)
	return description

	def process_images(image_files, enhance, scale, generate_description):
	processed_images = []
	descriptions = []
	zip_buffer = io.BytesIO()

	for image_file in image_files:
	image = Image.open(image_file).convert('RGB')

	if enhance:
	image = enhance_image(image, scale)

	if generate_description:
	description = describe_image(image)
	descriptions.append(description)

	# Save enhanced image to ZIP in-memory buffer
	buffer = io.BytesIO()
	image.save(buffer, format='JPEG')
	processed_images.append(Image.open(io.BytesIO(buffer.getvalue())))
	with zipfile.ZipFile(zip_buffer, 'a') as zipf:
	zipf.writestr(os.path.basename(image_file.name), buffer.getvalue())

	zip_buffer.seek(0)
	return processed_images, zip_buffer, descriptions

	iface = gr.Interface(
	fn=process_images,
	inputs=[
	gr.Files(label="Upload Image Files"),
	gr.Checkbox(label="Enhance Images (ESRGAN)"),
	gr.Radio(['2x', '4x', '8x'], type="value", value='2x', label='Resolution model'),
	gr.Checkbox(label="Generate Image Descriptions")
	],
	outputs=[
	gr.Gallery(label="Enhanced Images"),
	gr.File(label="Download Enhanced Images (ZIP)"),
	gr.Textbox(label="Generated Descriptions", lines=5)
	],
	title="Image Enhancer with Description Generator",
	description="Upload multiple images, enhance using AI, generate descriptions using Hugging Face, and download results as a ZIP file."
	)

	iface.launch()