abiabidali's picture
Update app.py
dea192e verified
raw
history blame
3.31 kB
import torch
from PIL import Image
from RealESRGAN import RealESRGAN
from transformers import BlipProcessor, BlipForConditionalGeneration # Example for Hugging Face model
import gradio as gr
import numpy as np
import io
import zipfile
import os
import time
# Set the device to CUDA if available, otherwise CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Load the RealESRGAN models for enhancement
def load_model(scale):
model = RealESRGAN(device, scale=scale)
weights_path = f'weights/RealESRGAN_x{scale}.pth'
try:
model.load_weights(weights_path, download=True)
print(f"Weights for scale {scale} loaded successfully.")
except Exception as e:
print(f"Error loading weights for scale {scale}: {e}")
return model
model2 = load_model(2)
model4 = load_model(4)
model8 = load_model(8)
# Load Hugging Face model and processor for image description
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
def enhance_image(image, scale):
try:
image_np = np.array(image.convert('RGB'))
model = model2 if scale == '2x' else model4 if scale == '4x' else model8
result = model.predict(image_np)
return Image.fromarray(np.uint8(result))
except Exception as e:
print(f"Error enhancing image: {e}")
return image
def describe_image(image):
inputs = processor(image, return_tensors="pt").to(device)
generated_ids = caption_model.generate(**inputs)
description = processor.decode(generated_ids[0], skip_special_tokens=True)
return description
def process_images(image_files, enhance, scale, generate_description):
processed_images = []
descriptions = []
zip_buffer = io.BytesIO()
for image_file in image_files:
image = Image.open(image_file).convert('RGB')
if enhance:
image = enhance_image(image, scale)
if generate_description:
description = describe_image(image)
descriptions.append(description)
# Save enhanced image to ZIP in-memory buffer
buffer = io.BytesIO()
image.save(buffer, format='JPEG')
processed_images.append(Image.open(io.BytesIO(buffer.getvalue())))
with zipfile.ZipFile(zip_buffer, 'a') as zipf:
zipf.writestr(os.path.basename(image_file.name), buffer.getvalue())
zip_buffer.seek(0)
return processed_images, zip_buffer, descriptions
iface = gr.Interface(
fn=process_images,
inputs=[
gr.Files(label="Upload Image Files"),
gr.Checkbox(label="Enhance Images (ESRGAN)"),
gr.Radio(['2x', '4x', '8x'], type="value", value='2x', label='Resolution model'),
gr.Checkbox(label="Generate Image Descriptions")
],
outputs=[
gr.Gallery(label="Enhanced Images"),
gr.File(label="Download Enhanced Images (ZIP)"),
gr.Textbox(label="Generated Descriptions", lines=5)
],
title="Image Enhancer with Description Generator",
description="Upload multiple images, enhance using AI, generate descriptions using Hugging Face, and download results as a ZIP file."
)
iface.launch()