|
from transformers import BlipProcessor, BlipForConditionalGeneration |
|
from PIL import Image |
|
import gradio as gr |
|
import tempfile |
|
import os |
|
import csv |
|
|
|
|
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") |
|
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") |
|
|
|
def generate_image_caption(image): |
|
""" |
|
Generate a caption for the given image. |
|
|
|
Args: |
|
image (PIL.Image): The image object. |
|
|
|
Returns: |
|
str: The generated caption. |
|
""" |
|
image = image.convert("RGB") |
|
inputs = processor(images=image, return_tensors="pt") |
|
outputs = model.generate(**inputs) |
|
caption = processor.decode(outputs[0], skip_special_tokens=True) |
|
return caption |
|
|
|
def generate_keywords(caption): |
|
""" |
|
Generate a list of keywords from the caption, focusing on SEO relevance. |
|
|
|
Args: |
|
caption (str): The image caption. |
|
|
|
Returns: |
|
list: A list of 50 single-word keywords. |
|
""" |
|
words = caption.split() |
|
|
|
stopwords = {"a", "the", "and", "of", "in", "on", "with", "at", "for", "to", "is"} |
|
keywords = list(set(word.lower() for word in words if word.lower() not in stopwords)) |
|
|
|
|
|
if len(keywords) > 50: |
|
keywords = keywords[:50] |
|
elif len(keywords) < 50: |
|
keywords.extend(keywords[:50-len(keywords)]) |
|
|
|
return keywords |
|
|
|
def process_images(image_files): |
|
""" |
|
Process uploaded images to generate metadata and create a CSV file. |
|
|
|
Args: |
|
image_files (list of file-like objects): List of uploaded image files. |
|
|
|
Returns: |
|
tuple: A list of PIL images, path to the CSV file. |
|
""" |
|
metadata = [] |
|
temp_dir = tempfile.mkdtemp() |
|
|
|
for image_file in image_files: |
|
filename = os.path.basename(image_file.name) |
|
image = Image.open(image_file) |
|
caption = generate_image_caption(image) |
|
if caption: |
|
keywords = generate_keywords(caption) |
|
title = caption if 70 <= len(caption) <= 100 else caption[:100] |
|
metadata.append({ |
|
'filename': filename, |
|
'title': title, |
|
'keywords': keywords |
|
}) |
|
|
|
|
|
csv_file_path = os.path.join(temp_dir, 'images_metadata.csv') |
|
with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file: |
|
writer = csv.writer(file) |
|
writer.writerow(['Filename', 'Title', 'Keywords']) |
|
for data in metadata: |
|
filename = data['filename'] |
|
title = data['title'] |
|
keywords = ','.join(data['keywords']) |
|
writer.writerow([filename, title, keywords]) |
|
|
|
return [Image.open(image_file.name) for image_file in image_files], csv_file_path |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_images, |
|
inputs=[ |
|
gr.Files(label="Upload Image Files") |
|
], |
|
outputs=[ |
|
gr.Gallery(label="Processed Images"), |
|
gr.File(label="Download Metadata CSV") |
|
], |
|
title="Image Captioning and Metadata Generator", |
|
description="Upload multiple images to generate captions and metadata. Download the metadata as a CSV file." |
|
) |
|
|
|
|
|
iface.launch(debug=True) |
|
|