from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import gradio as gr import tempfile import os import csv # Initialize the processor and model processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") def generate_image_caption(image): """ Generate a caption for the given image. Args: image (PIL.Image): The image object. Returns: str: The generated caption. """ image = image.convert("RGB") inputs = processor(images=image, return_tensors="pt") outputs = model.generate(**inputs) caption = processor.decode(outputs[0], skip_special_tokens=True) return caption def generate_keywords(caption): """ Generate a list of keywords from the caption, focusing on SEO relevance. Args: caption (str): The image caption. Returns: list: A list of 50 single-word keywords. """ words = caption.split() # Remove common stopwords and keep unique words for SEO relevance stopwords = {"a", "the", "and", "of", "in", "on", "with", "at", "for", "to", "is"} keywords = list(set(word.lower() for word in words if word.lower() not in stopwords)) # Ensure the list contains exactly 50 keywords (add repetitions if needed) if len(keywords) > 50: keywords = keywords[:50] elif len(keywords) < 50: keywords.extend(keywords[:50-len(keywords)]) return keywords def process_images(image_files): """ Process uploaded images to generate metadata and create a CSV file. Args: image_files (list of file-like objects): List of uploaded image files. Returns: tuple: A list of PIL images, path to the CSV file. """ metadata = [] temp_dir = tempfile.mkdtemp() for image_file in image_files: filename = os.path.basename(image_file.name) image = Image.open(image_file) caption = generate_image_caption(image) if caption: keywords = generate_keywords(caption) title = caption if 70 <= len(caption) <= 100 else caption[:100] metadata.append({ 'filename': filename, 'title': title, 'keywords': keywords }) # Create CSV file csv_file_path = os.path.join(temp_dir, 'images_metadata.csv') with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file: writer = csv.writer(file) writer.writerow(['Filename', 'Title', 'Keywords']) for data in metadata: filename = data['filename'] title = data['title'] keywords = ','.join(data['keywords']) writer.writerow([filename, title, keywords]) return [Image.open(image_file.name) for image_file in image_files], csv_file_path # Define Gradio interface iface = gr.Interface( fn=process_images, inputs=[ gr.Files(label="Upload Image Files") # Use gr.Files for multiple file uploads ], outputs=[ gr.Gallery(label="Processed Images"), gr.File(label="Download Metadata CSV") ], title="Image Captioning and Metadata Generator", description="Upload multiple images to generate captions and metadata. Download the metadata as a CSV file." ) # Launch the interface iface.launch(debug=True)