import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image import torch import yake import requests from io import BytesIO # Load Janus-Pro-7B model and tokenizer model = AutoModelForCausalLM.from_pretrained("deepseek-ai/Janus-Pro-7B") tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/Janus-Pro-7B") # Function to process image and generate text-based caption using external image model (e.g., BLIP, CLIP) # For this case, we assume BLIP is used or any other text-based description for image def process_image_for_caption(image): # This is just a placeholder; replace this with a real image captioning model like BLIP or CLIP # For now, we'll return a dummy caption. return "A person holding a book in a library." # Function to enhance caption using Janus-Pro-7B def enhance_caption_with_janus(caption): inputs = tokenizer(caption, return_tensors="pt") outputs = model.generate(**inputs, max_length=100) enhanced_caption = tokenizer.decode(outputs[0], skip_special_tokens=True) return enhanced_caption # YAKE-based keyword extraction and task name generation class YakeTaskGenerator: def __init__(self, n=2, top_k=3): self.kw_extractor = yake.KeywordExtractor(n=n, top=top_k) def extract_keywords(self, caption): return [kw[0] for kw in self.kw_extractor.extract_keywords(caption)] def generate_task_name(self, caption): keywords = self.extract_keywords(caption) if not keywords: return "General Image Processing" task_name = " ".join(keywords[:2]) # Use top 2 keywords for task task_name = task_name.capitalize() + " Analysis" # Format task name return task_name # Full pipeline for image processing and task name generation def process_image_and_generate_task(image): caption = process_image_for_caption(image) # Get image caption enhanced_caption = enhance_caption_with_janus(caption) # Enhance the caption using Janus-Pro-7B task_generator = YakeTaskGenerator() task_name = task_generator.generate_task_name(enhanced_caption) return task_name # Gradio Interface def gradio_interface(image): task_name = process_image_and_generate_task(image) return task_name # Create Gradio interface image_input = gr.Image(type="pil", label="Upload Image") output = gr.Textbox(label="Generated Task Name") gr.Interface( fn=gradio_interface, inputs=image_input, outputs=output, live=True, title="Image Captioning and Task Name Generation with Janus-Pro-7B" ).launch(share=True, debug=True)