captionnn / app.py
karouswissem's picture
Update app.py
77d0fe1 verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch
import yake
import requests
from io import BytesIO
# Load Janus-Pro-7B model and tokenizer
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/Janus-Pro-7B")
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/Janus-Pro-7B")
# Function to process image and generate text-based caption using external image model (e.g., BLIP, CLIP)
# For this case, we assume BLIP is used or any other text-based description for image
def process_image_for_caption(image):
# This is just a placeholder; replace this with a real image captioning model like BLIP or CLIP
# For now, we'll return a dummy caption.
return "A person holding a book in a library."
# Function to enhance caption using Janus-Pro-7B
def enhance_caption_with_janus(caption):
inputs = tokenizer(caption, return_tensors="pt")
outputs = model.generate(**inputs, max_length=100)
enhanced_caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
return enhanced_caption
# YAKE-based keyword extraction and task name generation
class YakeTaskGenerator:
def __init__(self, n=2, top_k=3):
self.kw_extractor = yake.KeywordExtractor(n=n, top=top_k)
def extract_keywords(self, caption):
return [kw[0] for kw in self.kw_extractor.extract_keywords(caption)]
def generate_task_name(self, caption):
keywords = self.extract_keywords(caption)
if not keywords:
return "General Image Processing"
task_name = " ".join(keywords[:2]) # Use top 2 keywords for task
task_name = task_name.capitalize() + " Analysis" # Format task name
return task_name
# Full pipeline for image processing and task name generation
def process_image_and_generate_task(image):
caption = process_image_for_caption(image) # Get image caption
enhanced_caption = enhance_caption_with_janus(caption) # Enhance the caption using Janus-Pro-7B
task_generator = YakeTaskGenerator()
task_name = task_generator.generate_task_name(enhanced_caption)
return task_name
# Gradio Interface
def gradio_interface(image):
task_name = process_image_and_generate_task(image)
return task_name
# Create Gradio interface
image_input = gr.Image(type="pil", label="Upload Image")
output = gr.Textbox(label="Generated Task Name")
gr.Interface(
fn=gradio_interface,
inputs=image_input,
outputs=output,
live=True,
title="Image Captioning and Task Name Generation with Janus-Pro-7B"
).launch(share=True, debug=True)