File size: 670 Bytes
e924edc
 
3b1cddb
e924edc
 
ab591a5
 
 
 
e924edc
3b1cddb
7ab2903
 
3b1cddb
 
e924edc
ab591a5
4b1e401
3b1cddb
e924edc
 
 
3b1cddb
 
e924edc
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gradio as gr
from transformers import pipeline
from PIL import Image

# pipeline as high level
pipe = pipeline("image-text-to-text", 
    model="microsoft/kosmos-2-patch14-224",
    device=-1,
    )

def get_image_caption(image):
    if not image:
        raise gr.Error("No image provided.")
    
    image = image.convert("RGB")

    # max_new_tokens: limit tokens to trade detail for speed
    result = pipe(image,text="The person is", max_new_tokens=240)
    return result[0]['generated_text']

# api w/ gradio
api = gr.Interface(
    fn=get_image_caption,
    inputs=gr.Image(type="pil", label="Input Image"),
    outputs="text"
)

api.launch(show_api=True)