moondream2 / handler.py
zesquirrelnator's picture
Create handler.py
9ac9a1e verified
raw
history blame
1.76 kB
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch
from io import BytesIO
import base64
# Initialize the model and tokenizer
model_id = "vikhyatk/moondream2"
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Check if CUDA (GPU support) is available and then set the device to GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def preprocess_image(encoded_image):
"""Decode and preprocess the input image."""
decoded_image = base64.b64decode(encoded_image)
img = Image.open(BytesIO(decoded_image)).convert("RGB")
return img
def handler(event, context):
"""Handle the incoming request."""
try:
# Extract the base64-encoded image and question from the event
input_image = event['body']['image']
question = event['body'].get('question', "move to the red ball")
# Preprocess the image
img = preprocess_image(input_image)
# Perform inference
enc_image = model.encode_image(img).to(device)
answer = model.answer_question(enc_image, question, tokenizer)
# If the output is a tensor, move it back to CPU and convert to list
if isinstance(answer, torch.Tensor):
answer = answer.cpu().numpy().tolist()
# Create the response
response = {
"statusCode": 200,
"body": {
"answer": answer
}
}
return response
except Exception as e:
# Handle any errors
response = {
"statusCode": 500,
"body": {
"error": str(e)
}
}
return response