File size: 1,760 Bytes
9ac9a1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch
from io import BytesIO
import base64
# Initialize the model and tokenizer
model_id = "vikhyatk/moondream2"
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Check if CUDA (GPU support) is available and then set the device to GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def preprocess_image(encoded_image):
"""Decode and preprocess the input image."""
decoded_image = base64.b64decode(encoded_image)
img = Image.open(BytesIO(decoded_image)).convert("RGB")
return img
def handler(event, context):
"""Handle the incoming request."""
try:
# Extract the base64-encoded image and question from the event
input_image = event['body']['image']
question = event['body'].get('question', "move to the red ball")
# Preprocess the image
img = preprocess_image(input_image)
# Perform inference
enc_image = model.encode_image(img).to(device)
answer = model.answer_question(enc_image, question, tokenizer)
# If the output is a tensor, move it back to CPU and convert to list
if isinstance(answer, torch.Tensor):
answer = answer.cpu().numpy().tolist()
# Create the response
response = {
"statusCode": 200,
"body": {
"answer": answer
}
}
return response
except Exception as e:
# Handle any errors
response = {
"statusCode": 500,
"body": {
"error": str(e)
}
}
return response |