Spaces:
Runtime error
Runtime error
# Necessary imports | |
import sys | |
import gradio as gr | |
import spaces | |
from decord import VideoReader, cpu | |
# from PIL import Image | |
# Local imports | |
from src.config import ( | |
device, | |
model_name, | |
sampling, | |
stream, | |
repetition_penalty, | |
) | |
from src.minicpm.model import load_model_tokenizer_and_processor | |
from src.logger import logging | |
from src.exception import CustomExceptionHandling | |
# Model, tokenizer and processor | |
model, tokenizer, processor = load_model_tokenizer_and_processor(model_name, device) | |
MAX_NUM_FRAMES=64 | |
# def encode_video(video_path): | |
# MAX_NUM_FRAMES=64 | |
# def uniform_sample(l, n): | |
# gap = len(l) / n | |
# idxs = [int(i * gap + gap / 2) for i in range(n)] | |
# return [l[i] for i in idxs] | |
# vr = VideoReader(video_path, ctx=cpu(0)) | |
# sample_fps = round(vr.get_avg_fps() / 1) # FPS | |
# frame_idx = [i for i in range(0, len(vr), sample_fps)] | |
# if len(frame_idx) > MAX_NUM_FRAMES: | |
# frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES) | |
# frames = vr.get_batch(frame_idx).asnumpy() | |
# frames = [Image.fromarray(v.astype('uint8')) for v in frames] | |
# print('num frames:', len(frames)) | |
# return frames | |
def describe_image( | |
image: str, | |
question: str, | |
temperature: float, | |
top_p: float, | |
top_k: int, | |
max_new_tokens: int, | |
) -> str: | |
""" | |
Generates an answer to a given question based on the provided image and question. | |
Args: | |
- image (str): The path to the image file. | |
- question (str): The question text. | |
- temperature (float): The temperature parameter for the model. | |
- top_p (float): The top_p parameter for the model. | |
- top_k (int): The top_k parameter for the model. | |
- max_new_tokens (int): The max tokens to be generated by the model. | |
Returns: | |
str: The generated answer to the question. | |
""" | |
try: | |
# Check if image or question is None | |
if not image or not question: | |
gr.Warning("Please provide an image and a question.") | |
# Message format for the model | |
msgs = [{"role": "user", "content": [image, question]}] | |
# Generate the answer | |
answer = model.chat( | |
image=None, | |
msgs=msgs, | |
tokenizer=tokenizer, | |
processor=processor, | |
sampling=sampling, | |
stream=stream, | |
top_p=top_p, | |
top_k=top_k, | |
temperature=temperature, | |
repetition_penalty=repetition_penalty, | |
max_new_tokens=max_new_tokens, | |
) | |
# Log the successful generation of the answer | |
logging.info("Answer generated successfully.") | |
# Return the answer | |
return "".join(answer) | |
# Handle exceptions that may occur during answer generation | |
except Exception as e: | |
# Custom exception handling | |
raise CustomExceptionHandling(e, sys) from e | |