Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -67,11 +67,14 @@ def video_to_frames(video_path):
|
|
| 67 |
return frames_png
|
| 68 |
|
| 69 |
def predict_answer(image, video, question, max_tokens=100):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
if image:
|
| 72 |
# Process as an image
|
| 73 |
image = image.convert("RGB")
|
| 74 |
-
input_ids = tokenizer(question, return_tensors='pt').input_ids.to(device)
|
| 75 |
image_tensor = model.image_preprocess(image)
|
| 76 |
|
| 77 |
#Generate the answer
|
|
@@ -89,7 +92,6 @@ def predict_answer(image, video, question, max_tokens=100):
|
|
| 89 |
answers = []
|
| 90 |
for frame in frames:
|
| 91 |
frame = Image.open(frame).convert("RGB")
|
| 92 |
-
input_ids = tokenizer(question, return_tensors='pt').input_ids.to(device)
|
| 93 |
image_tensor = model.image_preprocess(frame)
|
| 94 |
|
| 95 |
# Generate the answer
|
|
|
|
| 67 |
return frames_png
|
| 68 |
|
| 69 |
def predict_answer(image, video, question, max_tokens=100):
|
| 70 |
+
|
| 71 |
+
text = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{question}? ASSISTANT:"
|
| 72 |
+
input_ids = tokenizer(text, return_tensors='pt').input_ids.to(device)
|
| 73 |
+
|
| 74 |
|
| 75 |
if image:
|
| 76 |
# Process as an image
|
| 77 |
image = image.convert("RGB")
|
|
|
|
| 78 |
image_tensor = model.image_preprocess(image)
|
| 79 |
|
| 80 |
#Generate the answer
|
|
|
|
| 92 |
answers = []
|
| 93 |
for frame in frames:
|
| 94 |
frame = Image.open(frame).convert("RGB")
|
|
|
|
| 95 |
image_tensor = model.image_preprocess(frame)
|
| 96 |
|
| 97 |
# Generate the answer
|