Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,9 +2,15 @@ import gradio as gr
|
|
| 2 |
import torch
|
| 3 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 4 |
|
| 5 |
-
# Load the processor and model
|
| 6 |
-
processor = AutoProcessor.from_pretrained(
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Set the device (use GPU if available)
|
| 10 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -12,17 +18,12 @@ model.to(device)
|
|
| 12 |
|
| 13 |
def analyze_video(video_path):
|
| 14 |
"""
|
| 15 |
-
|
| 16 |
-
then uses the LLaVA-Video-7B-Qwen2 model to analyze the video.
|
| 17 |
-
|
| 18 |
-
The prompt instructs the model to analyze the video and return
|
| 19 |
-
the moment when the crowd is most engaged.
|
| 20 |
"""
|
| 21 |
-
# Define the prompt
|
| 22 |
prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
|
| 23 |
|
| 24 |
-
# Process the video and prompt
|
| 25 |
-
# Note: The processor is expected to handle the video input (e.g., by reading frames).
|
| 26 |
inputs = processor(text=prompt, video=video_path, return_tensors="pt")
|
| 27 |
|
| 28 |
# Move all tensor inputs to the selected device
|
|
|
|
| 2 |
import torch
|
| 3 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 4 |
|
| 5 |
+
# Load the processor and model, trusting the remote code for custom implementations
|
| 6 |
+
processor = AutoProcessor.from_pretrained(
|
| 7 |
+
"lmms-lab/LLaVA-Video-7B-Qwen2",
|
| 8 |
+
trust_remote_code=True
|
| 9 |
+
)
|
| 10 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 11 |
+
"lmms-lab/LLaVA-Video-7B-Qwen2",
|
| 12 |
+
trust_remote_code=True
|
| 13 |
+
)
|
| 14 |
|
| 15 |
# Set the device (use GPU if available)
|
| 16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 18 |
|
| 19 |
def analyze_video(video_path):
|
| 20 |
"""
|
| 21 |
+
Analyzes a concert/event video to determine the moment when the crowd is most engaged.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
"""
|
| 23 |
+
# Define the prompt instructing the model on what to do
|
| 24 |
prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
|
| 25 |
|
| 26 |
+
# Process the video and prompt
|
|
|
|
| 27 |
inputs = processor(text=prompt, video=video_path, return_tensors="pt")
|
| 28 |
|
| 29 |
# Move all tensor inputs to the selected device
|