Spaces:

saadfarhad
/

Crowdanalyzer_v1

Runtime error

App Files Files Community

saadfarhad commited on Feb 9

Commit

5642ff6

verified ·

1 Parent(s): deb3cb9

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -11

app.py CHANGED Viewed

@@ -1,42 +1,49 @@
 import gradio as gr
 import torch
-from transformers import AutoProcessor, AutoModelForCausalLM
-# Load the processor and model, trusting the remote code for custom implementations
 processor = AutoProcessor.from_pretrained(
     "lmms-lab/LLaVA-Video-7B-Qwen2",
     trust_remote_code=True
 )
-model = AutoModelForCausalLM.from_pretrained(
     "lmms-lab/LLaVA-Video-7B-Qwen2",
     trust_remote_code=True
 )
-# Set the device (use GPU if available)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 def analyze_video(video_path):
     """
-    Analyzes a concert/event video to determine the moment when the crowd is most engaged.
     """
-    # Define the prompt instructing the model on what to do
     prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
-    # Process the video and prompt
     inputs = processor(text=prompt, video=video_path, return_tensors="pt")
-    # Move all tensor inputs to the selected device
     inputs = {key: value.to(device) for key, value in inputs.items()}
-    # Generate the model's response
     outputs = model.generate(**inputs, max_new_tokens=100)
-    # Decode the generated tokens to a human-readable string
     answer = processor.decode(outputs[0], skip_special_tokens=True)
     return answer
-# Create the Gradio Interface
 iface = gr.Interface(
     fn=analyze_video,
     inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),

 import gradio as gr
 import torch
+from transformers import AutoProcessor
+# Import the custom model class directly from the remote code.
+# Note: The import path here is based on the repository structure. If this fails,
+# check the model repository's files to confirm the correct import path and class name.
+from transformers.models.llava.modeling_llava import LlavaForCausalLM
+# Load the processor and model while trusting remote code.
 processor = AutoProcessor.from_pretrained(
     "lmms-lab/LLaVA-Video-7B-Qwen2",
     trust_remote_code=True
 )
+model = LlavaForCausalLM.from_pretrained(
     "lmms-lab/LLaVA-Video-7B-Qwen2",
     trust_remote_code=True
 )
+# Set device to GPU if available.
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 def analyze_video(video_path):
     """
+    This function accepts the path to a video file,
+    then uses the LLaVA-Video model to analyze it for the moment
+    when the crowd is most engaged.
     """
     prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
+    # Process the text and video input.
+    # (Make sure that the processor handles video inputs as expected.)
     inputs = processor(text=prompt, video=video_path, return_tensors="pt")
+    # Move tensors to the device.
     inputs = {key: value.to(device) for key, value in inputs.items()}
+    # Generate a response.
     outputs = model.generate(**inputs, max_new_tokens=100)
+    # Decode the generated tokens to a string.
     answer = processor.decode(outputs[0], skip_special_tokens=True)
     return answer
+# Create the Gradio interface.
 iface = gr.Interface(
     fn=analyze_video,
     inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),