mfarre HF staff commited on
Commit
3680a17
·
1 Parent(s): bd08551

prompt + transformers adjustments

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -5,7 +5,7 @@ import tempfile
5
  import torch
6
  import spaces
7
  from pathlib import Path
8
- from transformers import AutoProcessor, AutoModelForVision2Seq
9
  import subprocess
10
  import logging
11
 
@@ -49,7 +49,7 @@ class VideoHighlightDetector:
49
 
50
  # Initialize model and processor
51
  self.processor = AutoProcessor.from_pretrained(model_path)
52
- self.model = AutoModelForVision2Seq.from_pretrained(
53
  model_path,
54
  torch_dtype=torch.bfloat16,
55
  # _attn_implementation="flash_attention_2"
@@ -88,15 +88,11 @@ class VideoHighlightDetector:
88
  messages = [
89
  {
90
  "role": "system",
91
- "content": [{"type": "text", "text": "You are a professional video editor specializing in creating viral highlight reels. You understand that the most engaging highlights are brief and focus only on exceptional moments that are statistically rare or particularly dramatic. Moments that would make viewers say 'I can't believe that happened!"}]
92
  },
93
  {
94
  "role": "user",
95
- "content": [{"type": "text", "text": f"""Here is a description of a video:
96
-
97
- {video_description}
98
-
99
- Based on this description, list which rare segments should be included in a best of the best highlight."""}]
100
  }
101
  ]
102
 
@@ -116,18 +112,19 @@ class VideoHighlightDetector:
116
  def process_segment(self, video_path: str, highlight_types: str) -> bool:
117
  """Process a video segment and determine if it contains highlights."""
118
  messages = [
 
 
 
 
119
  {
120
  "role": "user",
121
  "content": [
122
  {"type": "video", "path": video_path},
123
- {"type": "text", "text": f"""{highlight_types}
124
-
125
-
126
- Do you see any of those elements in the video? answer yes if you do and answer no if you don't."""}
127
- ]
128
  }
129
  ]
130
 
 
131
  print(messages)
132
 
133
 
 
5
  import torch
6
  import spaces
7
  from pathlib import Path
8
+ from transformers import AutoProcessor, AutoModelForImageTextToText
9
  import subprocess
10
  import logging
11
 
 
49
 
50
  # Initialize model and processor
51
  self.processor = AutoProcessor.from_pretrained(model_path)
52
+ self.model = AutoModelForImageTextToText.from_pretrained(
53
  model_path,
54
  torch_dtype=torch.bfloat16,
55
  # _attn_implementation="flash_attention_2"
 
88
  messages = [
89
  {
90
  "role": "system",
91
+ "content": [{"type": "text", "text": "You are a highlight editor. List archetypal dramatic moments that would make compelling highlights if they appear in the video. Each moment should be specific enough to be recognizable but generic enough to potentially exist in any video of this type."}]
92
  },
93
  {
94
  "role": "user",
95
+ "content": [{"type": "text", "text": f"""Here is a description of a video:\n\n{video_description}\n\nList potential highlight moments to look for in this video:"""}]
 
 
 
 
96
  }
97
  ]
98
 
 
112
  def process_segment(self, video_path: str, highlight_types: str) -> bool:
113
  """Process a video segment and determine if it contains highlights."""
114
  messages = [
115
+ {
116
+ "role": "system",
117
+ "content": [{"type": "text", "text": "You are a video highlight analyzer. Your role is to identify moments that have high dramatic value, focusing on displays of skill, emotion, personality, or tension. Compare video segments against provided example highlights to find moments with similar emotional impact and visual interest, even if the specific actions differ."}]
118
+ },
119
  {
120
  "role": "user",
121
  "content": [
122
  {"type": "video", "path": video_path},
123
+ {"type": "text", "text": f"""Given these highlight examples:\n{highlight_types}\n\nDoes this video contain a moment that matches the core action of one of the highlights? Answer with:\n'yes' or 'no'\nIf yes, justify it"""}]
 
 
 
 
124
  }
125
  ]
126
 
127
+
128
  print(messages)
129
 
130