ParsaKhaz commited on
Commit
5f5e577
·
verified ·
1 Parent(s): 98e66b9

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +25 -6
  2. main.py +43 -1
app.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
  # import spaces first
3
- import spaces
4
  import gradio as gr
5
  import os
6
  from main import load_moondream, process_video, load_sam_model
@@ -27,9 +27,9 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
27
  model, tokenizer = None, None
28
 
29
  # Uncomment for Hugging Face Spaces
30
- @spaces.GPU(duration=120)
31
  def process_video_file(
32
- video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration
33
  ):
34
  """Process a video file through the Gradio interface."""
35
  try:
@@ -326,11 +326,29 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
326
 
327
  with gr.Accordion("Advanced Settings", open=False):
328
  box_style_input = gr.Radio(
329
- choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
330
  value="obfuscated-pixel",
331
  label="Visualization Style",
332
- info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging)",
333
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  preset_input = gr.Dropdown(
335
  choices=[
336
  "ultrafast",
@@ -355,7 +373,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
355
  )
356
 
357
  test_mode_input = gr.Checkbox(
358
- label="Test Mode (Process first 3 seconds only)",
359
  value=True,
360
  info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
361
  )
@@ -504,6 +522,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
504
  cols_input,
505
  test_mode_input,
506
  test_duration_input,
 
507
  ],
508
  outputs=[video_output, json_output],
509
  )
 
1
  #!/usr/bin/env python3
2
  # import spaces first
3
+ # import spaces
4
  import gradio as gr
5
  import os
6
  from main import load_moondream, process_video, load_sam_model
 
27
  model, tokenizer = None, None
28
 
29
  # Uncomment for Hugging Face Spaces
30
+ # @spaces.GPU(duration=120)
31
  def process_video_file(
32
+ video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration, magnify_factor
33
  ):
34
  """Process a video file through the Gradio interface."""
35
  try:
 
326
 
327
  with gr.Accordion("Advanced Settings", open=False):
328
  box_style_input = gr.Radio(
329
+ choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
330
  value="obfuscated-pixel",
331
  label="Visualization Style",
332
+ info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging), magnify (enlarges detected regions)",
333
  )
334
+
335
+ magnify_factor = gr.Slider(
336
+ minimum=1.1, maximum=5.0, value=2.0, step=0.1,
337
+ label="Magnification Factor",
338
+ info="How much to enlarge detected regions (only used with magnify style)",
339
+ visible=False
340
+ )
341
+
342
+ # Show/hide magnification slider based on style selection
343
+ def update_magnify_visibility(style):
344
+ return gr.update(visible=(style == "magnify"))
345
+
346
+ box_style_input.change(
347
+ fn=update_magnify_visibility,
348
+ inputs=[box_style_input],
349
+ outputs=[magnify_factor]
350
+ )
351
+
352
  preset_input = gr.Dropdown(
353
  choices=[
354
  "ultrafast",
 
373
  )
374
 
375
  test_mode_input = gr.Checkbox(
376
+ label="Test Mode (Process first X seconds only)",
377
  value=True,
378
  info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
379
  )
 
522
  cols_input,
523
  test_mode_input,
524
  test_duration_input,
525
+ magnify_factor,
526
  ],
527
  outputs=[video_output, json_output],
528
  )
main.py CHANGED
@@ -711,6 +711,43 @@ def draw_ad_boxes(frame, detected_objects, detect_keyword, model, box_style="cen
711
  except Exception as e:
712
  print(f"Error processing individual point: {str(e)}")
713
  print(f"Point data: {point}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
 
715
  except Exception as e:
716
  print(f"Error drawing {box_style} style box: {str(e)}")
@@ -1002,6 +1039,7 @@ def process_video(
1002
  grid_rows=1,
1003
  grid_cols=1,
1004
  box_style="censor",
 
1005
  ):
1006
  """Process a video to detect and visualize specified objects."""
1007
  try:
@@ -1011,6 +1049,9 @@ def process_video(
1011
  # Load model
1012
  print("Loading Moondream model...")
1013
  model, tokenizer = load_moondream()
 
 
 
1014
 
1015
  # Get video properties
1016
  props = get_video_properties(video_path)
@@ -1183,7 +1224,7 @@ def main():
1183
  parser.add_argument(
1184
  "--box-style",
1185
  choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur",
1186
- "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
1187
  default="censor",
1188
  help="Style of detection visualization (default: censor)",
1189
  )
@@ -1223,6 +1264,7 @@ def main():
1223
  grid_rows=args.rows,
1224
  grid_cols=args.cols,
1225
  box_style=args.box_style,
 
1226
  )
1227
  if output_path:
1228
  success_count += 1
 
711
  except Exception as e:
712
  print(f"Error processing individual point: {str(e)}")
713
  print(f"Point data: {point}")
714
+ elif box_style == "magnify":
715
+ # Calculate the center point of the detection
716
+ center_x = (x1 + x2) // 2
717
+ center_y = (y1 + y2) // 2
718
+
719
+ # Calculate original dimensions
720
+ orig_width = x2 - x1
721
+ orig_height = y2 - y1
722
+
723
+ # Calculate new dimensions using magnify_factor parameter
724
+ magnify_factor = getattr(model, "magnify_factor", 2.0) # Default to 2x if not specified
725
+ new_width = int(orig_width * magnify_factor)
726
+ new_height = int(orig_height * magnify_factor)
727
+
728
+ # Calculate new coordinates ensuring they stay within frame bounds
729
+ new_x1 = max(0, center_x - new_width // 2)
730
+ new_y1 = max(0, center_y - new_height // 2)
731
+ new_x2 = min(width - 1, new_x1 + new_width)
732
+ new_y2 = min(height - 1, new_y1 + new_height)
733
+
734
+ # Extract the original ROI
735
+ roi = frame[y1:y2, x1:x2]
736
+
737
+ # Resize the ROI using the magnify_factor
738
+ enlarged_roi = cv2.resize(roi, (new_x2 - new_x1, new_y2 - new_y1))
739
+
740
+ # Create a mask for smooth blending
741
+ mask = np.zeros((new_y2 - new_y1, new_x2 - new_x1), dtype=np.float32)
742
+ cv2.rectangle(mask, (0, 0), (new_x2 - new_x1, new_y2 - new_y1), 1, -1)
743
+ mask = cv2.GaussianBlur(mask, (21, 21), 11)
744
+
745
+ # Blend the enlarged ROI with the original frame
746
+ for c in range(3): # For each color channel
747
+ frame[new_y1:new_y2, new_x1:new_x2, c] = (
748
+ frame[new_y1:new_y2, new_x1:new_x2, c] * (1 - mask) +
749
+ enlarged_roi[:, :, c] * mask
750
+ )
751
 
752
  except Exception as e:
753
  print(f"Error drawing {box_style} style box: {str(e)}")
 
1039
  grid_rows=1,
1040
  grid_cols=1,
1041
  box_style="censor",
1042
+ magnify_factor=2.0,
1043
  ):
1044
  """Process a video to detect and visualize specified objects."""
1045
  try:
 
1049
  # Load model
1050
  print("Loading Moondream model...")
1051
  model, tokenizer = load_moondream()
1052
+
1053
+ # Add magnify_factor to model dict for use in draw_ad_boxes
1054
+ model.magnify_factor = magnify_factor
1055
 
1056
  # Get video properties
1057
  props = get_video_properties(video_path)
 
1224
  parser.add_argument(
1225
  "--box-style",
1226
  choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur",
1227
+ "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
1228
  default="censor",
1229
  help="Style of detection visualization (default: censor)",
1230
  )
 
1264
  grid_rows=args.rows,
1265
  grid_cols=args.cols,
1266
  box_style=args.box_style,
1267
+ magnify_factor=args.magnify_factor,
1268
  )
1269
  if output_path:
1270
  success_count += 1