Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
# import spaces first
|
3 |
-
import spaces
|
4 |
import gradio as gr
|
5 |
import os
|
6 |
from main import load_moondream, process_video, load_sam_model
|
@@ -27,9 +27,9 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
|
27 |
model, tokenizer = None, None
|
28 |
|
29 |
# Uncomment for Hugging Face Spaces
|
30 |
-
@spaces.GPU(duration=120)
|
31 |
def process_video_file(
|
32 |
-
video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration
|
33 |
):
|
34 |
"""Process a video file through the Gradio interface."""
|
35 |
try:
|
@@ -326,11 +326,29 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
|
|
326 |
|
327 |
with gr.Accordion("Advanced Settings", open=False):
|
328 |
box_style_input = gr.Radio(
|
329 |
-
choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
|
330 |
value="obfuscated-pixel",
|
331 |
label="Visualization Style",
|
332 |
-
info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging)",
|
333 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
preset_input = gr.Dropdown(
|
335 |
choices=[
|
336 |
"ultrafast",
|
@@ -355,7 +373,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
|
|
355 |
)
|
356 |
|
357 |
test_mode_input = gr.Checkbox(
|
358 |
-
label="Test Mode (Process first
|
359 |
value=True,
|
360 |
info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
|
361 |
)
|
@@ -504,6 +522,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
|
|
504 |
cols_input,
|
505 |
test_mode_input,
|
506 |
test_duration_input,
|
|
|
507 |
],
|
508 |
outputs=[video_output, json_output],
|
509 |
)
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
# import spaces first
|
3 |
+
# import spaces
|
4 |
import gradio as gr
|
5 |
import os
|
6 |
from main import load_moondream, process_video, load_sam_model
|
|
|
27 |
model, tokenizer = None, None
|
28 |
|
29 |
# Uncomment for Hugging Face Spaces
|
30 |
+
# @spaces.GPU(duration=120)
|
31 |
def process_video_file(
|
32 |
+
video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration, magnify_factor
|
33 |
):
|
34 |
"""Process a video file through the Gradio interface."""
|
35 |
try:
|
|
|
326 |
|
327 |
with gr.Accordion("Advanced Settings", open=False):
|
328 |
box_style_input = gr.Radio(
|
329 |
+
choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
|
330 |
value="obfuscated-pixel",
|
331 |
label="Visualization Style",
|
332 |
+
info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging), magnify (enlarges detected regions)",
|
333 |
)
|
334 |
+
|
335 |
+
magnify_factor = gr.Slider(
|
336 |
+
minimum=1.1, maximum=5.0, value=2.0, step=0.1,
|
337 |
+
label="Magnification Factor",
|
338 |
+
info="How much to enlarge detected regions (only used with magnify style)",
|
339 |
+
visible=False
|
340 |
+
)
|
341 |
+
|
342 |
+
# Show/hide magnification slider based on style selection
|
343 |
+
def update_magnify_visibility(style):
|
344 |
+
return gr.update(visible=(style == "magnify"))
|
345 |
+
|
346 |
+
box_style_input.change(
|
347 |
+
fn=update_magnify_visibility,
|
348 |
+
inputs=[box_style_input],
|
349 |
+
outputs=[magnify_factor]
|
350 |
+
)
|
351 |
+
|
352 |
preset_input = gr.Dropdown(
|
353 |
choices=[
|
354 |
"ultrafast",
|
|
|
373 |
)
|
374 |
|
375 |
test_mode_input = gr.Checkbox(
|
376 |
+
label="Test Mode (Process first X seconds only)",
|
377 |
value=True,
|
378 |
info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
|
379 |
)
|
|
|
522 |
cols_input,
|
523 |
test_mode_input,
|
524 |
test_duration_input,
|
525 |
+
magnify_factor,
|
526 |
],
|
527 |
outputs=[video_output, json_output],
|
528 |
)
|
main.py
CHANGED
@@ -711,6 +711,43 @@ def draw_ad_boxes(frame, detected_objects, detect_keyword, model, box_style="cen
|
|
711 |
except Exception as e:
|
712 |
print(f"Error processing individual point: {str(e)}")
|
713 |
print(f"Point data: {point}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
714 |
|
715 |
except Exception as e:
|
716 |
print(f"Error drawing {box_style} style box: {str(e)}")
|
@@ -1002,6 +1039,7 @@ def process_video(
|
|
1002 |
grid_rows=1,
|
1003 |
grid_cols=1,
|
1004 |
box_style="censor",
|
|
|
1005 |
):
|
1006 |
"""Process a video to detect and visualize specified objects."""
|
1007 |
try:
|
@@ -1011,6 +1049,9 @@ def process_video(
|
|
1011 |
# Load model
|
1012 |
print("Loading Moondream model...")
|
1013 |
model, tokenizer = load_moondream()
|
|
|
|
|
|
|
1014 |
|
1015 |
# Get video properties
|
1016 |
props = get_video_properties(video_path)
|
@@ -1183,7 +1224,7 @@ def main():
|
|
1183 |
parser.add_argument(
|
1184 |
"--box-style",
|
1185 |
choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur",
|
1186 |
-
"pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
|
1187 |
default="censor",
|
1188 |
help="Style of detection visualization (default: censor)",
|
1189 |
)
|
@@ -1223,6 +1264,7 @@ def main():
|
|
1223 |
grid_rows=args.rows,
|
1224 |
grid_cols=args.cols,
|
1225 |
box_style=args.box_style,
|
|
|
1226 |
)
|
1227 |
if output_path:
|
1228 |
success_count += 1
|
|
|
711 |
except Exception as e:
|
712 |
print(f"Error processing individual point: {str(e)}")
|
713 |
print(f"Point data: {point}")
|
714 |
+
elif box_style == "magnify":
|
715 |
+
# Calculate the center point of the detection
|
716 |
+
center_x = (x1 + x2) // 2
|
717 |
+
center_y = (y1 + y2) // 2
|
718 |
+
|
719 |
+
# Calculate original dimensions
|
720 |
+
orig_width = x2 - x1
|
721 |
+
orig_height = y2 - y1
|
722 |
+
|
723 |
+
# Calculate new dimensions using magnify_factor parameter
|
724 |
+
magnify_factor = getattr(model, "magnify_factor", 2.0) # Default to 2x if not specified
|
725 |
+
new_width = int(orig_width * magnify_factor)
|
726 |
+
new_height = int(orig_height * magnify_factor)
|
727 |
+
|
728 |
+
# Calculate new coordinates ensuring they stay within frame bounds
|
729 |
+
new_x1 = max(0, center_x - new_width // 2)
|
730 |
+
new_y1 = max(0, center_y - new_height // 2)
|
731 |
+
new_x2 = min(width - 1, new_x1 + new_width)
|
732 |
+
new_y2 = min(height - 1, new_y1 + new_height)
|
733 |
+
|
734 |
+
# Extract the original ROI
|
735 |
+
roi = frame[y1:y2, x1:x2]
|
736 |
+
|
737 |
+
# Resize the ROI using the magnify_factor
|
738 |
+
enlarged_roi = cv2.resize(roi, (new_x2 - new_x1, new_y2 - new_y1))
|
739 |
+
|
740 |
+
# Create a mask for smooth blending
|
741 |
+
mask = np.zeros((new_y2 - new_y1, new_x2 - new_x1), dtype=np.float32)
|
742 |
+
cv2.rectangle(mask, (0, 0), (new_x2 - new_x1, new_y2 - new_y1), 1, -1)
|
743 |
+
mask = cv2.GaussianBlur(mask, (21, 21), 11)
|
744 |
+
|
745 |
+
# Blend the enlarged ROI with the original frame
|
746 |
+
for c in range(3): # For each color channel
|
747 |
+
frame[new_y1:new_y2, new_x1:new_x2, c] = (
|
748 |
+
frame[new_y1:new_y2, new_x1:new_x2, c] * (1 - mask) +
|
749 |
+
enlarged_roi[:, :, c] * mask
|
750 |
+
)
|
751 |
|
752 |
except Exception as e:
|
753 |
print(f"Error drawing {box_style} style box: {str(e)}")
|
|
|
1039 |
grid_rows=1,
|
1040 |
grid_cols=1,
|
1041 |
box_style="censor",
|
1042 |
+
magnify_factor=2.0,
|
1043 |
):
|
1044 |
"""Process a video to detect and visualize specified objects."""
|
1045 |
try:
|
|
|
1049 |
# Load model
|
1050 |
print("Loading Moondream model...")
|
1051 |
model, tokenizer = load_moondream()
|
1052 |
+
|
1053 |
+
# Add magnify_factor to model dict for use in draw_ad_boxes
|
1054 |
+
model.magnify_factor = magnify_factor
|
1055 |
|
1056 |
# Get video properties
|
1057 |
props = get_video_properties(video_path)
|
|
|
1224 |
parser.add_argument(
|
1225 |
"--box-style",
|
1226 |
choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur",
|
1227 |
+
"pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
|
1228 |
default="censor",
|
1229 |
help="Style of detection visualization (default: censor)",
|
1230 |
)
|
|
|
1264 |
grid_rows=args.rows,
|
1265 |
grid_cols=args.cols,
|
1266 |
box_style=args.box_style,
|
1267 |
+
magnify_factor=args.magnify_factor,
|
1268 |
)
|
1269 |
if output_path:
|
1270 |
success_count += 1
|