Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
# import spaces first
|
| 3 |
-
import spaces
|
| 4 |
import gradio as gr
|
| 5 |
import os
|
| 6 |
from main import load_moondream, process_video, load_sam_model
|
|
@@ -27,9 +27,9 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
|
| 27 |
model, tokenizer = None, None
|
| 28 |
|
| 29 |
# Uncomment for Hugging Face Spaces
|
| 30 |
-
@spaces.GPU(duration=120)
|
| 31 |
def process_video_file(
|
| 32 |
-
video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration
|
| 33 |
):
|
| 34 |
"""Process a video file through the Gradio interface."""
|
| 35 |
try:
|
|
@@ -326,11 +326,29 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
|
|
| 326 |
|
| 327 |
with gr.Accordion("Advanced Settings", open=False):
|
| 328 |
box_style_input = gr.Radio(
|
| 329 |
-
choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
|
| 330 |
value="obfuscated-pixel",
|
| 331 |
label="Visualization Style",
|
| 332 |
-
info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging)",
|
| 333 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
preset_input = gr.Dropdown(
|
| 335 |
choices=[
|
| 336 |
"ultrafast",
|
|
@@ -355,7 +373,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
|
|
| 355 |
)
|
| 356 |
|
| 357 |
test_mode_input = gr.Checkbox(
|
| 358 |
-
label="Test Mode (Process first
|
| 359 |
value=True,
|
| 360 |
info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
|
| 361 |
)
|
|
@@ -504,6 +522,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
|
|
| 504 |
cols_input,
|
| 505 |
test_mode_input,
|
| 506 |
test_duration_input,
|
|
|
|
| 507 |
],
|
| 508 |
outputs=[video_output, json_output],
|
| 509 |
)
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
# import spaces first
|
| 3 |
+
# import spaces
|
| 4 |
import gradio as gr
|
| 5 |
import os
|
| 6 |
from main import load_moondream, process_video, load_sam_model
|
|
|
|
| 27 |
model, tokenizer = None, None
|
| 28 |
|
| 29 |
# Uncomment for Hugging Face Spaces
|
| 30 |
+
# @spaces.GPU(duration=120)
|
| 31 |
def process_video_file(
|
| 32 |
+
video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration, magnify_factor
|
| 33 |
):
|
| 34 |
"""Process a video file through the Gradio interface."""
|
| 35 |
try:
|
|
|
|
| 326 |
|
| 327 |
with gr.Accordion("Advanced Settings", open=False):
|
| 328 |
box_style_input = gr.Radio(
|
| 329 |
+
choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
|
| 330 |
value="obfuscated-pixel",
|
| 331 |
label="Visualization Style",
|
| 332 |
+
info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging), magnify (enlarges detected regions)",
|
| 333 |
)
|
| 334 |
+
|
| 335 |
+
magnify_factor = gr.Slider(
|
| 336 |
+
minimum=1.1, maximum=5.0, value=2.0, step=0.1,
|
| 337 |
+
label="Magnification Factor",
|
| 338 |
+
info="How much to enlarge detected regions (only used with magnify style)",
|
| 339 |
+
visible=False
|
| 340 |
+
)
|
| 341 |
+
|
| 342 |
+
# Show/hide magnification slider based on style selection
|
| 343 |
+
def update_magnify_visibility(style):
|
| 344 |
+
return gr.update(visible=(style == "magnify"))
|
| 345 |
+
|
| 346 |
+
box_style_input.change(
|
| 347 |
+
fn=update_magnify_visibility,
|
| 348 |
+
inputs=[box_style_input],
|
| 349 |
+
outputs=[magnify_factor]
|
| 350 |
+
)
|
| 351 |
+
|
| 352 |
preset_input = gr.Dropdown(
|
| 353 |
choices=[
|
| 354 |
"ultrafast",
|
|
|
|
| 373 |
)
|
| 374 |
|
| 375 |
test_mode_input = gr.Checkbox(
|
| 376 |
+
label="Test Mode (Process first X seconds only)",
|
| 377 |
value=True,
|
| 378 |
info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
|
| 379 |
)
|
|
|
|
| 522 |
cols_input,
|
| 523 |
test_mode_input,
|
| 524 |
test_duration_input,
|
| 525 |
+
magnify_factor,
|
| 526 |
],
|
| 527 |
outputs=[video_output, json_output],
|
| 528 |
)
|
main.py
CHANGED
|
@@ -711,6 +711,43 @@ def draw_ad_boxes(frame, detected_objects, detect_keyword, model, box_style="cen
|
|
| 711 |
except Exception as e:
|
| 712 |
print(f"Error processing individual point: {str(e)}")
|
| 713 |
print(f"Point data: {point}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
|
| 715 |
except Exception as e:
|
| 716 |
print(f"Error drawing {box_style} style box: {str(e)}")
|
|
@@ -1002,6 +1039,7 @@ def process_video(
|
|
| 1002 |
grid_rows=1,
|
| 1003 |
grid_cols=1,
|
| 1004 |
box_style="censor",
|
|
|
|
| 1005 |
):
|
| 1006 |
"""Process a video to detect and visualize specified objects."""
|
| 1007 |
try:
|
|
@@ -1011,6 +1049,9 @@ def process_video(
|
|
| 1011 |
# Load model
|
| 1012 |
print("Loading Moondream model...")
|
| 1013 |
model, tokenizer = load_moondream()
|
|
|
|
|
|
|
|
|
|
| 1014 |
|
| 1015 |
# Get video properties
|
| 1016 |
props = get_video_properties(video_path)
|
|
@@ -1183,7 +1224,7 @@ def main():
|
|
| 1183 |
parser.add_argument(
|
| 1184 |
"--box-style",
|
| 1185 |
choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur",
|
| 1186 |
-
"pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
|
| 1187 |
default="censor",
|
| 1188 |
help="Style of detection visualization (default: censor)",
|
| 1189 |
)
|
|
@@ -1223,6 +1264,7 @@ def main():
|
|
| 1223 |
grid_rows=args.rows,
|
| 1224 |
grid_cols=args.cols,
|
| 1225 |
box_style=args.box_style,
|
|
|
|
| 1226 |
)
|
| 1227 |
if output_path:
|
| 1228 |
success_count += 1
|
|
|
|
| 711 |
except Exception as e:
|
| 712 |
print(f"Error processing individual point: {str(e)}")
|
| 713 |
print(f"Point data: {point}")
|
| 714 |
+
elif box_style == "magnify":
|
| 715 |
+
# Calculate the center point of the detection
|
| 716 |
+
center_x = (x1 + x2) // 2
|
| 717 |
+
center_y = (y1 + y2) // 2
|
| 718 |
+
|
| 719 |
+
# Calculate original dimensions
|
| 720 |
+
orig_width = x2 - x1
|
| 721 |
+
orig_height = y2 - y1
|
| 722 |
+
|
| 723 |
+
# Calculate new dimensions using magnify_factor parameter
|
| 724 |
+
magnify_factor = getattr(model, "magnify_factor", 2.0) # Default to 2x if not specified
|
| 725 |
+
new_width = int(orig_width * magnify_factor)
|
| 726 |
+
new_height = int(orig_height * magnify_factor)
|
| 727 |
+
|
| 728 |
+
# Calculate new coordinates ensuring they stay within frame bounds
|
| 729 |
+
new_x1 = max(0, center_x - new_width // 2)
|
| 730 |
+
new_y1 = max(0, center_y - new_height // 2)
|
| 731 |
+
new_x2 = min(width - 1, new_x1 + new_width)
|
| 732 |
+
new_y2 = min(height - 1, new_y1 + new_height)
|
| 733 |
+
|
| 734 |
+
# Extract the original ROI
|
| 735 |
+
roi = frame[y1:y2, x1:x2]
|
| 736 |
+
|
| 737 |
+
# Resize the ROI using the magnify_factor
|
| 738 |
+
enlarged_roi = cv2.resize(roi, (new_x2 - new_x1, new_y2 - new_y1))
|
| 739 |
+
|
| 740 |
+
# Create a mask for smooth blending
|
| 741 |
+
mask = np.zeros((new_y2 - new_y1, new_x2 - new_x1), dtype=np.float32)
|
| 742 |
+
cv2.rectangle(mask, (0, 0), (new_x2 - new_x1, new_y2 - new_y1), 1, -1)
|
| 743 |
+
mask = cv2.GaussianBlur(mask, (21, 21), 11)
|
| 744 |
+
|
| 745 |
+
# Blend the enlarged ROI with the original frame
|
| 746 |
+
for c in range(3): # For each color channel
|
| 747 |
+
frame[new_y1:new_y2, new_x1:new_x2, c] = (
|
| 748 |
+
frame[new_y1:new_y2, new_x1:new_x2, c] * (1 - mask) +
|
| 749 |
+
enlarged_roi[:, :, c] * mask
|
| 750 |
+
)
|
| 751 |
|
| 752 |
except Exception as e:
|
| 753 |
print(f"Error drawing {box_style} style box: {str(e)}")
|
|
|
|
| 1039 |
grid_rows=1,
|
| 1040 |
grid_cols=1,
|
| 1041 |
box_style="censor",
|
| 1042 |
+
magnify_factor=2.0,
|
| 1043 |
):
|
| 1044 |
"""Process a video to detect and visualize specified objects."""
|
| 1045 |
try:
|
|
|
|
| 1049 |
# Load model
|
| 1050 |
print("Loading Moondream model...")
|
| 1051 |
model, tokenizer = load_moondream()
|
| 1052 |
+
|
| 1053 |
+
# Add magnify_factor to model dict for use in draw_ad_boxes
|
| 1054 |
+
model.magnify_factor = magnify_factor
|
| 1055 |
|
| 1056 |
# Get video properties
|
| 1057 |
props = get_video_properties(video_path)
|
|
|
|
| 1224 |
parser.add_argument(
|
| 1225 |
"--box-style",
|
| 1226 |
choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur",
|
| 1227 |
+
"pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
|
| 1228 |
default="censor",
|
| 1229 |
help="Style of detection visualization (default: censor)",
|
| 1230 |
)
|
|
|
|
| 1264 |
grid_rows=args.rows,
|
| 1265 |
grid_cols=args.cols,
|
| 1266 |
box_style=args.box_style,
|
| 1267 |
+
magnify_factor=args.magnify_factor,
|
| 1268 |
)
|
| 1269 |
if output_path:
|
| 1270 |
success_count += 1
|