Spaces:

ParsaKhaz
/

promptable-content-moderation

Runtime error

App Files Files Community

ParsaKhaz commited on Feb 22

Commit

5f5e577

verified ·

1 Parent(s): 98e66b9

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +25 -6
main.py +43 -1

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # import spaces first
-import spaces
 import gradio as gr
 import os
 from main import load_moondream, process_video, load_sam_model
@@ -27,9 +27,9 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 model, tokenizer = None, None
 # Uncomment for Hugging Face Spaces
-@spaces.GPU(duration=120)
 def process_video_file(
-    video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration
 ):
     """Process a video file through the Gradio interface."""
     try:
@@ -326,11 +326,29 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
                     with gr.Accordion("Advanced Settings", open=False):
                         box_style_input = gr.Radio(
-                            choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
                             value="obfuscated-pixel",
                             label="Visualization Style",
-                            info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging)",
                         )
                         preset_input = gr.Dropdown(
                             choices=[
                                 "ultrafast",
@@ -355,7 +373,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
                             )
                         test_mode_input = gr.Checkbox(
-                            label="Test Mode (Process first 3 seconds only)",
                             value=True,
                             info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
                         )
@@ -504,6 +522,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app:
             cols_input,
             test_mode_input,
             test_duration_input,
         ],
         outputs=[video_output, json_output],
     )

 #!/usr/bin/env python3
 # import spaces first
+# import spaces
 import gradio as gr
 import os
 from main import load_moondream, process_video, load_sam_model
 model, tokenizer = None, None
 # Uncomment for Hugging Face Spaces
+# @spaces.GPU(duration=120)
 def process_video_file(
+    video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration, magnify_factor
 ):
     """Process a video file through the Gradio interface."""
     try:
                     with gr.Accordion("Advanced Settings", open=False):
                         box_style_input = gr.Radio(
+                            choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
                             value="obfuscated-pixel",
                             label="Visualization Style",
+                            info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging), magnify (enlarges detected regions)",
                         )
+                        magnify_factor = gr.Slider(
+                            minimum=1.1, maximum=5.0, value=2.0, step=0.1,
+                            label="Magnification Factor",
+                            info="How much to enlarge detected regions (only used with magnify style)",
+                            visible=False
+                        )
+                        # Show/hide magnification slider based on style selection
+                        def update_magnify_visibility(style):
+                            return gr.update(visible=(style == "magnify"))
+                        box_style_input.change(
+                            fn=update_magnify_visibility,
+                            inputs=[box_style_input],
+                            outputs=[magnify_factor]
+                        )
                         preset_input = gr.Dropdown(
                             choices=[
                                 "ultrafast",
                             )
                         test_mode_input = gr.Checkbox(
+                            label="Test Mode (Process first X seconds only)",
                             value=True,
                             info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
                         )
             cols_input,
             test_mode_input,
             test_duration_input,
+            magnify_factor,
         ],
         outputs=[video_output, json_output],
     )

main.py CHANGED Viewed

@@ -711,6 +711,43 @@ def draw_ad_boxes(frame, detected_objects, detect_keyword, model, box_style="cen
                             except Exception as e:
                                 print(f"Error processing individual point: {str(e)}")
                                 print(f"Point data: {point}")
         except Exception as e:
             print(f"Error drawing {box_style} style box: {str(e)}")
@@ -1002,6 +1039,7 @@ def process_video(
     grid_rows=1,
     grid_cols=1,
     box_style="censor",
 ):
     """Process a video to detect and visualize specified objects."""
     try:
@@ -1011,6 +1049,9 @@ def process_video(
         # Load model
         print("Loading Moondream model...")
         model, tokenizer = load_moondream()
         # Get video properties
         props = get_video_properties(video_path)
@@ -1183,7 +1224,7 @@ def main():
     parser.add_argument(
         "--box-style",
         choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur",
-                "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
         default="censor",
         help="Style of detection visualization (default: censor)",
     )
@@ -1223,6 +1264,7 @@ def main():
             grid_rows=args.rows,
             grid_cols=args.cols,
             box_style=args.box_style,
         )
         if output_path:
             success_count += 1

                             except Exception as e:
                                 print(f"Error processing individual point: {str(e)}")
                                 print(f"Point data: {point}")
+                elif box_style == "magnify":
+                    # Calculate the center point of the detection
+                    center_x = (x1 + x2) // 2
+                    center_y = (y1 + y2) // 2
+                    # Calculate original dimensions
+                    orig_width = x2 - x1
+                    orig_height = y2 - y1
+                    # Calculate new dimensions using magnify_factor parameter
+                    magnify_factor = getattr(model, "magnify_factor", 2.0)  # Default to 2x if not specified
+                    new_width = int(orig_width * magnify_factor)
+                    new_height = int(orig_height * magnify_factor)
+                    # Calculate new coordinates ensuring they stay within frame bounds
+                    new_x1 = max(0, center_x - new_width // 2)
+                    new_y1 = max(0, center_y - new_height // 2)
+                    new_x2 = min(width - 1, new_x1 + new_width)
+                    new_y2 = min(height - 1, new_y1 + new_height)
+                    # Extract the original ROI
+                    roi = frame[y1:y2, x1:x2]
+                    # Resize the ROI using the magnify_factor
+                    enlarged_roi = cv2.resize(roi, (new_x2 - new_x1, new_y2 - new_y1))
+                    # Create a mask for smooth blending
+                    mask = np.zeros((new_y2 - new_y1, new_x2 - new_x1), dtype=np.float32)
+                    cv2.rectangle(mask, (0, 0), (new_x2 - new_x1, new_y2 - new_y1), 1, -1)
+                    mask = cv2.GaussianBlur(mask, (21, 21), 11)
+                    # Blend the enlarged ROI with the original frame
+                    for c in range(3):  # For each color channel
+                        frame[new_y1:new_y2, new_x1:new_x2, c] = (
+                            frame[new_y1:new_y2, new_x1:new_x2, c] * (1 - mask) +
+                            enlarged_roi[:, :, c] * mask
+                        )
         except Exception as e:
             print(f"Error drawing {box_style} style box: {str(e)}")
     grid_rows=1,
     grid_cols=1,
     box_style="censor",
+    magnify_factor=2.0,
 ):
     """Process a video to detect and visualize specified objects."""
     try:
         # Load model
         print("Loading Moondream model...")
         model, tokenizer = load_moondream()
+        # Add magnify_factor to model dict for use in draw_ad_boxes
+        model.magnify_factor = magnify_factor
         # Get video properties
         props = get_video_properties(video_path)
     parser.add_argument(
         "--box-style",
         choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur",
+                "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
         default="censor",
         help="Style of detection visualization (default: censor)",
     )
             grid_rows=args.rows,
             grid_cols=args.cols,
             box_style=args.box_style,
+            magnify_factor=args.magnify_factor,
         )
         if output_path:
             success_count += 1