Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Upload folder using huggingface_hub
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 | 
             
            #!/usr/bin/env python3
         | 
| 2 | 
             
            # import spaces first
         | 
| 3 | 
            -
            import spaces
         | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
             
            import os
         | 
| 6 | 
             
            from main import load_moondream, process_video, load_sam_model
         | 
| @@ -27,9 +27,9 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") | |
| 27 | 
             
            model, tokenizer = None, None
         | 
| 28 |  | 
| 29 | 
             
            # Uncomment for Hugging Face Spaces
         | 
| 30 | 
            -
            @spaces.GPU(duration=120)
         | 
| 31 | 
             
            def process_video_file(
         | 
| 32 | 
            -
                video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration
         | 
| 33 | 
             
            ):
         | 
| 34 | 
             
                """Process a video file through the Gradio interface."""
         | 
| 35 | 
             
                try:
         | 
| @@ -326,11 +326,29 @@ with gr.Blocks(title="Promptable Content Moderation") as app: | |
| 326 |  | 
| 327 | 
             
                                with gr.Accordion("Advanced Settings", open=False):
         | 
| 328 | 
             
                                    box_style_input = gr.Radio(
         | 
| 329 | 
            -
                                        choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
         | 
| 330 | 
             
                                        value="obfuscated-pixel",
         | 
| 331 | 
             
                                        label="Visualization Style",
         | 
| 332 | 
            -
                                        info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging)",
         | 
| 333 | 
             
                                    )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 334 | 
             
                                    preset_input = gr.Dropdown(
         | 
| 335 | 
             
                                        choices=[
         | 
| 336 | 
             
                                            "ultrafast",
         | 
| @@ -355,7 +373,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app: | |
| 355 | 
             
                                        )
         | 
| 356 |  | 
| 357 | 
             
                                    test_mode_input = gr.Checkbox(
         | 
| 358 | 
            -
                                        label="Test Mode (Process first  | 
| 359 | 
             
                                        value=True,
         | 
| 360 | 
             
                                        info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
         | 
| 361 | 
             
                                    )
         | 
| @@ -504,6 +522,7 @@ with gr.Blocks(title="Promptable Content Moderation") as app: | |
| 504 | 
             
                        cols_input,
         | 
| 505 | 
             
                        test_mode_input,
         | 
| 506 | 
             
                        test_duration_input,
         | 
|  | |
| 507 | 
             
                    ],
         | 
| 508 | 
             
                    outputs=[video_output, json_output],
         | 
| 509 | 
             
                )
         | 
|  | |
| 1 | 
             
            #!/usr/bin/env python3
         | 
| 2 | 
             
            # import spaces first
         | 
| 3 | 
            +
            # import spaces
         | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
             
            import os
         | 
| 6 | 
             
            from main import load_moondream, process_video, load_sam_model
         | 
|  | |
| 27 | 
             
            model, tokenizer = None, None
         | 
| 28 |  | 
| 29 | 
             
            # Uncomment for Hugging Face Spaces
         | 
| 30 | 
            +
            # @spaces.GPU(duration=120)
         | 
| 31 | 
             
            def process_video_file(
         | 
| 32 | 
            +
                video_file, target_object, box_style, ffmpeg_preset, grid_rows, grid_cols, test_mode, test_duration, magnify_factor
         | 
| 33 | 
             
            ):
         | 
| 34 | 
             
                """Process a video file through the Gradio interface."""
         | 
| 35 | 
             
                try:
         | 
|  | |
| 326 |  | 
| 327 | 
             
                                with gr.Accordion("Advanced Settings", open=False):
         | 
| 328 | 
             
                                    box_style_input = gr.Radio(
         | 
| 329 | 
            +
                                        choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
         | 
| 330 | 
             
                                        value="obfuscated-pixel",
         | 
| 331 | 
             
                                        label="Visualization Style",
         | 
| 332 | 
            +
                                        info="Choose how to display moderations: censor (black boxes), bounding-box (red boxes with labels), hitmarker (COD-style markers), sam (precise segmentation), sam-fast (faster but less precise segmentation), fuzzy-blur (Gaussian blur), pixelated-blur (pixelated with blur), obfuscated-pixel (advanced pixelation with neighborhood averaging), magnify (enlarges detected regions)",
         | 
| 333 | 
             
                                    )
         | 
| 334 | 
            +
             | 
| 335 | 
            +
                                    magnify_factor = gr.Slider(
         | 
| 336 | 
            +
                                        minimum=1.1, maximum=5.0, value=2.0, step=0.1,
         | 
| 337 | 
            +
                                        label="Magnification Factor",
         | 
| 338 | 
            +
                                        info="How much to enlarge detected regions (only used with magnify style)",
         | 
| 339 | 
            +
                                        visible=False
         | 
| 340 | 
            +
                                    )
         | 
| 341 | 
            +
             | 
| 342 | 
            +
                                    # Show/hide magnification slider based on style selection
         | 
| 343 | 
            +
                                    def update_magnify_visibility(style):
         | 
| 344 | 
            +
                                        return gr.update(visible=(style == "magnify"))
         | 
| 345 | 
            +
                                    
         | 
| 346 | 
            +
                                    box_style_input.change(
         | 
| 347 | 
            +
                                        fn=update_magnify_visibility,
         | 
| 348 | 
            +
                                        inputs=[box_style_input],
         | 
| 349 | 
            +
                                        outputs=[magnify_factor]
         | 
| 350 | 
            +
                                    )
         | 
| 351 | 
            +
             | 
| 352 | 
             
                                    preset_input = gr.Dropdown(
         | 
| 353 | 
             
                                        choices=[
         | 
| 354 | 
             
                                            "ultrafast",
         | 
|  | |
| 373 | 
             
                                        )
         | 
| 374 |  | 
| 375 | 
             
                                    test_mode_input = gr.Checkbox(
         | 
| 376 | 
            +
                                        label="Test Mode (Process first X seconds only)",
         | 
| 377 | 
             
                                        value=True,
         | 
| 378 | 
             
                                        info="Enable to quickly test settings on a short clip before processing the full video (recommended). If using the data visualizations, disable.",
         | 
| 379 | 
             
                                    )
         | 
|  | |
| 522 | 
             
                        cols_input,
         | 
| 523 | 
             
                        test_mode_input,
         | 
| 524 | 
             
                        test_duration_input,
         | 
| 525 | 
            +
                        magnify_factor,
         | 
| 526 | 
             
                    ],
         | 
| 527 | 
             
                    outputs=[video_output, json_output],
         | 
| 528 | 
             
                )
         | 
    	
        main.py
    CHANGED
    
    | @@ -711,6 +711,43 @@ def draw_ad_boxes(frame, detected_objects, detect_keyword, model, box_style="cen | |
| 711 | 
             
                                        except Exception as e:
         | 
| 712 | 
             
                                            print(f"Error processing individual point: {str(e)}")
         | 
| 713 | 
             
                                            print(f"Point data: {point}")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 714 |  | 
| 715 | 
             
                    except Exception as e:
         | 
| 716 | 
             
                        print(f"Error drawing {box_style} style box: {str(e)}")
         | 
| @@ -1002,6 +1039,7 @@ def process_video( | |
| 1002 | 
             
                grid_rows=1,
         | 
| 1003 | 
             
                grid_cols=1,
         | 
| 1004 | 
             
                box_style="censor",
         | 
|  | |
| 1005 | 
             
            ):
         | 
| 1006 | 
             
                """Process a video to detect and visualize specified objects."""
         | 
| 1007 | 
             
                try:
         | 
| @@ -1011,6 +1049,9 @@ def process_video( | |
| 1011 | 
             
                    # Load model
         | 
| 1012 | 
             
                    print("Loading Moondream model...")
         | 
| 1013 | 
             
                    model, tokenizer = load_moondream()
         | 
|  | |
|  | |
|  | |
| 1014 |  | 
| 1015 | 
             
                    # Get video properties
         | 
| 1016 | 
             
                    props = get_video_properties(video_path)
         | 
| @@ -1183,7 +1224,7 @@ def main(): | |
| 1183 | 
             
                parser.add_argument(
         | 
| 1184 | 
             
                    "--box-style",
         | 
| 1185 | 
             
                    choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", 
         | 
| 1186 | 
            -
                            "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel"],
         | 
| 1187 | 
             
                    default="censor",
         | 
| 1188 | 
             
                    help="Style of detection visualization (default: censor)",
         | 
| 1189 | 
             
                )
         | 
| @@ -1223,6 +1264,7 @@ def main(): | |
| 1223 | 
             
                        grid_rows=args.rows,
         | 
| 1224 | 
             
                        grid_cols=args.cols,
         | 
| 1225 | 
             
                        box_style=args.box_style,
         | 
|  | |
| 1226 | 
             
                    )
         | 
| 1227 | 
             
                    if output_path:
         | 
| 1228 | 
             
                        success_count += 1
         | 
|  | |
| 711 | 
             
                                        except Exception as e:
         | 
| 712 | 
             
                                            print(f"Error processing individual point: {str(e)}")
         | 
| 713 | 
             
                                            print(f"Point data: {point}")
         | 
| 714 | 
            +
                            elif box_style == "magnify":
         | 
| 715 | 
            +
                                # Calculate the center point of the detection
         | 
| 716 | 
            +
                                center_x = (x1 + x2) // 2
         | 
| 717 | 
            +
                                center_y = (y1 + y2) // 2
         | 
| 718 | 
            +
                                
         | 
| 719 | 
            +
                                # Calculate original dimensions
         | 
| 720 | 
            +
                                orig_width = x2 - x1
         | 
| 721 | 
            +
                                orig_height = y2 - y1
         | 
| 722 | 
            +
                                
         | 
| 723 | 
            +
                                # Calculate new dimensions using magnify_factor parameter
         | 
| 724 | 
            +
                                magnify_factor = getattr(model, "magnify_factor", 2.0)  # Default to 2x if not specified
         | 
| 725 | 
            +
                                new_width = int(orig_width * magnify_factor)
         | 
| 726 | 
            +
                                new_height = int(orig_height * magnify_factor)
         | 
| 727 | 
            +
                                
         | 
| 728 | 
            +
                                # Calculate new coordinates ensuring they stay within frame bounds
         | 
| 729 | 
            +
                                new_x1 = max(0, center_x - new_width // 2)
         | 
| 730 | 
            +
                                new_y1 = max(0, center_y - new_height // 2)
         | 
| 731 | 
            +
                                new_x2 = min(width - 1, new_x1 + new_width)
         | 
| 732 | 
            +
                                new_y2 = min(height - 1, new_y1 + new_height)
         | 
| 733 | 
            +
                                
         | 
| 734 | 
            +
                                # Extract the original ROI
         | 
| 735 | 
            +
                                roi = frame[y1:y2, x1:x2]
         | 
| 736 | 
            +
                                
         | 
| 737 | 
            +
                                # Resize the ROI using the magnify_factor
         | 
| 738 | 
            +
                                enlarged_roi = cv2.resize(roi, (new_x2 - new_x1, new_y2 - new_y1))
         | 
| 739 | 
            +
                                
         | 
| 740 | 
            +
                                # Create a mask for smooth blending
         | 
| 741 | 
            +
                                mask = np.zeros((new_y2 - new_y1, new_x2 - new_x1), dtype=np.float32)
         | 
| 742 | 
            +
                                cv2.rectangle(mask, (0, 0), (new_x2 - new_x1, new_y2 - new_y1), 1, -1)
         | 
| 743 | 
            +
                                mask = cv2.GaussianBlur(mask, (21, 21), 11)
         | 
| 744 | 
            +
                                
         | 
| 745 | 
            +
                                # Blend the enlarged ROI with the original frame
         | 
| 746 | 
            +
                                for c in range(3):  # For each color channel
         | 
| 747 | 
            +
                                    frame[new_y1:new_y2, new_x1:new_x2, c] = (
         | 
| 748 | 
            +
                                        frame[new_y1:new_y2, new_x1:new_x2, c] * (1 - mask) +
         | 
| 749 | 
            +
                                        enlarged_roi[:, :, c] * mask
         | 
| 750 | 
            +
                                    )
         | 
| 751 |  | 
| 752 | 
             
                    except Exception as e:
         | 
| 753 | 
             
                        print(f"Error drawing {box_style} style box: {str(e)}")
         | 
|  | |
| 1039 | 
             
                grid_rows=1,
         | 
| 1040 | 
             
                grid_cols=1,
         | 
| 1041 | 
             
                box_style="censor",
         | 
| 1042 | 
            +
                magnify_factor=2.0,
         | 
| 1043 | 
             
            ):
         | 
| 1044 | 
             
                """Process a video to detect and visualize specified objects."""
         | 
| 1045 | 
             
                try:
         | 
|  | |
| 1049 | 
             
                    # Load model
         | 
| 1050 | 
             
                    print("Loading Moondream model...")
         | 
| 1051 | 
             
                    model, tokenizer = load_moondream()
         | 
| 1052 | 
            +
                    
         | 
| 1053 | 
            +
                    # Add magnify_factor to model dict for use in draw_ad_boxes
         | 
| 1054 | 
            +
                    model.magnify_factor = magnify_factor
         | 
| 1055 |  | 
| 1056 | 
             
                    # Get video properties
         | 
| 1057 | 
             
                    props = get_video_properties(video_path)
         | 
|  | |
| 1224 | 
             
                parser.add_argument(
         | 
| 1225 | 
             
                    "--box-style",
         | 
| 1226 | 
             
                    choices=["censor", "bounding-box", "hitmarker", "sam", "sam-fast", "fuzzy-blur", 
         | 
| 1227 | 
            +
                            "pixelated-blur", "intense-pixelated-blur", "obfuscated-pixel", "magnify"],
         | 
| 1228 | 
             
                    default="censor",
         | 
| 1229 | 
             
                    help="Style of detection visualization (default: censor)",
         | 
| 1230 | 
             
                )
         | 
|  | |
| 1264 | 
             
                        grid_rows=args.rows,
         | 
| 1265 | 
             
                        grid_cols=args.cols,
         | 
| 1266 | 
             
                        box_style=args.box_style,
         | 
| 1267 | 
            +
                        magnify_factor=args.magnify_factor,
         | 
| 1268 | 
             
                    )
         | 
| 1269 | 
             
                    if output_path:
         | 
| 1270 | 
             
                        success_count += 1
         | 
