Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- .github/workflows/update_space.yml +28 -28
- .gitignore +51 -51
- README.md +165 -165
- deep_sort_integration.py +72 -72
- packages.txt +1 -1
- persistence.py +38 -38
- requirements.txt +25 -25
- video_visualization.py +329 -329
- visualization.py +97 -97
.github/workflows/update_space.yml
CHANGED
@@ -1,28 +1,28 @@
|
|
1 |
-
name: Run Python script
|
2 |
-
|
3 |
-
on:
|
4 |
-
push:
|
5 |
-
branches:
|
6 |
-
- main
|
7 |
-
|
8 |
-
jobs:
|
9 |
-
build:
|
10 |
-
runs-on: ubuntu-latest
|
11 |
-
|
12 |
-
steps:
|
13 |
-
- name: Checkout
|
14 |
-
uses: actions/checkout@v2
|
15 |
-
|
16 |
-
- name: Set up Python
|
17 |
-
uses: actions/setup-python@v2
|
18 |
-
with:
|
19 |
-
python-version: '3.9'
|
20 |
-
|
21 |
-
- name: Install Gradio
|
22 |
-
run: python -m pip install gradio
|
23 |
-
|
24 |
-
- name: Log in to Hugging Face
|
25 |
-
run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
|
26 |
-
|
27 |
-
- name: Deploy to Spaces
|
28 |
-
run: gradio deploy
|
|
|
1 |
+
name: Run Python script
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
build:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
|
12 |
+
steps:
|
13 |
+
- name: Checkout
|
14 |
+
uses: actions/checkout@v2
|
15 |
+
|
16 |
+
- name: Set up Python
|
17 |
+
uses: actions/setup-python@v2
|
18 |
+
with:
|
19 |
+
python-version: '3.9'
|
20 |
+
|
21 |
+
- name: Install Gradio
|
22 |
+
run: python -m pip install gradio
|
23 |
+
|
24 |
+
- name: Log in to Hugging Face
|
25 |
+
run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
|
26 |
+
|
27 |
+
- name: Deploy to Spaces
|
28 |
+
run: gradio deploy
|
.gitignore
CHANGED
@@ -1,52 +1,52 @@
|
|
1 |
-
# Python
|
2 |
-
__pycache__/
|
3 |
-
*.py[cod]
|
4 |
-
*$py.class
|
5 |
-
*.so
|
6 |
-
.Python
|
7 |
-
build/
|
8 |
-
develop-eggs/
|
9 |
-
dist/
|
10 |
-
downloads/
|
11 |
-
eggs/
|
12 |
-
.eggs/
|
13 |
-
lib/
|
14 |
-
lib64/
|
15 |
-
parts/
|
16 |
-
sdist/
|
17 |
-
var/
|
18 |
-
wheels/
|
19 |
-
*.egg-info/
|
20 |
-
.installed.cfg
|
21 |
-
*.egg
|
22 |
-
*.dll
|
23 |
-
|
24 |
-
# Virtual Environment
|
25 |
-
venv/
|
26 |
-
env/
|
27 |
-
ENV/
|
28 |
-
.venv/
|
29 |
-
|
30 |
-
# IDE
|
31 |
-
.idea/
|
32 |
-
.vscode/
|
33 |
-
*.swp
|
34 |
-
*.swo
|
35 |
-
|
36 |
-
# Project specific
|
37 |
-
inputs/*
|
38 |
-
outputs/*
|
39 |
-
!inputs/.gitkeep
|
40 |
-
!outputs/.gitkeep
|
41 |
-
inputs/
|
42 |
-
outputs/
|
43 |
-
|
44 |
-
# Model files
|
45 |
-
*.pth
|
46 |
-
*.onnx
|
47 |
-
*.pt
|
48 |
-
|
49 |
-
# Logs
|
50 |
-
*.log
|
51 |
-
|
52 |
certificate.pem
|
|
|
1 |
+
# Python
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.so
|
6 |
+
.Python
|
7 |
+
build/
|
8 |
+
develop-eggs/
|
9 |
+
dist/
|
10 |
+
downloads/
|
11 |
+
eggs/
|
12 |
+
.eggs/
|
13 |
+
lib/
|
14 |
+
lib64/
|
15 |
+
parts/
|
16 |
+
sdist/
|
17 |
+
var/
|
18 |
+
wheels/
|
19 |
+
*.egg-info/
|
20 |
+
.installed.cfg
|
21 |
+
*.egg
|
22 |
+
*.dll
|
23 |
+
|
24 |
+
# Virtual Environment
|
25 |
+
venv/
|
26 |
+
env/
|
27 |
+
ENV/
|
28 |
+
.venv/
|
29 |
+
|
30 |
+
# IDE
|
31 |
+
.idea/
|
32 |
+
.vscode/
|
33 |
+
*.swp
|
34 |
+
*.swo
|
35 |
+
|
36 |
+
# Project specific
|
37 |
+
inputs/*
|
38 |
+
outputs/*
|
39 |
+
!inputs/.gitkeep
|
40 |
+
!outputs/.gitkeep
|
41 |
+
inputs/
|
42 |
+
outputs/
|
43 |
+
|
44 |
+
# Model files
|
45 |
+
*.pth
|
46 |
+
*.onnx
|
47 |
+
*.pt
|
48 |
+
|
49 |
+
# Logs
|
50 |
+
*.log
|
51 |
+
|
52 |
certificate.pem
|
README.md
CHANGED
@@ -1,165 +1,165 @@
|
|
1 |
-
---
|
2 |
-
title: promptable-content-moderation
|
3 |
-
app_file: app.py
|
4 |
-
sdk: gradio
|
5 |
-
sdk_version: 5.16.1
|
6 |
-
---
|
7 |
-
# Promptable Content Moderation with Moondream
|
8 |
-
|
9 |
-
Welcome to the future of content moderation with Moondream 2B, a powerful and lightweight vision-language model that enables detection and moderation of video content using natural language prompts.
|
10 |
-
|
11 |
-
[Try it now.](https://huggingface.co/spaces/moondream/content-moderation)
|
12 |
-
|
13 |
-
## Features
|
14 |
-
|
15 |
-
- Content moderation through natural language prompts
|
16 |
-
- Multiple visualization styles
|
17 |
-
- Intelligent scene detection and tracking:
|
18 |
-
- DeepSORT tracking with scene-aware reset
|
19 |
-
- Persistent moderation across frames
|
20 |
-
- Smart tracker reset at scene boundaries
|
21 |
-
- Optional grid-based detection for improved accuracy on complex scenes
|
22 |
-
- Frame-by-frame processing with IoU-based merging
|
23 |
-
- Web-compatible output format
|
24 |
-
- Test mode (process only first X seconds)
|
25 |
-
- Advanced moderation analysis with multiple visualization plots
|
26 |
-
|
27 |
-
## Examples
|
28 |
-
|
29 |
-
| Example Outputs |
|
30 |
-
|
31 |
-
|  |
|
32 |
-
|  |
|
33 |
-
|  |
|
34 |
-
|
35 |
-
## Requirements
|
36 |
-
|
37 |
-
### Python Dependencies
|
38 |
-
|
39 |
-
For Windows users, before installing other requirements, first install PyTorch with CUDA support:
|
40 |
-
|
41 |
-
```bash
|
42 |
-
pip install torch==2.5.1+cu121 torchvision==0.20.1+cu121 --index-url https://download.pytorch.org/whl/cu121
|
43 |
-
```
|
44 |
-
|
45 |
-
Then install the remaining dependencies:
|
46 |
-
|
47 |
-
```bash
|
48 |
-
pip install -r requirements.txt
|
49 |
-
```
|
50 |
-
|
51 |
-
### System Requirements
|
52 |
-
|
53 |
-
- FFmpeg (required for video processing)
|
54 |
-
- libvips (required for image processing)
|
55 |
-
|
56 |
-
Installation by platform:
|
57 |
-
|
58 |
-
- Ubuntu/Debian: `sudo apt-get install ffmpeg libvips`
|
59 |
-
- macOS: `brew install ffmpeg libvips`
|
60 |
-
- Windows:
|
61 |
-
- Download FFmpeg from [ffmpeg.org](https://ffmpeg.org/download.html)
|
62 |
-
- Follow [libvips Windows installation guide](https://docs.moondream.ai/quick-start)
|
63 |
-
|
64 |
-
## Installation
|
65 |
-
|
66 |
-
1. Clone this repository and create a new virtual environment:
|
67 |
-
|
68 |
-
```bash
|
69 |
-
git clone https://github.com/vikhyat/moondream/blob/main/recipes/promptable-video-redaction
|
70 |
-
python -m venv .venv
|
71 |
-
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
72 |
-
```
|
73 |
-
|
74 |
-
2. Install Python dependencies:
|
75 |
-
|
76 |
-
```bash
|
77 |
-
pip install -r requirements.txt
|
78 |
-
```
|
79 |
-
|
80 |
-
3. Install ffmpeg and libvips:
|
81 |
-
- On Ubuntu/Debian: `sudo apt-get install ffmpeg libvips`
|
82 |
-
- On macOS: `brew install ffmpeg`
|
83 |
-
- On Windows: Download from [ffmpeg.org](https://ffmpeg.org/download.html)
|
84 |
-
|
85 |
-
> Downloading libvips for Windows requires some additional steps, see [here](https://docs.moondream.ai/quick-start)
|
86 |
-
|
87 |
-
## Usage
|
88 |
-
|
89 |
-
The easiest way to use this tool is through its web interface, which provides a user-friendly experience for video content moderation.
|
90 |
-
|
91 |
-
### Web Interface
|
92 |
-
|
93 |
-
1. Start the web interface:
|
94 |
-
|
95 |
-
```bash
|
96 |
-
python app.py
|
97 |
-
```
|
98 |
-
|
99 |
-
2. Open the provided URL in your browser (typically <http://localhost:7860>)
|
100 |
-
|
101 |
-
3. Use the interface to:
|
102 |
-
- Upload your video file
|
103 |
-
- Specify content to moderate (e.g., "face", "cigarette", "gun")
|
104 |
-
- Choose redaction style (default: obfuscated-pixel)
|
105 |
-
- OPTIONAL: Configure advanced settings
|
106 |
-
- Processing speed/quality
|
107 |
-
- Grid size for detection
|
108 |
-
- Test mode for quick validation (default: on, 3 seconds)
|
109 |
-
- Process the video and download results
|
110 |
-
- Analyze detection patterns with visualization tools
|
111 |
-
|
112 |
-
## Output Files
|
113 |
-
|
114 |
-
The tool generates two types of output files in the `outputs` directory:
|
115 |
-
|
116 |
-
1. Processed Videos:
|
117 |
-
- Format: `[style]_[content_type]_[original_filename].mp4`
|
118 |
-
- Example: `censor_inappropriate_video.mp4`
|
119 |
-
|
120 |
-
2. Detection Data:
|
121 |
-
- Format: `[style]_[content_type]_[original_filename]_detections.json`
|
122 |
-
- Contains frame-by-frame detection information
|
123 |
-
- Used for visualization and analysis
|
124 |
-
|
125 |
-
## Technical Details
|
126 |
-
|
127 |
-
### Scene Detection and Tracking
|
128 |
-
|
129 |
-
The tool uses advanced scene detection and object tracking:
|
130 |
-
|
131 |
-
1. Scene Detection:
|
132 |
-
- Powered by PySceneDetect's ContentDetector
|
133 |
-
- Automatically identifies scene changes in videos
|
134 |
-
- Configurable detection threshold (default: 30.0)
|
135 |
-
- Helps maintain tracking accuracy across scene boundaries
|
136 |
-
|
137 |
-
2. Object Tracking:
|
138 |
-
- DeepSORT tracking for consistent object identification
|
139 |
-
- Automatic tracker reset at scene changes
|
140 |
-
- Maintains object identity within scenes
|
141 |
-
- Prevents tracking errors across scene boundaries
|
142 |
-
|
143 |
-
3. Integration Benefits:
|
144 |
-
- More accurate object tracking
|
145 |
-
- Better handling of scene transitions
|
146 |
-
- Reduced false positives in tracking
|
147 |
-
- Improved tracking consistency
|
148 |
-
|
149 |
-
## Best Practices
|
150 |
-
|
151 |
-
- Use test mode for initial configuration
|
152 |
-
- Enable grid-based detection for complex scenes
|
153 |
-
- Choose appropriate redaction style based on content type:
|
154 |
-
- Censor: Complete content blocking
|
155 |
-
- Blur styles: Less intrusive moderation
|
156 |
-
- Bounding Box: Content review and analysis
|
157 |
-
- Monitor system resources during processing
|
158 |
-
- Use appropriate processing quality settings based on your needs
|
159 |
-
|
160 |
-
## Notes
|
161 |
-
|
162 |
-
- Processing time depends on video length, resolution, GPU availability, and chosen settings
|
163 |
-
- GPU is strongly recommended for faster processing
|
164 |
-
- Grid-based detection increases accuracy but requires more processing time (each grid cell is processed independently)
|
165 |
-
- Test mode processes only first X seconds (default: 3 seconds) for quick validation
|
|
|
1 |
+
---
|
2 |
+
title: promptable-content-moderation
|
3 |
+
app_file: app.py
|
4 |
+
sdk: gradio
|
5 |
+
sdk_version: 5.16.1
|
6 |
+
---
|
7 |
+
# Promptable Content Moderation with Moondream
|
8 |
+
|
9 |
+
Welcome to the future of content moderation with Moondream 2B, a powerful and lightweight vision-language model that enables detection and moderation of video content using natural language prompts.
|
10 |
+
|
11 |
+
[Try it now.](https://huggingface.co/spaces/moondream/content-moderation)
|
12 |
+
|
13 |
+
## Features
|
14 |
+
|
15 |
+
- Content moderation through natural language prompts
|
16 |
+
- Multiple visualization styles
|
17 |
+
- Intelligent scene detection and tracking:
|
18 |
+
- DeepSORT tracking with scene-aware reset
|
19 |
+
- Persistent moderation across frames
|
20 |
+
- Smart tracker reset at scene boundaries
|
21 |
+
- Optional grid-based detection for improved accuracy on complex scenes
|
22 |
+
- Frame-by-frame processing with IoU-based merging
|
23 |
+
- Web-compatible output format
|
24 |
+
- Test mode (process only first X seconds)
|
25 |
+
- Advanced moderation analysis with multiple visualization plots
|
26 |
+
|
27 |
+
## Examples
|
28 |
+
|
29 |
+
| Example Outputs | Prompt |
|
30 |
+
|------------------|----------------------|
|
31 |
+
| "white cigarette" |  |
|
32 |
+
| "gun" |  |
|
33 |
+
| "confederate flag" |  |
|
34 |
+
|
35 |
+
## Requirements
|
36 |
+
|
37 |
+
### Python Dependencies
|
38 |
+
|
39 |
+
For Windows users, before installing other requirements, first install PyTorch with CUDA support:
|
40 |
+
|
41 |
+
```bash
|
42 |
+
pip install torch==2.5.1+cu121 torchvision==0.20.1+cu121 --index-url https://download.pytorch.org/whl/cu121
|
43 |
+
```
|
44 |
+
|
45 |
+
Then install the remaining dependencies:
|
46 |
+
|
47 |
+
```bash
|
48 |
+
pip install -r requirements.txt
|
49 |
+
```
|
50 |
+
|
51 |
+
### System Requirements
|
52 |
+
|
53 |
+
- FFmpeg (required for video processing)
|
54 |
+
- libvips (required for image processing)
|
55 |
+
|
56 |
+
Installation by platform:
|
57 |
+
|
58 |
+
- Ubuntu/Debian: `sudo apt-get install ffmpeg libvips`
|
59 |
+
- macOS: `brew install ffmpeg libvips`
|
60 |
+
- Windows:
|
61 |
+
- Download FFmpeg from [ffmpeg.org](https://ffmpeg.org/download.html)
|
62 |
+
- Follow [libvips Windows installation guide](https://docs.moondream.ai/quick-start)
|
63 |
+
|
64 |
+
## Installation
|
65 |
+
|
66 |
+
1. Clone this repository and create a new virtual environment:
|
67 |
+
|
68 |
+
```bash
|
69 |
+
git clone https://github.com/vikhyat/moondream/blob/main/recipes/promptable-video-redaction
|
70 |
+
python -m venv .venv
|
71 |
+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
72 |
+
```
|
73 |
+
|
74 |
+
2. Install Python dependencies:
|
75 |
+
|
76 |
+
```bash
|
77 |
+
pip install -r requirements.txt
|
78 |
+
```
|
79 |
+
|
80 |
+
3. Install ffmpeg and libvips:
|
81 |
+
- On Ubuntu/Debian: `sudo apt-get install ffmpeg libvips`
|
82 |
+
- On macOS: `brew install ffmpeg`
|
83 |
+
- On Windows: Download from [ffmpeg.org](https://ffmpeg.org/download.html)
|
84 |
+
|
85 |
+
> Downloading libvips for Windows requires some additional steps, see [here](https://docs.moondream.ai/quick-start)
|
86 |
+
|
87 |
+
## Usage
|
88 |
+
|
89 |
+
The easiest way to use this tool is through its web interface, which provides a user-friendly experience for video content moderation.
|
90 |
+
|
91 |
+
### Web Interface
|
92 |
+
|
93 |
+
1. Start the web interface:
|
94 |
+
|
95 |
+
```bash
|
96 |
+
python app.py
|
97 |
+
```
|
98 |
+
|
99 |
+
2. Open the provided URL in your browser (typically <http://localhost:7860>)
|
100 |
+
|
101 |
+
3. Use the interface to:
|
102 |
+
- Upload your video file
|
103 |
+
- Specify content to moderate (e.g., "face", "cigarette", "gun")
|
104 |
+
- Choose redaction style (default: obfuscated-pixel)
|
105 |
+
- OPTIONAL: Configure advanced settings
|
106 |
+
- Processing speed/quality
|
107 |
+
- Grid size for detection
|
108 |
+
- Test mode for quick validation (default: on, 3 seconds)
|
109 |
+
- Process the video and download results
|
110 |
+
- Analyze detection patterns with visualization tools
|
111 |
+
|
112 |
+
## Output Files
|
113 |
+
|
114 |
+
The tool generates two types of output files in the `outputs` directory:
|
115 |
+
|
116 |
+
1. Processed Videos:
|
117 |
+
- Format: `[style]_[content_type]_[original_filename].mp4`
|
118 |
+
- Example: `censor_inappropriate_video.mp4`
|
119 |
+
|
120 |
+
2. Detection Data:
|
121 |
+
- Format: `[style]_[content_type]_[original_filename]_detections.json`
|
122 |
+
- Contains frame-by-frame detection information
|
123 |
+
- Used for visualization and analysis
|
124 |
+
|
125 |
+
## Technical Details
|
126 |
+
|
127 |
+
### Scene Detection and Tracking
|
128 |
+
|
129 |
+
The tool uses advanced scene detection and object tracking:
|
130 |
+
|
131 |
+
1. Scene Detection:
|
132 |
+
- Powered by PySceneDetect's ContentDetector
|
133 |
+
- Automatically identifies scene changes in videos
|
134 |
+
- Configurable detection threshold (default: 30.0)
|
135 |
+
- Helps maintain tracking accuracy across scene boundaries
|
136 |
+
|
137 |
+
2. Object Tracking:
|
138 |
+
- DeepSORT tracking for consistent object identification
|
139 |
+
- Automatic tracker reset at scene changes
|
140 |
+
- Maintains object identity within scenes
|
141 |
+
- Prevents tracking errors across scene boundaries
|
142 |
+
|
143 |
+
3. Integration Benefits:
|
144 |
+
- More accurate object tracking
|
145 |
+
- Better handling of scene transitions
|
146 |
+
- Reduced false positives in tracking
|
147 |
+
- Improved tracking consistency
|
148 |
+
|
149 |
+
## Best Practices
|
150 |
+
|
151 |
+
- Use test mode for initial configuration
|
152 |
+
- Enable grid-based detection for complex scenes
|
153 |
+
- Choose appropriate redaction style based on content type:
|
154 |
+
- Censor: Complete content blocking
|
155 |
+
- Blur styles: Less intrusive moderation
|
156 |
+
- Bounding Box: Content review and analysis
|
157 |
+
- Monitor system resources during processing
|
158 |
+
- Use appropriate processing quality settings based on your needs
|
159 |
+
|
160 |
+
## Notes
|
161 |
+
|
162 |
+
- Processing time depends on video length, resolution, GPU availability, and chosen settings
|
163 |
+
- GPU is strongly recommended for faster processing
|
164 |
+
- Grid-based detection increases accuracy but requires more processing time (each grid cell is processed independently)
|
165 |
+
- Test mode processes only first X seconds (default: 3 seconds) for quick validation
|
deep_sort_integration.py
CHANGED
@@ -1,73 +1,73 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
from deep_sort_realtime.deepsort_tracker import DeepSort
|
4 |
-
from datetime import datetime
|
5 |
-
|
6 |
-
class DeepSORTTracker:
|
7 |
-
def __init__(self, max_age=5):
|
8 |
-
"""Initialize DeepSORT tracker."""
|
9 |
-
self.max_age = max_age
|
10 |
-
self.tracker = self._create_tracker()
|
11 |
-
|
12 |
-
def _create_tracker(self):
|
13 |
-
"""Create a new instance of DeepSort tracker."""
|
14 |
-
return DeepSort(
|
15 |
-
max_age=self.max_age,
|
16 |
-
embedder='mobilenet', # Using default MobileNetV2 embedder
|
17 |
-
today=datetime.now().date() # For track naming and daily ID reset
|
18 |
-
)
|
19 |
-
|
20 |
-
def reset(self):
|
21 |
-
"""Reset the tracker state by creating a new instance."""
|
22 |
-
print("Resetting DeepSORT tracker...")
|
23 |
-
self.tracker = self._create_tracker()
|
24 |
-
|
25 |
-
def update(self, frame, detections):
|
26 |
-
"""Update tracking with new detections.
|
27 |
-
|
28 |
-
Args:
|
29 |
-
frame: Current video frame (numpy array)
|
30 |
-
detections: List of (box, keyword) tuples where box is [x1, y1, x2, y2] normalized
|
31 |
-
|
32 |
-
Returns:
|
33 |
-
List of (box, keyword, track_id) tuples
|
34 |
-
"""
|
35 |
-
if not detections:
|
36 |
-
return []
|
37 |
-
|
38 |
-
height, width = frame.shape[:2]
|
39 |
-
|
40 |
-
# Convert normalized coordinates to absolute and format detections
|
41 |
-
detection_list = []
|
42 |
-
for box, keyword in detections:
|
43 |
-
x1 = int(box[0] * width)
|
44 |
-
y1 = int(box[1] * height)
|
45 |
-
x2 = int(box[2] * width)
|
46 |
-
y2 = int(box[3] * height)
|
47 |
-
w = x2 - x1
|
48 |
-
h = y2 - y1
|
49 |
-
|
50 |
-
# Format: ([left,top,w,h], confidence, detection_class)
|
51 |
-
detection_list.append(([x1, y1, w, h], 1.0, keyword))
|
52 |
-
|
53 |
-
# Update tracker
|
54 |
-
tracks = self.tracker.update_tracks(detection_list, frame=frame)
|
55 |
-
|
56 |
-
# Convert back to normalized coordinates with track IDs
|
57 |
-
tracked_objects = []
|
58 |
-
for track in tracks:
|
59 |
-
if not track.is_confirmed():
|
60 |
-
continue
|
61 |
-
|
62 |
-
ltrb = track.to_ltrb() # Get [left,top,right,bottom] format
|
63 |
-
x1, y1, x2, y2 = ltrb
|
64 |
-
|
65 |
-
# Normalize coordinates
|
66 |
-
x1 = max(0.0, min(1.0, x1 / width))
|
67 |
-
y1 = max(0.0, min(1.0, y1 / height))
|
68 |
-
x2 = max(0.0, min(1.0, x2 / width))
|
69 |
-
y2 = max(0.0, min(1.0, y2 / height))
|
70 |
-
|
71 |
-
tracked_objects.append(([x1, y1, x2, y2], track.det_class, track.track_id))
|
72 |
-
|
73 |
return tracked_objects
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
from deep_sort_realtime.deepsort_tracker import DeepSort
|
4 |
+
from datetime import datetime
|
5 |
+
|
6 |
+
class DeepSORTTracker:
|
7 |
+
def __init__(self, max_age=5):
|
8 |
+
"""Initialize DeepSORT tracker."""
|
9 |
+
self.max_age = max_age
|
10 |
+
self.tracker = self._create_tracker()
|
11 |
+
|
12 |
+
def _create_tracker(self):
|
13 |
+
"""Create a new instance of DeepSort tracker."""
|
14 |
+
return DeepSort(
|
15 |
+
max_age=self.max_age,
|
16 |
+
embedder='mobilenet', # Using default MobileNetV2 embedder
|
17 |
+
today=datetime.now().date() # For track naming and daily ID reset
|
18 |
+
)
|
19 |
+
|
20 |
+
def reset(self):
|
21 |
+
"""Reset the tracker state by creating a new instance."""
|
22 |
+
print("Resetting DeepSORT tracker...")
|
23 |
+
self.tracker = self._create_tracker()
|
24 |
+
|
25 |
+
def update(self, frame, detections):
|
26 |
+
"""Update tracking with new detections.
|
27 |
+
|
28 |
+
Args:
|
29 |
+
frame: Current video frame (numpy array)
|
30 |
+
detections: List of (box, keyword) tuples where box is [x1, y1, x2, y2] normalized
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
List of (box, keyword, track_id) tuples
|
34 |
+
"""
|
35 |
+
if not detections:
|
36 |
+
return []
|
37 |
+
|
38 |
+
height, width = frame.shape[:2]
|
39 |
+
|
40 |
+
# Convert normalized coordinates to absolute and format detections
|
41 |
+
detection_list = []
|
42 |
+
for box, keyword in detections:
|
43 |
+
x1 = int(box[0] * width)
|
44 |
+
y1 = int(box[1] * height)
|
45 |
+
x2 = int(box[2] * width)
|
46 |
+
y2 = int(box[3] * height)
|
47 |
+
w = x2 - x1
|
48 |
+
h = y2 - y1
|
49 |
+
|
50 |
+
# Format: ([left,top,w,h], confidence, detection_class)
|
51 |
+
detection_list.append(([x1, y1, w, h], 1.0, keyword))
|
52 |
+
|
53 |
+
# Update tracker
|
54 |
+
tracks = self.tracker.update_tracks(detection_list, frame=frame)
|
55 |
+
|
56 |
+
# Convert back to normalized coordinates with track IDs
|
57 |
+
tracked_objects = []
|
58 |
+
for track in tracks:
|
59 |
+
if not track.is_confirmed():
|
60 |
+
continue
|
61 |
+
|
62 |
+
ltrb = track.to_ltrb() # Get [left,top,right,bottom] format
|
63 |
+
x1, y1, x2, y2 = ltrb
|
64 |
+
|
65 |
+
# Normalize coordinates
|
66 |
+
x1 = max(0.0, min(1.0, x1 / width))
|
67 |
+
y1 = max(0.0, min(1.0, y1 / height))
|
68 |
+
x2 = max(0.0, min(1.0, x2 / width))
|
69 |
+
y2 = max(0.0, min(1.0, y2 / height))
|
70 |
+
|
71 |
+
tracked_objects.append(([x1, y1, x2, y2], track.det_class, track.track_id))
|
72 |
+
|
73 |
return tracked_objects
|
packages.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
libvips
|
2 |
ffmpeg
|
|
|
1 |
+
libvips
|
2 |
ffmpeg
|
persistence.py
CHANGED
@@ -1,39 +1,39 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
|
4 |
-
def save_detection_data(data, output_file):
|
5 |
-
"""
|
6 |
-
Saves the detection data to a JSON file.
|
7 |
-
|
8 |
-
Args:
|
9 |
-
data (dict): The complete detection data structure.
|
10 |
-
output_file (str): Path to the output JSON file.
|
11 |
-
"""
|
12 |
-
try:
|
13 |
-
# Create directory if it doesn't exist
|
14 |
-
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
15 |
-
|
16 |
-
with open(output_file, "w") as f:
|
17 |
-
json.dump(data, f, indent=4)
|
18 |
-
print(f"Detection data saved to {output_file}")
|
19 |
-
return True
|
20 |
-
except Exception as e:
|
21 |
-
print(f"Error saving data: {str(e)}")
|
22 |
-
return False
|
23 |
-
|
24 |
-
def load_detection_data(input_file):
|
25 |
-
"""
|
26 |
-
Loads the detection data from a JSON file.
|
27 |
-
|
28 |
-
Args:
|
29 |
-
input_file (str): Path to the JSON file.
|
30 |
-
|
31 |
-
Returns:
|
32 |
-
dict: The loaded detection data, or None if there was an error.
|
33 |
-
"""
|
34 |
-
try:
|
35 |
-
with open(input_file, "r") as f:
|
36 |
-
return json.load(f)
|
37 |
-
except Exception as e:
|
38 |
-
print(f"Error loading data: {str(e)}")
|
39 |
return None
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
|
4 |
+
def save_detection_data(data, output_file):
|
5 |
+
"""
|
6 |
+
Saves the detection data to a JSON file.
|
7 |
+
|
8 |
+
Args:
|
9 |
+
data (dict): The complete detection data structure.
|
10 |
+
output_file (str): Path to the output JSON file.
|
11 |
+
"""
|
12 |
+
try:
|
13 |
+
# Create directory if it doesn't exist
|
14 |
+
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
15 |
+
|
16 |
+
with open(output_file, "w") as f:
|
17 |
+
json.dump(data, f, indent=4)
|
18 |
+
print(f"Detection data saved to {output_file}")
|
19 |
+
return True
|
20 |
+
except Exception as e:
|
21 |
+
print(f"Error saving data: {str(e)}")
|
22 |
+
return False
|
23 |
+
|
24 |
+
def load_detection_data(input_file):
|
25 |
+
"""
|
26 |
+
Loads the detection data from a JSON file.
|
27 |
+
|
28 |
+
Args:
|
29 |
+
input_file (str): Path to the JSON file.
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
dict: The loaded detection data, or None if there was an error.
|
33 |
+
"""
|
34 |
+
try:
|
35 |
+
with open(input_file, "r") as f:
|
36 |
+
return json.load(f)
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Error loading data: {str(e)}")
|
39 |
return None
|
requirements.txt
CHANGED
@@ -1,26 +1,26 @@
|
|
1 |
-
gradio>=4.0.0
|
2 |
-
torch>=2.0.0
|
3 |
-
# if on windows: pip install torch==2.5.1+cu121 torchvision==0.20.1+cu121 --index-url https://download.pytorch.org/whl/cu121
|
4 |
-
transformers>=4.36.0
|
5 |
-
opencv-python>=4.8.0
|
6 |
-
pillow>=10.0.0
|
7 |
-
numpy>=1.24.0
|
8 |
-
tqdm>=4.66.0
|
9 |
-
ffmpeg-python
|
10 |
-
einops
|
11 |
-
pyvips-binary
|
12 |
-
pyvips
|
13 |
-
accelerate
|
14 |
-
# for spaces
|
15 |
-
--extra-index-url https://download.pytorch.org/whl/cu113
|
16 |
-
spaces
|
17 |
-
# SAM dependencies
|
18 |
-
torchvision>=0.20.1
|
19 |
-
matplotlib>=3.7.0
|
20 |
-
pandas>=2.0.0
|
21 |
-
plotly
|
22 |
-
# DeepSORT dependencies
|
23 |
-
deep-sort-realtime>=1.3.2
|
24 |
-
scikit-learn # Required for deep-sort-realtime
|
25 |
-
# Scene detection dependencies (for intelligent scene-aware tracking)
|
26 |
scenedetect[opencv]>=0.6.2 # Provides scene change detection capabilities
|
|
|
1 |
+
gradio>=4.0.0
|
2 |
+
torch>=2.0.0
|
3 |
+
# if on windows: pip install torch==2.5.1+cu121 torchvision==0.20.1+cu121 --index-url https://download.pytorch.org/whl/cu121
|
4 |
+
transformers>=4.36.0
|
5 |
+
opencv-python>=4.8.0
|
6 |
+
pillow>=10.0.0
|
7 |
+
numpy>=1.24.0
|
8 |
+
tqdm>=4.66.0
|
9 |
+
ffmpeg-python
|
10 |
+
einops
|
11 |
+
pyvips-binary
|
12 |
+
pyvips
|
13 |
+
accelerate
|
14 |
+
# for spaces
|
15 |
+
--extra-index-url https://download.pytorch.org/whl/cu113
|
16 |
+
spaces
|
17 |
+
# SAM dependencies
|
18 |
+
torchvision>=0.20.1
|
19 |
+
matplotlib>=3.7.0
|
20 |
+
pandas>=2.0.0
|
21 |
+
plotly
|
22 |
+
# DeepSORT dependencies
|
23 |
+
deep-sort-realtime>=1.3.2
|
24 |
+
scikit-learn # Required for deep-sort-realtime
|
25 |
+
# Scene detection dependencies (for intelligent scene-aware tracking)
|
26 |
scenedetect[opencv]>=0.6.2 # Provides scene change detection capabilities
|
video_visualization.py
CHANGED
@@ -1,330 +1,330 @@
|
|
1 |
-
import os
|
2 |
-
import tempfile
|
3 |
-
import subprocess
|
4 |
-
import matplotlib.pyplot as plt
|
5 |
-
import pandas as pd
|
6 |
-
import cv2
|
7 |
-
import numpy as np
|
8 |
-
from tqdm import tqdm
|
9 |
-
from persistence import load_detection_data
|
10 |
-
|
11 |
-
def create_frame_data(json_path):
|
12 |
-
"""Create frame-by-frame detection data for visualization."""
|
13 |
-
try:
|
14 |
-
data = load_detection_data(json_path)
|
15 |
-
if not data:
|
16 |
-
print("No data loaded from JSON file")
|
17 |
-
return None
|
18 |
-
|
19 |
-
if "video_metadata" not in data or "frame_detections" not in data:
|
20 |
-
print("Invalid JSON structure: missing required fields")
|
21 |
-
return None
|
22 |
-
|
23 |
-
# Extract video metadata
|
24 |
-
metadata = data["video_metadata"]
|
25 |
-
if "fps" not in metadata or "total_frames" not in metadata:
|
26 |
-
print("Invalid metadata: missing fps or total_frames")
|
27 |
-
return None
|
28 |
-
|
29 |
-
fps = metadata["fps"]
|
30 |
-
total_frames = metadata["total_frames"]
|
31 |
-
|
32 |
-
# Create frame data
|
33 |
-
frame_counts = {}
|
34 |
-
for frame_data in data["frame_detections"]:
|
35 |
-
if "frame" not in frame_data or "objects" not in frame_data:
|
36 |
-
continue # Skip invalid frame data
|
37 |
-
frame_num = frame_data["frame"]
|
38 |
-
frame_counts[frame_num] = len(frame_data["objects"])
|
39 |
-
|
40 |
-
# Fill in missing frames with 0 detections
|
41 |
-
for frame in range(total_frames):
|
42 |
-
if frame not in frame_counts:
|
43 |
-
frame_counts[frame] = 0
|
44 |
-
|
45 |
-
if not frame_counts:
|
46 |
-
print("No valid frame data found")
|
47 |
-
return None
|
48 |
-
|
49 |
-
# Convert to DataFrame
|
50 |
-
df = pd.DataFrame(list(frame_counts.items()), columns=["frame", "detections"])
|
51 |
-
df["timestamp"] = df["frame"] / fps
|
52 |
-
|
53 |
-
return df, metadata
|
54 |
-
|
55 |
-
except Exception as e:
|
56 |
-
print(f"Error creating frame data: {str(e)}")
|
57 |
-
import traceback
|
58 |
-
traceback.print_exc()
|
59 |
-
return None
|
60 |
-
|
61 |
-
def generate_frame_image(df, frame_num, temp_dir, max_y):
|
62 |
-
"""Generate and save a single frame of the visualization."""
|
63 |
-
# Set the style to dark background
|
64 |
-
plt.style.use('dark_background')
|
65 |
-
|
66 |
-
# Set global font to monospace
|
67 |
-
plt.rcParams['font.family'] = 'monospace'
|
68 |
-
plt.rcParams['font.monospace'] = ['DejaVu Sans Mono']
|
69 |
-
|
70 |
-
plt.figure(figsize=(10, 6))
|
71 |
-
|
72 |
-
# Plot data up to current frame
|
73 |
-
current_data = df[df['frame'] <= frame_num]
|
74 |
-
plt.plot(df['frame'], df['detections'], color='#1a1a1a', alpha=0.5) # Darker background line
|
75 |
-
plt.plot(current_data['frame'], current_data['detections'], color='#00ff41') # Matrix green
|
76 |
-
|
77 |
-
# Add vertical line for current position
|
78 |
-
plt.axvline(x=frame_num, color='#ff0000', linestyle='-', alpha=0.7) # Keep red for position
|
79 |
-
|
80 |
-
# Set consistent axes
|
81 |
-
plt.xlim(0, len(df) - 1)
|
82 |
-
plt.ylim(0, max_y * 1.1) # Add 10% padding
|
83 |
-
|
84 |
-
# Add labels with Matrix green color
|
85 |
-
plt.title(f'FRAME {frame_num:04d} - DETECTIONS OVER TIME', color='#00ff41', pad=20)
|
86 |
-
plt.xlabel('FRAME NUMBER', color='#00ff41')
|
87 |
-
plt.ylabel('NUMBER OF DETECTIONS', color='#00ff41')
|
88 |
-
|
89 |
-
# Add current stats in Matrix green with monospace formatting
|
90 |
-
current_detections = df[df['frame'] == frame_num]['detections'].iloc[0]
|
91 |
-
plt.text(0.02, 0.98, f'CURRENT DETECTIONS: {current_detections:02d}',
|
92 |
-
transform=plt.gca().transAxes, verticalalignment='top',
|
93 |
-
color='#00ff41', family='monospace')
|
94 |
-
|
95 |
-
# Style the grid and ticks
|
96 |
-
plt.grid(True, color='#1a1a1a', linestyle='-', alpha=0.3)
|
97 |
-
plt.tick_params(colors='#00ff41')
|
98 |
-
|
99 |
-
# Save frame
|
100 |
-
frame_path = os.path.join(temp_dir, f'frame_{frame_num:05d}.png')
|
101 |
-
plt.savefig(frame_path, bbox_inches='tight', dpi=100, facecolor='black', edgecolor='none')
|
102 |
-
plt.close()
|
103 |
-
|
104 |
-
return frame_path
|
105 |
-
|
106 |
-
def generate_gauge_frame(df, frame_num, temp_dir, detect_keyword="OBJECT"):
|
107 |
-
"""Generate a modern square-style binary gauge visualization frame."""
|
108 |
-
# Set the style to dark background
|
109 |
-
plt.style.use('dark_background')
|
110 |
-
|
111 |
-
# Set global font to monospace
|
112 |
-
plt.rcParams['font.family'] = 'monospace'
|
113 |
-
plt.rcParams['font.monospace'] = ['DejaVu Sans Mono']
|
114 |
-
|
115 |
-
# Create figure with 16:9 aspect ratio
|
116 |
-
plt.figure(figsize=(16, 9))
|
117 |
-
|
118 |
-
# Get current detection state
|
119 |
-
current_detections = df[df['frame'] == frame_num]['detections'].iloc[0]
|
120 |
-
has_detection = current_detections > 0
|
121 |
-
|
122 |
-
# Create a simple gauge visualization
|
123 |
-
plt.axis('off')
|
124 |
-
|
125 |
-
# Set colors
|
126 |
-
if has_detection:
|
127 |
-
color = '#00ff41' # Matrix green for YES
|
128 |
-
status = 'YES'
|
129 |
-
indicator_pos = 0.8 # Right position
|
130 |
-
else:
|
131 |
-
color = '#ff0000' # Red for NO
|
132 |
-
status = 'NO'
|
133 |
-
indicator_pos = 0.2 # Left position
|
134 |
-
|
135 |
-
# Draw background rectangle
|
136 |
-
background = plt.Rectangle((0.1, 0.3), 0.8, 0.2,
|
137 |
-
facecolor='#1a1a1a',
|
138 |
-
edgecolor='#333333',
|
139 |
-
linewidth=2)
|
140 |
-
plt.gca().add_patch(background)
|
141 |
-
|
142 |
-
# Draw indicator
|
143 |
-
indicator_width = 0.05
|
144 |
-
indicator = plt.Rectangle((indicator_pos - indicator_width/2, 0.25),
|
145 |
-
indicator_width, 0.3,
|
146 |
-
facecolor=color,
|
147 |
-
edgecolor=None)
|
148 |
-
plt.gca().add_patch(indicator)
|
149 |
-
|
150 |
-
# Add tick marks
|
151 |
-
tick_positions = [0.2, 0.5, 0.8] # NO, CENTER, YES
|
152 |
-
for x in tick_positions:
|
153 |
-
plt.plot([x, x], [0.3, 0.5], color='#444444', linewidth=2)
|
154 |
-
|
155 |
-
# Add YES/NO labels
|
156 |
-
plt.text(0.8, 0.2, 'YES', color='#00ff41', fontsize=14,
|
157 |
-
ha='center', va='center', family='monospace')
|
158 |
-
plt.text(0.2, 0.2, 'NO', color='#ff0000', fontsize=14,
|
159 |
-
ha='center', va='center', family='monospace')
|
160 |
-
|
161 |
-
# Add status box at top with detection keyword
|
162 |
-
plt.text(0.5, 0.8, f'{detect_keyword.upper()} DETECTED?', color=color,
|
163 |
-
fontsize=16, ha='center', va='center', family='monospace',
|
164 |
-
bbox=dict(facecolor='#1a1a1a',
|
165 |
-
edgecolor=color,
|
166 |
-
linewidth=2,
|
167 |
-
pad=10))
|
168 |
-
|
169 |
-
# Add frame counter at bottom
|
170 |
-
plt.text(0.5, 0.1, f'FRAME: {frame_num:04d}', color='#00ff41',
|
171 |
-
fontsize=14, ha='center', va='center', family='monospace')
|
172 |
-
|
173 |
-
# Add subtle grid lines for depth
|
174 |
-
for x in np.linspace(0.2, 0.8, 7):
|
175 |
-
plt.plot([x, x], [0.3, 0.5], color='#222222', linewidth=1, zorder=0)
|
176 |
-
|
177 |
-
# Add glow effect to indicator
|
178 |
-
for i in range(3):
|
179 |
-
glow = plt.Rectangle((indicator_pos - (indicator_width + i*0.01)/2,
|
180 |
-
0.25 - i*0.01),
|
181 |
-
indicator_width + i*0.01,
|
182 |
-
0.3 + i*0.02,
|
183 |
-
facecolor=color,
|
184 |
-
alpha=0.1/(i+1))
|
185 |
-
plt.gca().add_patch(glow)
|
186 |
-
|
187 |
-
# Set consistent plot limits
|
188 |
-
plt.xlim(0, 1)
|
189 |
-
plt.ylim(0, 1)
|
190 |
-
|
191 |
-
# Save frame with 16:9 aspect ratio
|
192 |
-
frame_path = os.path.join(temp_dir, f'gauge_{frame_num:05d}.png')
|
193 |
-
plt.savefig(frame_path,
|
194 |
-
bbox_inches='tight',
|
195 |
-
dpi=100,
|
196 |
-
facecolor='black',
|
197 |
-
edgecolor='none',
|
198 |
-
pad_inches=0)
|
199 |
-
plt.close()
|
200 |
-
|
201 |
-
return frame_path
|
202 |
-
|
203 |
-
def create_video_visualization(json_path, style="timeline"):
|
204 |
-
"""Create a video visualization of the detection data."""
|
205 |
-
try:
|
206 |
-
if not json_path:
|
207 |
-
return None, "No JSON file provided"
|
208 |
-
|
209 |
-
if not os.path.exists(json_path):
|
210 |
-
return None, f"File not found: {json_path}"
|
211 |
-
|
212 |
-
# Load and process data
|
213 |
-
result = create_frame_data(json_path)
|
214 |
-
if result is None:
|
215 |
-
return None, "Failed to load detection data from JSON file"
|
216 |
-
|
217 |
-
frame_data, metadata = result
|
218 |
-
if len(frame_data) == 0:
|
219 |
-
return None, "No frame data found in JSON file"
|
220 |
-
|
221 |
-
total_frames = metadata["total_frames"]
|
222 |
-
detect_keyword = metadata.get("detect_keyword", "OBJECT") # Get the detection keyword
|
223 |
-
|
224 |
-
# Create temporary directory for frames
|
225 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
226 |
-
max_y = frame_data['detections'].max()
|
227 |
-
|
228 |
-
# Generate each frame
|
229 |
-
print("Generating frames...")
|
230 |
-
frame_paths = []
|
231 |
-
with tqdm(total=total_frames, desc="Generating frames") as pbar:
|
232 |
-
for frame in range(total_frames):
|
233 |
-
try:
|
234 |
-
if style == "gauge":
|
235 |
-
frame_path = generate_gauge_frame(frame_data, frame, temp_dir, detect_keyword)
|
236 |
-
else: # default to timeline
|
237 |
-
frame_path = generate_frame_image(frame_data, frame, temp_dir, max_y)
|
238 |
-
if frame_path and os.path.exists(frame_path):
|
239 |
-
frame_paths.append(frame_path)
|
240 |
-
else:
|
241 |
-
print(f"Warning: Failed to generate frame {frame}")
|
242 |
-
pbar.update(1)
|
243 |
-
except Exception as e:
|
244 |
-
print(f"Error generating frame {frame}: {str(e)}")
|
245 |
-
continue
|
246 |
-
|
247 |
-
if not frame_paths:
|
248 |
-
return None, "Failed to generate any frames"
|
249 |
-
|
250 |
-
# Create output video path
|
251 |
-
output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "outputs")
|
252 |
-
os.makedirs(output_dir, exist_ok=True)
|
253 |
-
output_video = os.path.join(output_dir, f"detection_visualization_{style}.mp4")
|
254 |
-
|
255 |
-
# Create temp output path
|
256 |
-
base, ext = os.path.splitext(output_video)
|
257 |
-
temp_output = f"{base}_temp{ext}"
|
258 |
-
|
259 |
-
# First pass: Create video with OpenCV VideoWriter
|
260 |
-
print("Creating initial video...")
|
261 |
-
# Get frame size from first image
|
262 |
-
first_frame = cv2.imread(frame_paths[0])
|
263 |
-
height, width = first_frame.shape[:2]
|
264 |
-
|
265 |
-
out = cv2.VideoWriter(
|
266 |
-
temp_output,
|
267 |
-
cv2.VideoWriter_fourcc(*"mp4v"),
|
268 |
-
metadata["fps"],
|
269 |
-
(width, height)
|
270 |
-
)
|
271 |
-
|
272 |
-
with tqdm(total=total_frames, desc="Creating video") as pbar: # Use total_frames here too
|
273 |
-
for frame_path in frame_paths:
|
274 |
-
frame = cv2.imread(frame_path)
|
275 |
-
out.write(frame)
|
276 |
-
pbar.update(1)
|
277 |
-
|
278 |
-
out.release()
|
279 |
-
|
280 |
-
# Second pass: Convert to web-compatible format
|
281 |
-
print("Converting to web format...")
|
282 |
-
try:
|
283 |
-
subprocess.run(
|
284 |
-
[
|
285 |
-
"ffmpeg",
|
286 |
-
"-y",
|
287 |
-
"-i",
|
288 |
-
temp_output,
|
289 |
-
"-c:v",
|
290 |
-
"libx264",
|
291 |
-
"-preset",
|
292 |
-
"medium",
|
293 |
-
"-crf",
|
294 |
-
"23",
|
295 |
-
"-movflags",
|
296 |
-
"+faststart", # Better web playback
|
297 |
-
"-loglevel",
|
298 |
-
"error",
|
299 |
-
output_video,
|
300 |
-
],
|
301 |
-
check=True,
|
302 |
-
)
|
303 |
-
|
304 |
-
os.remove(temp_output) # Remove the temporary file
|
305 |
-
|
306 |
-
if not os.path.exists(output_video):
|
307 |
-
print(f"Warning: FFmpeg completed but output file not found at {output_video}")
|
308 |
-
return None, "Failed to create video"
|
309 |
-
|
310 |
-
# Return video path and stats
|
311 |
-
stats = f"""Video Stats:
|
312 |
-
FPS: {metadata['fps']}
|
313 |
-
Total Frames: {metadata['total_frames']}
|
314 |
-
Duration: {metadata['duration_sec']:.2f} seconds
|
315 |
-
Max Detections in a Frame: {frame_data['detections'].max()}
|
316 |
-
Average Detections per Frame: {frame_data['detections'].mean():.2f}"""
|
317 |
-
|
318 |
-
return output_video, stats
|
319 |
-
|
320 |
-
except subprocess.CalledProcessError as e:
|
321 |
-
print(f"Error running FFmpeg: {str(e)}")
|
322 |
-
if os.path.exists(temp_output):
|
323 |
-
os.remove(temp_output)
|
324 |
-
return None, f"Error creating visualization: {str(e)}"
|
325 |
-
|
326 |
-
except Exception as e:
|
327 |
-
print(f"Error creating video visualization: {str(e)}")
|
328 |
-
import traceback
|
329 |
-
traceback.print_exc()
|
330 |
return None, f"Error creating visualization: {str(e)}"
|
|
|
1 |
+
import os
|
2 |
+
import tempfile
|
3 |
+
import subprocess
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import pandas as pd
|
6 |
+
import cv2
|
7 |
+
import numpy as np
|
8 |
+
from tqdm import tqdm
|
9 |
+
from persistence import load_detection_data
|
10 |
+
|
11 |
+
def create_frame_data(json_path):
|
12 |
+
"""Create frame-by-frame detection data for visualization."""
|
13 |
+
try:
|
14 |
+
data = load_detection_data(json_path)
|
15 |
+
if not data:
|
16 |
+
print("No data loaded from JSON file")
|
17 |
+
return None
|
18 |
+
|
19 |
+
if "video_metadata" not in data or "frame_detections" not in data:
|
20 |
+
print("Invalid JSON structure: missing required fields")
|
21 |
+
return None
|
22 |
+
|
23 |
+
# Extract video metadata
|
24 |
+
metadata = data["video_metadata"]
|
25 |
+
if "fps" not in metadata or "total_frames" not in metadata:
|
26 |
+
print("Invalid metadata: missing fps or total_frames")
|
27 |
+
return None
|
28 |
+
|
29 |
+
fps = metadata["fps"]
|
30 |
+
total_frames = metadata["total_frames"]
|
31 |
+
|
32 |
+
# Create frame data
|
33 |
+
frame_counts = {}
|
34 |
+
for frame_data in data["frame_detections"]:
|
35 |
+
if "frame" not in frame_data or "objects" not in frame_data:
|
36 |
+
continue # Skip invalid frame data
|
37 |
+
frame_num = frame_data["frame"]
|
38 |
+
frame_counts[frame_num] = len(frame_data["objects"])
|
39 |
+
|
40 |
+
# Fill in missing frames with 0 detections
|
41 |
+
for frame in range(total_frames):
|
42 |
+
if frame not in frame_counts:
|
43 |
+
frame_counts[frame] = 0
|
44 |
+
|
45 |
+
if not frame_counts:
|
46 |
+
print("No valid frame data found")
|
47 |
+
return None
|
48 |
+
|
49 |
+
# Convert to DataFrame
|
50 |
+
df = pd.DataFrame(list(frame_counts.items()), columns=["frame", "detections"])
|
51 |
+
df["timestamp"] = df["frame"] / fps
|
52 |
+
|
53 |
+
return df, metadata
|
54 |
+
|
55 |
+
except Exception as e:
|
56 |
+
print(f"Error creating frame data: {str(e)}")
|
57 |
+
import traceback
|
58 |
+
traceback.print_exc()
|
59 |
+
return None
|
60 |
+
|
61 |
+
def generate_frame_image(df, frame_num, temp_dir, max_y):
|
62 |
+
"""Generate and save a single frame of the visualization."""
|
63 |
+
# Set the style to dark background
|
64 |
+
plt.style.use('dark_background')
|
65 |
+
|
66 |
+
# Set global font to monospace
|
67 |
+
plt.rcParams['font.family'] = 'monospace'
|
68 |
+
plt.rcParams['font.monospace'] = ['DejaVu Sans Mono']
|
69 |
+
|
70 |
+
plt.figure(figsize=(10, 6))
|
71 |
+
|
72 |
+
# Plot data up to current frame
|
73 |
+
current_data = df[df['frame'] <= frame_num]
|
74 |
+
plt.plot(df['frame'], df['detections'], color='#1a1a1a', alpha=0.5) # Darker background line
|
75 |
+
plt.plot(current_data['frame'], current_data['detections'], color='#00ff41') # Matrix green
|
76 |
+
|
77 |
+
# Add vertical line for current position
|
78 |
+
plt.axvline(x=frame_num, color='#ff0000', linestyle='-', alpha=0.7) # Keep red for position
|
79 |
+
|
80 |
+
# Set consistent axes
|
81 |
+
plt.xlim(0, len(df) - 1)
|
82 |
+
plt.ylim(0, max_y * 1.1) # Add 10% padding
|
83 |
+
|
84 |
+
# Add labels with Matrix green color
|
85 |
+
plt.title(f'FRAME {frame_num:04d} - DETECTIONS OVER TIME', color='#00ff41', pad=20)
|
86 |
+
plt.xlabel('FRAME NUMBER', color='#00ff41')
|
87 |
+
plt.ylabel('NUMBER OF DETECTIONS', color='#00ff41')
|
88 |
+
|
89 |
+
# Add current stats in Matrix green with monospace formatting
|
90 |
+
current_detections = df[df['frame'] == frame_num]['detections'].iloc[0]
|
91 |
+
plt.text(0.02, 0.98, f'CURRENT DETECTIONS: {current_detections:02d}',
|
92 |
+
transform=plt.gca().transAxes, verticalalignment='top',
|
93 |
+
color='#00ff41', family='monospace')
|
94 |
+
|
95 |
+
# Style the grid and ticks
|
96 |
+
plt.grid(True, color='#1a1a1a', linestyle='-', alpha=0.3)
|
97 |
+
plt.tick_params(colors='#00ff41')
|
98 |
+
|
99 |
+
# Save frame
|
100 |
+
frame_path = os.path.join(temp_dir, f'frame_{frame_num:05d}.png')
|
101 |
+
plt.savefig(frame_path, bbox_inches='tight', dpi=100, facecolor='black', edgecolor='none')
|
102 |
+
plt.close()
|
103 |
+
|
104 |
+
return frame_path
|
105 |
+
|
106 |
+
def generate_gauge_frame(df, frame_num, temp_dir, detect_keyword="OBJECT"):
|
107 |
+
"""Generate a modern square-style binary gauge visualization frame."""
|
108 |
+
# Set the style to dark background
|
109 |
+
plt.style.use('dark_background')
|
110 |
+
|
111 |
+
# Set global font to monospace
|
112 |
+
plt.rcParams['font.family'] = 'monospace'
|
113 |
+
plt.rcParams['font.monospace'] = ['DejaVu Sans Mono']
|
114 |
+
|
115 |
+
# Create figure with 16:9 aspect ratio
|
116 |
+
plt.figure(figsize=(16, 9))
|
117 |
+
|
118 |
+
# Get current detection state
|
119 |
+
current_detections = df[df['frame'] == frame_num]['detections'].iloc[0]
|
120 |
+
has_detection = current_detections > 0
|
121 |
+
|
122 |
+
# Create a simple gauge visualization
|
123 |
+
plt.axis('off')
|
124 |
+
|
125 |
+
# Set colors
|
126 |
+
if has_detection:
|
127 |
+
color = '#00ff41' # Matrix green for YES
|
128 |
+
status = 'YES'
|
129 |
+
indicator_pos = 0.8 # Right position
|
130 |
+
else:
|
131 |
+
color = '#ff0000' # Red for NO
|
132 |
+
status = 'NO'
|
133 |
+
indicator_pos = 0.2 # Left position
|
134 |
+
|
135 |
+
# Draw background rectangle
|
136 |
+
background = plt.Rectangle((0.1, 0.3), 0.8, 0.2,
|
137 |
+
facecolor='#1a1a1a',
|
138 |
+
edgecolor='#333333',
|
139 |
+
linewidth=2)
|
140 |
+
plt.gca().add_patch(background)
|
141 |
+
|
142 |
+
# Draw indicator
|
143 |
+
indicator_width = 0.05
|
144 |
+
indicator = plt.Rectangle((indicator_pos - indicator_width/2, 0.25),
|
145 |
+
indicator_width, 0.3,
|
146 |
+
facecolor=color,
|
147 |
+
edgecolor=None)
|
148 |
+
plt.gca().add_patch(indicator)
|
149 |
+
|
150 |
+
# Add tick marks
|
151 |
+
tick_positions = [0.2, 0.5, 0.8] # NO, CENTER, YES
|
152 |
+
for x in tick_positions:
|
153 |
+
plt.plot([x, x], [0.3, 0.5], color='#444444', linewidth=2)
|
154 |
+
|
155 |
+
# Add YES/NO labels
|
156 |
+
plt.text(0.8, 0.2, 'YES', color='#00ff41', fontsize=14,
|
157 |
+
ha='center', va='center', family='monospace')
|
158 |
+
plt.text(0.2, 0.2, 'NO', color='#ff0000', fontsize=14,
|
159 |
+
ha='center', va='center', family='monospace')
|
160 |
+
|
161 |
+
# Add status box at top with detection keyword
|
162 |
+
plt.text(0.5, 0.8, f'{detect_keyword.upper()} DETECTED?', color=color,
|
163 |
+
fontsize=16, ha='center', va='center', family='monospace',
|
164 |
+
bbox=dict(facecolor='#1a1a1a',
|
165 |
+
edgecolor=color,
|
166 |
+
linewidth=2,
|
167 |
+
pad=10))
|
168 |
+
|
169 |
+
# Add frame counter at bottom
|
170 |
+
plt.text(0.5, 0.1, f'FRAME: {frame_num:04d}', color='#00ff41',
|
171 |
+
fontsize=14, ha='center', va='center', family='monospace')
|
172 |
+
|
173 |
+
# Add subtle grid lines for depth
|
174 |
+
for x in np.linspace(0.2, 0.8, 7):
|
175 |
+
plt.plot([x, x], [0.3, 0.5], color='#222222', linewidth=1, zorder=0)
|
176 |
+
|
177 |
+
# Add glow effect to indicator
|
178 |
+
for i in range(3):
|
179 |
+
glow = plt.Rectangle((indicator_pos - (indicator_width + i*0.01)/2,
|
180 |
+
0.25 - i*0.01),
|
181 |
+
indicator_width + i*0.01,
|
182 |
+
0.3 + i*0.02,
|
183 |
+
facecolor=color,
|
184 |
+
alpha=0.1/(i+1))
|
185 |
+
plt.gca().add_patch(glow)
|
186 |
+
|
187 |
+
# Set consistent plot limits
|
188 |
+
plt.xlim(0, 1)
|
189 |
+
plt.ylim(0, 1)
|
190 |
+
|
191 |
+
# Save frame with 16:9 aspect ratio
|
192 |
+
frame_path = os.path.join(temp_dir, f'gauge_{frame_num:05d}.png')
|
193 |
+
plt.savefig(frame_path,
|
194 |
+
bbox_inches='tight',
|
195 |
+
dpi=100,
|
196 |
+
facecolor='black',
|
197 |
+
edgecolor='none',
|
198 |
+
pad_inches=0)
|
199 |
+
plt.close()
|
200 |
+
|
201 |
+
return frame_path
|
202 |
+
|
203 |
+
def create_video_visualization(json_path, style="timeline"):
|
204 |
+
"""Create a video visualization of the detection data."""
|
205 |
+
try:
|
206 |
+
if not json_path:
|
207 |
+
return None, "No JSON file provided"
|
208 |
+
|
209 |
+
if not os.path.exists(json_path):
|
210 |
+
return None, f"File not found: {json_path}"
|
211 |
+
|
212 |
+
# Load and process data
|
213 |
+
result = create_frame_data(json_path)
|
214 |
+
if result is None:
|
215 |
+
return None, "Failed to load detection data from JSON file"
|
216 |
+
|
217 |
+
frame_data, metadata = result
|
218 |
+
if len(frame_data) == 0:
|
219 |
+
return None, "No frame data found in JSON file"
|
220 |
+
|
221 |
+
total_frames = metadata["total_frames"]
|
222 |
+
detect_keyword = metadata.get("detect_keyword", "OBJECT") # Get the detection keyword
|
223 |
+
|
224 |
+
# Create temporary directory for frames
|
225 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
226 |
+
max_y = frame_data['detections'].max()
|
227 |
+
|
228 |
+
# Generate each frame
|
229 |
+
print("Generating frames...")
|
230 |
+
frame_paths = []
|
231 |
+
with tqdm(total=total_frames, desc="Generating frames") as pbar:
|
232 |
+
for frame in range(total_frames):
|
233 |
+
try:
|
234 |
+
if style == "gauge":
|
235 |
+
frame_path = generate_gauge_frame(frame_data, frame, temp_dir, detect_keyword)
|
236 |
+
else: # default to timeline
|
237 |
+
frame_path = generate_frame_image(frame_data, frame, temp_dir, max_y)
|
238 |
+
if frame_path and os.path.exists(frame_path):
|
239 |
+
frame_paths.append(frame_path)
|
240 |
+
else:
|
241 |
+
print(f"Warning: Failed to generate frame {frame}")
|
242 |
+
pbar.update(1)
|
243 |
+
except Exception as e:
|
244 |
+
print(f"Error generating frame {frame}: {str(e)}")
|
245 |
+
continue
|
246 |
+
|
247 |
+
if not frame_paths:
|
248 |
+
return None, "Failed to generate any frames"
|
249 |
+
|
250 |
+
# Create output video path
|
251 |
+
output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "outputs")
|
252 |
+
os.makedirs(output_dir, exist_ok=True)
|
253 |
+
output_video = os.path.join(output_dir, f"detection_visualization_{style}.mp4")
|
254 |
+
|
255 |
+
# Create temp output path
|
256 |
+
base, ext = os.path.splitext(output_video)
|
257 |
+
temp_output = f"{base}_temp{ext}"
|
258 |
+
|
259 |
+
# First pass: Create video with OpenCV VideoWriter
|
260 |
+
print("Creating initial video...")
|
261 |
+
# Get frame size from first image
|
262 |
+
first_frame = cv2.imread(frame_paths[0])
|
263 |
+
height, width = first_frame.shape[:2]
|
264 |
+
|
265 |
+
out = cv2.VideoWriter(
|
266 |
+
temp_output,
|
267 |
+
cv2.VideoWriter_fourcc(*"mp4v"),
|
268 |
+
metadata["fps"],
|
269 |
+
(width, height)
|
270 |
+
)
|
271 |
+
|
272 |
+
with tqdm(total=total_frames, desc="Creating video") as pbar: # Use total_frames here too
|
273 |
+
for frame_path in frame_paths:
|
274 |
+
frame = cv2.imread(frame_path)
|
275 |
+
out.write(frame)
|
276 |
+
pbar.update(1)
|
277 |
+
|
278 |
+
out.release()
|
279 |
+
|
280 |
+
# Second pass: Convert to web-compatible format
|
281 |
+
print("Converting to web format...")
|
282 |
+
try:
|
283 |
+
subprocess.run(
|
284 |
+
[
|
285 |
+
"ffmpeg",
|
286 |
+
"-y",
|
287 |
+
"-i",
|
288 |
+
temp_output,
|
289 |
+
"-c:v",
|
290 |
+
"libx264",
|
291 |
+
"-preset",
|
292 |
+
"medium",
|
293 |
+
"-crf",
|
294 |
+
"23",
|
295 |
+
"-movflags",
|
296 |
+
"+faststart", # Better web playback
|
297 |
+
"-loglevel",
|
298 |
+
"error",
|
299 |
+
output_video,
|
300 |
+
],
|
301 |
+
check=True,
|
302 |
+
)
|
303 |
+
|
304 |
+
os.remove(temp_output) # Remove the temporary file
|
305 |
+
|
306 |
+
if not os.path.exists(output_video):
|
307 |
+
print(f"Warning: FFmpeg completed but output file not found at {output_video}")
|
308 |
+
return None, "Failed to create video"
|
309 |
+
|
310 |
+
# Return video path and stats
|
311 |
+
stats = f"""Video Stats:
|
312 |
+
FPS: {metadata['fps']}
|
313 |
+
Total Frames: {metadata['total_frames']}
|
314 |
+
Duration: {metadata['duration_sec']:.2f} seconds
|
315 |
+
Max Detections in a Frame: {frame_data['detections'].max()}
|
316 |
+
Average Detections per Frame: {frame_data['detections'].mean():.2f}"""
|
317 |
+
|
318 |
+
return output_video, stats
|
319 |
+
|
320 |
+
except subprocess.CalledProcessError as e:
|
321 |
+
print(f"Error running FFmpeg: {str(e)}")
|
322 |
+
if os.path.exists(temp_output):
|
323 |
+
os.remove(temp_output)
|
324 |
+
return None, f"Error creating visualization: {str(e)}"
|
325 |
+
|
326 |
+
except Exception as e:
|
327 |
+
print(f"Error creating video visualization: {str(e)}")
|
328 |
+
import traceback
|
329 |
+
traceback.print_exc()
|
330 |
return None, f"Error creating visualization: {str(e)}"
|
visualization.py
CHANGED
@@ -1,98 +1,98 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import matplotlib.pyplot as plt
|
3 |
-
from persistence import load_detection_data
|
4 |
-
import argparse
|
5 |
-
|
6 |
-
def visualize_detections(json_path):
|
7 |
-
"""
|
8 |
-
Visualize detection data from a JSON file.
|
9 |
-
|
10 |
-
Args:
|
11 |
-
json_path (str): Path to the JSON file containing detection data.
|
12 |
-
"""
|
13 |
-
# Load the persisted JSON data
|
14 |
-
data = load_detection_data(json_path)
|
15 |
-
if not data:
|
16 |
-
return
|
17 |
-
|
18 |
-
# Convert the frame detections to a DataFrame
|
19 |
-
rows = []
|
20 |
-
for frame_data in data["frame_detections"]:
|
21 |
-
frame = frame_data["frame"]
|
22 |
-
timestamp = frame_data["timestamp"]
|
23 |
-
for obj in frame_data["objects"]:
|
24 |
-
rows.append({
|
25 |
-
"frame": frame,
|
26 |
-
"timestamp": timestamp,
|
27 |
-
"keyword": obj["keyword"],
|
28 |
-
"x1": obj["bbox"][0],
|
29 |
-
"y1": obj["bbox"][1],
|
30 |
-
"x2": obj["bbox"][2],
|
31 |
-
"y2": obj["bbox"][3],
|
32 |
-
"area": (obj["bbox"][2] - obj["bbox"][0]) * (obj["bbox"][3] - obj["bbox"][1])
|
33 |
-
})
|
34 |
-
|
35 |
-
if not rows:
|
36 |
-
print("No detections found in the data")
|
37 |
-
return
|
38 |
-
|
39 |
-
df = pd.DataFrame(rows)
|
40 |
-
|
41 |
-
# Create a figure with multiple subplots
|
42 |
-
fig = plt.figure(figsize=(15, 10))
|
43 |
-
|
44 |
-
# Plot 1: Number of detections per frame
|
45 |
-
plt.subplot(2, 2, 1)
|
46 |
-
detections_per_frame = df.groupby("frame").size()
|
47 |
-
plt.plot(detections_per_frame.index, detections_per_frame.values)
|
48 |
-
plt.xlabel("Frame")
|
49 |
-
plt.ylabel("Number of Detections")
|
50 |
-
plt.title("Detections Per Frame")
|
51 |
-
|
52 |
-
# Plot 2: Distribution of detection areas
|
53 |
-
plt.subplot(2, 2, 2)
|
54 |
-
df["area"].hist(bins=30)
|
55 |
-
plt.xlabel("Detection Area (normalized)")
|
56 |
-
plt.ylabel("Count")
|
57 |
-
plt.title("Distribution of Detection Areas")
|
58 |
-
|
59 |
-
# Plot 3: Average detection area over time
|
60 |
-
plt.subplot(2, 2, 3)
|
61 |
-
avg_area = df.groupby("frame")["area"].mean()
|
62 |
-
plt.plot(avg_area.index, avg_area.values)
|
63 |
-
plt.xlabel("Frame")
|
64 |
-
plt.ylabel("Average Detection Area")
|
65 |
-
plt.title("Average Detection Area Over Time")
|
66 |
-
|
67 |
-
# Plot 4: Heatmap of detection centers
|
68 |
-
plt.subplot(2, 2, 4)
|
69 |
-
df["center_x"] = (df["x1"] + df["x2"]) / 2
|
70 |
-
df["center_y"] = (df["y1"] + df["y2"]) / 2
|
71 |
-
plt.hist2d(df["center_x"], df["center_y"], bins=30)
|
72 |
-
plt.colorbar()
|
73 |
-
plt.xlabel("X Position")
|
74 |
-
plt.ylabel("Y Position")
|
75 |
-
plt.title("Detection Center Heatmap")
|
76 |
-
|
77 |
-
# Adjust layout and display
|
78 |
-
plt.tight_layout()
|
79 |
-
plt.show()
|
80 |
-
|
81 |
-
# Print summary statistics
|
82 |
-
print("\nSummary Statistics:")
|
83 |
-
print(f"Total frames analyzed: {len(data['frame_detections'])}")
|
84 |
-
print(f"Total detections: {len(df)}")
|
85 |
-
print(f"Average detections per frame: {len(df) / len(data['frame_detections']):.2f}")
|
86 |
-
print(f"\nVideo metadata:")
|
87 |
-
for key, value in data["video_metadata"].items():
|
88 |
-
print(f"{key}: {value}")
|
89 |
-
|
90 |
-
def main():
|
91 |
-
parser = argparse.ArgumentParser(description="Visualize object detection data")
|
92 |
-
parser.add_argument("json_file", help="Path to the JSON file containing detection data")
|
93 |
-
args = parser.parse_args()
|
94 |
-
|
95 |
-
visualize_detections(args.json_file)
|
96 |
-
|
97 |
-
if __name__ == "__main__":
|
98 |
main()
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
from persistence import load_detection_data
|
4 |
+
import argparse
|
5 |
+
|
6 |
+
def visualize_detections(json_path):
|
7 |
+
"""
|
8 |
+
Visualize detection data from a JSON file.
|
9 |
+
|
10 |
+
Args:
|
11 |
+
json_path (str): Path to the JSON file containing detection data.
|
12 |
+
"""
|
13 |
+
# Load the persisted JSON data
|
14 |
+
data = load_detection_data(json_path)
|
15 |
+
if not data:
|
16 |
+
return
|
17 |
+
|
18 |
+
# Convert the frame detections to a DataFrame
|
19 |
+
rows = []
|
20 |
+
for frame_data in data["frame_detections"]:
|
21 |
+
frame = frame_data["frame"]
|
22 |
+
timestamp = frame_data["timestamp"]
|
23 |
+
for obj in frame_data["objects"]:
|
24 |
+
rows.append({
|
25 |
+
"frame": frame,
|
26 |
+
"timestamp": timestamp,
|
27 |
+
"keyword": obj["keyword"],
|
28 |
+
"x1": obj["bbox"][0],
|
29 |
+
"y1": obj["bbox"][1],
|
30 |
+
"x2": obj["bbox"][2],
|
31 |
+
"y2": obj["bbox"][3],
|
32 |
+
"area": (obj["bbox"][2] - obj["bbox"][0]) * (obj["bbox"][3] - obj["bbox"][1])
|
33 |
+
})
|
34 |
+
|
35 |
+
if not rows:
|
36 |
+
print("No detections found in the data")
|
37 |
+
return
|
38 |
+
|
39 |
+
df = pd.DataFrame(rows)
|
40 |
+
|
41 |
+
# Create a figure with multiple subplots
|
42 |
+
fig = plt.figure(figsize=(15, 10))
|
43 |
+
|
44 |
+
# Plot 1: Number of detections per frame
|
45 |
+
plt.subplot(2, 2, 1)
|
46 |
+
detections_per_frame = df.groupby("frame").size()
|
47 |
+
plt.plot(detections_per_frame.index, detections_per_frame.values)
|
48 |
+
plt.xlabel("Frame")
|
49 |
+
plt.ylabel("Number of Detections")
|
50 |
+
plt.title("Detections Per Frame")
|
51 |
+
|
52 |
+
# Plot 2: Distribution of detection areas
|
53 |
+
plt.subplot(2, 2, 2)
|
54 |
+
df["area"].hist(bins=30)
|
55 |
+
plt.xlabel("Detection Area (normalized)")
|
56 |
+
plt.ylabel("Count")
|
57 |
+
plt.title("Distribution of Detection Areas")
|
58 |
+
|
59 |
+
# Plot 3: Average detection area over time
|
60 |
+
plt.subplot(2, 2, 3)
|
61 |
+
avg_area = df.groupby("frame")["area"].mean()
|
62 |
+
plt.plot(avg_area.index, avg_area.values)
|
63 |
+
plt.xlabel("Frame")
|
64 |
+
plt.ylabel("Average Detection Area")
|
65 |
+
plt.title("Average Detection Area Over Time")
|
66 |
+
|
67 |
+
# Plot 4: Heatmap of detection centers
|
68 |
+
plt.subplot(2, 2, 4)
|
69 |
+
df["center_x"] = (df["x1"] + df["x2"]) / 2
|
70 |
+
df["center_y"] = (df["y1"] + df["y2"]) / 2
|
71 |
+
plt.hist2d(df["center_x"], df["center_y"], bins=30)
|
72 |
+
plt.colorbar()
|
73 |
+
plt.xlabel("X Position")
|
74 |
+
plt.ylabel("Y Position")
|
75 |
+
plt.title("Detection Center Heatmap")
|
76 |
+
|
77 |
+
# Adjust layout and display
|
78 |
+
plt.tight_layout()
|
79 |
+
plt.show()
|
80 |
+
|
81 |
+
# Print summary statistics
|
82 |
+
print("\nSummary Statistics:")
|
83 |
+
print(f"Total frames analyzed: {len(data['frame_detections'])}")
|
84 |
+
print(f"Total detections: {len(df)}")
|
85 |
+
print(f"Average detections per frame: {len(df) / len(data['frame_detections']):.2f}")
|
86 |
+
print(f"\nVideo metadata:")
|
87 |
+
for key, value in data["video_metadata"].items():
|
88 |
+
print(f"{key}: {value}")
|
89 |
+
|
90 |
+
def main():
|
91 |
+
parser = argparse.ArgumentParser(description="Visualize object detection data")
|
92 |
+
parser.add_argument("json_file", help="Path to the JSON file containing detection data")
|
93 |
+
args = parser.parse_args()
|
94 |
+
|
95 |
+
visualize_detections(args.json_file)
|
96 |
+
|
97 |
+
if __name__ == "__main__":
|
98 |
main()
|