inital commit
Browse files- .idea/.gitignore +3 -0
- .idea/aws.xml +11 -0
- .idea/inspectionProfiles/Project_Default.xml +23 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +7 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- .idea/viralplay.iml +14 -0
- app.py +90 -0
- handlers/.DS_Store +0 -0
- handlers/__init__.py +0 -0
- handlers/__pycache__/__init__.cpython-311.pyc +0 -0
- handlers/__pycache__/frame_handler_resnet.cpython-311.pyc +0 -0
- handlers/__pycache__/frame_handler_yolo.cpython-311.pyc +0 -0
- handlers/__pycache__/video_handler.cpython-311.pyc +0 -0
- handlers/app_yolo.py +41 -0
- handlers/app_yolo_test.py +20 -0
- handlers/frame_handler_resnet.py +253 -0
- handlers/frame_handler_yolo.py +294 -0
- handlers/video_handler.py +171 -0
- handlers/yolov8n.pt +3 -0
- input_data/.DS_Store +0 -0
- requirements.txt +191 -0
- yolov8n.pt +3 -0
.idea/.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Default ignored files
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
.idea/aws.xml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="accountSettings">
|
4 |
+
<option name="activeRegion" value="us-east-1" />
|
5 |
+
<option name="recentlyUsedRegions">
|
6 |
+
<list>
|
7 |
+
<option value="us-east-1" />
|
8 |
+
</list>
|
9 |
+
</option>
|
10 |
+
</component>
|
11 |
+
</project>
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<profile version="1.0">
|
3 |
+
<option name="myName" value="Project Default" />
|
4 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
5 |
+
<option name="ignoredPackages">
|
6 |
+
<value>
|
7 |
+
<list size="3">
|
8 |
+
<item index="0" class="java.lang.String" itemvalue="transformers" />
|
9 |
+
<item index="1" class="java.lang.String" itemvalue="huggingface-hub" />
|
10 |
+
<item index="2" class="java.lang.String" itemvalue="datasets" />
|
11 |
+
</list>
|
12 |
+
</value>
|
13 |
+
</option>
|
14 |
+
</inspection_tool>
|
15 |
+
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
16 |
+
<option name="ignoredErrors">
|
17 |
+
<list>
|
18 |
+
<option value="E265" />
|
19 |
+
</list>
|
20 |
+
</option>
|
21 |
+
</inspection_tool>
|
22 |
+
</profile>
|
23 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
.idea/misc.xml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="Black">
|
4 |
+
<option name="sdkName" value="Python 3.11 (viralplay)" />
|
5 |
+
</component>
|
6 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (viralplay)" project-jdk-type="Python SDK" />
|
7 |
+
</project>
|
.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/viralplay.iml" filepath="$PROJECT_DIR$/.idea/viralplay.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
.idea/vcs.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="VcsDirectoryMappings">
|
4 |
+
<mapping directory="" vcs="Git" />
|
5 |
+
</component>
|
6 |
+
</project>
|
.idea/viralplay.iml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$">
|
5 |
+
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
6 |
+
</content>
|
7 |
+
<orderEntry type="inheritedJdk" />
|
8 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
9 |
+
</component>
|
10 |
+
<component name="PyDocumentationSettings">
|
11 |
+
<option name="format" value="PLAIN" />
|
12 |
+
<option name="myDocStringFormat" value="Plain" />
|
13 |
+
</component>
|
14 |
+
</module>
|
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
import spaces
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
from handlers import frame_handler_yolo as fh
|
7 |
+
from handlers import video_handler as vh
|
8 |
+
|
9 |
+
model_path = "yolov8n.pt" # YOLOv8 model path
|
10 |
+
|
11 |
+
|
12 |
+
@spaces.GPU(duration=300)
|
13 |
+
def process_video(video_file):
|
14 |
+
"""
|
15 |
+
Processes the uploaded video file by extracting key frames, cropping them, and generating a processed video.
|
16 |
+
"""
|
17 |
+
status_message = "Processing started..."
|
18 |
+
|
19 |
+
# Define output directories
|
20 |
+
output_folder = "output_data"
|
21 |
+
all_frames_folder = os.path.join(output_folder, "all_frames")
|
22 |
+
key_frames_folder = os.path.join(output_folder, "key_frames")
|
23 |
+
nonkey_frames_folder = os.path.join(output_folder, "nonkey_frames")
|
24 |
+
cropped_frames_folder = os.path.join(output_folder, "cropped_frames")
|
25 |
+
processed_video_path = os.path.join(output_folder, "processed_video.mp4")
|
26 |
+
|
27 |
+
print("Calling process_video function: Output folder:", output_folder)
|
28 |
+
|
29 |
+
# Clear output directory before processing
|
30 |
+
if os.path.exists(output_folder):
|
31 |
+
shutil.rmtree(output_folder)
|
32 |
+
os.makedirs(output_folder, exist_ok=True)
|
33 |
+
|
34 |
+
# Save uploaded video temporarily
|
35 |
+
video_path = os.path.join(output_folder, "input_video.mp4")
|
36 |
+
with open(video_file.name, "rb") as vf:
|
37 |
+
with open(video_path, "wb") as f:
|
38 |
+
f.write(vf.read())
|
39 |
+
|
40 |
+
status_message = "Extracting frames..."
|
41 |
+
yield status_message, None
|
42 |
+
|
43 |
+
# Step 1: Extract all frames
|
44 |
+
vh.extract_all_frames(video_path, all_frames_folder)
|
45 |
+
|
46 |
+
status_message = "Detecting key frames..."
|
47 |
+
yield status_message, None
|
48 |
+
|
49 |
+
# Step 2: Extract key frames
|
50 |
+
original_fps = 30
|
51 |
+
fh.extract_key_frames(all_frames_folder, key_frames_folder, original_fps, model_path)
|
52 |
+
|
53 |
+
status_message = "Cropping key frames..."
|
54 |
+
yield status_message, None
|
55 |
+
|
56 |
+
# Step 3: Crop key frames based on object detection
|
57 |
+
target_resolution = (360, 640) # Output resolution (9:16)
|
58 |
+
fh.crop_preserve_key_objects(key_frames_folder, cropped_frames_folder, model_path, target_resolution)
|
59 |
+
|
60 |
+
status_message = "Generating final video..."
|
61 |
+
yield status_message, None
|
62 |
+
|
63 |
+
# Step 4: Generate short video
|
64 |
+
target_resolution = (360, 640) # Output resolution (9:16)
|
65 |
+
target_frame_rate = 30
|
66 |
+
vh.create_video_from_frames(cropped_frames_folder, processed_video_path, target_frame_rate, target_resolution)
|
67 |
+
|
68 |
+
status_message = "Processing complete!"
|
69 |
+
yield status_message, processed_video_path
|
70 |
+
|
71 |
+
|
72 |
+
# Gradio Blocks UI
|
73 |
+
with gr.Blocks() as demo:
|
74 |
+
gr.Markdown("## Generate short video for your football match")
|
75 |
+
gr.Markdown("Upload a video file. The app will extract key frames, crop them to fix 9:16 aspect ratio, "
|
76 |
+
"and generate a short video.")
|
77 |
+
|
78 |
+
with gr.Row():
|
79 |
+
with gr.Column():
|
80 |
+
video_input = gr.File(label="Upload Video", type="filepath", file_types=["video"], file_count="single")
|
81 |
+
with gr.Column():
|
82 |
+
process_button = gr.Button("Process Video", variant="primary")
|
83 |
+
status_output = gr.Textbox(label="Status", interactive=False)
|
84 |
+
with gr.Column():
|
85 |
+
video_output = gr.Video(label="Processed Video", width=360, height=640)
|
86 |
+
|
87 |
+
process_button.click(process_video, inputs=video_input, outputs=[status_output, video_output])
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
demo.launch()
|
handlers/.DS_Store
ADDED
Binary file (8.2 kB). View file
|
|
handlers/__init__.py
ADDED
File without changes
|
handlers/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (176 Bytes). View file
|
|
handlers/__pycache__/frame_handler_resnet.cpython-311.pyc
ADDED
Binary file (8.39 kB). View file
|
|
handlers/__pycache__/frame_handler_yolo.cpython-311.pyc
ADDED
Binary file (13.6 kB). View file
|
|
handlers/__pycache__/video_handler.cpython-311.pyc
ADDED
Binary file (6.94 kB). View file
|
|
handlers/app_yolo.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import video_handler as vh
|
2 |
+
import frame_handler_yolo as fh
|
3 |
+
|
4 |
+
model_path = "yolov8n.pt" # YOLOv8 model path
|
5 |
+
original_fps = 30 # Original FPS of the input videos
|
6 |
+
|
7 |
+
video_path = "../input_data/football.mp4" # Replace with your video path
|
8 |
+
|
9 |
+
output_folder = "../output_data/" # Folder to save extracted frames
|
10 |
+
all_frames_folder = output_folder + "all_frames"
|
11 |
+
|
12 |
+
# ============= Step 1: Extract frames from the input videos ========
|
13 |
+
|
14 |
+
vh.extract_all_frames(video_path, all_frames_folder)
|
15 |
+
|
16 |
+
#frame_rate = 2 # Extract 2 frames per second
|
17 |
+
#vh.extract_frames_by_rate(video_path, all_frames_folder, frame_rate)
|
18 |
+
|
19 |
+
|
20 |
+
#============== Step 2: Extract key frames from the extracted frames ========
|
21 |
+
# key frames = frames contains a ball
|
22 |
+
# if a previous frame of a key frame is a non-key frame - major movement detected
|
23 |
+
# -> reclassify up to 30 previous frames (~ 1 second) as key frames to add context the major movement
|
24 |
+
|
25 |
+
key_frames_folder = output_folder + "key_frames" # Save key frames here
|
26 |
+
nonkey_frames_folder = output_folder + "nonkey_frames" # Save non-key frames here
|
27 |
+
|
28 |
+
fh.extract_key_frames(all_frames_folder, key_frames_folder, original_fps, model_path)
|
29 |
+
|
30 |
+
#============== Step 3: Crop the key frames to align with 9:16 ratio aspect while keeping the key object - football ball
|
31 |
+
key_frames_9_16_folder = output_folder + "key_frames_9_16" # Save processed frames here
|
32 |
+
|
33 |
+
target_resolution = (360, 640) # Output resolution (9:16)
|
34 |
+
|
35 |
+
fh.crop_preserve_key_objects(key_frames_folder, key_frames_9_16_folder, model_path, target_resolution)
|
36 |
+
|
37 |
+
#============== Step 4: Create a video from the processed frames ========
|
38 |
+
|
39 |
+
output_video_path_9_16 = output_folder + "output_video_9_16.mp4" # Output video path
|
40 |
+
target_frame_rate = 30 # Frames per second of the output videos
|
41 |
+
vh.create_video_from_frames(key_frames_9_16_folder, output_video_path_9_16, target_frame_rate, target_resolution)
|
handlers/app_yolo_test.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import video_handler as vh
|
2 |
+
import frame_handler_yolo as fh
|
3 |
+
|
4 |
+
model_path = "yolov8n.pt" # YOLOv8 model path
|
5 |
+
original_fps = 30 # Original FPS of the input videos
|
6 |
+
|
7 |
+
video_path = "../input_data/football.mp4" # Replace with your video path
|
8 |
+
|
9 |
+
output_folder = "../output_data/" # Folder to save extracted frames
|
10 |
+
all_frames_folder = output_folder + "all_frames"
|
11 |
+
|
12 |
+
output_video_path_9_16 = output_folder + "output_video_9_16_test.mp4" # Output video path
|
13 |
+
target_frame_rate = 30 # Frames per second of the output videos
|
14 |
+
target_resolution = (360, 640) # Output resolution (9:16)
|
15 |
+
|
16 |
+
# FOR TESTING ONLY
|
17 |
+
test_key_frames_folder = output_folder + "test_key_frames"
|
18 |
+
test_key_frames_9_16_folder = output_folder + "test_key_frames_9_16"
|
19 |
+
#fh.crop_preserve_key_objects(test_key_frames_folder, test_key_frames_9_16_folder, model_path, target_resolution)
|
20 |
+
vh.create_video_from_frames(test_key_frames_9_16_folder, output_video_path_9_16, target_frame_rate, target_resolution)
|
handlers/frame_handler_resnet.py
ADDED
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
import torch
|
4 |
+
#from transformers import DetrImageProcessor, DetrForObjectDetection
|
5 |
+
from transformers import AutoImageProcessor, AutoModelForObjectDetection
|
6 |
+
from PIL import Image
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
def crop_preserve_key_objects(input_folder, output_folder, model_name='facebook/detr-resnet-50', target_resolution=(360, 640)):
|
10 |
+
"""
|
11 |
+
Preprocess frames to fit a target aspect ratio, focusing on the densest group of people
|
12 |
+
if a football is not detected, and extending the area until it reaches the target resolution.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
input_folder (str): Path to the folder containing key frames.
|
16 |
+
output_folder (str): Path to save the processed frames.
|
17 |
+
model_name (str): Hugging Face model name for DETR.
|
18 |
+
target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
|
19 |
+
"""
|
20 |
+
print("Preprocessing frames using DETR to fit the target aspect ratio...")
|
21 |
+
|
22 |
+
# Load the DETR model and processor
|
23 |
+
#processor = DetrImageProcessor.from_pretrained(model_name)
|
24 |
+
#model = DetrForObjectDetection.from_pretrained(model_name)
|
25 |
+
|
26 |
+
processor = AutoImageProcessor.from_pretrained(model_name)
|
27 |
+
model = AutoModelForObjectDetection.from_pretrained(model_name)
|
28 |
+
target_aspect_ratio = target_resolution[0] / target_resolution[1]
|
29 |
+
|
30 |
+
for frame_name in os.listdir(input_folder):
|
31 |
+
frame_path = os.path.join(input_folder, frame_name)
|
32 |
+
if not frame_name.lower().endswith(('.jpg', '.png')):
|
33 |
+
continue # Skip non-image files
|
34 |
+
|
35 |
+
# Read the frame
|
36 |
+
frame = cv2.imread(frame_path)
|
37 |
+
if frame is None:
|
38 |
+
print(f"Error reading frame: {frame_path}")
|
39 |
+
continue
|
40 |
+
|
41 |
+
original_height, original_width = frame.shape[:2]
|
42 |
+
frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
43 |
+
|
44 |
+
# Run inference
|
45 |
+
inputs = processor(images=frame_pil, return_tensors="pt")
|
46 |
+
outputs = model(**inputs)
|
47 |
+
|
48 |
+
# Extract bounding boxes and labels
|
49 |
+
logits = outputs.logits.softmax(-1)[0]
|
50 |
+
boxes = outputs.pred_boxes[0].cpu().detach().numpy()
|
51 |
+
labels = logits.argmax(-1).cpu().detach().numpy()
|
52 |
+
scores = logits.max(-1).values.cpu().detach().numpy()
|
53 |
+
|
54 |
+
# Filter boxes with a confidence threshold
|
55 |
+
confidence_threshold = 0.1
|
56 |
+
filtered_boxes = []
|
57 |
+
for i, score in enumerate(scores):
|
58 |
+
if score >= confidence_threshold:
|
59 |
+
filtered_boxes.append((labels[i], score, boxes[i]))
|
60 |
+
|
61 |
+
# Separate detections into categories
|
62 |
+
ball_detected = False
|
63 |
+
people_boxes = []
|
64 |
+
for label, score, box in filtered_boxes:
|
65 |
+
# Convert box from normalized coordinates to pixel values
|
66 |
+
x_min, y_min, x_max, y_max = (
|
67 |
+
int(box[0] * original_width),
|
68 |
+
int(box[1] * original_height),
|
69 |
+
int(box[2] * original_width),
|
70 |
+
int(box[3] * original_height),
|
71 |
+
)
|
72 |
+
if label == 32: # "sports ball" class in COCO
|
73 |
+
print("Ball is detected in the frame.")
|
74 |
+
x_center = (x_min + x_max) // 2
|
75 |
+
y_center = (y_min + y_max) // 2
|
76 |
+
ball_detected = True
|
77 |
+
break
|
78 |
+
elif label == 1: # "person" class in COCO
|
79 |
+
print("Person is detected in the frame.")
|
80 |
+
people_boxes.append((x_min, y_min, x_max, y_max))
|
81 |
+
|
82 |
+
# If no ball is detected, focus on the densest group of people
|
83 |
+
if not ball_detected and people_boxes:
|
84 |
+
# Cluster the people into groups based on proximity
|
85 |
+
centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
|
86 |
+
distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
|
87 |
+
|
88 |
+
# Define a distance threshold to group nearby people
|
89 |
+
threshold = max(original_width, original_height) * 0.1 # Adjust clustering sensitivity
|
90 |
+
clusters = []
|
91 |
+
visited = set()
|
92 |
+
|
93 |
+
for i, center in enumerate(centers):
|
94 |
+
if i in visited:
|
95 |
+
continue
|
96 |
+
cluster = [i]
|
97 |
+
visited.add(i)
|
98 |
+
for j in range(len(centers)):
|
99 |
+
if j not in visited and distances[i, j] < threshold:
|
100 |
+
cluster.append(j)
|
101 |
+
visited.add(j)
|
102 |
+
clusters.append(cluster)
|
103 |
+
|
104 |
+
# Find the largest cluster and calculate its bounding box
|
105 |
+
largest_cluster = max(clusters, key=len)
|
106 |
+
x_min = min(people_boxes[i][0] for i in largest_cluster)
|
107 |
+
y_min = min(people_boxes[i][1] for i in largest_cluster)
|
108 |
+
x_max = max(people_boxes[i][2] for i in largest_cluster)
|
109 |
+
y_max = max(people_boxes[i][3] for i in largest_cluster)
|
110 |
+
|
111 |
+
# Center the crop on the largest cluster
|
112 |
+
x_center = (x_min + x_max) // 2
|
113 |
+
y_center = (y_min + y_max) // 2
|
114 |
+
|
115 |
+
# Calculate the cropping region to fit the target resolution
|
116 |
+
new_width = int(original_height * target_aspect_ratio)
|
117 |
+
new_height = int(original_width / target_aspect_ratio)
|
118 |
+
|
119 |
+
x_start = max(0, x_center - new_width // 2)
|
120 |
+
y_start = max(0, y_center - new_height // 2)
|
121 |
+
x_end = min(original_width, x_start + new_width)
|
122 |
+
y_end = min(original_height, y_start + new_height)
|
123 |
+
|
124 |
+
# Adjust the crop if the size is smaller than the target resolution
|
125 |
+
if (x_end - x_start) < new_width:
|
126 |
+
x_start = max(0, x_end - new_width)
|
127 |
+
if (y_end - y_start) < new_height:
|
128 |
+
y_start = max(0, y_end - new_height)
|
129 |
+
|
130 |
+
# Crop and resize the frame
|
131 |
+
frame_cropped = frame[int(y_start):int(y_end), int(x_start):int(x_end)]
|
132 |
+
frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_LINEAR)
|
133 |
+
|
134 |
+
# Save the processed frame
|
135 |
+
output_path = os.path.join(output_folder, frame_name)
|
136 |
+
cv2.imwrite(output_path, frame_resized)
|
137 |
+
print(f"Processed frame saved: {output_path}")
|
138 |
+
|
139 |
+
print("Preprocessing completed.")
|
140 |
+
|
141 |
+
|
142 |
+
#back up
|
143 |
+
def backup_yolo_crop_preserve_key_objects(input_folder, output_folder, model_path='yolov8n.pt', target_resolution=(360, 640)):
|
144 |
+
"""
|
145 |
+
Preprocess frames to fit a target aspect ratio, focusing on the densest group of people
|
146 |
+
if a football is not detected, and extending the area until it reaches the target resolution.
|
147 |
+
|
148 |
+
Args:
|
149 |
+
input_folder (str): Path to the folder containing key frames.
|
150 |
+
output_folder (str): Path to save the processed frames.
|
151 |
+
model_path (str): Path to the YOLOv8 model file.
|
152 |
+
target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
|
153 |
+
"""
|
154 |
+
print("Preprocessing frames to fit the target aspect ratio...")
|
155 |
+
|
156 |
+
model = YOLO(model_path)
|
157 |
+
target_aspect_ratio = target_resolution[0] / target_resolution[1]
|
158 |
+
|
159 |
+
for frame_name in os.listdir(input_folder):
|
160 |
+
frame_path = os.path.join(input_folder, frame_name)
|
161 |
+
if not frame_name.lower().endswith(('.jpg', '.png')):
|
162 |
+
continue # Skip non-image files
|
163 |
+
|
164 |
+
# Read the frame
|
165 |
+
frame = cv2.imread(frame_path)
|
166 |
+
if frame is None:
|
167 |
+
print(f"Error reading frame: {frame_path}")
|
168 |
+
continue
|
169 |
+
|
170 |
+
original_height, original_width = frame.shape[:2]
|
171 |
+
|
172 |
+
# Run YOLOv8 inference
|
173 |
+
# TTP adjusted conf to 0.3 from 0.5 originally
|
174 |
+
results = model.predict(frame, conf=0.3)
|
175 |
+
|
176 |
+
# Initialize cropping region
|
177 |
+
x_center, y_center = original_width // 2, original_height // 2
|
178 |
+
ball_detected = False
|
179 |
+
people_boxes = []
|
180 |
+
|
181 |
+
# Process detections to find "sports ball" or "person"
|
182 |
+
for result in results[0].boxes:
|
183 |
+
label = result.cls
|
184 |
+
if model.names[int(label)] == "sports ball":
|
185 |
+
# Get the center of the detected football
|
186 |
+
x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
|
187 |
+
x_center = int((x_min + x_max) / 2)
|
188 |
+
y_center = int((y_min + y_max) / 2)
|
189 |
+
ball_detected = True
|
190 |
+
break
|
191 |
+
elif model.names[int(label)] == "person":
|
192 |
+
# Collect bounding boxes for people
|
193 |
+
x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
|
194 |
+
people_boxes.append((x_min, y_min, x_max, y_max))
|
195 |
+
|
196 |
+
# If no ball is detected, focus on the densest group of people
|
197 |
+
if not ball_detected and people_boxes:
|
198 |
+
# Cluster the people into groups based on proximity
|
199 |
+
centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
|
200 |
+
distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
|
201 |
+
|
202 |
+
# Define a distance threshold to group nearby people - Adjust clustering sensitivity
|
203 |
+
threshold = max(original_width, original_height) * 0.2 # TTP adjusted to 0.2
|
204 |
+
clusters = []
|
205 |
+
visited = set()
|
206 |
+
|
207 |
+
for i, center in enumerate(centers):
|
208 |
+
if i in visited:
|
209 |
+
continue
|
210 |
+
cluster = [i]
|
211 |
+
visited.add(i)
|
212 |
+
for j in range(len(centers)):
|
213 |
+
if j not in visited and distances[i, j] < threshold:
|
214 |
+
cluster.append(j)
|
215 |
+
visited.add(j)
|
216 |
+
clusters.append(cluster)
|
217 |
+
|
218 |
+
# Find the largest cluster and calculate its bounding box
|
219 |
+
largest_cluster = max(clusters, key=len)
|
220 |
+
x_min = min(people_boxes[i][0] for i in largest_cluster)
|
221 |
+
y_min = min(people_boxes[i][1] for i in largest_cluster)
|
222 |
+
x_max = max(people_boxes[i][2] for i in largest_cluster)
|
223 |
+
y_max = max(people_boxes[i][3] for i in largest_cluster)
|
224 |
+
|
225 |
+
# Center the crop on the largest cluster
|
226 |
+
x_center = int((x_min + x_max) / 2)
|
227 |
+
y_center = int((y_min + y_max) / 2)
|
228 |
+
|
229 |
+
# Calculate the cropping region to fit the target resolution
|
230 |
+
new_width = int(original_height * target_aspect_ratio)
|
231 |
+
new_height = int(original_width / target_aspect_ratio)
|
232 |
+
|
233 |
+
x_start = max(0, x_center - new_width // 2)
|
234 |
+
y_start = max(0, y_center - new_height // 2)
|
235 |
+
x_end = min(original_width, x_start + new_width)
|
236 |
+
y_end = min(original_height, y_start + new_height)
|
237 |
+
|
238 |
+
# Adjust the crop if the size is smaller than the target resolution
|
239 |
+
if (x_end - x_start) < new_width:
|
240 |
+
x_start = max(0, x_end - new_width)
|
241 |
+
if (y_end - y_start) < new_height:
|
242 |
+
y_start = max(0, y_end - new_height)
|
243 |
+
|
244 |
+
# Crop and resize the frame
|
245 |
+
frame_cropped = frame[int(y_start):int(y_end), int(x_start):int(x_end)]
|
246 |
+
frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_LINEAR)
|
247 |
+
|
248 |
+
# Save the processed frame
|
249 |
+
output_path = os.path.join(output_folder, frame_name)
|
250 |
+
cv2.imwrite(output_path, frame_resized)
|
251 |
+
print(f"Processed frame saved: {output_path}")
|
252 |
+
|
253 |
+
print("Preprocessing completed.")
|
handlers/frame_handler_yolo.py
ADDED
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
import shutil
|
4 |
+
from collections import deque
|
5 |
+
from ultralytics import YOLO # Assuming YOLOv8 library
|
6 |
+
import numpy as np
|
7 |
+
import functools
|
8 |
+
import time
|
9 |
+
|
10 |
+
|
11 |
+
def timer_decorator(func):
|
12 |
+
@functools.wraps(func)
|
13 |
+
def wrapper(*args, **kwargs):
|
14 |
+
start_time = time.time()
|
15 |
+
result = func(*args, **kwargs)
|
16 |
+
end_time = time.time()
|
17 |
+
execution_time = end_time - start_time
|
18 |
+
print(f"{func.__name__} took {execution_time:.2f} seconds to execute")
|
19 |
+
return result
|
20 |
+
|
21 |
+
return wrapper
|
22 |
+
|
23 |
+
|
24 |
+
@timer_decorator
|
25 |
+
def extract_key_frames(input_folder, key_frames_folder, original_fps=30, model_path='yolov8n.pt'):
|
26 |
+
"""
|
27 |
+
Detects frames containing a football and separates them into key frames.
|
28 |
+
Reduces file I/O by loading frames into memory before processing.
|
29 |
+
|
30 |
+
Optimizations:
|
31 |
+
- Reads all frames into memory once to avoid multiple disk reads.
|
32 |
+
- Uses OpenCV to write frames instead of shutil.copy (faster).
|
33 |
+
|
34 |
+
Args:
|
35 |
+
input_folder (str): Path to the folder containing input frames.
|
36 |
+
key_frames_folder (str): Path to save frames containing a football.
|
37 |
+
original_fps: original frames per second (default is 30)
|
38 |
+
model_path (str): Path to the YOLOv8 model file (default is yolov8n.pt).
|
39 |
+
"""
|
40 |
+
counter = 0
|
41 |
+
print("Extracting key frames with reduced file I/O...")
|
42 |
+
|
43 |
+
# Ensure the output directory exists
|
44 |
+
os.makedirs(key_frames_folder, exist_ok=True)
|
45 |
+
|
46 |
+
# Load YOLO model once
|
47 |
+
model = YOLO(model_path)
|
48 |
+
|
49 |
+
# Maintain last 30 non-key frames for reclassification
|
50 |
+
previous_nonkey_frames = deque(maxlen=original_fps)
|
51 |
+
processed_key_frames = set()
|
52 |
+
last_frame_was_key = False
|
53 |
+
|
54 |
+
# Load frames into memory first (Reduces file I/O), sort frames by file names
|
55 |
+
frame_names = sorted(os.listdir(input_folder))
|
56 |
+
frames = {}
|
57 |
+
for frame_name in frame_names:
|
58 |
+
if frame_name.lower().endswith(('.jpg', '.png')):
|
59 |
+
frame_path = os.path.join(input_folder, frame_name)
|
60 |
+
frames[frame_name] = cv2.imread(frame_path) # Load into RAM
|
61 |
+
|
62 |
+
for frame_name, frame in frames.items():
|
63 |
+
if frame is None:
|
64 |
+
continue # Skip invalid frames
|
65 |
+
|
66 |
+
counter += 1
|
67 |
+
if counter % 1000 == 0:
|
68 |
+
print(f"Processed {counter} frames.")
|
69 |
+
# Run YOLO inference
|
70 |
+
results = model.predict(frame, conf=0.3, verbose=False)
|
71 |
+
|
72 |
+
# Check if a football (sports ball) is detected
|
73 |
+
ball_detected = any(model.names[int(box.cls)] == "sports ball" for box in results[0].boxes)
|
74 |
+
|
75 |
+
if ball_detected:
|
76 |
+
# Reclassify up to 30 previous non-key frames
|
77 |
+
if not last_frame_was_key:
|
78 |
+
for _ in range(min(len(previous_nonkey_frames), 30)):
|
79 |
+
nonkey_frame_name, nonkey_frame = previous_nonkey_frames.popleft()
|
80 |
+
if nonkey_frame_name not in processed_key_frames:
|
81 |
+
cv2.imwrite(os.path.join(key_frames_folder, nonkey_frame_name), nonkey_frame)
|
82 |
+
processed_key_frames.add(nonkey_frame_name)
|
83 |
+
|
84 |
+
previous_nonkey_frames.clear() # Reset after reclassification
|
85 |
+
|
86 |
+
# Save the current frame as a key frame if not already processed
|
87 |
+
if frame_name not in processed_key_frames:
|
88 |
+
cv2.imwrite(os.path.join(key_frames_folder, frame_name), frame)
|
89 |
+
processed_key_frames.add(frame_name)
|
90 |
+
last_frame_was_key = True
|
91 |
+
else:
|
92 |
+
previous_nonkey_frames.append((frame_name, frame))
|
93 |
+
last_frame_was_key = False
|
94 |
+
|
95 |
+
print("Key frame extraction complete (Optimized for File I/O).")
|
96 |
+
|
97 |
+
@timer_decorator
|
98 |
+
def crop_preserve_key_objects(input_folder, output_folder, model_path='yolov8n.pt', target_resolution=(360, 640)):
|
99 |
+
"""
|
100 |
+
Preprocess frames to fit a target aspect ratio, focusing on key objects like a football or the densest group of people.
|
101 |
+
|
102 |
+
Reuse the cropping area if conditions are met for smoother transitions, and recalculate if objects are out of the reused cropping area.
|
103 |
+
|
104 |
+
Args:
|
105 |
+
input_folder (str): Path to the folder containing key frames.
|
106 |
+
output_folder (str): Path to save the processed frames.
|
107 |
+
model_path (str): Path to the YOLOv8 model file.
|
108 |
+
target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
|
109 |
+
"""
|
110 |
+
print("Preprocessing frames to fit the target aspect ratio...")
|
111 |
+
|
112 |
+
ball_counter = 0
|
113 |
+
counter = 0
|
114 |
+
x_min, y_min, x_max, y_max = 0, 0, 0, 0
|
115 |
+
|
116 |
+
model = YOLO(model_path)
|
117 |
+
target_aspect_ratio = target_resolution[0] / target_resolution[1]
|
118 |
+
|
119 |
+
os.makedirs(output_folder, exist_ok=True)
|
120 |
+
|
121 |
+
# Sort frames by file name
|
122 |
+
frame_files = sorted(
|
123 |
+
[f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.png'))]
|
124 |
+
)
|
125 |
+
|
126 |
+
last_cropping_area = None # Store the last cropping area
|
127 |
+
last_objects_detected = None # Track the type of object detected in the last frame ("ball" or "people")
|
128 |
+
|
129 |
+
for frame_name in frame_files:
|
130 |
+
frame_path = os.path.join(input_folder, frame_name)
|
131 |
+
if not frame_name.lower().endswith(('.jpg', '.png')):
|
132 |
+
continue # Skip non-image files
|
133 |
+
|
134 |
+
frame = cv2.imread(frame_path)
|
135 |
+
if frame is None:
|
136 |
+
print(f"Error reading frame: {frame_path}")
|
137 |
+
continue
|
138 |
+
|
139 |
+
counter += 1
|
140 |
+
if counter % 100 == 0:
|
141 |
+
status_message = f"Preprocessing frames to fit the target aspect ratio...Processed {counter} frames."
|
142 |
+
#yield status_message, None
|
143 |
+
original_height, original_width = frame.shape[:2]
|
144 |
+
|
145 |
+
# YOLO inference
|
146 |
+
results = model.predict(frame, conf=0.3, verbose=False)
|
147 |
+
|
148 |
+
# Initialize variables
|
149 |
+
x_center, y_center = original_width // 2, original_height // 2
|
150 |
+
ball_detected = False
|
151 |
+
people_boxes = []
|
152 |
+
ball_box = None # To store ball coordinates for case 1
|
153 |
+
|
154 |
+
# Process detections
|
155 |
+
for result in results[0].boxes:
|
156 |
+
label = result.cls
|
157 |
+
x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
|
158 |
+
if model.names[int(label)] == "sports ball":
|
159 |
+
ball_detected = True
|
160 |
+
ball_box = (x_min, y_min, x_max, y_max)
|
161 |
+
ball_counter += 1
|
162 |
+
elif model.names[int(label)] == "person":
|
163 |
+
people_boxes.append((x_min, y_min, x_max, y_max))
|
164 |
+
#draw red boxes around peoples
|
165 |
+
#cv2.rectangle(frame, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 2)
|
166 |
+
|
167 |
+
# Determine whether to reuse the last cropping area
|
168 |
+
reuse_last_area = False
|
169 |
+
if last_cropping_area:
|
170 |
+
if ball_detected and last_objects_detected == "ball":
|
171 |
+
# Check if the ball is within the last cropping area
|
172 |
+
x_min, y_min, x_max, y_max = last_cropping_area
|
173 |
+
if ball_box and (ball_box[0] >= x_min and ball_box[1] >= y_min and
|
174 |
+
ball_box[2] <= x_max and ball_box[3] <= y_max):
|
175 |
+
reuse_last_area = True
|
176 |
+
elif people_boxes and last_objects_detected == "people":
|
177 |
+
#result the last_cropping_area until the ball appears
|
178 |
+
reuse_last_area = True
|
179 |
+
|
180 |
+
if reuse_last_area:
|
181 |
+
# Reuse the last cropping area
|
182 |
+
x_min, y_min, x_max, y_max = last_cropping_area
|
183 |
+
else:
|
184 |
+
# Calculate a new cropping area
|
185 |
+
if ball_detected:
|
186 |
+
# Focus on the ball
|
187 |
+
x_min, y_min, x_max, y_max = ball_box
|
188 |
+
last_objects_detected = "ball"
|
189 |
+
elif people_boxes:
|
190 |
+
# Find the densest group of people
|
191 |
+
x_min, y_min, x_max, y_max = calculate_largest_group_box(people_boxes, original_width, original_height)
|
192 |
+
|
193 |
+
#draw blue box around densest people area
|
194 |
+
#cv2.rectangle(frame, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (255, 0, 0), 2)
|
195 |
+
last_objects_detected = "people"
|
196 |
+
|
197 |
+
# Default to center cropping if nothing is detected
|
198 |
+
# Center the crop on the largest cluster
|
199 |
+
x_center = (x_min + x_max) // 2
|
200 |
+
y_center = (y_min + y_max) // 2
|
201 |
+
new_width = int(original_height * target_aspect_ratio)
|
202 |
+
new_height = int(original_width / target_aspect_ratio)
|
203 |
+
x_min = max(0, x_center - new_width // 2)
|
204 |
+
y_min = max(0, y_center - new_height // 2)
|
205 |
+
x_max = min(original_width, x_min + new_width)
|
206 |
+
y_max = min(original_height, y_min + new_height)
|
207 |
+
|
208 |
+
# Adjust the crop if the size is smaller than the target resolution
|
209 |
+
if (x_max - x_min) < new_width:
|
210 |
+
x_min = max(0, x_max - new_width)
|
211 |
+
if (y_max - y_min) < new_height:
|
212 |
+
y_min = max(0, y_max - new_height)
|
213 |
+
|
214 |
+
#Update the last cropping area
|
215 |
+
last_cropping_area = (x_min, y_min, x_max, y_max)
|
216 |
+
|
217 |
+
# Crop and resize the frame
|
218 |
+
frame_cropped = frame[int(y_min):int(y_max), int(x_min):int(x_max)]
|
219 |
+
frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_CUBIC)
|
220 |
+
|
221 |
+
# Save the processed frame
|
222 |
+
output_path = os.path.join(output_folder, frame_name)
|
223 |
+
cv2.imwrite(output_path, frame_resized)
|
224 |
+
|
225 |
+
#save the debug frame
|
226 |
+
#debug_path = os.path.join(output_folder, "debug_" + frame_name)
|
227 |
+
#cv2.imwrite(debug_path, frame)
|
228 |
+
|
229 |
+
print("Completed preprocessing frames to fit the target aspect ratio.")
|
230 |
+
print(f"Total frames processed: {len(frame_files)}")
|
231 |
+
print(f"Total frames detected with a sport ball: {ball_counter}")
|
232 |
+
|
233 |
+
|
234 |
+
def calculate_largest_group_box(people_boxes, original_width, original_height):
|
235 |
+
"""
|
236 |
+
Calculate the bounding box for the densest group of people.
|
237 |
+
|
238 |
+
Args:
|
239 |
+
people_boxes (list of tuples): List of bounding boxes for detected people.
|
240 |
+
Each box is (x_min, y_min, x_max, y_max).
|
241 |
+
original_width (int): Width of the original frame.
|
242 |
+
original_height (int): Height of the original frame.
|
243 |
+
|
244 |
+
Returns:
|
245 |
+
tuple: Bounding box (x_min, y_min, x_max, y_max) for the densest group of people.
|
246 |
+
"""
|
247 |
+
if not people_boxes:
|
248 |
+
return None # Return None if no people boxes are provided
|
249 |
+
|
250 |
+
# Get the center points of all bounding boxes
|
251 |
+
centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
|
252 |
+
|
253 |
+
# Calculate pairwise distances between all centers
|
254 |
+
distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
|
255 |
+
|
256 |
+
# Define a distance threshold for clustering. Adjust this value if needed
|
257 |
+
threshold = max(original_width, original_height) * 0.2 # TTP adjusted to 0.2 to allow bigger distance
|
258 |
+
|
259 |
+
# Perform clustering using a flood-fill approach
|
260 |
+
clusters = []
|
261 |
+
visited = set()
|
262 |
+
|
263 |
+
for i, center in enumerate(centers):
|
264 |
+
if i in visited:
|
265 |
+
continue
|
266 |
+
cluster = [i]
|
267 |
+
queue = [i]
|
268 |
+
visited.add(i)
|
269 |
+
|
270 |
+
for j in range(len(centers)):
|
271 |
+
if j not in visited and distances[i, j] < threshold:
|
272 |
+
cluster.append(j)
|
273 |
+
visited.add(j)
|
274 |
+
clusters.append(cluster)
|
275 |
+
|
276 |
+
# Find the largest cluster based on the number of people
|
277 |
+
largest_cluster = max(clusters, key=len)
|
278 |
+
|
279 |
+
# Calculate the bounding box for the largest cluster
|
280 |
+
x_min = min(people_boxes[i][0] for i in largest_cluster)
|
281 |
+
y_min = min(people_boxes[i][1] for i in largest_cluster)
|
282 |
+
x_max = max(people_boxes[i][2] for i in largest_cluster)
|
283 |
+
y_max = max(people_boxes[i][3] for i in largest_cluster)
|
284 |
+
|
285 |
+
# Expand the bounding box slightly to include some context
|
286 |
+
#padding_x = int(original_width * 0.05) # 5% padding horizontally
|
287 |
+
#padding_y = int(original_height * 0.05) # 5% padding vertically
|
288 |
+
|
289 |
+
#x_min = max(0, x_min - padding_x)
|
290 |
+
#y_min = max(0, y_min - padding_y)
|
291 |
+
#x_max = min(original_width, x_max + padding_x)
|
292 |
+
#y_max = min(original_height, y_max + padding_y)
|
293 |
+
|
294 |
+
return x_min, y_min, x_max, y_max
|
handlers/video_handler.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
|
4 |
+
import functools
|
5 |
+
import time
|
6 |
+
|
7 |
+
|
8 |
+
def timer_decorator(func):
|
9 |
+
@functools.wraps(func)
|
10 |
+
def wrapper(*args, **kwargs):
|
11 |
+
start_time = time.time()
|
12 |
+
result = func(*args, **kwargs)
|
13 |
+
end_time = time.time()
|
14 |
+
execution_time = end_time - start_time
|
15 |
+
print(f"{func.__name__} took {execution_time:.2f} seconds to execute")
|
16 |
+
return result
|
17 |
+
|
18 |
+
return wrapper
|
19 |
+
|
20 |
+
|
21 |
+
@timer_decorator
|
22 |
+
def extract_frames_by_rate(video_path, output_folder, frame_rate=1):
|
23 |
+
"""
|
24 |
+
Extracts frames from a video at a specified frame rate.
|
25 |
+
|
26 |
+
Args:
|
27 |
+
video_path (str): Path to the input video file.
|
28 |
+
output_folder (str): Directory to save the extracted frames.
|
29 |
+
frame_rate (int): Number of frames to extract per second of the video.
|
30 |
+
"""
|
31 |
+
# Ensure the output directory exists
|
32 |
+
if not os.path.exists(output_folder):
|
33 |
+
os.makedirs(output_folder)
|
34 |
+
|
35 |
+
# Load the video
|
36 |
+
video = cv2.VideoCapture(video_path)
|
37 |
+
|
38 |
+
# Check if the video is opened successfully
|
39 |
+
if not video.isOpened():
|
40 |
+
print(f"Error: Cannot open video file {video_path}")
|
41 |
+
return
|
42 |
+
|
43 |
+
# Get video properties
|
44 |
+
fps = int(video.get(cv2.CAP_PROP_FPS)) # Frames per second
|
45 |
+
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) # Total number of frames
|
46 |
+
duration = total_frames / fps # Duration in seconds
|
47 |
+
|
48 |
+
print(f"Video loaded: {video_path}")
|
49 |
+
print(f"Total Frames: {total_frames}, FPS: {fps}, Duration: {duration:.2f} seconds")
|
50 |
+
|
51 |
+
# Calculate frame interval (in terms of frame number)
|
52 |
+
frame_interval = fps // frame_rate
|
53 |
+
|
54 |
+
# Frame counter
|
55 |
+
frame_count = 0
|
56 |
+
saved_count = 0
|
57 |
+
|
58 |
+
while True:
|
59 |
+
# Read a frame
|
60 |
+
ret, frame = video.read()
|
61 |
+
|
62 |
+
# Break the loop if the video ends
|
63 |
+
if not ret:
|
64 |
+
break
|
65 |
+
|
66 |
+
# Save frame if it matches the frame interval
|
67 |
+
if frame_count % frame_interval == 0:
|
68 |
+
frame_filename = os.path.join(output_folder, f"frame_{saved_count:05d}.jpg")
|
69 |
+
cv2.imwrite(frame_filename, frame)
|
70 |
+
#print(f"Saved: {frame_filename}")
|
71 |
+
saved_count += 1
|
72 |
+
|
73 |
+
frame_count += 1
|
74 |
+
|
75 |
+
# Release video resources
|
76 |
+
video.release()
|
77 |
+
print(f"Extraction complete. Total frames saved: {saved_count}")
|
78 |
+
|
79 |
+
|
80 |
+
@timer_decorator
|
81 |
+
def extract_all_frames(video_path, output_folder):
|
82 |
+
"""
|
83 |
+
Extracts all frames from a video.
|
84 |
+
|
85 |
+
Args:
|
86 |
+
video_path (str): Path to the input video file.
|
87 |
+
output_folder (str): Directory to save the extracted frames.
|
88 |
+
"""
|
89 |
+
# Ensure the output directory exists
|
90 |
+
if not os.path.exists(output_folder):
|
91 |
+
os.makedirs(output_folder)
|
92 |
+
|
93 |
+
# Load the video
|
94 |
+
video = cv2.VideoCapture(video_path)
|
95 |
+
|
96 |
+
# Check if the video is opened successfully
|
97 |
+
if not video.isOpened():
|
98 |
+
print(f"Error: Cannot open video file {video_path}")
|
99 |
+
return
|
100 |
+
|
101 |
+
# Get video properties
|
102 |
+
fps = int(video.get(cv2.CAP_PROP_FPS)) # Frames per second
|
103 |
+
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) # Total number of frames
|
104 |
+
duration = total_frames / fps # Duration in seconds
|
105 |
+
|
106 |
+
print(f"Video loaded: {video_path}")
|
107 |
+
print(f"Total Frames: {total_frames}, FPS: {fps}, Duration: {duration:.2f} seconds")
|
108 |
+
print("Extracting frames from the video...")
|
109 |
+
# Frame counter
|
110 |
+
frame_count = 0
|
111 |
+
|
112 |
+
while True:
|
113 |
+
# Read a frame
|
114 |
+
ret, frame = video.read()
|
115 |
+
|
116 |
+
# Break the loop if the video ends
|
117 |
+
if not ret:
|
118 |
+
break
|
119 |
+
|
120 |
+
# Save every frame
|
121 |
+
frame_filename = os.path.join(output_folder, f"frame_{frame_count:05d}.jpg")
|
122 |
+
cv2.imwrite(frame_filename, frame)
|
123 |
+
frame_count += 1
|
124 |
+
|
125 |
+
# Release video resources
|
126 |
+
video.release()
|
127 |
+
print(f"Extraction complete. Total frames saved: {frame_count}")
|
128 |
+
|
129 |
+
|
130 |
+
@timer_decorator
|
131 |
+
def create_video_from_frames(input_folder, output_video_path, frame_rate=30, resolution=(360, 640)):
|
132 |
+
"""
|
133 |
+
Creates a video from preprocessed frames.
|
134 |
+
|
135 |
+
Args:
|
136 |
+
input_folder (str): Path to the folder containing frames.
|
137 |
+
output_video_path (str): Path to save the output video.
|
138 |
+
frame_rate (int): Frames per second for the output video.
|
139 |
+
resolution (tuple): Resolution of the output video (width, height).
|
140 |
+
"""
|
141 |
+
# Get sorted list of image files in the folder
|
142 |
+
frame_files = sorted(
|
143 |
+
[f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.png'))]
|
144 |
+
)
|
145 |
+
|
146 |
+
# Initialize the video writer
|
147 |
+
fourcc = cv2.VideoWriter_fourcc(*'avc1') # H.264 Codec
|
148 |
+
video_writer = cv2.VideoWriter(output_video_path, fourcc, frame_rate, resolution)
|
149 |
+
|
150 |
+
for frame_file in frame_files:
|
151 |
+
frame_path = os.path.join(input_folder, frame_file)
|
152 |
+
frame = cv2.imread(frame_path)
|
153 |
+
|
154 |
+
if frame is None:
|
155 |
+
print(f"Error reading frame: {frame_path}")
|
156 |
+
continue
|
157 |
+
|
158 |
+
# Get the frame's original resolution
|
159 |
+
original_height, original_width = frame.shape[:2]
|
160 |
+
|
161 |
+
# **Check if resizing is needed**
|
162 |
+
if (original_width, original_height) != resolution:
|
163 |
+
# Ensure the frame matches the target resolution
|
164 |
+
frame = cv2.resize(frame, resolution, interpolation=cv2.INTER_CUBIC)
|
165 |
+
|
166 |
+
# Write the frame to the video
|
167 |
+
video_writer.write(frame)
|
168 |
+
|
169 |
+
# Release the video writer
|
170 |
+
video_writer.release()
|
171 |
+
print(f"Video saved to: {output_video_path}")
|
handlers/yolov8n.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
|
3 |
+
size 6549796
|
input_data/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==2.1.0
|
2 |
+
aiofiles==23.2.1
|
3 |
+
aiohttp==3.8.4
|
4 |
+
aiosignal==1.3.1
|
5 |
+
altair==5.3.0
|
6 |
+
annotated-types==0.6.0
|
7 |
+
anyio==4.3.0
|
8 |
+
appnope==0.1.4
|
9 |
+
argon2-cffi==23.1.0
|
10 |
+
argon2-cffi-bindings==21.2.0
|
11 |
+
arrow==1.3.0
|
12 |
+
asttokens==2.4.1
|
13 |
+
async-lru==2.0.4
|
14 |
+
async-timeout==4.0.2
|
15 |
+
attrs==23.1.0
|
16 |
+
babel==2.16.0
|
17 |
+
beautifulsoup4==4.12.3
|
18 |
+
bleach==6.1.0
|
19 |
+
boto3==1.28.73
|
20 |
+
botocore==1.31.73
|
21 |
+
captum==0.7.0
|
22 |
+
certifi==2023.5.7
|
23 |
+
cffi==1.17.1
|
24 |
+
charset-normalizer==3.1.0
|
25 |
+
clarabel==0.9.0
|
26 |
+
click==8.1.7
|
27 |
+
comm==0.2.2
|
28 |
+
contourpy==1.2.1
|
29 |
+
cvxpy==1.6.0
|
30 |
+
cycler==0.12.1
|
31 |
+
debugpy==1.8.5
|
32 |
+
decorator==5.1.1
|
33 |
+
defusedxml==0.7.1
|
34 |
+
dill==0.3.7
|
35 |
+
dnspython==2.6.1
|
36 |
+
email_validator==2.1.1
|
37 |
+
executing==2.1.0
|
38 |
+
fastapi==0.111.0
|
39 |
+
fastapi-cli==0.0.3
|
40 |
+
fastjsonschema==2.20.0
|
41 |
+
ffmpy==0.3.2
|
42 |
+
filelock==3.12.4
|
43 |
+
fonttools==4.51.0
|
44 |
+
fqdn==1.5.1
|
45 |
+
frozenlist==1.3.3
|
46 |
+
fsspec==2023.6.0
|
47 |
+
gradio_client==0.16.2
|
48 |
+
grpcio==1.66.2
|
49 |
+
h11==0.14.0
|
50 |
+
holidays==0.57
|
51 |
+
httpcore==1.0.5
|
52 |
+
httptools==0.6.1
|
53 |
+
httpx==0.27.0
|
54 |
+
huggingface-hub==0.23.0
|
55 |
+
idna==3.4
|
56 |
+
importlib_resources==6.4.0
|
57 |
+
ipykernel==6.29.5
|
58 |
+
ipython==8.27.0
|
59 |
+
isoduration==20.11.0
|
60 |
+
jedi==0.19.1
|
61 |
+
Jinja2==3.1.2
|
62 |
+
jmespath==1.0.1
|
63 |
+
joblib==1.4.2
|
64 |
+
json5==0.9.25
|
65 |
+
jsonpointer==3.0.0
|
66 |
+
jsonschema==4.22.0
|
67 |
+
jsonschema-specifications==2023.12.1
|
68 |
+
jupyter-events==0.10.0
|
69 |
+
jupyter-lsp==2.2.5
|
70 |
+
jupyter_client==8.6.2
|
71 |
+
jupyter_core==5.7.2
|
72 |
+
jupyter_server==2.14.2
|
73 |
+
jupyter_server_terminals==0.5.3
|
74 |
+
jupyterlab==4.2.5
|
75 |
+
jupyterlab_pygments==0.3.0
|
76 |
+
jupyterlab_server==2.27.3
|
77 |
+
kaleido==0.2.1
|
78 |
+
kiwisolver==1.4.5
|
79 |
+
liac-arff==2.5.0
|
80 |
+
lightning-utilities==0.11.7
|
81 |
+
Markdown==3.7
|
82 |
+
markdown-it-py==3.0.0
|
83 |
+
MarkupSafe==2.1.3
|
84 |
+
matplotlib==3.8.4
|
85 |
+
matplotlib-inline==0.1.7
|
86 |
+
mdurl==0.1.2
|
87 |
+
minio==7.2.8
|
88 |
+
mistune==3.0.2
|
89 |
+
mpmath==1.3.0
|
90 |
+
multidict==6.0.4
|
91 |
+
multiprocess==0.70.15
|
92 |
+
nbclient==0.10.0
|
93 |
+
nbconvert==7.16.4
|
94 |
+
nbformat==5.10.4
|
95 |
+
nest-asyncio==1.6.0
|
96 |
+
networkx==3.1
|
97 |
+
neuralprophet==0.9.0
|
98 |
+
notebook_shim==0.2.4
|
99 |
+
numpy==1.26.1
|
100 |
+
openai==0.27.7
|
101 |
+
openml==0.14.2
|
102 |
+
orjson==3.10.3
|
103 |
+
osqp==0.6.7.post3
|
104 |
+
overrides==7.7.0
|
105 |
+
packaging==23.2
|
106 |
+
pandas==2.1.1
|
107 |
+
pandocfilters==1.5.1
|
108 |
+
parso==0.8.4
|
109 |
+
pexpect==4.9.0
|
110 |
+
pillow==10.3.0
|
111 |
+
platformdirs==4.3.2
|
112 |
+
plotly==5.24.1
|
113 |
+
prometheus_client==0.20.0
|
114 |
+
prompt_toolkit==3.0.47
|
115 |
+
protobuf==5.28.2
|
116 |
+
psutil==6.0.0
|
117 |
+
ptyprocess==0.7.0
|
118 |
+
pure_eval==0.2.3
|
119 |
+
pyarrow==13.0.0
|
120 |
+
pycparser==2.22
|
121 |
+
pycryptodome==3.20.0
|
122 |
+
pydantic==2.7.1
|
123 |
+
pydantic_core==2.18.2
|
124 |
+
pydub==0.25.1
|
125 |
+
Pygments==2.18.0
|
126 |
+
pyparsing==3.1.2
|
127 |
+
python-dateutil==2.8.2
|
128 |
+
python-dotenv==1.0.1
|
129 |
+
python-json-logger==2.0.7
|
130 |
+
python-multipart==0.0.9
|
131 |
+
pytorch-lightning==2.4.0
|
132 |
+
pytz==2023.3.post1
|
133 |
+
PyYAML==6.0.1
|
134 |
+
pyzmq==26.2.0
|
135 |
+
qdldl==0.1.7.post4
|
136 |
+
referencing==0.35.1
|
137 |
+
regex==2023.10.3
|
138 |
+
requests==2.31.0
|
139 |
+
rfc3339-validator==0.1.4
|
140 |
+
rfc3986-validator==0.1.1
|
141 |
+
rich==13.7.1
|
142 |
+
rpds-py==0.18.1
|
143 |
+
ruff==0.4.3
|
144 |
+
s3transfer==0.7.0
|
145 |
+
safetensors==0.4.0
|
146 |
+
scikit-learn==1.5.2
|
147 |
+
scipy==1.14.1
|
148 |
+
scs==3.2.7
|
149 |
+
semantic-version==2.10.0
|
150 |
+
Send2Trash==1.8.3
|
151 |
+
shellingham==1.5.4
|
152 |
+
six==1.16.0
|
153 |
+
sniffio==1.3.1
|
154 |
+
soupsieve==2.6
|
155 |
+
stack-data==0.6.3
|
156 |
+
starlette==0.37.2
|
157 |
+
sympy==1.12
|
158 |
+
tenacity==9.0.0
|
159 |
+
tensorboard==2.18.0
|
160 |
+
tensorboard-data-server==0.7.2
|
161 |
+
terminado==0.18.1
|
162 |
+
threadpoolctl==3.5.0
|
163 |
+
tinycss2==1.3.0
|
164 |
+
tokenizers==0.14.1
|
165 |
+
tomlkit==0.12.0
|
166 |
+
toolz==0.12.1
|
167 |
+
torch==2.1.0
|
168 |
+
torchdata==0.7.0
|
169 |
+
torchmetrics==1.4.2
|
170 |
+
tornado==6.4.1
|
171 |
+
tqdm==4.65.0
|
172 |
+
traitlets==5.14.3
|
173 |
+
typer==0.12.3
|
174 |
+
types-python-dateutil==2.9.0.20240906
|
175 |
+
typing_extensions==4.8.0
|
176 |
+
tzdata==2023.3
|
177 |
+
ujson==5.9.0
|
178 |
+
uri-template==1.3.0
|
179 |
+
urllib3==2.0.2
|
180 |
+
uvicorn==0.29.0
|
181 |
+
uvloop==0.19.0
|
182 |
+
watchfiles==0.21.0
|
183 |
+
wcwidth==0.2.13
|
184 |
+
webcolors==24.8.0
|
185 |
+
webencodings==0.5.1
|
186 |
+
websocket-client==1.8.0
|
187 |
+
websockets==11.0.3
|
188 |
+
Werkzeug==3.0.4
|
189 |
+
xmltodict==0.13.0
|
190 |
+
xxhash==3.4.1
|
191 |
+
yarl==1.9.2
|
yolov8n.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
|
3 |
+
size 6549796
|