|
|
import os |
|
|
import cv2 |
|
|
import shutil |
|
|
from collections import deque |
|
|
from ultralytics import YOLO |
|
|
import numpy as np |
|
|
import functools |
|
|
import time |
|
|
|
|
|
|
|
|
def timer_decorator(func): |
|
|
@functools.wraps(func) |
|
|
def wrapper(*args, **kwargs): |
|
|
start_time = time.time() |
|
|
result = func(*args, **kwargs) |
|
|
end_time = time.time() |
|
|
execution_time = end_time - start_time |
|
|
print(f"{func.__name__} took {execution_time:.2f} seconds to execute") |
|
|
return result |
|
|
|
|
|
return wrapper |
|
|
|
|
|
|
|
|
@timer_decorator |
|
|
|
|
|
def extract_key_frames(input_folder, key_frames_folder, original_fps, model_path='yolov8n.pt'): |
|
|
""" |
|
|
Detects frames containing a football and separates them into key frames. |
|
|
Reduces file I/O by loading frames into memory before processing. |
|
|
|
|
|
Optimizations: |
|
|
- Reads all frames into memory once to avoid multiple disk reads. |
|
|
- Uses OpenCV to write frames instead of shutil.copy (faster). |
|
|
|
|
|
Args: |
|
|
input_folder (str): Path to the folder containing input frames. |
|
|
key_frames_folder (str): Path to save frames containing a football. |
|
|
original_fps: original frames per second |
|
|
model_path (str): Path to the YOLOv8 model file (default is yolov8n.pt). |
|
|
""" |
|
|
counter = 0 |
|
|
print("Extracting key frames with reduced file I/O...") |
|
|
|
|
|
|
|
|
os.makedirs(key_frames_folder, exist_ok=True) |
|
|
|
|
|
|
|
|
model = YOLO(model_path) |
|
|
|
|
|
|
|
|
previous_nonkey_frames = deque(maxlen=original_fps) |
|
|
processed_key_frames = set() |
|
|
last_frame_was_key = False |
|
|
|
|
|
|
|
|
frame_names = sorted(os.listdir(input_folder)) |
|
|
frames = {} |
|
|
for frame_name in frame_names: |
|
|
if frame_name.lower().endswith(('.jpg', '.png')): |
|
|
frame_path = os.path.join(input_folder, frame_name) |
|
|
frames[frame_name] = cv2.imread(frame_path) |
|
|
|
|
|
for frame_name, frame in frames.items(): |
|
|
if frame is None: |
|
|
continue |
|
|
|
|
|
counter += 1 |
|
|
if counter % 1000 == 0: |
|
|
print(f"Processed {counter} frames.") |
|
|
|
|
|
results = model.predict(frame, conf=0.1, verbose=False) |
|
|
|
|
|
|
|
|
ball_detected = any(model.names[int(box.cls)] == "sports ball" for box in results[0].boxes) |
|
|
|
|
|
if ball_detected: |
|
|
|
|
|
|
|
|
if not last_frame_was_key: |
|
|
for _ in range(min(len(previous_nonkey_frames), original_fps)): |
|
|
nonkey_frame_name, nonkey_frame = previous_nonkey_frames.popleft() |
|
|
if nonkey_frame_name not in processed_key_frames: |
|
|
cv2.imwrite(os.path.join(key_frames_folder, nonkey_frame_name), nonkey_frame) |
|
|
processed_key_frames.add(nonkey_frame_name) |
|
|
|
|
|
previous_nonkey_frames.clear() |
|
|
|
|
|
|
|
|
if frame_name not in processed_key_frames: |
|
|
cv2.imwrite(os.path.join(key_frames_folder, frame_name), frame) |
|
|
processed_key_frames.add(frame_name) |
|
|
last_frame_was_key = True |
|
|
else: |
|
|
previous_nonkey_frames.append((frame_name, frame)) |
|
|
last_frame_was_key = False |
|
|
|
|
|
print("Key frame extraction complete (Optimized for File I/O).") |
|
|
|
|
|
|
|
|
@timer_decorator |
|
|
def crop_preserve_key_objects(input_folder, output_folder, model_path='yolov8n.pt', target_resolution=(360, 640)): |
|
|
""" |
|
|
Optimized version: Uses more RAM and reduces file I/O by storing frames in memory. |
|
|
|
|
|
Args: |
|
|
input_folder (str): Path to the folder containing key frames. |
|
|
output_folder (str): Path to save the processed frames. |
|
|
model_path (str): Path to the YOLOv8 model file. |
|
|
target_resolution (tuple): Desired resolution (width, height), e.g., (360, 640). |
|
|
""" |
|
|
print("Preprocessing frames to fit the target aspect ratio (Optimized for RAM)...") |
|
|
|
|
|
model = YOLO(model_path) |
|
|
target_aspect_ratio = target_resolution[0] / target_resolution[1] |
|
|
|
|
|
os.makedirs(output_folder, exist_ok=True) |
|
|
|
|
|
|
|
|
frame_files = sorted([f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.png'))]) |
|
|
|
|
|
|
|
|
frames = {} |
|
|
for frame_name in frame_files: |
|
|
frame_path = os.path.join(input_folder, frame_name) |
|
|
frames[frame_name] = cv2.imread(frame_path) |
|
|
|
|
|
last_cropping_area = None |
|
|
last_objects_detected = None |
|
|
|
|
|
ball_counter = 0 |
|
|
counter = 0 |
|
|
|
|
|
for frame_name, frame in frames.items(): |
|
|
if frame is None: |
|
|
print(f"Error reading frame: {frame_name}") |
|
|
continue |
|
|
|
|
|
counter += 1 |
|
|
if counter % 1000 == 0: |
|
|
print(f"Processed {counter} frames...") |
|
|
|
|
|
original_height, original_width = frame.shape[:2] |
|
|
|
|
|
new_width = int(original_height * target_aspect_ratio) |
|
|
new_height = int(original_width / target_aspect_ratio) |
|
|
|
|
|
|
|
|
results = model.predict(frame, conf=0.1, verbose=False) |
|
|
|
|
|
|
|
|
ball_detected = False |
|
|
people_boxes = [] |
|
|
ball_box = None |
|
|
|
|
|
|
|
|
for result in results[0].boxes: |
|
|
label = result.cls |
|
|
x_min, y_min, x_max, y_max = result.xyxy[0].cpu().numpy() |
|
|
if model.names[int(label)] == "sports ball": |
|
|
ball_detected = True |
|
|
ball_box = (x_min, y_min, x_max, y_max) |
|
|
ball_counter += 1 |
|
|
elif model.names[int(label)] == "person": |
|
|
people_boxes.append((x_min, y_min, x_max, y_max)) |
|
|
|
|
|
|
|
|
reuse_last_area = False |
|
|
if last_cropping_area: |
|
|
if ball_detected and last_objects_detected == "ball": |
|
|
|
|
|
x_min, y_min, x_max, y_max = last_cropping_area |
|
|
if ball_box and (ball_box[0] >= x_min and ball_box[1] >= y_min and |
|
|
ball_box[2] <= x_max and ball_box[3] <= y_max): |
|
|
reuse_last_area = True |
|
|
elif people_boxes and last_objects_detected == "people": |
|
|
reuse_last_area = True |
|
|
|
|
|
if reuse_last_area: |
|
|
x_min, y_min, x_max, y_max = last_cropping_area |
|
|
else: |
|
|
|
|
|
if ball_detected: |
|
|
x_min, y_min, x_max, y_max = ball_box |
|
|
last_objects_detected = "ball" |
|
|
elif people_boxes: |
|
|
x_min, y_min, x_max, y_max = calculate_largest_group_box(people_boxes, original_width, original_height) |
|
|
last_objects_detected = "people" |
|
|
else: |
|
|
|
|
|
x_center, y_center = original_width // 2, original_height // 2 |
|
|
new_width = int(original_height * target_aspect_ratio) |
|
|
new_height = int(original_width / target_aspect_ratio) |
|
|
x_min = max(0, x_center - new_width // 2) |
|
|
y_min = max(0, y_center - new_height // 2) |
|
|
x_max = min(original_width, x_min + new_width) |
|
|
y_max = min(original_height, y_min + new_height) |
|
|
|
|
|
|
|
|
if (x_max - x_min) < new_width: |
|
|
x_min = max(0, x_max - new_width) |
|
|
if (y_max - y_min) < new_height: |
|
|
y_min = max(0, y_max - new_height) |
|
|
|
|
|
last_cropping_area = (x_min, y_min, x_max, y_max) |
|
|
|
|
|
|
|
|
frame_cropped = frame[int(y_min):int(y_max), int(x_min):int(x_max)] |
|
|
frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_CUBIC) |
|
|
|
|
|
|
|
|
output_path = os.path.join(output_folder, frame_name) |
|
|
cv2.imwrite(output_path, frame_resized) |
|
|
|
|
|
print("Completed preprocessing (Optimized for RAM).") |
|
|
print(f"Total frames processed: {len(frame_files)}") |
|
|
print(f"Total frames detected with a sports ball: {ball_counter}") |
|
|
|
|
|
|
|
|
def calculate_largest_group_box(people_boxes, original_width, original_height): |
|
|
""" |
|
|
Calculate the bounding box for the densest group of people. |
|
|
|
|
|
Args: |
|
|
people_boxes (list of tuples): List of bounding boxes for detected people. |
|
|
Each box is (x_min, y_min, x_max, y_max). |
|
|
original_width (int): Width of the original frame. |
|
|
original_height (int): Height of the original frame. |
|
|
|
|
|
Returns: |
|
|
tuple: Bounding box (x_min, y_min, x_max, y_max) for the densest group of people. |
|
|
""" |
|
|
if not people_boxes: |
|
|
return None |
|
|
|
|
|
|
|
|
centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes]) |
|
|
|
|
|
|
|
|
distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2) |
|
|
|
|
|
|
|
|
threshold = max(original_width, original_height) * 0.2 |
|
|
|
|
|
|
|
|
clusters = [] |
|
|
visited = set() |
|
|
|
|
|
for i, center in enumerate(centers): |
|
|
if i in visited: |
|
|
continue |
|
|
cluster = [i] |
|
|
queue = [i] |
|
|
visited.add(i) |
|
|
|
|
|
for j in range(len(centers)): |
|
|
if j not in visited and distances[i, j] < threshold: |
|
|
cluster.append(j) |
|
|
visited.add(j) |
|
|
clusters.append(cluster) |
|
|
|
|
|
|
|
|
largest_cluster = max(clusters, key=len) |
|
|
|
|
|
|
|
|
x_min = min(people_boxes[i][0] for i in largest_cluster) |
|
|
y_min = min(people_boxes[i][1] for i in largest_cluster) |
|
|
x_max = max(people_boxes[i][2] for i in largest_cluster) |
|
|
y_max = max(people_boxes[i][3] for i in largest_cluster) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return x_min, y_min, x_max, y_max |
|
|
|
|
|
|