Upload folder using huggingface_hub
Browse files- README.md +31 -3
- main.py +192 -0
- requirements.txt +5 -0
- yolov11l.pt +3 -0
README.md
CHANGED
@@ -1,3 +1,31 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
facesaver
|
2 |
+
|
3 |
+
A tool to process video files into still for image and video AI training, using yolov11 face detection to find scenes with people in them, within a certain size and position range.
|
4 |
+
|
5 |
+
Requirements:
|
6 |
+
CUDA 12.x
|
7 |
+
A GPU with 6GB or more VRAM
|
8 |
+
Raw video rips, unless you want subtitles in your training data.
|
9 |
+
|
10 |
+
Usage:
|
11 |
+
1. create a conda env
|
12 |
+
conda env create -n facesaver python=3.12
|
13 |
+
2. activate the env
|
14 |
+
conda activate facesaver
|
15 |
+
3. install the requiremnts
|
16 |
+
pip3 install -r requirements.txt
|
17 |
+
4. put your video files into the input directory
|
18 |
+
5. run the command
|
19 |
+
python3 main.py -I ./input -O ./output -w 200 -m 200
|
20 |
+
|
21 |
+
notes:
|
22 |
+
You can use -w and -m to specify the minimum bounding box for face detection, to avoid triggering on background faces
|
23 |
+
If you find you're getting too many false positives or not enough faces, adjust the code here:
|
24 |
+
# Perform face detection if no face has been detected in this scene
|
25 |
+
if not face_detected_in_scene:
|
26 |
+
try:
|
27 |
+
results = model.predict(frame, classes=[0], conf=0.75, device=device)
|
28 |
+
by changing conf to somethihng bigger or smaller
|
29 |
+
|
30 |
+
You will have to do some cleanup to remove the occasional non-face and faces in credit scenes.
|
31 |
+
If you process something like as 12-episode anime, you should end up with 250-1000 usable stills after manual cleanup.
|
main.py
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import argparse
|
4 |
+
import os
|
5 |
+
import cv2
|
6 |
+
import numpy as np
|
7 |
+
from ultralytics import YOLO
|
8 |
+
from scenedetect import open_video, SceneManager, ContentDetector
|
9 |
+
import torch
|
10 |
+
|
11 |
+
def parse_arguments():
|
12 |
+
"""Parse command-line arguments."""
|
13 |
+
parser = argparse.ArgumentParser(
|
14 |
+
description="Detect full faces in videos and capture screenshots on scene changes.",
|
15 |
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
16 |
+
)
|
17 |
+
parser.add_argument(
|
18 |
+
"--input-dir", "-I",
|
19 |
+
required=True,
|
20 |
+
help="Directory containing input video files."
|
21 |
+
)
|
22 |
+
parser.add_argument(
|
23 |
+
"--output-dir", "-O",
|
24 |
+
required=True,
|
25 |
+
help="Directory to save screenshot outputs."
|
26 |
+
)
|
27 |
+
parser.add_argument(
|
28 |
+
"--min-width", "-w",
|
29 |
+
type=int,
|
30 |
+
default=200,
|
31 |
+
help="Minimum width of face bounding box to trigger screenshot."
|
32 |
+
)
|
33 |
+
parser.add_argument(
|
34 |
+
"--min-height", "-m",
|
35 |
+
type=int,
|
36 |
+
default=200,
|
37 |
+
help="Minimum height of face bounding box to trigger screenshot."
|
38 |
+
)
|
39 |
+
return parser.parse_args()
|
40 |
+
|
41 |
+
def ensure_directory(directory):
|
42 |
+
"""Create directory if it doesn't exist."""
|
43 |
+
if not os.path.exists(directory):
|
44 |
+
os.makedirs(directory)
|
45 |
+
|
46 |
+
def check_cuda():
|
47 |
+
"""Check CUDA availability and return device."""
|
48 |
+
if torch.cuda.is_available():
|
49 |
+
device = torch.device("cuda")
|
50 |
+
print(f"CUDA is available! Using GPU: {torch.cuda.get_device_name(0)}")
|
51 |
+
print(f"CUDA version: {torch.version.cuda}")
|
52 |
+
print(f"Number of GPUs: {torch.cuda.device_count()}")
|
53 |
+
else:
|
54 |
+
device = torch.device("cpu")
|
55 |
+
print("CUDA is not available. Falling back to CPU.")
|
56 |
+
return device
|
57 |
+
|
58 |
+
def is_full_face(box, frame_shape, min_width, min_height, min_proportion=0.1):
|
59 |
+
"""Check if the bounding box represents a full face within the frame."""
|
60 |
+
x1, y1, x2, y2 = box
|
61 |
+
frame_height, frame_width = frame_shape[:2]
|
62 |
+
|
63 |
+
# Check if box is fully within frame (not touching edges)
|
64 |
+
if x1 <= 0 or y1 <= 0 or x2 >= frame_width or y2 >= frame_height:
|
65 |
+
return False
|
66 |
+
|
67 |
+
# Check minimum size
|
68 |
+
width = x2 - x1
|
69 |
+
height = y2 - y1
|
70 |
+
if width < min_width or height < min_height:
|
71 |
+
return False
|
72 |
+
|
73 |
+
# Check if box is large enough relative to frame (likely a face)
|
74 |
+
if width < frame_width * min_proportion or height < frame_height * min_proportion:
|
75 |
+
return False
|
76 |
+
|
77 |
+
return True
|
78 |
+
|
79 |
+
def process_video(video_path, output_dir, min_width, min_height, model, device):
|
80 |
+
"""Process a single video for face detection and scene changes."""
|
81 |
+
# Initialize PySceneDetect
|
82 |
+
try:
|
83 |
+
video = open_video(video_path)
|
84 |
+
scene_manager = SceneManager()
|
85 |
+
scene_manager.add_detector(ContentDetector(threshold=30.0))
|
86 |
+
except Exception as e:
|
87 |
+
print(f"Error initializing video for scene detection in {video_path}: {e}")
|
88 |
+
return
|
89 |
+
|
90 |
+
# Get video capture for OpenCV
|
91 |
+
cap = cv2.VideoCapture(video_path)
|
92 |
+
if not cap.isOpened():
|
93 |
+
print(f"Error opening video file {video_path}")
|
94 |
+
return
|
95 |
+
|
96 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
97 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
98 |
+
|
99 |
+
# Find scenes
|
100 |
+
try:
|
101 |
+
scene_manager.detect_scenes(video=video)
|
102 |
+
scene_list = scene_manager.get_scene_list()
|
103 |
+
scene_starts = [scene[0].get_frames() for scene in scene_list]
|
104 |
+
except Exception as e:
|
105 |
+
print(f"Error detecting scenes in {video_path}: {e}")
|
106 |
+
cap.release()
|
107 |
+
return
|
108 |
+
|
109 |
+
scene_index = 0
|
110 |
+
face_detected_in_scene = False
|
111 |
+
frame_idx = 0
|
112 |
+
output_count = 0
|
113 |
+
video_name = os.path.splitext(os.path.basename(video_path))[0]
|
114 |
+
|
115 |
+
while cap.isOpened():
|
116 |
+
ret, frame = cap.read()
|
117 |
+
if not ret:
|
118 |
+
break
|
119 |
+
|
120 |
+
# Check if current frame is start of a new scene
|
121 |
+
if scene_index < len(scene_starts) and frame_idx >= scene_starts[scene_index]:
|
122 |
+
face_detected_in_scene = False # Reset face detection for new scene
|
123 |
+
scene_index += 1
|
124 |
+
print(f"New scene detected at frame {frame_idx}")
|
125 |
+
|
126 |
+
# Perform face detection if no face has been detected in this scene
|
127 |
+
if not face_detected_in_scene:
|
128 |
+
try:
|
129 |
+
results = model.predict(frame, classes=[0], conf=0.75, device=device)
|
130 |
+
|
131 |
+
for result in results:
|
132 |
+
boxes = result.boxes.xyxy.cpu().numpy()
|
133 |
+
confidences = result.boxes.conf.cpu().numpy()
|
134 |
+
classes = result.boxes.cls.cpu().numpy()
|
135 |
+
|
136 |
+
for box, conf, cls in zip(boxes, confidences, classes):
|
137 |
+
if cls == 0: # Class 0 is 'person' in COCO, used as proxy for face
|
138 |
+
if is_full_face(box, frame.shape, min_width, min_height):
|
139 |
+
# Save screenshot
|
140 |
+
output_path = os.path.join(output_dir, f"{video_name}_face_{output_count:04d}.jpg")
|
141 |
+
cv2.imwrite(output_path, frame)
|
142 |
+
print(f"Saved screenshot: {output_path}")
|
143 |
+
output_count += 1
|
144 |
+
face_detected_in_scene = True
|
145 |
+
break # Stop checking boxes after first valid face
|
146 |
+
if face_detected_in_scene:
|
147 |
+
break # Stop checking results after first valid face
|
148 |
+
|
149 |
+
except Exception as e:
|
150 |
+
print(f"Error during face detection in {video_path}: {e}")
|
151 |
+
|
152 |
+
frame_idx += 1
|
153 |
+
|
154 |
+
cap.release()
|
155 |
+
print(f"Processed {video_path}: {output_count} screenshots saved.")
|
156 |
+
|
157 |
+
def main():
|
158 |
+
"""Main function to process videos in input directory."""
|
159 |
+
args = parse_arguments()
|
160 |
+
|
161 |
+
# Validate input directory
|
162 |
+
if not os.path.isdir(args.input_dir):
|
163 |
+
print(f"Error: Input directory '{args.input_dir}' does not exist.")
|
164 |
+
return
|
165 |
+
|
166 |
+
# Ensure output directory exists
|
167 |
+
ensure_directory(args.output_dir)
|
168 |
+
|
169 |
+
# Check CUDA and set device once
|
170 |
+
device = check_cuda()
|
171 |
+
|
172 |
+
# Load YOLO model once
|
173 |
+
try:
|
174 |
+
model = YOLO("yolov11l.pt")
|
175 |
+
model.to(device)
|
176 |
+
print(f"YOLO model loaded on device: {device}")
|
177 |
+
except Exception as e:
|
178 |
+
print(f"Error loading YOLO model: {e}")
|
179 |
+
return
|
180 |
+
|
181 |
+
# Supported video extensions
|
182 |
+
video_extensions = ('.mp4', '.avi', '.mov', '.mkv')
|
183 |
+
|
184 |
+
# Iterate over video files in input directory
|
185 |
+
for filename in os.listdir(args.input_dir):
|
186 |
+
if filename.lower().endswith(video_extensions):
|
187 |
+
video_path = os.path.join(args.input_dir, filename)
|
188 |
+
print(f"Processing video: {video_path}")
|
189 |
+
process_video(video_path, args.output_dir, args.min_width, args.min_height, model, device)
|
190 |
+
|
191 |
+
if __name__ == "__main__":
|
192 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ultralytics
|
2 |
+
opencv-python
|
3 |
+
numpy
|
4 |
+
scenedetect
|
5 |
+
torch
|
yolov11l.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ebd0e09d59811db4b1d61e2bc6730649608b1ac47f8dd01e2da6bca7c20023f
|
3 |
+
size 51387343
|