quarterturn commited on
Commit
28ca839
·
verified ·
1 Parent(s): 907b133

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +31 -3
  2. main.py +192 -0
  3. requirements.txt +5 -0
  4. yolov11l.pt +3 -0
README.md CHANGED
@@ -1,3 +1,31 @@
1
- ---
2
- license: cc-by-nc-nd-4.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ facesaver
2
+
3
+ A tool to process video files into still for image and video AI training, using yolov11 face detection to find scenes with people in them, within a certain size and position range.
4
+
5
+ Requirements:
6
+ CUDA 12.x
7
+ A GPU with 6GB or more VRAM
8
+ Raw video rips, unless you want subtitles in your training data.
9
+
10
+ Usage:
11
+ 1. create a conda env
12
+ conda env create -n facesaver python=3.12
13
+ 2. activate the env
14
+ conda activate facesaver
15
+ 3. install the requiremnts
16
+ pip3 install -r requirements.txt
17
+ 4. put your video files into the input directory
18
+ 5. run the command
19
+ python3 main.py -I ./input -O ./output -w 200 -m 200
20
+
21
+ notes:
22
+ You can use -w and -m to specify the minimum bounding box for face detection, to avoid triggering on background faces
23
+ If you find you're getting too many false positives or not enough faces, adjust the code here:
24
+ # Perform face detection if no face has been detected in this scene
25
+ if not face_detected_in_scene:
26
+ try:
27
+ results = model.predict(frame, classes=[0], conf=0.75, device=device)
28
+ by changing conf to somethihng bigger or smaller
29
+
30
+ You will have to do some cleanup to remove the occasional non-face and faces in credit scenes.
31
+ If you process something like as 12-episode anime, you should end up with 250-1000 usable stills after manual cleanup.
main.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import argparse
4
+ import os
5
+ import cv2
6
+ import numpy as np
7
+ from ultralytics import YOLO
8
+ from scenedetect import open_video, SceneManager, ContentDetector
9
+ import torch
10
+
11
+ def parse_arguments():
12
+ """Parse command-line arguments."""
13
+ parser = argparse.ArgumentParser(
14
+ description="Detect full faces in videos and capture screenshots on scene changes.",
15
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter
16
+ )
17
+ parser.add_argument(
18
+ "--input-dir", "-I",
19
+ required=True,
20
+ help="Directory containing input video files."
21
+ )
22
+ parser.add_argument(
23
+ "--output-dir", "-O",
24
+ required=True,
25
+ help="Directory to save screenshot outputs."
26
+ )
27
+ parser.add_argument(
28
+ "--min-width", "-w",
29
+ type=int,
30
+ default=200,
31
+ help="Minimum width of face bounding box to trigger screenshot."
32
+ )
33
+ parser.add_argument(
34
+ "--min-height", "-m",
35
+ type=int,
36
+ default=200,
37
+ help="Minimum height of face bounding box to trigger screenshot."
38
+ )
39
+ return parser.parse_args()
40
+
41
+ def ensure_directory(directory):
42
+ """Create directory if it doesn't exist."""
43
+ if not os.path.exists(directory):
44
+ os.makedirs(directory)
45
+
46
+ def check_cuda():
47
+ """Check CUDA availability and return device."""
48
+ if torch.cuda.is_available():
49
+ device = torch.device("cuda")
50
+ print(f"CUDA is available! Using GPU: {torch.cuda.get_device_name(0)}")
51
+ print(f"CUDA version: {torch.version.cuda}")
52
+ print(f"Number of GPUs: {torch.cuda.device_count()}")
53
+ else:
54
+ device = torch.device("cpu")
55
+ print("CUDA is not available. Falling back to CPU.")
56
+ return device
57
+
58
+ def is_full_face(box, frame_shape, min_width, min_height, min_proportion=0.1):
59
+ """Check if the bounding box represents a full face within the frame."""
60
+ x1, y1, x2, y2 = box
61
+ frame_height, frame_width = frame_shape[:2]
62
+
63
+ # Check if box is fully within frame (not touching edges)
64
+ if x1 <= 0 or y1 <= 0 or x2 >= frame_width or y2 >= frame_height:
65
+ return False
66
+
67
+ # Check minimum size
68
+ width = x2 - x1
69
+ height = y2 - y1
70
+ if width < min_width or height < min_height:
71
+ return False
72
+
73
+ # Check if box is large enough relative to frame (likely a face)
74
+ if width < frame_width * min_proportion or height < frame_height * min_proportion:
75
+ return False
76
+
77
+ return True
78
+
79
+ def process_video(video_path, output_dir, min_width, min_height, model, device):
80
+ """Process a single video for face detection and scene changes."""
81
+ # Initialize PySceneDetect
82
+ try:
83
+ video = open_video(video_path)
84
+ scene_manager = SceneManager()
85
+ scene_manager.add_detector(ContentDetector(threshold=30.0))
86
+ except Exception as e:
87
+ print(f"Error initializing video for scene detection in {video_path}: {e}")
88
+ return
89
+
90
+ # Get video capture for OpenCV
91
+ cap = cv2.VideoCapture(video_path)
92
+ if not cap.isOpened():
93
+ print(f"Error opening video file {video_path}")
94
+ return
95
+
96
+ fps = cap.get(cv2.CAP_PROP_FPS)
97
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
98
+
99
+ # Find scenes
100
+ try:
101
+ scene_manager.detect_scenes(video=video)
102
+ scene_list = scene_manager.get_scene_list()
103
+ scene_starts = [scene[0].get_frames() for scene in scene_list]
104
+ except Exception as e:
105
+ print(f"Error detecting scenes in {video_path}: {e}")
106
+ cap.release()
107
+ return
108
+
109
+ scene_index = 0
110
+ face_detected_in_scene = False
111
+ frame_idx = 0
112
+ output_count = 0
113
+ video_name = os.path.splitext(os.path.basename(video_path))[0]
114
+
115
+ while cap.isOpened():
116
+ ret, frame = cap.read()
117
+ if not ret:
118
+ break
119
+
120
+ # Check if current frame is start of a new scene
121
+ if scene_index < len(scene_starts) and frame_idx >= scene_starts[scene_index]:
122
+ face_detected_in_scene = False # Reset face detection for new scene
123
+ scene_index += 1
124
+ print(f"New scene detected at frame {frame_idx}")
125
+
126
+ # Perform face detection if no face has been detected in this scene
127
+ if not face_detected_in_scene:
128
+ try:
129
+ results = model.predict(frame, classes=[0], conf=0.75, device=device)
130
+
131
+ for result in results:
132
+ boxes = result.boxes.xyxy.cpu().numpy()
133
+ confidences = result.boxes.conf.cpu().numpy()
134
+ classes = result.boxes.cls.cpu().numpy()
135
+
136
+ for box, conf, cls in zip(boxes, confidences, classes):
137
+ if cls == 0: # Class 0 is 'person' in COCO, used as proxy for face
138
+ if is_full_face(box, frame.shape, min_width, min_height):
139
+ # Save screenshot
140
+ output_path = os.path.join(output_dir, f"{video_name}_face_{output_count:04d}.jpg")
141
+ cv2.imwrite(output_path, frame)
142
+ print(f"Saved screenshot: {output_path}")
143
+ output_count += 1
144
+ face_detected_in_scene = True
145
+ break # Stop checking boxes after first valid face
146
+ if face_detected_in_scene:
147
+ break # Stop checking results after first valid face
148
+
149
+ except Exception as e:
150
+ print(f"Error during face detection in {video_path}: {e}")
151
+
152
+ frame_idx += 1
153
+
154
+ cap.release()
155
+ print(f"Processed {video_path}: {output_count} screenshots saved.")
156
+
157
+ def main():
158
+ """Main function to process videos in input directory."""
159
+ args = parse_arguments()
160
+
161
+ # Validate input directory
162
+ if not os.path.isdir(args.input_dir):
163
+ print(f"Error: Input directory '{args.input_dir}' does not exist.")
164
+ return
165
+
166
+ # Ensure output directory exists
167
+ ensure_directory(args.output_dir)
168
+
169
+ # Check CUDA and set device once
170
+ device = check_cuda()
171
+
172
+ # Load YOLO model once
173
+ try:
174
+ model = YOLO("yolov11l.pt")
175
+ model.to(device)
176
+ print(f"YOLO model loaded on device: {device}")
177
+ except Exception as e:
178
+ print(f"Error loading YOLO model: {e}")
179
+ return
180
+
181
+ # Supported video extensions
182
+ video_extensions = ('.mp4', '.avi', '.mov', '.mkv')
183
+
184
+ # Iterate over video files in input directory
185
+ for filename in os.listdir(args.input_dir):
186
+ if filename.lower().endswith(video_extensions):
187
+ video_path = os.path.join(args.input_dir, filename)
188
+ print(f"Processing video: {video_path}")
189
+ process_video(video_path, args.output_dir, args.min_width, args.min_height, model, device)
190
+
191
+ if __name__ == "__main__":
192
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ultralytics
2
+ opencv-python
3
+ numpy
4
+ scenedetect
5
+ torch
yolov11l.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ebd0e09d59811db4b1d61e2bc6730649608b1ac47f8dd01e2da6bca7c20023f
3
+ size 51387343