Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +31 -3
main.py +192 -0
requirements.txt +5 -0
yolov11l.pt +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,31 @@
----
-license: cc-by-nc-nd-4.0
----

+facesaver
+A tool to process video files into still for image and video AI training, using yolov11 face detection to find scenes with people in them, within a certain size and position range.
+Requirements:
+CUDA 12.x
+A GPU with 6GB or more VRAM
+Raw video rips, unless you want subtitles in your training data.
+Usage:
+1. create a conda env
+conda env create -n facesaver python=3.12
+2. activate the env
+conda activate facesaver
+3. install the requiremnts
+pip3 install -r requirements.txt
+4. put your video files into the input directory
+5. run the command
+python3 main.py -I ./input -O ./output -w 200 -m 200
+notes:
+You can use -w and -m to specify the minimum bounding box for face detection, to avoid triggering on background faces
+If you find you're getting too many false positives or not enough faces, adjust the code here:
+        # Perform face detection if no face has been detected in this scene
+        if not face_detected_in_scene:
+            try:
+                results = model.predict(frame, classes=[0], conf=0.75, device=device)
+by changing conf to somethihng bigger or smaller
+You will have to do some cleanup to remove the occasional non-face and faces in credit scenes.
+If you process something like as 12-episode anime, you should end up with 250-1000 usable stills after manual cleanup.

main.py ADDED Viewed

	@@ -0,0 +1,192 @@

+#!/usr/bin/env python3
+import argparse
+import os
+import cv2
+import numpy as np
+from ultralytics import YOLO
+from scenedetect import open_video, SceneManager, ContentDetector
+import torch
+def parse_arguments():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Detect full faces in videos and capture screenshots on scene changes.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--input-dir", "-I",
+        required=True,
+        help="Directory containing input video files."
+    )
+    parser.add_argument(
+        "--output-dir", "-O",
+        required=True,
+        help="Directory to save screenshot outputs."
+    )
+    parser.add_argument(
+        "--min-width", "-w",
+        type=int,
+        default=200,
+        help="Minimum width of face bounding box to trigger screenshot."
+    )
+    parser.add_argument(
+        "--min-height", "-m",
+        type=int,
+        default=200,
+        help="Minimum height of face bounding box to trigger screenshot."
+    )
+    return parser.parse_args()
+def ensure_directory(directory):
+    """Create directory if it doesn't exist."""
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+def check_cuda():
+    """Check CUDA availability and return device."""
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        print(f"CUDA is available! Using GPU: {torch.cuda.get_device_name(0)}")
+        print(f"CUDA version: {torch.version.cuda}")
+        print(f"Number of GPUs: {torch.cuda.device_count()}")
+    else:
+        device = torch.device("cpu")
+        print("CUDA is not available. Falling back to CPU.")
+    return device
+def is_full_face(box, frame_shape, min_width, min_height, min_proportion=0.1):
+    """Check if the bounding box represents a full face within the frame."""
+    x1, y1, x2, y2 = box
+    frame_height, frame_width = frame_shape[:2]
+    # Check if box is fully within frame (not touching edges)
+    if x1 <= 0 or y1 <= 0 or x2 >= frame_width or y2 >= frame_height:
+        return False
+    # Check minimum size
+    width = x2 - x1
+    height = y2 - y1
+    if width < min_width or height < min_height:
+        return False
+    # Check if box is large enough relative to frame (likely a face)
+    if width < frame_width * min_proportion or height < frame_height * min_proportion:
+        return False
+    return True
+def process_video(video_path, output_dir, min_width, min_height, model, device):
+    """Process a single video for face detection and scene changes."""
+    # Initialize PySceneDetect
+    try:
+        video = open_video(video_path)
+        scene_manager = SceneManager()
+        scene_manager.add_detector(ContentDetector(threshold=30.0))
+    except Exception as e:
+        print(f"Error initializing video for scene detection in {video_path}: {e}")
+        return
+    # Get video capture for OpenCV
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        print(f"Error opening video file {video_path}")
+        return
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    # Find scenes
+    try:
+        scene_manager.detect_scenes(video=video)
+        scene_list = scene_manager.get_scene_list()
+        scene_starts = [scene[0].get_frames() for scene in scene_list]
+    except Exception as e:
+        print(f"Error detecting scenes in {video_path}: {e}")
+        cap.release()
+        return
+    scene_index = 0
+    face_detected_in_scene = False
+    frame_idx = 0
+    output_count = 0
+    video_name = os.path.splitext(os.path.basename(video_path))[0]
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Check if current frame is start of a new scene
+        if scene_index < len(scene_starts) and frame_idx >= scene_starts[scene_index]:
+            face_detected_in_scene = False  # Reset face detection for new scene
+            scene_index += 1
+            print(f"New scene detected at frame {frame_idx}")
+        # Perform face detection if no face has been detected in this scene
+        if not face_detected_in_scene:
+            try:
+                results = model.predict(frame, classes=[0], conf=0.75, device=device)
+                for result in results:
+                    boxes = result.boxes.xyxy.cpu().numpy()
+                    confidences = result.boxes.conf.cpu().numpy()
+                    classes = result.boxes.cls.cpu().numpy()
+                    for box, conf, cls in zip(boxes, confidences, classes):
+                        if cls == 0:  # Class 0 is 'person' in COCO, used as proxy for face
+                            if is_full_face(box, frame.shape, min_width, min_height):
+                                # Save screenshot
+                                output_path = os.path.join(output_dir, f"{video_name}_face_{output_count:04d}.jpg")
+                                cv2.imwrite(output_path, frame)
+                                print(f"Saved screenshot: {output_path}")
+                                output_count += 1
+                                face_detected_in_scene = True
+                                break  # Stop checking boxes after first valid face
+                    if face_detected_in_scene:
+                        break  # Stop checking results after first valid face
+            except Exception as e:
+                print(f"Error during face detection in {video_path}: {e}")
+        frame_idx += 1
+    cap.release()
+    print(f"Processed {video_path}: {output_count} screenshots saved.")
+def main():
+    """Main function to process videos in input directory."""
+    args = parse_arguments()
+    # Validate input directory
+    if not os.path.isdir(args.input_dir):
+        print(f"Error: Input directory '{args.input_dir}' does not exist.")
+        return
+    # Ensure output directory exists
+    ensure_directory(args.output_dir)
+    # Check CUDA and set device once
+    device = check_cuda()
+    # Load YOLO model once
+    try:
+        model = YOLO("yolov11l.pt")
+        model.to(device)
+        print(f"YOLO model loaded on device: {device}")
+    except Exception as e:
+        print(f"Error loading YOLO model: {e}")
+        return
+    # Supported video extensions
+    video_extensions = ('.mp4', '.avi', '.mov', '.mkv')
+    # Iterate over video files in input directory
+    for filename in os.listdir(args.input_dir):
+        if filename.lower().endswith(video_extensions):
+            video_path = os.path.join(args.input_dir, filename)
+            print(f"Processing video: {video_path}")
+            process_video(video_path, args.output_dir, args.min_width, args.min_height, model, device)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+ultralytics
+opencv-python
+numpy
+scenedetect
+torch

yolov11l.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ebd0e09d59811db4b1d61e2bc6730649608b1ac47f8dd01e2da6bca7c20023f
+size 51387343