mschiesser commited on
Commit
6932abb
·
1 Parent(s): 15b4859

add segment

Browse files
Files changed (5) hide show
  1. .gitignore +3 -0
  2. README.md +20 -0
  3. ai_video_cli/segment.py +62 -0
  4. poetry.lock +0 -0
  5. pyproject.toml +4 -1
.gitignore CHANGED
@@ -160,3 +160,6 @@ cython_debug/
160
  # and can be added to the global gitignore or merged into this file. For a more nuclear
161
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
  #.idea/
 
 
 
 
160
  # and can be added to the global gitignore or merged into this file. For a more nuclear
161
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
  #.idea/
163
+
164
+ # checkpoints
165
+ *.pt
README.md CHANGED
@@ -139,3 +139,23 @@ ai-video convert input_video.mp4 output_video.mp4 --video_codec libx265 --audio_
139
  This command will convert `input_video.mp4` to `output_video.mp4`, using the libx265 video codec and mp3 audio codec. It will also resize and crop the video to 1920x1080 pixels. If the input video's aspect ratio doesn't match 1920x1080, the video will be scaled to fit within these dimensions and centered.
140
 
141
  If you don't specify an output file, the tool will automatically generate one with the suffix "_converted" added to the input filename.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  This command will convert `input_video.mp4` to `output_video.mp4`, using the libx265 video codec and mp3 audio codec. It will also resize and crop the video to 1920x1080 pixels. If the input video's aspect ratio doesn't match 1920x1080, the video will be scaled to fit within these dimensions and centered.
140
 
141
  If you don't specify an output file, the tool will automatically generate one with the suffix "_converted" added to the input filename.
142
+
143
+ ### 6. AI Segment Video
144
+
145
+ Segments people in a video using AI models (YOLO and SAM2) and replaces the background with green.
146
+
147
+ ```
148
+ ai-segment <input_file> <output_file>
149
+ ```
150
+
151
+ - `<input_file>`: Path to the input video file
152
+ - `<output_file>`: Path for the output segmented video file
153
+
154
+ Example:
155
+ ```
156
+ ai-segment input_video.mp4 segmented_output.mp4
157
+ ```
158
+
159
+ This command will process `input_video.mp4` using YOLO for person detection and SAM2 for segmentation. It will create a new video `segmented_output.mp4` where all detected people are segmented, and the background is replaced with green.
160
+
161
+ Note: This command requires additional AI models (YOLO and SAM2) which will be downloaded automatically on first use. The process may take some time depending on the length of the video and your hardware capabilities.
ai_video_cli/segment.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from ultralytics import YOLO, SAM
4
+ import argparse
5
+
6
+ def process_video(video_path, output_path):
7
+ # Load models - Ultralytics will handle caching automatically
8
+ yolo_model = YOLO("yolo11n.pt")
9
+ sam2_model = SAM("sam2_b.pt")
10
+
11
+ cap = cv2.VideoCapture(video_path)
12
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
13
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
14
+ fps = cap.get(cv2.CAP_PROP_FPS)
15
+
16
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
17
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
18
+
19
+ while cap.isOpened():
20
+ ret, frame = cap.read()
21
+ if not ret:
22
+ break
23
+
24
+ # Detect people using YOLO model
25
+ yolo_results = yolo_model(frame)
26
+
27
+ # Filter for person class (adjust the class index if needed)
28
+ person_boxes = yolo_results[0].boxes[yolo_results[0].boxes.cls == 0].xyxy.cpu().numpy()
29
+
30
+ # Use SAM 2 for segmentation
31
+ sam_results = sam2_model(frame, bboxes=person_boxes)
32
+
33
+ # Combine all person masks
34
+ combined_mask = np.zeros(frame.shape[:2], dtype=bool)
35
+ for mask in sam_results[0].masks.data:
36
+ combined_mask |= mask.cpu().numpy()
37
+
38
+ # Apply the mask to the original frame
39
+ segmented_frame = frame.copy()
40
+ segmented_frame[~combined_mask] = [
41
+ 0,
42
+ 255,
43
+ 0,
44
+ ] # Green background, you can change this
45
+
46
+ out.write(segmented_frame)
47
+
48
+ cap.release()
49
+ out.release()
50
+
51
+
52
+ def main():
53
+
54
+ parser = argparse.ArgumentParser(description="Process video with YOLO and SAM2")
55
+ parser.add_argument("input_video", help="Path to the input video file")
56
+ parser.add_argument("output_video", help="Path to the output video file")
57
+ args = parser.parse_args()
58
+
59
+ process_video(args.input_video, args.output_video)
60
+
61
+ if __name__ == "__main__":
62
+ main()
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -7,13 +7,16 @@ readme = "README.md"
7
  packages = [{ include = "ai_video_cli" }]
8
 
9
  [tool.poetry.dependencies]
10
- python = ">=3.9,<3.12"
11
  moviepy = "^1.0.3"
12
  numpy = "^1.26.0"
13
  pillow = "^9"
 
 
14
 
15
  [tool.poetry.scripts]
16
  ai-video = "ai_video_cli.main:main"
 
17
 
18
  [build-system]
19
  requires = ["poetry-core"]
 
7
  packages = [{ include = "ai_video_cli" }]
8
 
9
  [tool.poetry.dependencies]
10
+ python = ">=3.10,<3.12"
11
  moviepy = "^1.0.3"
12
  numpy = "^1.26.0"
13
  pillow = "^9"
14
+ opencv-python = "^4.10.0.84"
15
+ ultralytics = "^8.3.13"
16
 
17
  [tool.poetry.scripts]
18
  ai-video = "ai_video_cli.main:main"
19
+ ai-segment = "ai_video_cli.segment:main"
20
 
21
  [build-system]
22
  requires = ["poetry-core"]