Spaces:
Runtime error
Runtime error
Commit
·
6932abb
1
Parent(s):
15b4859
add segment
Browse files- .gitignore +3 -0
- README.md +20 -0
- ai_video_cli/segment.py +62 -0
- poetry.lock +0 -0
- pyproject.toml +4 -1
.gitignore
CHANGED
@@ -160,3 +160,6 @@ cython_debug/
|
|
160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
#.idea/
|
|
|
|
|
|
|
|
160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
#.idea/
|
163 |
+
|
164 |
+
# checkpoints
|
165 |
+
*.pt
|
README.md
CHANGED
@@ -139,3 +139,23 @@ ai-video convert input_video.mp4 output_video.mp4 --video_codec libx265 --audio_
|
|
139 |
This command will convert `input_video.mp4` to `output_video.mp4`, using the libx265 video codec and mp3 audio codec. It will also resize and crop the video to 1920x1080 pixels. If the input video's aspect ratio doesn't match 1920x1080, the video will be scaled to fit within these dimensions and centered.
|
140 |
|
141 |
If you don't specify an output file, the tool will automatically generate one with the suffix "_converted" added to the input filename.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
This command will convert `input_video.mp4` to `output_video.mp4`, using the libx265 video codec and mp3 audio codec. It will also resize and crop the video to 1920x1080 pixels. If the input video's aspect ratio doesn't match 1920x1080, the video will be scaled to fit within these dimensions and centered.
|
140 |
|
141 |
If you don't specify an output file, the tool will automatically generate one with the suffix "_converted" added to the input filename.
|
142 |
+
|
143 |
+
### 6. AI Segment Video
|
144 |
+
|
145 |
+
Segments people in a video using AI models (YOLO and SAM2) and replaces the background with green.
|
146 |
+
|
147 |
+
```
|
148 |
+
ai-segment <input_file> <output_file>
|
149 |
+
```
|
150 |
+
|
151 |
+
- `<input_file>`: Path to the input video file
|
152 |
+
- `<output_file>`: Path for the output segmented video file
|
153 |
+
|
154 |
+
Example:
|
155 |
+
```
|
156 |
+
ai-segment input_video.mp4 segmented_output.mp4
|
157 |
+
```
|
158 |
+
|
159 |
+
This command will process `input_video.mp4` using YOLO for person detection and SAM2 for segmentation. It will create a new video `segmented_output.mp4` where all detected people are segmented, and the background is replaced with green.
|
160 |
+
|
161 |
+
Note: This command requires additional AI models (YOLO and SAM2) which will be downloaded automatically on first use. The process may take some time depending on the length of the video and your hardware capabilities.
|
ai_video_cli/segment.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
from ultralytics import YOLO, SAM
|
4 |
+
import argparse
|
5 |
+
|
6 |
+
def process_video(video_path, output_path):
|
7 |
+
# Load models - Ultralytics will handle caching automatically
|
8 |
+
yolo_model = YOLO("yolo11n.pt")
|
9 |
+
sam2_model = SAM("sam2_b.pt")
|
10 |
+
|
11 |
+
cap = cv2.VideoCapture(video_path)
|
12 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
13 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
14 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
15 |
+
|
16 |
+
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
17 |
+
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
18 |
+
|
19 |
+
while cap.isOpened():
|
20 |
+
ret, frame = cap.read()
|
21 |
+
if not ret:
|
22 |
+
break
|
23 |
+
|
24 |
+
# Detect people using YOLO model
|
25 |
+
yolo_results = yolo_model(frame)
|
26 |
+
|
27 |
+
# Filter for person class (adjust the class index if needed)
|
28 |
+
person_boxes = yolo_results[0].boxes[yolo_results[0].boxes.cls == 0].xyxy.cpu().numpy()
|
29 |
+
|
30 |
+
# Use SAM 2 for segmentation
|
31 |
+
sam_results = sam2_model(frame, bboxes=person_boxes)
|
32 |
+
|
33 |
+
# Combine all person masks
|
34 |
+
combined_mask = np.zeros(frame.shape[:2], dtype=bool)
|
35 |
+
for mask in sam_results[0].masks.data:
|
36 |
+
combined_mask |= mask.cpu().numpy()
|
37 |
+
|
38 |
+
# Apply the mask to the original frame
|
39 |
+
segmented_frame = frame.copy()
|
40 |
+
segmented_frame[~combined_mask] = [
|
41 |
+
0,
|
42 |
+
255,
|
43 |
+
0,
|
44 |
+
] # Green background, you can change this
|
45 |
+
|
46 |
+
out.write(segmented_frame)
|
47 |
+
|
48 |
+
cap.release()
|
49 |
+
out.release()
|
50 |
+
|
51 |
+
|
52 |
+
def main():
|
53 |
+
|
54 |
+
parser = argparse.ArgumentParser(description="Process video with YOLO and SAM2")
|
55 |
+
parser.add_argument("input_video", help="Path to the input video file")
|
56 |
+
parser.add_argument("output_video", help="Path to the output video file")
|
57 |
+
args = parser.parse_args()
|
58 |
+
|
59 |
+
process_video(args.input_video, args.output_video)
|
60 |
+
|
61 |
+
if __name__ == "__main__":
|
62 |
+
main()
|
poetry.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -7,13 +7,16 @@ readme = "README.md"
|
|
7 |
packages = [{ include = "ai_video_cli" }]
|
8 |
|
9 |
[tool.poetry.dependencies]
|
10 |
-
python = ">=3.
|
11 |
moviepy = "^1.0.3"
|
12 |
numpy = "^1.26.0"
|
13 |
pillow = "^9"
|
|
|
|
|
14 |
|
15 |
[tool.poetry.scripts]
|
16 |
ai-video = "ai_video_cli.main:main"
|
|
|
17 |
|
18 |
[build-system]
|
19 |
requires = ["poetry-core"]
|
|
|
7 |
packages = [{ include = "ai_video_cli" }]
|
8 |
|
9 |
[tool.poetry.dependencies]
|
10 |
+
python = ">=3.10,<3.12"
|
11 |
moviepy = "^1.0.3"
|
12 |
numpy = "^1.26.0"
|
13 |
pillow = "^9"
|
14 |
+
opencv-python = "^4.10.0.84"
|
15 |
+
ultralytics = "^8.3.13"
|
16 |
|
17 |
[tool.poetry.scripts]
|
18 |
ai-video = "ai_video_cli.main:main"
|
19 |
+
ai-segment = "ai_video_cli.segment:main"
|
20 |
|
21 |
[build-system]
|
22 |
requires = ["poetry-core"]
|