|
import cv2 |
|
import numpy as np |
|
import os |
|
import tempfile |
|
from tqdm import tqdm |
|
import gradio as gr |
|
import ffmpeg |
|
|
|
|
|
|
|
|
|
def extract_frames(video_path): |
|
""" |
|
Opens the video file and extracts all frames into a list. |
|
Logic: |
|
- Use cv2.VideoCapture to read the video frame-by-frame. |
|
""" |
|
cap = cv2.VideoCapture(video_path) |
|
frames = [] |
|
while True: |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
frames.append(frame) |
|
cap.release() |
|
return frames |
|
|
|
|
|
|
|
|
|
def apply_style_propagation(frames, style_image_path): |
|
""" |
|
Applies the style from the provided image onto each video frame. |
|
Logic: |
|
- Load and resize the style image to match the video dimensions. |
|
- Use the style image as the starting point. |
|
- For each subsequent frame, compute the dense optical flow between the previous and current frame. |
|
- Warp the previous styled frame so that the style follows the motion. |
|
""" |
|
|
|
style_image = cv2.imread(style_image_path) |
|
h, w = frames[0].shape[:2] |
|
style_image = cv2.resize(style_image, (w, h)) |
|
|
|
|
|
styled_frames = [style_image] |
|
|
|
|
|
prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
for i in tqdm(range(1, len(frames)), desc="Propagating style"): |
|
curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
flow = cv2.calcOpticalFlowFarneback( |
|
prev_gray, curr_gray, None, |
|
pyr_scale=0.5, levels=3, winsize=15, |
|
iterations=3, poly_n=5, poly_sigma=1.2, flags=0 |
|
) |
|
|
|
|
|
grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h)) |
|
map_x = (grid_x + flow[..., 0]).astype(np.float32) |
|
map_y = (grid_y + flow[..., 1]).astype(np.float32) |
|
|
|
|
|
warped_styled = cv2.remap(styled_frames[-1], map_x, map_y, interpolation=cv2.INTER_LINEAR) |
|
styled_frames.append(warped_styled) |
|
|
|
|
|
prev_gray = curr_gray |
|
|
|
return styled_frames |
|
|
|
|
|
|
|
|
|
def save_video_cv2(frames, output_path, fps=30): |
|
""" |
|
Saves a list of frames as a video file. |
|
Logic: |
|
- Uses cv2.VideoWriter with codec 'mp4v' to create a temporary video file. |
|
""" |
|
h, w, _ = frames[0].shape |
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v') |
|
out = cv2.VideoWriter(output_path, fourcc, fps, (w, h)) |
|
for frame in frames: |
|
out.write(frame) |
|
out.release() |
|
|
|
|
|
|
|
|
|
def process_video(video_file, style_image_file, fps=30): |
|
""" |
|
Processes the video by applying the style image via optical flow. |
|
Then re-encodes the video using FFmpeg (H.264, yuv420p) for web compatibility. |
|
|
|
Inputs: |
|
- video_file: Uploaded video file. |
|
- style_image_file: Uploaded stylized keyframe image. |
|
- fps: Output frames per second. |
|
|
|
Returns: |
|
- Path to the re-encoded, web-playable video. |
|
""" |
|
|
|
video_path = video_file if isinstance(video_file, str) else video_file["name"] |
|
|
|
|
|
if isinstance(style_image_file, str): |
|
style_image_path = style_image_file |
|
elif isinstance(style_image_file, dict) and "name" in style_image_file: |
|
style_image_path = style_image_file["name"] |
|
elif isinstance(style_image_file, np.ndarray): |
|
tmp_style_path = os.path.join(tempfile.gettempdir(), "temp_style_image.jpeg") |
|
|
|
cv2.imwrite(tmp_style_path, cv2.cvtColor(style_image_file, cv2.COLOR_RGB2BGR)) |
|
style_image_path = tmp_style_path |
|
else: |
|
return "Error: Unsupported style image format." |
|
|
|
|
|
frames = extract_frames(video_path) |
|
if not frames: |
|
return "Error: No frames extracted from the video." |
|
|
|
|
|
styled_frames = apply_style_propagation(frames, style_image_path) |
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
|
|
temp_video_path = os.path.join(tmpdir, "temp_video.mp4") |
|
save_video_cv2(styled_frames, temp_video_path, fps=fps) |
|
|
|
|
|
output_video_path = os.path.join(tmpdir, "output_video.mp4") |
|
try: |
|
( |
|
ffmpeg |
|
.input(temp_video_path) |
|
.output(output_video_path, vcodec='libx264', pix_fmt='yuv420p', r=fps) |
|
.run(overwrite_output=True, quiet=True) |
|
) |
|
except ffmpeg.Error as e: |
|
print("FFmpeg error:", e) |
|
return "Error during video re-encoding." |
|
|
|
return output_video_path |
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_video, |
|
inputs=[ |
|
gr.Video(label="Input Video (v.mp4)"), |
|
gr.Image(label="Stylized Keyframe (a.jpeg)"), |
|
gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Output FPS") |
|
], |
|
outputs=gr.Video(label="Styled Video"), |
|
title="Optical Flow Style Propagation", |
|
description=( |
|
"Upload a video and a stylized keyframe image. The style from the keyframe is propagated " |
|
"across the video using optical flow and warping. The resulting video is re-encoded to be " |
|
"web-friendly." |
|
) |
|
) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch(share=True) |