Spaces:

NihalGazi
/

EBSynth

Running

App Files Files Community

NihalGazi commited on Feb 15

Commit

7ffd52f

verified ·

1 Parent(s): 83956cb

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -44

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 import tempfile
 from tqdm import tqdm
 import gradio as gr
 # -----------------------------
 # Function to extract frames from the video.
@@ -12,15 +13,14 @@ def extract_frames(video_path):
     """
     Opens the video file and extracts all frames into a list.
     Logic:
-    - Open the video with cv2.VideoCapture.
-    - Read frames until no frame is returned.
     """
     cap = cv2.VideoCapture(video_path)
     frames = []
     while True:
-        ret, frame = cap.read()  # ret is True if a frame is successfully read.
         if not ret:
-            break  # Exit loop when no more frames.
         frames.append(frame)
     cap.release()
     return frames
@@ -32,109 +32,122 @@ def apply_style_propagation(frames, style_image_path):
     """
     Applies the style from the provided image onto each video frame.
     Logic:
-    - Load and resize the style image to match video dimensions.
-    - Use the style image as the first styled frame.
-    - For each subsequent frame, compute optical flow between consecutive frames.
-    - Warp the previous styled frame using the flow so that the style follows the motion.
     """
-    # Load and resize the style image.
     style_image = cv2.imread(style_image_path)
     h, w = frames[0].shape[:2]
     style_image = cv2.resize(style_image, (w, h))
-    # Use the style image as the first styled frame.
     styled_frames = [style_image]
     # Convert the first frame to grayscale.
     prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
-    # Process each subsequent frame.
     for i in tqdm(range(1, len(frames)), desc="Propagating style"):
         curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
-        # Compute dense optical flow using the Farneback method.
         flow = cv2.calcOpticalFlowFarneback(
             prev_gray, curr_gray, None,
             pyr_scale=0.5, levels=3, winsize=15,
             iterations=3, poly_n=5, poly_sigma=1.2, flags=0
         )
-        # Create a grid of (x,y) coordinates for each pixel.
         grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h))
-        # Add the flow vectors to the coordinate grid.
         map_x = (grid_x + flow[..., 0]).astype(np.float32)
         map_y = (grid_y + flow[..., 1]).astype(np.float32)
-        # Warp the last styled frame using the computed mapping.
         warped_styled = cv2.remap(styled_frames[-1], map_x, map_y, interpolation=cv2.INTER_LINEAR)
         styled_frames.append(warped_styled)
-        # Update the previous grayscale frame.
         prev_gray = curr_gray
     return styled_frames
 # -----------------------------
-# Function to save a list of frames as a video file.
 # -----------------------------
-def save_video(frames, output_path, fps=30):
     """
-    Combines frames into a video and saves it.
     Logic:
-    - Create a VideoWriter with the specified FPS and frame size.
-    - Write each frame sequentially to the video file.
     """
     h, w, _ = frames[0].shape
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use 'mp4v' codec for MP4.
     out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
     for frame in frames:
         out.write(frame)
     out.release()
 # -----------------------------
-# Main processing function for the Gradio interface.
 # -----------------------------
 def process_video(video_file, style_image_file, fps=30):
     """
-    Processes the video by propagating the style from the image.
     Inputs:
       - video_file: Uploaded video file.
       - style_image_file: Uploaded stylized keyframe image.
-      - fps: Frames per second for the output video.
     Returns:
-      - Path to the generated styled video.
     """
-    # For the video file, we expect a file path.
     video_path = video_file if isinstance(video_file, str) else video_file["name"]
-    # For the style image, Gradio might return a numpy array.
     if isinstance(style_image_file, str):
         style_image_path = style_image_file
     elif isinstance(style_image_file, dict) and "name" in style_image_file:
         style_image_path = style_image_file["name"]
     elif isinstance(style_image_file, np.ndarray):
-        # If the image is a numpy array, save it to a temporary file.
-        tmp_path = os.path.join(tempfile.gettempdir(), "temp_style_image.jpeg")
-        # Gradio images are usually in RGB; OpenCV uses BGR.
-        cv2.imwrite(tmp_path, cv2.cvtColor(style_image_file, cv2.COLOR_RGB2BGR))
-        style_image_path = tmp_path
     else:
-        return "Error: Unsupported style image file format."
-    # Extract frames from the input video.
     frames = extract_frames(video_path)
     if not frames:
         return "Error: No frames extracted from the video."
-    # Propagate the style image across the frames.
     styled_frames = apply_style_propagation(frames, style_image_path)
-    # Save the styled frames into a new video file in a temporary directory.
     with tempfile.TemporaryDirectory() as tmpdir:
-        output_video_path = os.path.join(tmpdir, "stylized_video.mp4")
-        save_video(styled_frames, output_video_path, fps=fps)
         return output_video_path
 # -----------------------------
@@ -150,9 +163,9 @@ iface = gr.Interface(
     outputs=gr.Video(label="Styled Video"),
     title="Optical Flow Style Propagation",
     description=(
-        "Upload a video and a stylized keyframe image. "
-        "The style from the keyframe is propagated across the video using optical flow and warping, "
-        "and the result is output as a new video."
     )
 )

 import tempfile
 from tqdm import tqdm
 import gradio as gr
+import ffmpeg
 # -----------------------------
 # Function to extract frames from the video.
     """
     Opens the video file and extracts all frames into a list.
     Logic:
+    - Use cv2.VideoCapture to read the video frame-by-frame.
     """
     cap = cv2.VideoCapture(video_path)
     frames = []
     while True:
+        ret, frame = cap.read()  # Read a frame.
         if not ret:
+            break  # Stop if no more frames.
         frames.append(frame)
     cap.release()
     return frames
     """
     Applies the style from the provided image onto each video frame.
     Logic:
+    - Load and resize the style image to match the video dimensions.
+    - Use the style image as the starting point.
+    - For each subsequent frame, compute the dense optical flow between the previous and current frame.
+    - Warp the previous styled frame so that the style follows the motion.
     """
+    # Load the style image and resize to match frame dimensions.
     style_image = cv2.imread(style_image_path)
     h, w = frames[0].shape[:2]
     style_image = cv2.resize(style_image, (w, h))
+    # The first styled frame is the style image.
     styled_frames = [style_image]
     # Convert the first frame to grayscale.
     prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
+    # Process subsequent frames.
     for i in tqdm(range(1, len(frames)), desc="Propagating style"):
         curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
+        # Compute dense optical flow using Farneback's method.
         flow = cv2.calcOpticalFlowFarneback(
             prev_gray, curr_gray, None,
             pyr_scale=0.5, levels=3, winsize=15,
             iterations=3, poly_n=5, poly_sigma=1.2, flags=0
         )
+        # Create a coordinate grid.
         grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h))
         map_x = (grid_x + flow[..., 0]).astype(np.float32)
         map_y = (grid_y + flow[..., 1]).astype(np.float32)
+        # Warp the last styled frame.
         warped_styled = cv2.remap(styled_frames[-1], map_x, map_y, interpolation=cv2.INTER_LINEAR)
         styled_frames.append(warped_styled)
+        # Update previous frame.
         prev_gray = curr_gray
     return styled_frames
 # -----------------------------
+# Function to save video frames using OpenCV.
 # -----------------------------
+def save_video_cv2(frames, output_path, fps=30):
     """
+    Saves a list of frames as a video file.
     Logic:
+    - Uses cv2.VideoWriter with codec 'mp4v' to create a temporary video file.
     """
     h, w, _ = frames[0].shape
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
     for frame in frames:
         out.write(frame)
     out.release()
 # -----------------------------
+# Main processing function for Gradio.
 # -----------------------------
 def process_video(video_file, style_image_file, fps=30):
     """
+    Processes the video by applying the style image via optical flow.
+    Then re-encodes the video using FFmpeg (H.264, yuv420p) for web compatibility.
     Inputs:
       - video_file: Uploaded video file.
       - style_image_file: Uploaded stylized keyframe image.
+      - fps: Output frames per second.
     Returns:
+      - Path to the re-encoded, web-playable video.
     """
+    # Get the video file path.
     video_path = video_file if isinstance(video_file, str) else video_file["name"]
+    # Process style image input: if it's a numpy array, save it to a temporary file.
     if isinstance(style_image_file, str):
         style_image_path = style_image_file
     elif isinstance(style_image_file, dict) and "name" in style_image_file:
         style_image_path = style_image_file["name"]
     elif isinstance(style_image_file, np.ndarray):
+        tmp_style_path = os.path.join(tempfile.gettempdir(), "temp_style_image.jpeg")
+        # Convert from RGB (Gradio) to BGR (OpenCV)
+        cv2.imwrite(tmp_style_path, cv2.cvtColor(style_image_file, cv2.COLOR_RGB2BGR))
+        style_image_path = tmp_style_path
     else:
+        return "Error: Unsupported style image format."
+    # Extract frames from the video.
     frames = extract_frames(video_path)
     if not frames:
         return "Error: No frames extracted from the video."
+    # Propagate the style across video frames.
     styled_frames = apply_style_propagation(frames, style_image_path)
+    # Use a temporary directory for processing.
     with tempfile.TemporaryDirectory() as tmpdir:
+        # Save the raw styled video using OpenCV.
+        temp_video_path = os.path.join(tmpdir, "temp_video.mp4")
+        save_video_cv2(styled_frames, temp_video_path, fps=fps)
+        # Re-encode using FFmpeg to produce a web-friendly video.
+        output_video_path = os.path.join(tmpdir, "output_video.mp4")
+        try:
+            (
+                ffmpeg
+                .input(temp_video_path)
+                .output(output_video_path, vcodec='libx264', pix_fmt='yuv420p', r=fps)
+                .run(overwrite_output=True, quiet=True)
+            )
+        except ffmpeg.Error as e:
+            print("FFmpeg error:", e)
+            return "Error during video re-encoding."
         return output_video_path
 # -----------------------------
     outputs=gr.Video(label="Styled Video"),
     title="Optical Flow Style Propagation",
     description=(
+        "Upload a video and a stylized keyframe image. The style from the keyframe is propagated "
+        "across the video using optical flow and warping. The resulting video is re-encoded to be "
+        "web-friendly."
     )
 )