Spaces:

NihalGazi
/

EBSynth

Running

App Files Files Community

NihalGazi commited on Feb 16

Commit

73e6bc5

verified ·

1 Parent(s): 34603a5

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -17

app.py CHANGED Viewed

@@ -9,6 +9,9 @@ import ffmpeg
 def extract_frames(video_path):
     """
     Extracts all frames from the input video.
     """
     cap = cv2.VideoCapture(video_path)
     frames = []
@@ -23,8 +26,13 @@ def extract_frames(video_path):
 def apply_style_propagation(frames, style_image_path):
     """
-    Applies the style from the provided image to all frames via optical flow warping.
-    Also clips remapping coordinates to avoid out-of-bound values.
     """
     style_image = cv2.imread(style_image_path)
     if style_image is None:
@@ -33,7 +41,6 @@ def apply_style_propagation(frames, style_image_path):
     h, w = frames[0].shape[:2]
     style_image = cv2.resize(style_image, (w, h))
-    # The first styled frame is the style image.
     styled_frames = [style_image]
     prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
@@ -47,7 +54,7 @@ def apply_style_propagation(frames, style_image_path):
         grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h))
         map_x = grid_x + flow[..., 0]
         map_y = grid_y + flow[..., 1]
-        # Clip coordinates to avoid accessing invalid pixels.
         map_x = np.clip(map_x, 0, w - 1).astype(np.float32)
         map_y = np.clip(map_y, 0, h - 1).astype(np.float32)
@@ -56,7 +63,6 @@ def apply_style_propagation(frames, style_image_path):
         prev_gray = curr_gray
     print(f"Propagated style to {len(styled_frames)} frames.")
-    # Debug: show a sample frame's mean pixel intensity.
     sample_frame = styled_frames[len(styled_frames) // 2]
     print(f"Sample styled frame mean intensity: {np.mean(sample_frame):.2f}")
     return styled_frames
@@ -64,6 +70,8 @@ def apply_style_propagation(frames, style_image_path):
 def save_video_cv2(frames, output_path, fps=30):
     """
     Saves a list of frames as a video using OpenCV.
     """
     h, w, _ = frames[0].shape
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
@@ -74,13 +82,23 @@ def save_video_cv2(frames, output_path, fps=30):
     size = os.path.getsize(output_path)
     print(f"Intermediate video saved to {output_path} (size: {size} bytes)")
-def process_video(video_file, style_image_file, fps=30):
     """
-    Processes the input video using the provided style image.
-    Then re-encodes the output video with FFmpeg for web compatibility.
-    Returns the path to the final video.
     """
-    # Get the video file path.
     video_path = video_file if isinstance(video_file, str) else video_file["name"]
     # Process the style image input.
@@ -95,17 +113,27 @@ def process_video(video_file, style_image_file, fps=30):
     else:
         return "Error: Unsupported style image format."
-    # Extract frames from the video.
     frames = extract_frames(video_path)
     if not frames:
         return "Error: No frames extracted from the video."
     # Propagate style.
     styled_frames = apply_style_propagation(frames, style_image_path)
     # Save intermediate video using OpenCV to a named temporary file.
     temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
-    temp_video_file.close()  # Close so that OpenCV can write to this file.
     temp_video_path = temp_video_file.name
     save_video_cv2(styled_frames, temp_video_path, fps=fps)
@@ -140,14 +168,17 @@ iface = gr.Interface(
     inputs=[
         gr.Video(label="Input Video (v.mp4)"),
         gr.Image(label="Stylized Keyframe (a.jpeg)"),
-        gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Output FPS")
     ],
     outputs=gr.Video(label="Styled Video"),
-    title="Optical Flow Style Propagation",
     description=(
-        "Upload a video and a stylized keyframe image. The style from the keyframe is propagated "
-        "across the video using optical flow and warping. The resulting video is re-encoded to be "
-        "web-friendly."
     )
 )

 def extract_frames(video_path):
     """
     Extracts all frames from the input video.
+    Logic:
+      - Open the video file using cv2.VideoCapture.
+      - Read frames until the video ends.
     """
     cap = cv2.VideoCapture(video_path)
     frames = []
 def apply_style_propagation(frames, style_image_path):
     """
+    Applies the style from the provided image to each video frame using optical flow.
+    Logic:
+      - Load and resize the style image to match the frame dimensions.
+      - Use the style image as the first styled frame.
+      - For each subsequent frame, compute dense optical flow between consecutive frames.
+      - Warp the previously styled frame using the computed flow.
+      - Clip mapping coordinates to avoid out-of-bound values.
     """
     style_image = cv2.imread(style_image_path)
     if style_image is None:
     h, w = frames[0].shape[:2]
     style_image = cv2.resize(style_image, (w, h))
     styled_frames = [style_image]
     prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
         grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h))
         map_x = grid_x + flow[..., 0]
         map_y = grid_y + flow[..., 1]
+        # Clip mapping coordinates to valid pixel indices.
         map_x = np.clip(map_x, 0, w - 1).astype(np.float32)
         map_y = np.clip(map_y, 0, h - 1).astype(np.float32)
         prev_gray = curr_gray
     print(f"Propagated style to {len(styled_frames)} frames.")
     sample_frame = styled_frames[len(styled_frames) // 2]
     print(f"Sample styled frame mean intensity: {np.mean(sample_frame):.2f}")
     return styled_frames
 def save_video_cv2(frames, output_path, fps=30):
     """
     Saves a list of frames as a video using OpenCV.
+    Logic:
+      - Use cv2.VideoWriter with codec 'mp4v' to create a temporary video file.
     """
     h, w, _ = frames[0].shape
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     size = os.path.getsize(output_path)
     print(f"Intermediate video saved to {output_path} (size: {size} bytes)")
+def process_video(video_file, style_image_file, fps=30, target_width=0, target_height=0):
     """
+    Processes the input video by applying the style image via optical flow,
+    optionally downscaling the video and style image to a specified resolution.
+    Then re-encodes the video with FFmpeg for web compatibility.
+    Inputs:
+      - video_file: The input video file.
+      - style_image_file: The stylized keyframe image.
+      - fps: Output frames per second.
+      - target_width: Target width for downscaling (0 means no downscale).
+      - target_height: Target height for downscaling (0 means no downscale).
+    Returns:
+      - Path to the final, web-playable video.
     """
+    # Determine video file path.
     video_path = video_file if isinstance(video_file, str) else video_file["name"]
     # Process the style image input.
     else:
         return "Error: Unsupported style image format."
+    # Extract frames from the input video.
     frames = extract_frames(video_path)
     if not frames:
         return "Error: No frames extracted from the video."
+    original_h, original_w = frames[0].shape[:2]
+    print(f"Original video resolution: {original_w}x{original_h}")
+    # Downscale if target dimensions are provided (non-zero).
+    if target_width > 0 and target_height > 0:
+        print(f"Downscaling frames to resolution: {target_width}x{target_height}")
+        frames = [cv2.resize(frame, (target_width, target_height)) for frame in frames]
+    else:
+        print("No downscaling applied. Using original resolution.")
     # Propagate style.
     styled_frames = apply_style_propagation(frames, style_image_path)
     # Save intermediate video using OpenCV to a named temporary file.
     temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    temp_video_file.close()  # Close so OpenCV can write to this file.
     temp_video_path = temp_video_file.name
     save_video_cv2(styled_frames, temp_video_path, fps=fps)
     inputs=[
         gr.Video(label="Input Video (v.mp4)"),
         gr.Image(label="Stylized Keyframe (a.jpeg)"),
+        gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Output FPS"),
+        gr.Slider(minimum=0, maximum=1920, step=1, value=0, label="Target Width (0 for original)"),
+        gr.Slider(minimum=0, maximum=1080, step=1, value=0, label="Target Height (0 for original)")
     ],
     outputs=gr.Video(label="Styled Video"),
+    title="Optical Flow Style Propagation with Optional Downscaling",
     description=(
+        "Upload a video and a stylized keyframe image. Optionally downscale both to a target resolution "
+        "by specifying width and height (set both to 0 for original resolution). "
+        "The style from the keyframe is propagated across the video using optical flow and warping. "
+        "The output video is re-encoded for web compatibility."
     )
 )