Spaces:

NihalGazi
/

EBSynth

Running

App Files Files Community

NihalGazi commited on Feb 16

Commit

0e04a39

verified ·

1 Parent(s): 73e6bc5

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -32

app.py CHANGED Viewed

@@ -9,9 +9,6 @@ import ffmpeg
 def extract_frames(video_path):
     """
     Extracts all frames from the input video.
-    Logic:
-      - Open the video file using cv2.VideoCapture.
-      - Read frames until the video ends.
     """
     cap = cv2.VideoCapture(video_path)
     frames = []
@@ -26,40 +23,102 @@ def extract_frames(video_path):
 def apply_style_propagation(frames, style_image_path):
     """
-    Applies the style from the provided image to each video frame using optical flow.
-    Logic:
-      - Load and resize the style image to match the frame dimensions.
-      - Use the style image as the first styled frame.
-      - For each subsequent frame, compute dense optical flow between consecutive frames.
-      - Warp the previously styled frame using the computed flow.
-      - Clip mapping coordinates to avoid out-of-bound values.
     """
     style_image = cv2.imread(style_image_path)
     if style_image is None:
         raise ValueError(f"Failed to load style image from {style_image_path}")
     h, w = frames[0].shape[:2]
     style_image = cv2.resize(style_image, (w, h))
     styled_frames = [style_image]
     prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
     for i in tqdm(range(1, len(frames)), desc="Propagating style"):
         curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
         flow = cv2.calcOpticalFlowFarneback(
             prev_gray, curr_gray, None,
             pyr_scale=0.5, levels=3, winsize=15,
             iterations=3, poly_n=5, poly_sigma=1.2, flags=0
         )
         grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h))
-        map_x = grid_x + flow[..., 0]
-        map_y = grid_y + flow[..., 1]
-        # Clip mapping coordinates to valid pixel indices.
         map_x = np.clip(map_x, 0, w - 1).astype(np.float32)
         map_y = np.clip(map_y, 0, h - 1).astype(np.float32)
         warped_styled = cv2.remap(styled_frames[-1], map_x, map_y, interpolation=cv2.INTER_LINEAR)
-        styled_frames.append(warped_styled)
         prev_gray = curr_gray
     print(f"Propagated style to {len(styled_frames)} frames.")
@@ -70,8 +129,6 @@ def apply_style_propagation(frames, style_image_path):
 def save_video_cv2(frames, output_path, fps=30):
     """
     Saves a list of frames as a video using OpenCV.
-    Logic:
-      - Use cv2.VideoWriter with codec 'mp4v' to create a temporary video file.
     """
     h, w, _ = frames[0].shape
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
@@ -84,21 +141,22 @@ def save_video_cv2(frames, output_path, fps=30):
 def process_video(video_file, style_image_file, fps=30, target_width=0, target_height=0):
     """
-    Processes the input video by applying the style image via optical flow,
-    optionally downscaling the video and style image to a specified resolution.
     Then re-encodes the video with FFmpeg for web compatibility.
-    Inputs:
       - video_file: The input video file.
       - style_image_file: The stylized keyframe image.
       - fps: Output frames per second.
-      - target_width: Target width for downscaling (0 means no downscale).
-      - target_height: Target height for downscaling (0 means no downscale).
     Returns:
-      - Path to the final, web-playable video.
     """
-    # Determine video file path.
     video_path = video_file if isinstance(video_file, str) else video_file["name"]
     # Process the style image input.
@@ -113,7 +171,7 @@ def process_video(video_file, style_image_file, fps=30, target_width=0, target_h
     else:
         return "Error: Unsupported style image format."
-    # Extract frames from the input video.
     frames = extract_frames(video_path)
     if not frames:
         return "Error: No frames extracted from the video."
@@ -121,19 +179,19 @@ def process_video(video_file, style_image_file, fps=30, target_width=0, target_h
     original_h, original_w = frames[0].shape[:2]
     print(f"Original video resolution: {original_w}x{original_h}")
-    # Downscale if target dimensions are provided (non-zero).
     if target_width > 0 and target_height > 0:
         print(f"Downscaling frames to resolution: {target_width}x{target_height}")
         frames = [cv2.resize(frame, (target_width, target_height)) for frame in frames]
     else:
         print("No downscaling applied. Using original resolution.")
-    # Propagate style.
     styled_frames = apply_style_propagation(frames, style_image_path)
     # Save intermediate video using OpenCV to a named temporary file.
     temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
-    temp_video_file.close()  # Close so OpenCV can write to this file.
     temp_video_path = temp_video_file.name
     save_video_cv2(styled_frames, temp_video_path, fps=fps)
@@ -173,11 +231,14 @@ iface = gr.Interface(
         gr.Slider(minimum=0, maximum=1080, step=1, value=0, label="Target Height (0 for original)")
     ],
     outputs=gr.Video(label="Styled Video"),
-    title="Optical Flow Style Propagation with Optional Downscaling",
     description=(
-        "Upload a video and a stylized keyframe image. Optionally downscale both to a target resolution "
-        "by specifying width and height (set both to 0 for original resolution). "
-        "The style from the keyframe is propagated across the video using optical flow and warping. "
         "The output video is re-encoded for web compatibility."
     )
 )

 def extract_frames(video_path):
     """
     Extracts all frames from the input video.
     """
     cap = cv2.VideoCapture(video_path)
     frames = []
 def apply_style_propagation(frames, style_image_path):
     """
+    Applies the style from the provided keyframe image to every frame using optical flow,
+    with additional corrections:
+      - Median filtering of flow components.
+      - Patch-based fallback for blocks with extreme flow.
+      - Temporal reset blending with the original style.
+      - Sharpening after warping.
     """
+    # Load and resize the style image to match video dimensions.
     style_image = cv2.imread(style_image_path)
     if style_image is None:
         raise ValueError(f"Failed to load style image from {style_image_path}")
     h, w = frames[0].shape[:2]
     style_image = cv2.resize(style_image, (w, h))
+    # Keep a copy for temporal re-anchoring.
+    original_styled = style_image.copy()
     styled_frames = [style_image]
     prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
+    # Parameters for corrections:
+    reset_interval = 30         # Every 30 frames, blend with original style.
+    block_size = 16             # Size of block for patch matching.
+    patch_threshold = 10        # If mean flow magnitude in a block exceeds this, use patch matching.
+    search_margin = 10          # Margin around block for patch matching.
     for i in tqdm(range(1, len(frames)), desc="Propagating style"):
+        # Compute optical flow between the previous and current grayscale frames.
         curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
         flow = cv2.calcOpticalFlowFarneback(
             prev_gray, curr_gray, None,
             pyr_scale=0.5, levels=3, winsize=15,
             iterations=3, poly_n=5, poly_sigma=1.2, flags=0
         )
+        # --- Method 3: Median filtering of the flow components ---
+        flow_x = flow[..., 0]
+        flow_y = flow[..., 1]
+        flow_x_filtered = cv2.medianBlur(flow_x, 3)
+        flow_y_filtered = cv2.medianBlur(flow_y, 3)
+        flow_filtered = np.dstack((flow_x_filtered, flow_y_filtered))
+        # --- Method 4: Patch-based fallback for extreme flow ---
+        flow_corrected = flow_filtered.copy()
+        for by in range(0, h, block_size):
+            for bx in range(0, w, block_size):
+                # Define block region (handle edges)
+                y1, y2 = by, min(by + block_size, h)
+                x1, x2 = bx, min(bx + block_size, w)
+                block_flow = flow_filtered[y1:y2, x1:x2]
+                # Compute mean magnitude in the block.
+                mag = np.sqrt(block_flow[..., 0]**2 + block_flow[..., 1]**2)
+                mean_mag = np.mean(mag)
+                if mean_mag > patch_threshold:
+                    # Use patch matching to recalc flow for this block.
+                    patch = prev_gray[y1:y2, x1:x2]
+                    # Define search region in current frame.
+                    sx1 = max(x1 - search_margin, 0)
+                    sy1 = max(by - search_margin, 0)
+                    sx2 = min(x2 + search_margin, w)
+                    sy2 = min(y2 + search_margin, h)
+                    search_region = curr_gray[sy1:sy2, sx1:sx2]
+                    if search_region.shape[0] < patch.shape[0] or search_region.shape[1] < patch.shape[1]:
+                        continue
+                    res = cv2.matchTemplate(search_region, patch, cv2.TM_SQDIFF_NORMED)
+                    _, _, min_loc, _ = cv2.minMaxLoc(res)
+                    best_x = sx1 + min_loc[0]
+                    best_y = sy1 + min_loc[1]
+                    # Calculate offset relative to block's top-left corner.
+                    offset_x = best_x - x1
+                    offset_y = best_y - by
+                    # Override flow for the entire block.
+                    flow_corrected[y1:y2, x1:x2, 0] = offset_x
+                    flow_corrected[y1:y2, x1:x2, 1] = offset_y
+        # Compute mapping coordinates.
         grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h))
+        map_x = grid_x + flow_corrected[..., 0]
+        map_y = grid_y + flow_corrected[..., 1]
         map_x = np.clip(map_x, 0, w - 1).astype(np.float32)
         map_y = np.clip(map_y, 0, h - 1).astype(np.float32)
+        # Warp the previous styled frame using the computed mapping.
         warped_styled = cv2.remap(styled_frames[-1], map_x, map_y, interpolation=cv2.INTER_LINEAR)
+        # --- Method 2: Temporal Reset/Re-anchoring ---
+        if i % reset_interval == 0:
+            # Blend the current warped result with the original styled keyframe.
+            warped_styled = cv2.addWeighted(warped_styled, 0.7, original_styled, 0.3, 0)
+        # --- Method 5: Sharpening Post-Warping ---
+        kernel = np.array([[0, -1, 0],
+                           [-1, 5, -1],
+                           [0, -1, 0]], dtype=np.float32)
+        warped_sharpened = cv2.filter2D(warped_styled, -1, kernel)
+        styled_frames.append(warped_sharpened)
         prev_gray = curr_gray
     print(f"Propagated style to {len(styled_frames)} frames.")
 def save_video_cv2(frames, output_path, fps=30):
     """
     Saves a list of frames as a video using OpenCV.
     """
     h, w, _ = frames[0].shape
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
 def process_video(video_file, style_image_file, fps=30, target_width=0, target_height=0):
     """
+    Processes the input video by applying the style image via optical flow propagation,
+    with additional corrections (methods 2, 3, 4, and 5).
+    Optionally downscale the video and style image to the specified resolution.
     Then re-encodes the video with FFmpeg for web compatibility.
+    Parameters:
       - video_file: The input video file.
       - style_image_file: The stylized keyframe image.
       - fps: Output frames per second.
+      - target_width: Target width for downscaling (0 for original).
+      - target_height: Target height for downscaling (0 for original).
     Returns:
+      - Path to the final output video.
     """
+    # Get the video file path.
     video_path = video_file if isinstance(video_file, str) else video_file["name"]
     # Process the style image input.
     else:
         return "Error: Unsupported style image format."
+    # Extract frames from the video.
     frames = extract_frames(video_path)
     if not frames:
         return "Error: No frames extracted from the video."
     original_h, original_w = frames[0].shape[:2]
     print(f"Original video resolution: {original_w}x{original_h}")
+    # Downscale if target dimensions are provided.
     if target_width > 0 and target_height > 0:
         print(f"Downscaling frames to resolution: {target_width}x{target_height}")
         frames = [cv2.resize(frame, (target_width, target_height)) for frame in frames]
     else:
         print("No downscaling applied. Using original resolution.")
+    # Propagate the style using our enhanced method.
     styled_frames = apply_style_propagation(frames, style_image_path)
     # Save intermediate video using OpenCV to a named temporary file.
     temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    temp_video_file.close()
     temp_video_path = temp_video_file.name
     save_video_cv2(styled_frames, temp_video_path, fps=fps)
         gr.Slider(minimum=0, maximum=1080, step=1, value=0, label="Target Height (0 for original)")
     ],
     outputs=gr.Video(label="Styled Video"),
+    title="Optical Flow Style Propagation with Corrections",
     description=(
+        "Upload a video and a stylized keyframe image. Optionally downscale both to a target resolution. "
+        "The style is propagated using optical flow with additional corrections:\n"
+        "• Temporal re-anchoring\n"
+        "• Median filtering of the flow\n"
+        "• Patch-based flow correction\n"
+        "• Post-warp sharpening\n"
         "The output video is re-encoded for web compatibility."
     )
 )