Spaces:

SpyC0der77
/

AI-Video-Stabilization

Sleeping

App Files Files Community

SpyC0der77 commited on Mar 15

Commit

3aedaee

verified ·

1 Parent(s): dec9875

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -102

app.py CHANGED Viewed

@@ -7,24 +7,14 @@ import tempfile
 import os
 import gradio as gr
 import time
-import threading
-# Global status and result dictionaries.
-status = {
-    "logs": "",
-    "progress": 0,  # from 0 to 100
-    "finished": False
-}
-result = {
-    "original_video": None,
-    "stabilized_video": None
-}
-# Set up device for torch.
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"[INFO] Using device: {device}")
-# Try to load the RAFT model. If it fails, fall back to OpenCV Farneback.
 try:
     print("[INFO] Attempting to load RAFT model from torch.hub...")
     raft_model = torch.hub.load("princeton-vl/RAFT", "raft_small", pretrained=True, trust_repo=True)
@@ -36,70 +26,76 @@ except Exception as e:
     print("[INFO] Falling back to OpenCV Farneback optical flow.")
     raft_model = None
-def append_log(msg):
-    """Append a log message to the global status and print it."""
-    global status
-    status["logs"] += msg + "\n"
-    print(msg)
-def background_process(video_file, zoom):
     """
-    Runs the full processing: generates a motion CSV using RAFT (or Farneback)
-    and then stabilizes the video. Updates global status and result.
     """
-    global status, result
-    status["logs"] = ""
-    status["progress"] = 0
-    status["finished"] = False
-    result["original_video"] = None
-    result["stabilized_video"] = None
-    append_log("[INFO] Starting AI-powered video processing...")
     # === CSV Generation Phase ===
-    append_log("[INFO] Starting motion CSV generation...")
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
-        append_log("[ERROR] Could not open video file for CSV generation.")
-        status["finished"] = True
         return
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    append_log(f"[INFO] Total frames in video: {total_frames}")
     csv_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv').name
     with open(csv_file, 'w', newline='') as csvfile:
         fieldnames = ['frame', 'mag', 'ang', 'zoom']
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
         writer.writeheader()
         ret, first_frame = cap.read()
         if not ret:
-            append_log("[ERROR] Cannot read first frame from video.")
-            status["finished"] = True
-            cap.release()
             return
         if raft_model is not None:
             first_frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
             prev_tensor = torch.from_numpy(first_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
             prev_tensor = prev_tensor.to(device)
-            append_log("[INFO] Using RAFT model for optical flow computation.")
         else:
             prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
-            append_log("[INFO] Using Farneback optical flow for computation.")
         frame_idx = 1
         while True:
             ret, frame = cap.read()
             if not ret:
                 break
             if raft_model is not None:
                 curr_frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                 curr_tensor = torch.from_numpy(curr_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
                 curr_tensor = curr_tensor.to(device)
                 with torch.no_grad():
-                    _, flow_up = raft_model(prev_tensor, curr_tensor, iters=20, test_mode=True)
                 flow = flow_up[0].permute(1, 2, 0).cpu().numpy()
                 prev_tensor = curr_tensor.clone()
             else:
@@ -108,12 +104,12 @@ def background_process(video_file, zoom):
                                                     pyr_scale=0.5, levels=3, winsize=15,
                                                     iterations=3, poly_n=5, poly_sigma=1.2, flags=0)
                 prev_gray = curr_gray
-            # Compute median magnitude and angle.
-            mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1], angleInDegrees=True)
             median_mag = np.median(mag)
             median_ang = np.median(ang)
-            # Compute zoom factor: fraction of pixels moving away from the center.
             h, w = flow.shape[:2]
             center_x, center_y = w / 2, h / 2
             x_coords, y_coords = np.meshgrid(np.arange(w), np.arange(h))
@@ -121,25 +117,28 @@ def background_process(video_file, zoom):
             y_offset = y_coords - center_y
             dot = flow[..., 0] * x_offset + flow[..., 1] * y_offset
             zoom_factor = np.count_nonzero(dot > 0) / (w * h)
             writer.writerow({
                 'frame': frame_idx,
                 'mag': median_mag,
                 'ang': median_ang,
                 'zoom': zoom_factor
             })
             if frame_idx % 10 == 0 or frame_idx == total_frames:
-                progress_csv = (frame_idx / total_frames) * 50  # CSV phase: 0-50%
-                append_log(f"[INFO] CSV: Processed frame {frame_idx}/{total_frames}")
-                status["progress"] = progress_csv
             frame_idx += 1
     cap.release()
-    append_log("[INFO] CSV generation complete.")
-    status["progress"] = 50
     # === Stabilization Phase ===
-    append_log("[INFO] Starting video stabilization...")
-    # Read the CSV and compute cumulative motion data.
     motion_data = {}
     cumulative_dx = 0.0
     cumulative_dy = 0.0
@@ -155,10 +154,10 @@ def background_process(video_file, zoom):
             cumulative_dx += dx
             cumulative_dy += dy
             motion_data[frame_num] = (-cumulative_dx, -cumulative_dy)
-    append_log("[INFO] Motion CSV read complete.")
-    status["progress"] = 55
-    # Re-open video for stabilization.
     cap = cv2.VideoCapture(video_file)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -168,7 +167,7 @@ def background_process(video_file, zoom):
     temp_file.close()
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))
     frame_idx = 1
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     while True:
@@ -181,59 +180,45 @@ def background_process(video_file, zoom):
             start_x = max((zoomed_w - width) // 2, 0)
             start_y = max((zoomed_h - height) // 2, 0)
             frame = zoomed_frame[start_y:start_y+height, start_x:start_x+width]
         dx, dy = motion_data.get(frame_idx, (0, 0))
-        transform = np.array([[1, 0, dx],
-                              [0, 1, dy]], dtype=np.float32)
         stabilized_frame = cv2.warpAffine(frame, transform, (width, height))
         out.write(stabilized_frame)
         if frame_idx % 10 == 0 or frame_idx == total_frames:
-            progress_stab = 50 + (frame_idx / total_frames) * 50  # Stabilization phase: 50-100%
-            append_log(f"[INFO] Stabilization: Processed frame {frame_idx}/{total_frames}")
-            status["progress"] = progress_stab
         frame_idx += 1
     cap.release()
     out.release()
-    append_log("[INFO] Stabilization complete.")
-    status["progress"] = 100
-    status["finished"] = True
-    result["original_video"] = video_file
-    result["stabilized_video"] = output_file
-def start_processing(video_file, zoom):
-    """Starts background processing in a new thread."""
-    thread = threading.Thread(target=background_process, args=(video_file, zoom), daemon=True)
-    thread.start()
-    return "[INFO] Processing started..."
-def poll_status():
-    """
-    Returns the current processing status:
-      - original_video: path if finished (else None)
-      - stabilized_video: path if finished (else None)
-      - logs: current logs string
-      - progress: current progress value (0 to 100)
-    """
-    return result["original_video"], result["stabilized_video"], status["logs"], status["progress"]
-# Build the Gradio UI.
 with gr.Blocks() as demo:
     gr.Markdown("# AI-Powered Video Stabilization")
-    gr.Markdown("Upload a video and select a zoom factor. Processing will start automatically and the UI will update every 2 seconds.")
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(label="Input Video")
             zoom_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.1, value=1.0, label="Zoom Factor")
-            start_button = gr.Button("Process Video")
         with gr.Column():
             original_video = gr.Video(label="Original Video")
             stabilized_video = gr.Video(label="Stabilized Video")
             logs_output = gr.Textbox(label="Logs", lines=15)
             progress_bar = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
-    # When "Process Video" is clicked, start processing in the background.
-    start_button.click(fn=start_processing, inputs=[video_input, zoom_slider], outputs=[logs_output])
-    # Automatically poll status every 2 seconds using Blocks.load().
-    demo.load(fn=poll_status, inputs=[], outputs=[original_video, stabilized_video, logs_output, progress_bar], every=2)
-demo.launch()

 import os
 import gradio as gr
 import time
+import io
+# Set up device for torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"[INFO] Using device: {device}")
+# Try to load the RAFT model from torch.hub.
+# If it fails, fall back to OpenCV's Farneback optical flow.
 try:
     print("[INFO] Attempting to load RAFT model from torch.hub...")
     raft_model = torch.hub.load("princeton-vl/RAFT", "raft_small", pretrained=True, trust_repo=True)
     print("[INFO] Falling back to OpenCV Farneback optical flow.")
     raft_model = None
+def process_video_ai(video_file, zoom):
     """
+    Generator function for Gradio:
+      - Generates motion data (CSV) from the input video using an AI model (RAFT if available, else Farneback)
+      - Stabilizes the video using the generated motion data.
+    Yields:
+      A tuple of (original_video, stabilized_video, logs, progress)
+      During processing, original_video and stabilized_video are None.
+      The final yield returns the video file paths along with final logs and progress=100.
     """
+    logs = []
+    def add_log(msg):
+        logs.append(msg)
+        return "\n".join(logs)
+    # Check and extract the file path
+    if isinstance(video_file, dict):
+        video_file = video_file.get("name", None)
+    if video_file is None:
+        yield (None, None, "[ERROR] Please upload a video file.", 0)
+        return
+    add_log("[INFO] Starting AI-powered video processing...")
+    yield (None, None, add_log("Starting processing..."), 0)
     # === CSV Generation Phase ===
+    add_log("[INFO] Starting motion CSV generation...")
+    yield (None, None, add_log("Starting CSV generation..."), 0)
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
+        yield (None, None, add_log("[ERROR] Could not open video file for CSV generation."), 0)
         return
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    add_log(f"[INFO] Total frames in video: {total_frames}")
+    # Create temporary CSV file
     csv_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv').name
     with open(csv_file, 'w', newline='') as csvfile:
         fieldnames = ['frame', 'mag', 'ang', 'zoom']
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
         writer.writeheader()
         ret, first_frame = cap.read()
         if not ret:
+            yield (None, None, add_log("[ERROR] Cannot read first frame from video."), 0)
             return
         if raft_model is not None:
             first_frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
             prev_tensor = torch.from_numpy(first_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
             prev_tensor = prev_tensor.to(device)
+            add_log("[INFO] Using RAFT model for optical flow computation.")
         else:
             prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
+            add_log("[INFO] Using Farneback optical flow for computation.")
         frame_idx = 1
+        # Process each frame for CSV generation
         while True:
             ret, frame = cap.read()
             if not ret:
                 break
             if raft_model is not None:
                 curr_frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                 curr_tensor = torch.from_numpy(curr_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
                 curr_tensor = curr_tensor.to(device)
                 with torch.no_grad():
+                    flow_low, flow_up = raft_model(prev_tensor, curr_tensor, iters=20, test_mode=True)
                 flow = flow_up[0].permute(1, 2, 0).cpu().numpy()
                 prev_tensor = curr_tensor.clone()
             else:
                                                     pyr_scale=0.5, levels=3, winsize=15,
                                                     iterations=3, poly_n=5, poly_sigma=1.2, flags=0)
                 prev_gray = curr_gray
+            # Compute median magnitude and angle
+            mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1], angleInDegrees=True)
             median_mag = np.median(mag)
             median_ang = np.median(ang)
+            # Compute zoom factor: fraction of pixels moving away from center
             h, w = flow.shape[:2]
             center_x, center_y = w / 2, h / 2
             x_coords, y_coords = np.meshgrid(np.arange(w), np.arange(h))
             y_offset = y_coords - center_y
             dot = flow[..., 0] * x_offset + flow[..., 1] * y_offset
             zoom_factor = np.count_nonzero(dot > 0) / (w * h)
             writer.writerow({
                 'frame': frame_idx,
                 'mag': median_mag,
                 'ang': median_ang,
                 'zoom': zoom_factor
             })
             if frame_idx % 10 == 0 or frame_idx == total_frames:
+                progress_csv = (frame_idx / total_frames) * 50  # CSV phase is 0-50%
+                add_log(f"[INFO] CSV: Processed frame {frame_idx}/{total_frames}")
+                yield (None, None, add_log(""), progress_csv)
             frame_idx += 1
     cap.release()
+    add_log("[INFO] CSV generation complete.")
+    yield (None, None, add_log(""), 50)
     # === Stabilization Phase ===
+    add_log("[INFO] Starting video stabilization...")
+    yield (None, None, add_log("Starting stabilization..."), 51)
+    # Read the CSV and compute cumulative motion data
     motion_data = {}
     cumulative_dx = 0.0
     cumulative_dy = 0.0
             cumulative_dx += dx
             cumulative_dy += dy
             motion_data[frame_num] = (-cumulative_dx, -cumulative_dy)
+    add_log("[INFO] Motion CSV read complete.")
+    yield (None, None, add_log(""), 55)
+    # Re-open video for stabilization
     cap = cv2.VideoCapture(video_file)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     temp_file.close()
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))
     frame_idx = 1
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     while True:
             start_x = max((zoomed_w - width) // 2, 0)
             start_y = max((zoomed_h - height) // 2, 0)
             frame = zoomed_frame[start_y:start_y+height, start_x:start_x+width]
         dx, dy = motion_data.get(frame_idx, (0, 0))
+        transform = np.array([[1, 0, dx], [0, 1, dy]], dtype=np.float32)
         stabilized_frame = cv2.warpAffine(frame, transform, (width, height))
         out.write(stabilized_frame)
         if frame_idx % 10 == 0 or frame_idx == total_frames:
+            progress_stab = 50 + (frame_idx / total_frames) * 50  # Stabilization phase is 50-100%
+            add_log(f"[INFO] Stabilization: Processed frame {frame_idx}/{total_frames}")
+            yield (None, None, add_log(""), progress_stab)
         frame_idx += 1
     cap.release()
     out.release()
+    add_log("[INFO] Stabilization complete.")
+    yield (video_file, output_file, add_log(""), 100)
+# Build the Gradio UI with streaming enabled.
 with gr.Blocks() as demo:
     gr.Markdown("# AI-Powered Video Stabilization")
+    gr.Markdown("Upload a video and select a zoom factor. The system will generate motion data using an AI model (RAFT if available, else Farneback) and then stabilize the video. Logs and progress will update during processing.")
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(label="Input Video")
             zoom_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.1, value=1.0, label="Zoom Factor")
+            process_button = gr.Button("Process Video")
         with gr.Column():
             original_video = gr.Video(label="Original Video")
             stabilized_video = gr.Video(label="Stabilized Video")
             logs_output = gr.Textbox(label="Logs", lines=15)
             progress_bar = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
+    demo.queue()  # enable streaming
+    process_button.click(
+        fn=process_video_ai,
+        inputs=[video_input, zoom_slider],
+        outputs=[original_video, stabilized_video, logs_output, progress_bar],
+        stream=True  # enable streaming updates
+    )
+demo.launch()