import gradio as gr import cv2 import numpy as np EXAMPLE_VIDEO = "example.mp4" EXAMPLE_IMAGE = "target (2).png" def process_video_and_image(video_path, target_image_path): try: target_img = cv2.imread(target_image_path) target_gray = cv2.cvtColor(target_img, cv2.COLOR_BGR2GRAY) cap = cv2.VideoCapture(video_path) sift = cv2.SIFT_create() keypoints_target, descriptors_target = sift.detectAndCompute(target_gray, None) flann = cv2.FlannBasedMatcher(dict(algorithm=1, trees=5), dict(checks=50)) output_frames = [] while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) keypoints_frame, descriptors_frame = sift.detectAndCompute(frame_gray, None) if descriptors_frame is not None: matches = flann.knnMatch(descriptors_target, descriptors_frame, k=2) good_matches = [m for m, n in matches if m.distance < 0.70 * n.distance] if len(good_matches) > 10: src_pts = [keypoints_target[m.queryIdx].pt for m in good_matches] dst_pts = [keypoints_frame[m.trainIdx].pt for m in good_matches] src_pts = np.float32(src_pts).reshape(-1, 1, 2) dst_pts = np.float32(dst_pts).reshape(-1, 1, 2) matrix, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) if matrix is not None: h, w = target_gray.shape pts = np.float32([[0, 0], [0, h], [w, h], [w, 0]]).reshape(-1, 1, 2) dst = cv2.perspectiveTransform(pts, matrix) frame = cv2.polylines(frame, [np.int32(dst)], isClosed=True, color=(0, 255, 0), thickness=3) match_frame = cv2.drawMatches(target_img, keypoints_target, frame, keypoints_frame, good_matches, None) output_frames.append(cv2.resize(match_frame, (1200, 600))) cap.release() height, width, _ = output_frames[0].shape out_video_path = "output_video.avi" out = cv2.VideoWriter(out_video_path, cv2.VideoWriter_fourcc(*'XVID'), 10, (width, height)) for frame in output_frames: out.write(frame) out.release() return out_video_path except Exception as e: return f"Error: {str(e)}" with gr.Blocks() as demo: gr.Markdown("## Video and Target Image Matcher") with gr.Row(): video_input = gr.File(label="Upload Video File", file_types=[".mp4", ".avi", ".mov"]) image_input = gr.File(label="Upload Target Image", file_types=[".png", ".jpg", ".jpeg"]) example_button = gr.Button("Process Example") output_video = gr.Video(label="Matched Video Output") process_button = gr.Button("Process") def process_example(): return process_video_and_image(EXAMPLE_VIDEO, EXAMPLE_IMAGE) process_button.click( fn=process_video_and_image, inputs=[video_input, image_input], outputs=output_video ) example_button.click( fn=process_example, inputs=[], outputs=output_video ) demo.launch()