Spaces:
Sleeping
Sleeping
File size: 4,759 Bytes
f53adeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import cv2
import os
import glob
import numpy as np
from datetime import datetime
def add_transparent_image(
background, foreground, alpha_factor=1.0, x_offset=None, y_offset=None
):
"""
Function sourced from StackOverflow contributor Ben.
This function was found on StackOverflow and is the work of Ben, a contributor
to the community. We are thankful for Ben's assistance by providing this useful
method.
Original Source:
https://stackoverflow.com/questions/40895785/
using-opencv-to-overlay-transparent-image-onto-another-image
"""
bg_h, bg_w, bg_channels = background.shape
fg_h, fg_w, fg_channels = foreground.shape
assert (
bg_channels == 3
), f"background image should have exactly 3 channels (RGB). found:{bg_channels}"
assert (
fg_channels == 4
), f"foreground image should have exactly 4 channels (RGBA). found:{fg_channels}"
# center by default
if x_offset is None:
x_offset = (bg_w - fg_w) // 2
if y_offset is None:
y_offset = (bg_h - fg_h) // 2
w = min(fg_w, bg_w, fg_w + x_offset, bg_w - x_offset)
h = min(fg_h, bg_h, fg_h + y_offset, bg_h - y_offset)
if w < 1 or h < 1:
return
# clip foreground and background images to the overlapping regions
bg_x = max(0, x_offset)
bg_y = max(0, y_offset)
fg_x = max(0, x_offset * -1)
fg_y = max(0, y_offset * -1)
foreground = foreground[fg_y : fg_y + h, fg_x : fg_x + w]
background_subsection = background[bg_y : bg_y + h, bg_x : bg_x + w]
# separate alpha and color channels from the foreground image
foreground_colors = foreground[:, :, :3]
foreground_colors = cv2.cvtColor(foreground_colors, cv2.COLOR_BGR2RGB)
alpha_channel = foreground[:, :, 3] / 255 * alpha_factor # 0-255 => 0.0-1.0
# construct an alpha_mask that matches the image shape
alpha_mask = np.dstack((alpha_channel, alpha_channel, alpha_channel))
# combine the background with the overlay image weighted by alpha
composite = (
background_subsection * (1 - alpha_mask) + foreground_colors * alpha_mask
)
# overwrite the section of the background image that has been updated
background[bg_y : bg_y + h, bg_x : bg_x + w] = composite
return background
def convert_tensor_to_rgba_image(tensor):
saliency_array = tensor.cpu().numpy()
# Normalize img a 0-255
if saliency_array.dtype != np.uint8:
saliency_array = (255 * saliency_array / saliency_array.max()).astype(np.uint8)
heatmap = cv2.applyColorMap(saliency_array, cv2.COLORMAP_JET)
# Pixels are transparent where no saliency [128, 0, 0] is black in COLORMAP_JET
alpha_channel = np.ones(heatmap.shape[:2], dtype=heatmap.dtype) * 255
black_pixels_mask = np.all(heatmap == [128, 0, 0], axis=-1)
alpha_channel[black_pixels_mask] = 0
# Combinar los canales RGB y alfa
saliency_rgba = cv2.merge((heatmap, alpha_channel))
return saliency_rgba
def convert_rgb_to_rgba_image(image):
alpha_channel = np.ones(image.shape[:2], dtype=image.dtype) * 255
rbga = cv2.merge((cv2.cvtColor(image, cv2.COLOR_RGB2BGR), alpha_channel))
return rbga
def label_frame(image, token):
# Add the text
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.7
text_color = (255, 255, 255)
text_thickness = 1
text_size, _ = cv2.getTextSize(token, font, font_scale, text_thickness)
text_position = (10, 10 + text_size[1])
# Draw a rectangle behind the text
rectangle_color = (0, 0, 0)
rectangle_thickness = -1
rectangle_position = (10, 10)
rectangle_size = (text_size[0] + 5, text_size[1] + 5)
cv2.rectangle(
image,
rectangle_position,
(
rectangle_position[0] + rectangle_size[0],
rectangle_position[1] + rectangle_size[1],
),
rectangle_color,
rectangle_thickness,
)
cv2.putText(
image, token, text_position, font, font_scale, text_color, text_thickness
)
return image
def saliency_video(path, sequence):
image_files = sorted(glob.glob(os.path.join(path, "*.png")), key=os.path.getctime)
image = cv2.imread(image_files[0])
height = image.shape[0]
widht = image.shape[1]
# Create a VideoWriter object to save the video
video_name = os.path.join(path, "saliency.mp4")
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
video = cv2.VideoWriter(video_name, fourcc, 5, (widht, height))
for image_file, token in zip(image_files, sequence):
image = cv2.imread(image_file)
# Write the image to the video
video.write(image)
# Release the VideoWriter object
video.release()
print(f"Video saved as {video_name}") |