File size: 4,759 Bytes
f53adeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import cv2
import os
import glob
import numpy as np
from datetime import datetime


def add_transparent_image(
    background, foreground, alpha_factor=1.0, x_offset=None, y_offset=None
):
    """
    Function sourced from StackOverflow contributor Ben.

    This function was found on StackOverflow and is the work of Ben, a contributor
    to the community. We are thankful for Ben's assistance by providing this useful
    method.

    Original Source:
    https://stackoverflow.com/questions/40895785/
    using-opencv-to-overlay-transparent-image-onto-another-image
    """

    bg_h, bg_w, bg_channels = background.shape
    fg_h, fg_w, fg_channels = foreground.shape

    assert (
        bg_channels == 3
    ), f"background image should have exactly 3 channels (RGB). found:{bg_channels}"
    assert (
        fg_channels == 4
    ), f"foreground image should have exactly 4 channels (RGBA). found:{fg_channels}"

    # center by default
    if x_offset is None:
        x_offset = (bg_w - fg_w) // 2
    if y_offset is None:
        y_offset = (bg_h - fg_h) // 2

    w = min(fg_w, bg_w, fg_w + x_offset, bg_w - x_offset)
    h = min(fg_h, bg_h, fg_h + y_offset, bg_h - y_offset)

    if w < 1 or h < 1:
        return

    # clip foreground and background images to the overlapping regions
    bg_x = max(0, x_offset)
    bg_y = max(0, y_offset)
    fg_x = max(0, x_offset * -1)
    fg_y = max(0, y_offset * -1)
    foreground = foreground[fg_y : fg_y + h, fg_x : fg_x + w]
    background_subsection = background[bg_y : bg_y + h, bg_x : bg_x + w]

    # separate alpha and color channels from the foreground image
    foreground_colors = foreground[:, :, :3]
    foreground_colors = cv2.cvtColor(foreground_colors, cv2.COLOR_BGR2RGB)
    alpha_channel = foreground[:, :, 3] / 255 * alpha_factor  # 0-255 => 0.0-1.0

    # construct an alpha_mask that matches the image shape
    alpha_mask = np.dstack((alpha_channel, alpha_channel, alpha_channel))

    # combine the background with the overlay image weighted by alpha
    composite = (
        background_subsection * (1 - alpha_mask) + foreground_colors * alpha_mask
    )

    # overwrite the section of the background image that has been updated
    background[bg_y : bg_y + h, bg_x : bg_x + w] = composite

    return background


def convert_tensor_to_rgba_image(tensor):

    saliency_array = tensor.cpu().numpy()

    # Normalize img a 0-255
    if saliency_array.dtype != np.uint8:
        saliency_array = (255 * saliency_array / saliency_array.max()).astype(np.uint8)

    heatmap = cv2.applyColorMap(saliency_array, cv2.COLORMAP_JET)

    # Pixels are transparent where no saliency [128, 0, 0] is black in COLORMAP_JET
    alpha_channel = np.ones(heatmap.shape[:2], dtype=heatmap.dtype) * 255
    black_pixels_mask = np.all(heatmap == [128, 0, 0], axis=-1)
    alpha_channel[black_pixels_mask] = 0

    # Combinar los canales RGB y alfa
    saliency_rgba = cv2.merge((heatmap, alpha_channel))

    return saliency_rgba


def convert_rgb_to_rgba_image(image):

    alpha_channel = np.ones(image.shape[:2], dtype=image.dtype) * 255
    rbga = cv2.merge((cv2.cvtColor(image, cv2.COLOR_RGB2BGR), alpha_channel))

    return rbga


def label_frame(image, token):

    # Add the text
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.7
    text_color = (255, 255, 255)
    text_thickness = 1
    text_size, _ = cv2.getTextSize(token, font, font_scale, text_thickness)
    text_position = (10, 10 + text_size[1])

    # Draw a rectangle behind the text
    rectangle_color = (0, 0, 0)
    rectangle_thickness = -1
    rectangle_position = (10, 10)
    rectangle_size = (text_size[0] + 5, text_size[1] + 5)
    cv2.rectangle(
        image,
        rectangle_position,
        (
            rectangle_position[0] + rectangle_size[0],
            rectangle_position[1] + rectangle_size[1],
        ),
        rectangle_color,
        rectangle_thickness,
    )

    cv2.putText(
        image, token, text_position, font, font_scale, text_color, text_thickness
    )

    return image


def saliency_video(path, sequence):

    image_files = sorted(glob.glob(os.path.join(path, "*.png")), key=os.path.getctime)
    image = cv2.imread(image_files[0])
    height = image.shape[0]
    widht = image.shape[1]

    # Create a VideoWriter object to save the video
    video_name = os.path.join(path, "saliency.mp4")
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")

    video = cv2.VideoWriter(video_name, fourcc, 5, (widht, height))

    for image_file, token in zip(image_files, sequence):

        image = cv2.imread(image_file)

        # Write the image to the video
        video.write(image)

    # Release the VideoWriter object
    video.release()

    print(f"Video saved as {video_name}")