|
|
|
import numpy as np |
|
import torch |
|
|
|
from ..utils import ext_loader |
|
|
|
ext_module = ext_loader.load_ext('_ext', ['pixel_group']) |
|
|
|
|
|
def pixel_group(score, mask, embedding, kernel_label, kernel_contour, |
|
kernel_region_num, distance_threshold): |
|
"""Group pixels into text instances, which is widely used text detection |
|
methods. |
|
|
|
Arguments: |
|
score (np.array or Tensor): The foreground score with size hxw. |
|
mask (np.array or Tensor): The foreground mask with size hxw. |
|
embedding (np.array or Tensor): The embedding with size hxwxc to |
|
distinguish instances. |
|
kernel_label (np.array or Tensor): The instance kernel index with |
|
size hxw. |
|
kernel_contour (np.array or Tensor): The kernel contour with size hxw. |
|
kernel_region_num (int): The instance kernel region number. |
|
distance_threshold (float): The embedding distance threshold between |
|
kernel and pixel in one instance. |
|
|
|
Returns: |
|
pixel_assignment (List[List[float]]): The instance coordinate list. |
|
Each element consists of averaged confidence, pixel number, and |
|
coordinates (x_i, y_i for all pixels) in order. |
|
""" |
|
assert isinstance(score, (torch.Tensor, np.ndarray)) |
|
assert isinstance(mask, (torch.Tensor, np.ndarray)) |
|
assert isinstance(embedding, (torch.Tensor, np.ndarray)) |
|
assert isinstance(kernel_label, (torch.Tensor, np.ndarray)) |
|
assert isinstance(kernel_contour, (torch.Tensor, np.ndarray)) |
|
assert isinstance(kernel_region_num, int) |
|
assert isinstance(distance_threshold, float) |
|
|
|
if isinstance(score, np.ndarray): |
|
score = torch.from_numpy(score) |
|
if isinstance(mask, np.ndarray): |
|
mask = torch.from_numpy(mask) |
|
if isinstance(embedding, np.ndarray): |
|
embedding = torch.from_numpy(embedding) |
|
if isinstance(kernel_label, np.ndarray): |
|
kernel_label = torch.from_numpy(kernel_label) |
|
if isinstance(kernel_contour, np.ndarray): |
|
kernel_contour = torch.from_numpy(kernel_contour) |
|
|
|
if torch.__version__ == 'parrots': |
|
label = ext_module.pixel_group( |
|
score, |
|
mask, |
|
embedding, |
|
kernel_label, |
|
kernel_contour, |
|
kernel_region_num=kernel_region_num, |
|
distance_threshold=distance_threshold) |
|
label = label.tolist() |
|
label = label[0] |
|
list_index = kernel_region_num |
|
pixel_assignment = [] |
|
for x in range(kernel_region_num): |
|
pixel_assignment.append( |
|
np.array( |
|
label[list_index:list_index + int(label[x])], |
|
dtype=np.float)) |
|
list_index = list_index + int(label[x]) |
|
else: |
|
pixel_assignment = ext_module.pixel_group(score, mask, embedding, |
|
kernel_label, kernel_contour, |
|
kernel_region_num, |
|
distance_threshold) |
|
return pixel_assignment |
|
|