ManishThota
/

Test_Annotator

Model card Files Files and versions Community

Test_Annotator / ChatUniVi /model /cluster.py

ManishThota

Upload folder using huggingface_hub

1fea0a0 verified about 1 year ago

raw

history blame contribute delete

10.8 kB

	import torch
	import math
	import torch.nn as nn
	import warnings


	def _no_grad_trunc_normal_(tensor, mean, std, a, b):
	# Cut & paste from PyTorch official master until it's in a few official releases - RW
	# Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
	def norm_cdf(x):
	# Computes standard normal cumulative distribution function
	return (1. + math.erf(x / math.sqrt(2.))) / 2.

	if (mean < a - 2 * std) or (mean > b + 2 * std):
	warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
	"The distribution of values may be incorrect.",
	stacklevel=2)

	with torch.no_grad():
	# Values are generated by using a truncated uniform distribution and
	# then using the inverse CDF for the normal distribution.
	# Get upper and lower cdf values
	l = norm_cdf((a - mean) / std)
	u = norm_cdf((b - mean) / std)

	# Uniformly fill tensor with values from [l, u], then translate to
	# [2l-1, 2u-1].
	tensor.uniform_(2 * l - 1, 2 * u - 1)

	# Use inverse cdf transform for normal distribution to get truncated
	# standard normal
	tensor.erfinv_()

	# Transform to proper mean, std
	tensor.mul_(std * math.sqrt(2.))
	tensor.add_(mean)

	# Clamp to ensure it's in the proper range
	tensor.clamp_(min=a, max=b)
	return tensor


	def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
	# type: (Tensor, float, float, float, float) -> Tensor
	r"""Fills the input Tensor with values drawn from a truncated
	normal distribution. The values are effectively drawn from the
	normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
	with values outside :math:`[a, b]` redrawn until they are within
	the bounds. The method used for generating the random values works
	best when :math:`a \leq \text{mean} \leq b`.
	Args:
	tensor: an n-dimensional `torch.Tensor`
	mean: the mean of the normal distribution
	std: the standard deviation of the normal distribution
	a: the minimum cutoff value
	b: the maximum cutoff value
	Examples:
	>>> w = torch.empty(3, 5)
	>>> nn.init.trunc_normal_(w)
	"""
	try:
	return _no_grad_trunc_normal_(tensor, mean, std, a, b)
	except:
	return tensor


	def drop_path(x, drop_prob: float = 0., training: bool = False):
	"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
	"""
	if drop_prob == 0. or not training:
	return x
	keep_prob = 1 - drop_prob
	shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
	random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
	random_tensor.floor_() # binarize
	output = x.div(keep_prob) * random_tensor
	return output


	class DropPath(nn.Module):
	"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
	"""
	def __init__(self, drop_prob=None):
	super(DropPath, self).__init__()
	self.drop_prob = drop_prob

	def forward(self, x):
	return drop_path(x, self.drop_prob, self.training)


	def index_points(points, idx):
	"""Sample features following the index.
	Returns:
	new_points:, indexed points data, [B, S, C]

	Args:
	points: input points data, [B, N, C]
	idx: sample index data, [B, S]
	"""
	device = points.device
	B = points.shape[0]
	view_shape = list(idx.shape)
	view_shape[1:] = [1] * (len(view_shape) - 1)
	repeat_shape = list(idx.shape)
	repeat_shape[0] = 1
	batch_indices = torch.arange(B, dtype=torch.long).to(device).view(view_shape).repeat(repeat_shape)
	new_points = points[batch_indices, idx, :]
	return new_points


	def cluster_dpc_knn(token_dict, cluster_num, k=5, token_mask=None):
	"""Cluster tokens with DPC-KNN algorithm.
	Return:
	idx_cluster (Tensor[B, N]): cluster index of each token.
	cluster_num (int): actual cluster number. The same with
	input cluster number
	Args:
	token_dict (dict): dict for token information
	cluster_num (int): cluster number
	k (int): number of the nearest neighbor used for local density.
	token_mask (Tensor[B, N]): mask indicate the whether the token is
	padded empty token. Non-zero value means the token is meaningful,
	zero value means the token is an empty token. If set to None, all
	tokens are regarded as meaningful.
	"""
	with torch.no_grad():
	x = token_dict["x"]
	B, N, C = x.shape

	dist_matrix = torch.cdist(x.float(), x.float()) / (C ** 0.5)

	if token_mask is not None:
	token_mask = token_mask > 0
	# in order to not affect the local density, the distance between empty tokens
	# and any other tokens should be the maximal distance.
	dist_matrix = dist_matrix * token_mask[:, None, :] + \
	(dist_matrix.max() + 1) * (~token_mask[:, None, :])

	# get local density

	dist_nearest, index_nearest = torch.topk(dist_matrix, k=k, dim=-1, largest=False)
	density = (-(dist_nearest ** 2).mean(dim=-1)).exp()
	# add a little noise to ensure no tokens have the same density.
	density = density + torch.rand(
	density.shape, device=density.device, dtype=density.dtype) * 1e-6

	if token_mask is not None:
	# the density of empty token should be 0
	density = density * token_mask

	# get distance indicator
	mask = density[:, None, :] > density[:, :, None]
	mask = mask.type(x.dtype)
	dist_max = dist_matrix.flatten(1).max(dim=-1)[0][:, None, None]
	dist, index_parent = (dist_matrix * mask + dist_max * (1 - mask)).min(dim=-1)

	# select clustering center according to score
	score = dist * density
	_, index_down = torch.topk(score, k=cluster_num, dim=-1)

	# assign tokens to the nearest center
	dist_matrix = index_points(dist_matrix, index_down)

	idx_cluster = dist_matrix.argmin(dim=1)

	# make sure cluster center merge to itself
	idx_batch = torch.arange(B, device=x.device)[:, None].expand(B, cluster_num)
	idx_tmp = torch.arange(cluster_num, device=x.device)[None, :].expand(B, cluster_num)
	idx_cluster[idx_batch.reshape(-1), index_down.reshape(-1)] = idx_tmp.reshape(-1)

	return idx_cluster, cluster_num


	def merge_tokens(token_dict, idx_cluster, cluster_num, token_weight=None):
	"""Merge tokens in the same cluster to a single cluster.
	Implemented by torch.index_add(). Flops: BN(C+2)
	Return:
	out_dict (dict): dict for output token information

	Args:
	token_dict (dict): dict for input token information
	idx_cluster (Tensor[B, N]): cluster index of each token.
	cluster_num (int): cluster number
	token_weight (Tensor[B, N, 1]): weight for each token.
	"""

	x = token_dict['x']
	idx_token = token_dict['idx_token']
	agg_weight = token_dict['agg_weight']

	B, N, C = x.shape
	if token_weight is None:
	token_weight = x.new_ones(B, N, 1)

	idx_batch = torch.arange(B, device=x.device)[:, None]
	idx = idx_cluster + idx_batch * cluster_num

	all_weight = token_weight.new_zeros(B * cluster_num, 1)
	all_weight.index_add_(dim=0, index=idx.reshape(B * N),
	source=token_weight.reshape(B * N, 1))
	all_weight = all_weight + 1e-6
	norm_weight = token_weight / all_weight[idx]

	# average token features
	x_merged = x.new_zeros(B * cluster_num, C)
	source = x * norm_weight

	x_merged.index_add_(dim=0, index=idx.reshape(B * N),
	source=source.reshape(B * N, C).type(x.dtype))
	x_merged = x_merged.reshape(B, cluster_num, C)

	idx_token_new = index_points(idx_cluster[..., None], idx_token).squeeze(-1)
	weight_t = index_points(norm_weight, idx_token)
	agg_weight_new = agg_weight * weight_t
	agg_weight_new / agg_weight_new.max(dim=1, keepdim=True)[0]

	out_dict = {}
	out_dict['x'] = x_merged
	out_dict['token_num'] = cluster_num
	out_dict['idx_token'] = idx_token_new
	out_dict['agg_weight'] = agg_weight_new
	out_dict['mask'] = None
	return out_dict


	class CTM(nn.Module):
	def __init__(self, sample_ratio, embed_dim, dim_out, k=5):
	super().__init__()
	self.sample_ratio = sample_ratio
	self.dim_out = dim_out
	self.k = k

	def forward(self, token_dict, sample_ratio=None):
	x = token_dict["x"]
	B, N, C = x.shape

	token_weight = x.new_ones(B, N)

	if token_dict["mask"] is not None:
	token_weight.masked_fill_((1 - token_dict["mask"]).to(torch.bool), float("-inf"))
	token_weight = token_weight.unsqueeze(2)
	token_dict['x'] = x

	if sample_ratio is not None:
	cluster_num = max(math.ceil(N * sample_ratio), 1)
	elif self.sample_ratio > 1:
	cluster_num = max(math.ceil(self.sample_ratio), 1)
	else:
	cluster_num = max(math.ceil(N * self.sample_ratio), 1)

	k = min(3, max(cluster_num//2, 1)) if self.k > cluster_num else self.k
	idx_cluster, cluster_num = cluster_dpc_knn(
	token_dict, cluster_num, k, token_mask=token_dict["mask"])

	down_dict = merge_tokens(token_dict, idx_cluster, cluster_num, token_weight)
	return down_dict, token_dict


	class TCBlock(nn.Module):
	def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
	drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1, use_sr_layer=False):
	super().__init__()
	self.apply(self._init_weights)

	def _init_weights(self, m):
	if isinstance(m, nn.Linear):
	trunc_normal_(m.weight, std=.02)
	if isinstance(m, nn.Linear) and m.bias is not None:
	nn.init.constant_(m.bias, 0)
	elif isinstance(m, nn.LayerNorm):
	nn.init.constant_(m.bias, 0)
	nn.init.constant_(m.weight, 1.0)
	elif isinstance(m, nn.Conv2d):
	fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
	fan_out //= m.groups
	m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
	if m.bias is not None:
	m.bias.data.zero_()

	def forward(self, inputs):
	if isinstance(inputs, tuple) or isinstance(inputs, list):
	q_dict, kv_dict = inputs
	else:
	q_dict, kv_dict = inputs, None

	x = q_dict['x']
	return q_dict