diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/config.yaml b/v2xverse_late_multiclass_2025_01_28_08_49_56/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d33b18ba973a87f2fcfe6d6ccd879a8c104c54a4 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/config.yaml @@ -0,0 +1,213 @@ +comm_range: 200 +data_augment: +- ALONG_AXIS_LIST: + - x + NAME: random_world_flip +- NAME: random_world_rotation + WORLD_ROT_ANGLE: + - -0.78539816 + - 0.78539816 +- NAME: random_world_scaling + WORLD_SCALE_RANGE: + - 0.95 + - 1.05 +fusion: + args: + clip_pc: false + proj_first: false + core_method: intermediatemulticlass + dataset: v2xverse +input_source: +- lidar +label_type: lidar +loss: + args: + cls_weight: 5.0 + code_weights: + - 1.0 + - 1.0 + - 1.0 + - 1.0 + - 1.0 + - 1.0 + - 5.0 + - 5.0 + loc_weight: 1.0 + target_assigner_config: + box_coder: ResidualCoder + cav_lidar_range: &id004 + - -36 + - -12 + - -22 + - 36 + - 12 + - 14 + gaussian_overlap: 0.1 + max_objs: 40 + min_radius: 2 + out_size_factor: 2 + voxel_size: &id001 + - 0.125 + - 0.125 + - 36 + core_method: center_point_loss_multiclass +lr_scheduler: + core_method: multistep + gamma: 0.1 + step_size: + - 8 + - 15 +model: + args: + anchor_number: 3 + att: + feat_dim: 64 + base_bev_backbone: + compression: 0 + layer_nums: &id002 + - 3 + - 4 + - 5 + layer_strides: + - 2 + - 2 + - 2 + num_filters: &id003 + - 64 + - 128 + - 256 + num_upsample_filter: + - 128 + - 128 + - 128 + resnet: true + upsample_strides: + - 1 + - 2 + - 4 + voxel_size: *id001 + fusion_args: + agg_operator: + feature_dim: 256 + mode: MAX + downsample_rate: 2 + dropout_rate: 0 + in_channels: 256 + layer_nums: *id002 + multi_scale: false + n_head: 8 + num_filters: *id003 + only_attention: true + voxel_size: *id001 + fusion_method: max + lidar_range: *id004 + max_cav: 5 + multi_class: true + out_size_factor: 2 + pillar_vfe: + num_filters: + - 64 + use_absolute_xyz: true + use_norm: true + with_distance: false + point_pillar_scatter: + grid_size: !!python/object/apply:numpy.core.multiarray._reconstruct + args: + - !!python/name:numpy.ndarray '' + - !!python/tuple + - 0 + - !!binary | + Yg== + state: !!python/tuple + - 1 + - !!python/tuple + - 3 + - !!python/object/apply:numpy.dtype + args: + - i8 + - 0 + - 1 + state: !!python/tuple + - 3 + - < + - null + - null + - null + - -1 + - -1 + - 0 + - false + - !!binary | + QAIAAAAAAADAAAAAAAAAAAEAAAAAAAAA + num_features: 64 + shrink_header: + dim: + - 128 + input_dim: 384 + kernal_size: + - 3 + padding: + - 1 + stride: + - 1 + supervise_fusion: false + supervise_single: true + voxel_size: *id001 + core_method: point_pillar_single_multiclass +name: v2xverse_late_multiclass +noise_setting: !!python/object/apply:collections.OrderedDict +- - - add_noise + - false +optimizer: + args: + eps: 1.0e-10 + weight_decay: 0.0001 + core_method: Adam + lr: 0.002 +postprocess: + anchor_args: + D: 1 + H: 192 + W: 576 + cav_lidar_range: *id004 + feature_stride: 2 + h: 1.56 + l: 3.9 + num: 1 + r: &id005 + - 0 + vd: 36 + vh: 0.125 + vw: 0.125 + w: 1.6 + core_method: VoxelPostprocessor + dir_args: + anchor_yaw: *id005 + dir_offset: 0.7853 + num_bins: 1 + gt_range: *id004 + max_num: 100 + nms_thresh: 0.15 + order: hwl + target_args: + neg_threshold: 0.45 + pos_threshold: 0.6 + score_threshold: 0.2 +preprocess: + args: + max_points_per_voxel: 32 + max_voxel_test: 70000 + max_voxel_train: 32000 + voxel_size: *id001 + cav_lidar_range: *id004 + core_method: SpVoxelPreprocessor +root_dir: external_paths/data_root +test_dir: external_paths/data_root +train_params: + batch_size: 4 + epoches: 40 + eval_freq: 1 + max_cav: 5 + save_freq: 1 +validate_dir: external_paths/data_root +yaml_parser: load_point_pillar_params diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/events.out.tfevents.1738072197.poliwag.engin.umich.edu b/v2xverse_late_multiclass_2025_01_28_08_49_56/events.out.tfevents.1738072197.poliwag.engin.umich.edu new file mode 100644 index 0000000000000000000000000000000000000000..369f7e00f6e94ac83726248cdc23478f4f096ef1 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/events.out.tfevents.1738072197.poliwag.engin.umich.edu @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac3b8a28e7fba347631b57fb22d403037b9f1fa244f0b566d60222d5c9bf5756 +size 498679515 diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/net_epoch_bestval_at14.pth b/v2xverse_late_multiclass_2025_01_28_08_49_56/net_epoch_bestval_at14.pth new file mode 100644 index 0000000000000000000000000000000000000000..08ff64964d58d62e93590dff809cb59a9c65735e --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/net_epoch_bestval_at14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3fef03956eb6da6eb9721db6baf142f81f85ac84cd95324c1e37065d387b50 +size 32820345 diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..072108341c943d97d9ccd526e1aec39dccdb9836 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__pycache__/__init__.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a3d17bf3b63d117b7c030907001c890fa2da586 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/__init__.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/augment_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/augment_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cbd6b4b39372cbe9bde9e480fa45d4ea6a7068c Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/augment_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/data_augmentor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/data_augmentor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e549f4c1aadf71f8fe17d8364b04b61ba22d6eb Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/data_augmentor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/augment_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/augment_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..af12bedcf1111e9ea4db37ac10395cdce566960e --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/augment_utils.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# Author: OpenPCDet + +import numpy as np + +from opencood.utils import common_utils + + +def random_flip_along_x(gt_boxes, points): + """ + Args: + gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] + points: (M, 3 + C) + Returns: + """ + enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5]) + if enable: + gt_boxes[:, 1] = -gt_boxes[:, 1] + gt_boxes[:, 6] = -gt_boxes[:, 6] + points[:, 1] = -points[:, 1] + + if gt_boxes.shape[1] > 7: + gt_boxes[:, 8] = -gt_boxes[:, 8] + + return gt_boxes, points + + +def random_flip_along_y(gt_boxes, points): + """ + Args: + gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] + points: (M, 3 + C) + Returns: + """ + enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5]) + if enable: + gt_boxes[:, 0] = -gt_boxes[:, 0] + gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi) + points[:, 0] = -points[:, 0] + + if gt_boxes.shape[1] > 7: + gt_boxes[:, 7] = -gt_boxes[:, 7] + + return gt_boxes, points + + +def global_rotation(gt_boxes, points, rot_range): + """ + Args: + gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] + points: (M, 3 + C), + rot_range: [min, max] + Returns: + """ + noise_rotation = np.random.uniform(rot_range[0], + rot_range[1]) + points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], + np.array([noise_rotation]))[0] + + gt_boxes[:, 0:3] = \ + common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], + np.array([noise_rotation]))[0] + gt_boxes[:, 6] += noise_rotation + + if gt_boxes.shape[1] > 7: + gt_boxes[:, 7:9] = common_utils.rotate_points_along_z( + np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[ + np.newaxis, :, :], + np.array([noise_rotation]))[0][:, 0:2] + + return gt_boxes, points + + +def global_scaling(gt_boxes, points, scale_range): + """ + Args: + gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading] + points: (M, 3 + C), + scale_range: [min, max] + Returns: + """ + if scale_range[1] - scale_range[0] < 1e-3: + return gt_boxes, points + noise_scale = np.random.uniform(scale_range[0], scale_range[1]) + points[:, :3] *= noise_scale + gt_boxes[:, :6] *= noise_scale + + return gt_boxes, points diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/data_augmentor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/data_augmentor.py new file mode 100644 index 0000000000000000000000000000000000000000..82e5533c1d4857c39bac0272b597d5ecd14d3956 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/data_augmentor.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +""" +Class for data augmentation +""" +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + +from functools import partial + +import numpy as np + +from opencood.data_utils.augmentor import augment_utils + + +class DataAugmentor(object): + """ + Data Augmentor. + + Parameters + ---------- + augment_config : list + A list of augmentation configuration. + + Attributes + ---------- + data_augmentor_queue : list + The list of data augmented functions. + """ + + def __init__(self, augment_config, train=True): + self.data_augmentor_queue = [] + self.train = train + + for cur_cfg in augment_config: + cur_augmentor = getattr(self, cur_cfg['NAME'])(config=cur_cfg) + self.data_augmentor_queue.append(cur_augmentor) + + def random_world_flip(self, data_dict=None, config=None): + if data_dict is None: + return partial(self.random_world_flip, config=config) + + gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \ + data_dict['object_bbx_mask'], \ + data_dict['lidar_np'] + gt_boxes_valid = gt_boxes[gt_mask == 1] + + for cur_axis in config['ALONG_AXIS_LIST']: + assert cur_axis in ['x', 'y'] + gt_boxes_valid, points = getattr(augment_utils, + 'random_flip_along_%s' % cur_axis)( + gt_boxes_valid, points, + ) + + gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid + + data_dict['object_bbx_center'] = gt_boxes + data_dict['object_bbx_mask'] = gt_mask + data_dict['lidar_np'] = points + + return data_dict + + def random_world_rotation(self, data_dict=None, config=None): + if data_dict is None: + return partial(self.random_world_rotation, config=config) + + rot_range = config['WORLD_ROT_ANGLE'] + if not isinstance(rot_range, list): + rot_range = [-rot_range, rot_range] + + gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \ + data_dict['object_bbx_mask'], \ + data_dict['lidar_np'] + gt_boxes_valid = gt_boxes[gt_mask == 1] + gt_boxes_valid, points = augment_utils.global_rotation( + gt_boxes_valid, points, rot_range=rot_range + ) + gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid + + data_dict['object_bbx_center'] = gt_boxes + data_dict['object_bbx_mask'] = gt_mask + data_dict['lidar_np'] = points + + return data_dict + + def random_world_scaling(self, data_dict=None, config=None): + if data_dict is None: + return partial(self.random_world_scaling, config=config) + + gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \ + data_dict['object_bbx_mask'], \ + data_dict['lidar_np'] + gt_boxes_valid = gt_boxes[gt_mask == 1] + + gt_boxes_valid, points = augment_utils.global_scaling( + gt_boxes_valid, points, config['WORLD_SCALE_RANGE'] + ) + gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid + + data_dict['object_bbx_center'] = gt_boxes + data_dict['object_bbx_mask'] = gt_mask + data_dict['lidar_np'] = points + + return data_dict + + def forward(self, data_dict): + """ + Args: + data_dict: + points: (N, 3 + C_in) + gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading] + gt_names: optional, (N), string + ... + + Returns: + """ + if self.train: + for cur_augmentor in self.data_augmentor_queue: + data_dict = cur_augmentor(data_dict=data_dict) + + return data_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d0fbdc4333c7b7cad70e442b811ceda71a8a9373 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__init__.py @@ -0,0 +1,35 @@ +from opencood.data_utils.datasets.late_fusion_dataset import getLateFusionDataset +from opencood.data_utils.datasets.late_heter_fusion_dataset import getLateheterFusionDataset +from opencood.data_utils.datasets.late_multiclass_fusion_dataset import getLatemulticlassFusionDataset +from opencood.data_utils.datasets.early_fusion_dataset import getEarlyFusionDataset +from opencood.data_utils.datasets.intermediate_fusion_dataset import getIntermediateFusionDataset +from opencood.data_utils.datasets.intermediate_multiclass_fusion_dataset import getIntermediatemulticlassFusionDataset +from opencood.data_utils.datasets.intermediate_2stage_fusion_dataset import getIntermediate2stageFusionDataset +from opencood.data_utils.datasets.intermediate_heter_fusion_dataset import getIntermediateheterFusionDataset +from opencood.data_utils.datasets.basedataset.opv2v_basedataset import OPV2VBaseDataset +from opencood.data_utils.datasets.basedataset.v2xsim_basedataset import V2XSIMBaseDataset +from opencood.data_utils.datasets.basedataset.dairv2x_basedataset import DAIRV2XBaseDataset +from opencood.data_utils.datasets.basedataset.v2xset_basedataset import V2XSETBaseDataset +from opencood.data_utils.datasets.basedataset.v2xverse_basedataset import V2XVERSEBaseDataset +from opencood.data_utils.datasets.late_multiclass_fusion_dataset import getLatemulticlassFusionDataset +from opencood.data_utils.datasets.early_multiclass_fusion_dataset import getEarlymulticlassFusionDataset + +def build_dataset(dataset_cfg, visualize=False, train=True): + fusion_name = dataset_cfg['fusion']['core_method'] + dataset_name = dataset_cfg['fusion']['dataset'] + + assert fusion_name in ['late', 'lateheter', 'intermediate', 'intermediate2stage', 'intermediateheter', 'intermediatemulticlass', 'early', 'latemulticlass', 'earlymulticlass'] + assert dataset_name in ['opv2v', 'v2xsim', 'dairv2x', 'v2xset', 'v2xverse'] + + fusion_dataset_func = "get" + fusion_name.capitalize() + "FusionDataset" + fusion_dataset_func = eval(fusion_dataset_func) + base_dataset_cls = dataset_name.upper() + "BaseDataset" + base_dataset_cls = eval(base_dataset_cls) + + dataset = fusion_dataset_func(base_dataset_cls)( + params=dataset_cfg, + visualize=visualize, + train=train + ) + + return dataset diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c97ffb53483f77c963cfa88928cf4ee217152248 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/__init__.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_fusion_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a636d44ab6cf8d26897791599e383f68fa56ea55 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_fusion_dataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_multiclass_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_multiclass_fusion_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0de6a681dad9df9943a012a13a3a95f4b129dc05 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_multiclass_fusion_dataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_2stage_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_2stage_fusion_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8dfa895b8f48fe755dde61b409a1e3ce1477cfea Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_2stage_fusion_dataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_fusion_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2150a33bc33feef273fb9a702609df74f740510a Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_fusion_dataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_heter_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_heter_fusion_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..286561c63570b9b338cc7512289f1a4a38e7291f Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_heter_fusion_dataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_multiclass_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_multiclass_fusion_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..355a4bb6e683fd9a6756662a05659bfdc9550216 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_multiclass_fusion_dataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_fusion_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e25f0750f45864c0eeecdd8cbc6dcbaabfeb23a1 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_fusion_dataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_heter_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_heter_fusion_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2d63c6a5f0d650a81a6e6b28e40d6d6b8dfb258 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_heter_fusion_dataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_multiclass_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_multiclass_fusion_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..725417113b70f712c3a3f7d025af36da5f438053 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_multiclass_fusion_dataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/dairv2x_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/dairv2x_basedataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3fe14c4aef2efa77fcb296e7c3134b76e50e2225 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/dairv2x_basedataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/opv2v_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/opv2v_basedataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80aa5cc0c8e851a06562c9a671ff808a835939a3 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/opv2v_basedataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xset_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xset_basedataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8ba1765ac57e011e45d2e64479e9a046921cb8c4 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xset_basedataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xsim_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xsim_basedataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab1a85a5ac0b6f14520c7952aad357b630cd841d Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xsim_basedataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xverse_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xverse_basedataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b69bab71d254e0dfd9ea6de4212cd670abf6fef8 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xverse_basedataset.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/dairv2x_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/dairv2x_basedataset.py new file mode 100644 index 0000000000000000000000000000000000000000..35d6641f358e7813adeb492680571ae7066eeebf --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/dairv2x_basedataset.py @@ -0,0 +1,285 @@ +import os +from collections import OrderedDict +import cv2 +import h5py +import torch +import numpy as np +from functools import partial +from torch.utils.data import Dataset +from PIL import Image +import random +import opencood.utils.pcd_utils as pcd_utils +from opencood.data_utils.augmentor.data_augmentor import DataAugmentor +from opencood.hypes_yaml.yaml_utils import load_yaml +from opencood.utils.pcd_utils import downsample_lidar_minimum +from opencood.utils.camera_utils import load_camera_data, load_intrinsic_DAIR_V2X +from opencood.utils.common_utils import read_json +from opencood.utils.transformation_utils import tfm_to_pose, rot_and_trans_to_trasnformation_matrix +from opencood.utils.transformation_utils import veh_side_rot_and_trans_to_trasnformation_matrix +from opencood.utils.transformation_utils import inf_side_rot_and_trans_to_trasnformation_matrix +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor + +class DAIRV2XBaseDataset(Dataset): + def __init__(self, params, visualize, train=True): + self.params = params + self.visualize = visualize + self.train = train + + self.pre_processor = build_preprocessor(params["preprocess"], train) + self.post_processor = build_postprocessor(params["postprocess"], train) + self.post_processor.generate_gt_bbx = self.post_processor.generate_gt_bbx_by_iou + if 'data_augment' in params: # late and early + self.data_augmentor = DataAugmentor(params['data_augment'], train) + else: # intermediate + self.data_augmentor = None + + if 'clip_pc' in params['fusion']['args'] and params['fusion']['args']['clip_pc']: + self.clip_pc = True + else: + self.clip_pc = False + + if 'train_params' not in params or 'max_cav' not in params['train_params']: + self.max_cav = 2 + else: + self.max_cav = params['train_params']['max_cav'] + + self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False + self.load_camera_file = True if 'camera' in params['input_source'] else False + self.load_depth_file = True if 'depth' in params['input_source'] else False + + assert self.load_depth_file is False + + self.label_type = params['label_type'] # 'lidar' or 'camera' + self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \ + else self.generate_object_center_camera + + if self.load_camera_file: + self.data_aug_conf = params["fusion"]["args"]["data_aug_conf"] + + if self.train: + split_dir = params['root_dir'] + else: + split_dir = params['validate_dir'] + + self.root_dir = params['data_dir'] + + self.split_info = read_json(split_dir) + co_datainfo = read_json(os.path.join(self.root_dir, 'cooperative/data_info.json')) + self.co_data = OrderedDict() + for frame_info in co_datainfo: + veh_frame_id = frame_info['vehicle_image_path'].split("/")[-1].replace(".jpg", "") + self.co_data[veh_frame_id] = frame_info + + if "noise_setting" not in self.params: + self.params['noise_setting'] = OrderedDict() + self.params['noise_setting']['add_noise'] = False + + def reinitialize(self): + pass + + def retrieve_base_data(self, idx): + """ + Given the index, return the corresponding data. + NOTICE! + It is different from Intermediate Fusion and Early Fusion + Label is not cooperative and loaded for both veh side and inf side. + Parameters + ---------- + idx : int + Index given by dataloader. + Returns + ------- + data : dict + The dictionary contains loaded yaml params and lidar data for + each cav. + """ + veh_frame_id = self.split_info[idx] + frame_info = self.co_data[veh_frame_id] + system_error_offset = frame_info["system_error_offset"] + data = OrderedDict() + + data[0] = OrderedDict() + data[0]['ego'] = True + data[1] = OrderedDict() + data[1]['ego'] = False + + data[0]['params'] = OrderedDict() + data[1]['params'] = OrderedDict() + + # pose of agent + lidar_to_novatel = read_json(os.path.join(self.root_dir,'vehicle-side/calib/lidar_to_novatel/'+str(veh_frame_id)+'.json')) + novatel_to_world = read_json(os.path.join(self.root_dir,'vehicle-side/calib/novatel_to_world/'+str(veh_frame_id)+'.json')) + transformation_matrix = veh_side_rot_and_trans_to_trasnformation_matrix(lidar_to_novatel, novatel_to_world) + data[0]['params']['lidar_pose'] = tfm_to_pose(transformation_matrix) + + inf_frame_id = frame_info['infrastructure_image_path'].split("/")[-1].replace(".jpg", "") + virtuallidar_to_world = read_json(os.path.join(self.root_dir,'infrastructure-side/calib/virtuallidar_to_world/'+str(inf_frame_id)+'.json')) + transformation_matrix = inf_side_rot_and_trans_to_trasnformation_matrix(virtuallidar_to_world, system_error_offset) + data[1]['params']['lidar_pose'] = tfm_to_pose(transformation_matrix) + + data[0]['params']['vehicles_front'] = read_json(os.path.join(self.root_dir,frame_info['cooperative_label_path'].replace("label_world", "label_world_backup"))) + data[0]['params']['vehicles_all'] = read_json(os.path.join(self.root_dir,frame_info['cooperative_label_path'])) + + data[1]['params']['vehicles_front'] = [] # we only load cooperative label in vehicle side + data[1]['params']['vehicles_all'] = [] # we only load cooperative label in vehicle side + + if self.load_camera_file: + data[0]['camera_data'] = load_camera_data([os.path.join(self.root_dir, frame_info["vehicle_image_path"])]) + data[0]['params']['camera0'] = OrderedDict() + data[0]['params']['camera0']['extrinsic'] = rot_and_trans_to_trasnformation_matrix( \ + read_json(os.path.join(self.root_dir, 'vehicle-side/calib/lidar_to_camera/'+str(veh_frame_id)+'.json'))) + data[0]['params']['camera0']['intrinsic'] = load_intrinsic_DAIR_V2X( \ + read_json(os.path.join(self.root_dir, 'vehicle-side/calib/camera_intrinsic/'+str(veh_frame_id)+'.json'))) + + data[1]['camera_data']= load_camera_data([os.path.join(self.root_dir,frame_info["infrastructure_image_path"])]) + data[1]['params']['camera0'] = OrderedDict() + data[1]['params']['camera0']['extrinsic'] = rot_and_trans_to_trasnformation_matrix( \ + read_json(os.path.join(self.root_dir, 'infrastructure-side/calib/virtuallidar_to_camera/'+str(inf_frame_id)+'.json'))) + data[1]['params']['camera0']['intrinsic'] = load_intrinsic_DAIR_V2X( \ + read_json(os.path.join(self.root_dir, 'infrastructure-side/calib/camera_intrinsic/'+str(inf_frame_id)+'.json'))) + + + if self.load_lidar_file or self.visualize: + data[0]['lidar_np'], _ = pcd_utils.read_pcd(os.path.join(self.root_dir,frame_info["vehicle_pointcloud_path"])) + data[1]['lidar_np'], _ = pcd_utils.read_pcd(os.path.join(self.root_dir,frame_info["infrastructure_pointcloud_path"])) + + + # Label for single side + data[0]['params']['vehicles_single_front'] = read_json(os.path.join(self.root_dir, \ + 'vehicle-side/label/lidar_backup/{}.json'.format(veh_frame_id))) + data[0]['params']['vehicles_single_all'] = read_json(os.path.join(self.root_dir, \ + 'vehicle-side/label/lidar/{}.json'.format(veh_frame_id))) + data[1]['params']['vehicles_single_front'] = read_json(os.path.join(self.root_dir, \ + 'infrastructure-side/label/virtuallidar/{}.json'.format(inf_frame_id))) + data[1]['params']['vehicles_single_all'] = read_json(os.path.join(self.root_dir, \ + 'infrastructure-side/label/virtuallidar/{}.json'.format(inf_frame_id))) + + if getattr(self, "heterogeneous", False): + self.generate_object_center_lidar = \ + partial(self.generate_object_center_single_hetero, modality='lidar') + self.generate_object_center_camera = \ + partial(self.generate_object_center_single_hetero, modality='camera') + + # by default + data[0]['modality_name'] = 'm1' + data[1]['modality_name'] = 'm2' + # veh cam inf lidar + data[0]['modality_name'] = 'm2' + data[1]['modality_name'] = 'm1' + + if self.train: # randomly choose LiDAR or Camera to be Ego + p = np.random.rand() + if p > 0.5: + data[0], data[1] = data[1], data[0] + data[0]['ego'] = True + data[1]['ego'] = False + else: + # evaluate, the agent of ego modality should be ego + if self.adaptor.mapping_dict[data[0]['modality_name']] not in self.ego_modality and \ + self.adaptor.mapping_dict[data[1]['modality_name']] in self.ego_modality: + data[0], data[1] = data[1], data[0] + data[0]['ego'] = True + data[1]['ego'] = False + + data[0]['modality_name'] = self.adaptor.reassign_cav_modality(data[0]['modality_name'], 0) + data[1]['modality_name'] = self.adaptor.reassign_cav_modality(data[1]['modality_name'], 1) + + + return data + + + def __len__(self): + return len(self.split_info) + + def __getitem__(self, idx): + pass + + + def generate_object_center_lidar(self, + cav_contents, + reference_lidar_pose): + """ + reference lidar 's coordinate + """ + for cav_content in cav_contents: + cav_content['params']['vehicles'] = cav_content['params']['vehicles_all'] + return self.post_processor.generate_object_center_dairv2x(cav_contents, + reference_lidar_pose) + + def generate_object_center_camera(self, + cav_contents, + reference_lidar_pose): + """ + reference lidar 's coordinate + """ + for cav_content in cav_contents: + cav_content['params']['vehicles'] = cav_content['params']['vehicles_front'] + return self.post_processor.generate_object_center_dairv2x(cav_contents, + reference_lidar_pose) + + ### Add new func for single side + def generate_object_center_single(self, + cav_contents, + reference_lidar_pose, + **kwargs): + """ + veh or inf 's coordinate. + + reference_lidar_pose is of no use. + """ + suffix = "_single" + for cav_content in cav_contents: + cav_content['params']['vehicles_single'] = \ + cav_content['params']['vehicles_single_front'] if self.label_type == 'camera' else \ + cav_content['params']['vehicles_single_all'] + return self.post_processor.generate_object_center_dairv2x_single(cav_contents, suffix) + + ### Add for heterogeneous, transforming the single label from self coord. to ego coord. + def generate_object_center_single_hetero(self, + cav_contents, + reference_lidar_pose, + modality): + """ + loading the object from single agent. + + The same as *generate_object_center_single*, but it will transform the object to reference(ego) coordinate, + using reference_lidar_pose. + """ + suffix = "_single" + for cav_content in cav_contents: + cav_content['params']['vehicles_single'] = \ + cav_content['params']['vehicles_single_front'] if modality == 'camera' else \ + cav_content['params']['vehicles_single_all'] + return self.post_processor.generate_object_center_dairv2x_single_hetero(cav_contents, reference_lidar_pose, suffix) + + + def get_ext_int(self, params, camera_id): + lidar_to_camera = params["camera%d" % camera_id]['extrinsic'].astype(np.float32) # R_cw + camera_to_lidar = np.linalg.inv(lidar_to_camera) # R_wc + camera_intrinsic = params["camera%d" % camera_id]['intrinsic'].astype(np.float32 + ) + return camera_to_lidar, camera_intrinsic + + def augment(self, lidar_np, object_bbx_center, object_bbx_mask): + """ + Given the raw point cloud, augment by flipping and rotation. + Parameters + ---------- + lidar_np : np.ndarray + (n, 4) shape + object_bbx_center : np.ndarray + (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw + object_bbx_mask : np.ndarray + Indicate which elements in object_bbx_center are padded. + """ + tmp_dict = {'lidar_np': lidar_np, + 'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask} + tmp_dict = self.data_augmentor.forward(tmp_dict) + + lidar_np = tmp_dict['lidar_np'] + object_bbx_center = tmp_dict['object_bbx_center'] + object_bbx_mask = tmp_dict['object_bbx_mask'] + + return lidar_np, object_bbx_center, object_bbx_mask \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/opv2v_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/opv2v_basedataset.py new file mode 100644 index 0000000000000000000000000000000000000000..8bf0662325c49fdbe7ee4c375873ab9632e1c5ac --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/opv2v_basedataset.py @@ -0,0 +1,479 @@ + +import os +from collections import OrderedDict +import cv2 +import h5py +import torch +import numpy as np +from torch.utils.data import Dataset +from PIL import Image +import json +import random +import opencood.utils.pcd_utils as pcd_utils +from opencood.data_utils.augmentor.data_augmentor import DataAugmentor +from opencood.hypes_yaml.yaml_utils import load_yaml +from opencood.utils.camera_utils import load_camera_data +from opencood.utils.transformation_utils import x1_to_x2 +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor + +class OPV2VBaseDataset(Dataset): + def __init__(self, params, visualize, train=True): + self.params = params + self.visualize = visualize + self.train = train + + self.pre_processor = build_preprocessor(params["preprocess"], train) + self.post_processor = build_postprocessor(params["postprocess"], train) + if 'data_augment' in params: # late and early + self.data_augmentor = DataAugmentor(params['data_augment'], train) + else: # intermediate + self.data_augmentor = None + + if self.train: + root_dir = params['root_dir'] + else: + root_dir = params['validate_dir'] + self.root_dir = root_dir + + print("Dataset dir:", root_dir) + + if 'train_params' not in params or \ + 'max_cav' not in params['train_params']: + self.max_cav = 5 + else: + self.max_cav = params['train_params']['max_cav'] + + self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False + self.load_camera_file = True if 'camera' in params['input_source'] else False + self.load_depth_file = True if 'depth' in params['input_source'] else False + + self.label_type = params['label_type'] # 'lidar' or 'camera' + self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \ + else self.generate_object_center_camera + self.generate_object_center_single = self.generate_object_center # will it follows 'self.generate_object_center' when 'self.generate_object_center' change? + + if self.load_camera_file: + self.data_aug_conf = params["fusion"]["args"]["data_aug_conf"] + + # by default, we load lidar, camera and metadata. But users may + # define additional inputs/tasks + self.add_data_extension = \ + params['add_data_extension'] if 'add_data_extension' \ + in params else [] + + if "noise_setting" not in self.params: + self.params['noise_setting'] = OrderedDict() + self.params['noise_setting']['add_noise'] = False + + # first load all paths of different scenarios + scenario_folders = sorted([os.path.join(root_dir, x) + for x in os.listdir(root_dir) if + os.path.isdir(os.path.join(root_dir, x))]) + + self.scenario_folders = scenario_folders + + self.reinitialize() + + + def reinitialize(self): + # Structure: {scenario_id : {cav_1 : {timestamp1 : {yaml: path, + # lidar: path, cameras:list of path}}}} + self.scenario_database = OrderedDict() + self.len_record = [] + + # loop over all scenarios + for (i, scenario_folder) in enumerate(self.scenario_folders): + self.scenario_database.update({i: OrderedDict()}) + + # at least 1 cav should show up + if self.train: + cav_list = [x for x in os.listdir(scenario_folder) + if os.path.isdir( + os.path.join(scenario_folder, x))] + # cav_list = sorted(cav_list) + random.shuffle(cav_list) + else: + cav_list = sorted([x for x in os.listdir(scenario_folder) + if os.path.isdir( + os.path.join(scenario_folder, x))]) + assert len(cav_list) > 0 + + """ + roadside unit data's id is always negative, so here we want to + make sure they will be in the end of the list as they shouldn't + be ego vehicle. + """ + if int(cav_list[0]) < 0: + cav_list = cav_list[1:] + [cav_list[0]] + + """ + make the first cav to be ego modality + """ + if getattr(self, "heterogeneous", False): + scenario_name = scenario_folder.split("/")[-1] + cav_list = self.adaptor.reorder_cav_list(cav_list, scenario_name) + + + # loop over all CAV data + for (j, cav_id) in enumerate(cav_list): + if j > self.max_cav - 1: + print('too many cavs reinitialize') + break + self.scenario_database[i][cav_id] = OrderedDict() + + # save all yaml files to the dictionary + cav_path = os.path.join(scenario_folder, cav_id) + + yaml_files = \ + sorted([os.path.join(cav_path, x) + for x in os.listdir(cav_path) if + x.endswith('.yaml') and 'additional' not in x]) + + # this timestamp is not ready + yaml_files = [x for x in yaml_files if not ("2021_08_20_21_10_24" in x and "000265" in x)] + + timestamps = self.extract_timestamps(yaml_files) + + for timestamp in timestamps: + self.scenario_database[i][cav_id][timestamp] = \ + OrderedDict() + yaml_file = os.path.join(cav_path, + timestamp + '.yaml') + lidar_file = os.path.join(cav_path, + timestamp + '.pcd') + camera_files = self.find_camera_files(cav_path, + timestamp) + depth_files = self.find_camera_files(cav_path, + timestamp, sensor="depth") + + self.scenario_database[i][cav_id][timestamp]['yaml'] = \ + yaml_file + self.scenario_database[i][cav_id][timestamp]['lidar'] = \ + lidar_file + self.scenario_database[i][cav_id][timestamp]['cameras'] = \ + camera_files + self.scenario_database[i][cav_id][timestamp]['depths'] = \ + depth_files + + if getattr(self, "heterogeneous", False): + scenario_name = scenario_folder.split("/")[-1] + + cav_modality = self.adaptor.reassign_cav_modality(self.modality_assignment[scenario_name][cav_id] , j) + + self.scenario_database[i][cav_id][timestamp]['modality_name'] = cav_modality + + self.scenario_database[i][cav_id][timestamp]['lidar'] = \ + self.adaptor.switch_lidar_channels(cav_modality, lidar_file) + + + # load extra data + for file_extension in self.add_data_extension: + file_name = \ + os.path.join(cav_path, + timestamp + '_' + file_extension) + + self.scenario_database[i][cav_id][timestamp][ + file_extension] = file_name + + # Assume all cavs will have the same timestamps length. Thus + # we only need to calculate for the first vehicle in the + # scene. + if j == 0: + # we regard the agent with the minimum id as the ego + self.scenario_database[i][cav_id]['ego'] = True + if not self.len_record: + self.len_record.append(len(timestamps)) + else: + prev_last = self.len_record[-1] + self.len_record.append(prev_last + len(timestamps)) + else: + self.scenario_database[i][cav_id]['ego'] = False + + + def retrieve_base_data(self, idx): + """ + Given the index, return the corresponding data. + + Parameters + ---------- + idx : int + Index given by dataloader. + + Returns + ------- + data : dict + The dictionary contains loaded yaml params and lidar data for + each cav. + """ + # we loop the accumulated length list to see get the scenario index + scenario_index = 0 + for i, ele in enumerate(self.len_record): + if idx < ele: + scenario_index = i + break + scenario_database = self.scenario_database[scenario_index] + + # check the timestamp index + timestamp_index = idx if scenario_index == 0 else \ + idx - self.len_record[scenario_index - 1] + # retrieve the corresponding timestamp key + timestamp_key = self.return_timestamp_key(scenario_database, + timestamp_index) + data = OrderedDict() + # load files for all CAVs + for cav_id, cav_content in scenario_database.items(): + data[cav_id] = OrderedDict() + data[cav_id]['ego'] = cav_content['ego'] + + # load param file: json is faster than yaml + json_file = cav_content[timestamp_key]['yaml'].replace("yaml", "json") + if os.path.exists(json_file): + with open(json_file, "r") as f: + data[cav_id]['params'] = json.load(f) + else: + data[cav_id]['params'] = \ + load_yaml(cav_content[timestamp_key]['yaml']) + + # load camera file: hdf5 is faster than png + hdf5_file = cav_content[timestamp_key]['cameras'][0].replace("camera0.png", "imgs.hdf5") + + if os.path.exists(hdf5_file): + with h5py.File(hdf5_file, "r") as f: + data[cav_id]['camera_data'] = [] + data[cav_id]['depth_data'] = [] + for i in range(4): + data[cav_id]['camera_data'].append(Image.fromarray(f[f'camera{i}'][()])) + data[cav_id]['depth_data'].append(Image.fromarray(f[f'depth{i}'][()])) + else: + if self.load_camera_file: + data[cav_id]['camera_data'] = \ + load_camera_data(cav_content[timestamp_key]['cameras']) + if self.load_depth_file: + data[cav_id]['depth_data'] = \ + load_camera_data(cav_content[timestamp_key]['depths']) + + # load lidar file + if self.load_lidar_file or self.visualize: + data[cav_id]['lidar_np'] = \ + pcd_utils.pcd_to_np(cav_content[timestamp_key]['lidar']) + + if getattr(self, "heterogeneous", False): + data[cav_id]['modality_name'] = cav_content[timestamp_key]['modality_name'] + + for file_extension in self.add_data_extension: + # if not find in the current directory + # go to additional folder + if not os.path.exists(cav_content[timestamp_key][file_extension]): + cav_content[timestamp_key][file_extension] = cav_content[timestamp_key][file_extension].replace("train","additional/train") + cav_content[timestamp_key][file_extension] = cav_content[timestamp_key][file_extension].replace("validate","additional/validate") + cav_content[timestamp_key][file_extension] = cav_content[timestamp_key][file_extension].replace("test","additional/test") + + if '.yaml' in file_extension: + data[cav_id][file_extension] = \ + load_yaml(cav_content[timestamp_key][file_extension]) + else: + data[cav_id][file_extension] = \ + cv2.imread(cav_content[timestamp_key][file_extension]) + + + return data + + def __len__(self): + return self.len_record[-1] + + def __getitem__(self, idx): + """ + Abstract method, needs to be define by the children class. + """ + pass + + @staticmethod + def extract_timestamps(yaml_files): + """ + Given the list of the yaml files, extract the mocked timestamps. + + Parameters + ---------- + yaml_files : list + The full path of all yaml files of ego vehicle + + Returns + ------- + timestamps : list + The list containing timestamps only. + """ + timestamps = [] + + for file in yaml_files: + res = file.split('/')[-1] + + timestamp = res.replace('.yaml', '') + timestamps.append(timestamp) + + return timestamps + + @staticmethod + def return_timestamp_key(scenario_database, timestamp_index): + """ + Given the timestamp index, return the correct timestamp key, e.g. + 2 --> '000078'. + + Parameters + ---------- + scenario_database : OrderedDict + The dictionary contains all contents in the current scenario. + + timestamp_index : int + The index for timestamp. + + Returns + ------- + timestamp_key : str + The timestamp key saved in the cav dictionary. + """ + # get all timestamp keys + timestamp_keys = list(scenario_database.items())[0][1] + # retrieve the correct index + timestamp_key = list(timestamp_keys.items())[timestamp_index][0] + + return timestamp_key + + @staticmethod + def find_camera_files(cav_path, timestamp, sensor="camera"): + """ + Retrieve the paths to all camera files. + + Parameters + ---------- + cav_path : str + The full file path of current cav. + + timestamp : str + Current timestamp + + sensor : str + "camera" or "depth" + + Returns + ------- + camera_files : list + The list containing all camera png file paths. + """ + camera0_file = os.path.join(cav_path, + timestamp + f'_{sensor}0.png') + camera1_file = os.path.join(cav_path, + timestamp + f'_{sensor}1.png') + camera2_file = os.path.join(cav_path, + timestamp + f'_{sensor}2.png') + camera3_file = os.path.join(cav_path, + timestamp + f'_{sensor}3.png') + return [camera0_file, camera1_file, camera2_file, camera3_file] + + + def augment(self, lidar_np, object_bbx_center, object_bbx_mask): + """ + Given the raw point cloud, augment by flipping and rotation. + + Parameters + ---------- + lidar_np : np.ndarray + (n, 4) shape + + object_bbx_center : np.ndarray + (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw + + object_bbx_mask : np.ndarray + Indicate which elements in object_bbx_center are padded. + """ + tmp_dict = {'lidar_np': lidar_np, + 'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask} + tmp_dict = self.data_augmentor.forward(tmp_dict) + + lidar_np = tmp_dict['lidar_np'] + object_bbx_center = tmp_dict['object_bbx_center'] + object_bbx_mask = tmp_dict['object_bbx_mask'] + + return lidar_np, object_bbx_center, object_bbx_mask + + + def generate_object_center_lidar(self, + cav_contents, + reference_lidar_pose): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + The object_bbx_center is in ego coordinate. + + Notice: it is a wrap of postprocessor + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + in fact it is used in get_item_single_car, so the list length is 1 + + reference_lidar_pose : list + The final target lidar pose with length 6. + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + return self.post_processor.generate_object_center(cav_contents, + reference_lidar_pose) + + def generate_object_center_camera(self, + cav_contents, + reference_lidar_pose): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + The object_bbx_center is in ego coordinate. + + Notice: it is a wrap of postprocessor + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + in fact it is used in get_item_single_car, so the list length is 1 + + reference_lidar_pose : list + The final target lidar pose with length 6. + + visibility_map : np.ndarray + for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up. + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + return self.post_processor.generate_visible_object_center( + cav_contents, reference_lidar_pose + ) + + def get_ext_int(self, params, camera_id): + camera_coords = np.array(params["camera%d" % camera_id]["cords"]).astype( + np.float32) + camera_to_lidar = x1_to_x2( + camera_coords, params["lidar_pose_clean"] + ).astype(np.float32) # T_LiDAR_camera + camera_to_lidar = camera_to_lidar @ np.array( + [[0, 0, 1, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]], + dtype=np.float32) # UE4 coord to opencv coord + camera_intrinsic = np.array(params["camera%d" % camera_id]["intrinsic"]).astype( + np.float32 + ) + return camera_to_lidar, camera_intrinsic \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xset_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xset_basedataset.py new file mode 100644 index 0000000000000000000000000000000000000000..52804df0ecde048154e0259457a0fb5df896a2d3 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xset_basedataset.py @@ -0,0 +1,24 @@ +from opencood.data_utils.datasets.basedataset.opv2v_basedataset import OPV2VBaseDataset + +# All the same as OPV2V +class V2XSETBaseDataset(OPV2VBaseDataset): + def __init__(self, params, visulize, train=True): + super().__init__(params, visulize, train) + + if self.load_camera_file is True: # '2021_09_09_13_20_58'. This scenario has only 3 camera files? + scenario_folders_new = [x for x in self.scenario_folders if '2021_09_09_13_20_58' not in x] + self.scenario_folders = scenario_folders_new + self.reinitialize() + + + def generate_object_center_camera(self, + cav_contents, + reference_lidar_pose): + """ + Since V2XSet has not release bev_visiblity map, we can only filter object by range. + + Suppose the detection range of camera is within 50m + """ + return self.post_processor.generate_object_center_v2xset_camera( + cav_contents, reference_lidar_pose + ) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xsim_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xsim_basedataset.py new file mode 100644 index 0000000000000000000000000000000000000000..0ca114ffab6fa684bd25458bd16775bdb08a487f --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xsim_basedataset.py @@ -0,0 +1,238 @@ +# Author: Yangheng Zhao +import os +import pickle +from collections import OrderedDict +from typing import Dict +from abc import abstractmethod +import numpy as np +import torch +from torch.utils.data import Dataset + +from opencood.data_utils.augmentor.data_augmentor import DataAugmentor +from opencood.utils.common_utils import read_json +from opencood.utils.transformation_utils import tfm_to_pose +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor + +class V2XSIMBaseDataset(Dataset): + """ + First version. + Load V2X-sim 2.0 using yifan lu's pickle file. + Only support LiDAR data. + """ + + def __init__(self, + params: Dict, + visualize: bool = False, + train: bool = True): + self.params = params + self.visualize = visualize + self.train = train + + self.pre_processor = build_preprocessor(params["preprocess"], train) + self.post_processor = build_postprocessor(params["postprocess"], train) + if 'data_augment' in params: # late and early + self.data_augmentor = DataAugmentor(params['data_augment'], train) + else: # intermediate + self.data_augmentor = None + + if self.train: + root_dir = params['root_dir'] + else: + root_dir = params['validate_dir'] + self.root_dir = root_dir + + print("Dataset dir:", root_dir) + + if 'train_params' not in params or \ + 'max_cav' not in params['train_params']: + self.max_cav = 5 + else: + self.max_cav = params['train_params']['max_cav'] + + self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False + self.load_camera_file = True if 'camera' in params['input_source'] else False + self.load_depth_file = True if 'depth' in params['input_source'] else False + + self.label_type = params['label_type'] # 'lidar' or 'camera' + assert self.label_type in ['lidar', 'camera'] + + self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \ + else self.generate_object_center_camera + self.generate_object_center_single = self.generate_object_center + + self.add_data_extension = \ + params['add_data_extension'] if 'add_data_extension' \ + in params else [] + + if "noise_setting" not in self.params: + self.params['noise_setting'] = OrderedDict() + self.params['noise_setting']['add_noise'] = False + + with open(self.root_dir, 'rb') as f: + dataset_info = pickle.load(f) + self.dataset_info_pkl = dataset_info + + # TODO param: one as ego or all as ego? + self.ego_mode = 'one' # "all" + + self.reinitialize() + + def reinitialize(self): + self.scene_database = OrderedDict() + if self.ego_mode == 'one': + self.len_record = len(self.dataset_info_pkl) + else: + raise NotImplementedError(self.ego_mode) + + for i, scene_info in enumerate(self.dataset_info_pkl): + self.scene_database.update({i: OrderedDict()}) + cav_num = scene_info['agent_num'] + assert cav_num > 0 + + if self.train: + cav_ids = 1 + np.random.permutation(cav_num) + else: + cav_ids = list(range(1, cav_num + 1)) + + + for j, cav_id in enumerate(cav_ids): + if j > self.max_cav - 1: + print('too many cavs reinitialize') + break + + self.scene_database[i][cav_id] = OrderedDict() + + self.scene_database[i][cav_id]['ego'] = j==0 + + self.scene_database[i][cav_id]['lidar'] = scene_info[f'lidar_path_{cav_id}'] + # need to delete this line is running in /GPFS + self.scene_database[i][cav_id]['lidar'] = \ + self.scene_database[i][cav_id]['lidar'].replace("/GPFS/rhome/yifanlu/workspace/dataset/v2xsim2-complete", "dataset/V2X-Sim-2.0") + + self.scene_database[i][cav_id]['params'] = OrderedDict() + self.scene_database[i][cav_id][ + 'params']['lidar_pose'] = tfm_to_pose( + scene_info[f"lidar_pose_{cav_id}"] + ) # [x, y, z, roll, pitch, yaw] + self.scene_database[i][cav_id]['params'][ + 'vehicles'] = scene_info[f'labels_{cav_id}'][ + 'gt_boxes_global'] + self.scene_database[i][cav_id]['params'][ + 'object_ids'] = scene_info[f'labels_{cav_id}'][ + 'gt_object_ids'].tolist() + + def __len__(self) -> int: + return self.len_record + + @abstractmethod + def __getitem__(self, index): + pass + + def retrieve_base_data(self, idx): + """ + Given the index, return the corresponding data. + + Parameters + ---------- + idx : int + Index given by dataloader. + + Returns + ------- + data : dict + The dictionary contains loaded yaml params and lidar data for + each cav. + """ + + data = OrderedDict() + # { + # 'cav_id0':{ + # 'ego': bool, + # 'params': { + # 'lidar_pose': [x, y, z, roll, pitch, yaw], + # 'vehicles':{ + # 'id': {'angle', 'center', 'extent', 'location'}, + # ... + # } + # },# 包含agent位置信息和object信息 + # 'camera_data':, + # 'depth_data':, + # 'lidar_np':, + # ... + # } + # 'cav_id1': , + # ... + # } + scene = self.scene_database[idx] + for cav_id, cav_content in scene.items(): + data[f'{cav_id}'] = OrderedDict() + data[f'{cav_id}']['ego'] = cav_content['ego'] + + data[f'{cav_id}']['params'] = cav_content['params'] + + # load the corresponding data into the dictionary + nbr_dims = 4 # x,y,z,intensity + scan = np.fromfile(cav_content['lidar'], dtype='float32') + points = scan.reshape((-1, 5))[:, :nbr_dims] + data[f'{cav_id}']['lidar_np'] = points + + return data + + def generate_object_center_lidar(self, cav_contents, reference_lidar_pose): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + + Notice: it is a wrap of postprocessor function + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + in fact it is used in get_item_single_car, so the list length is 1 + + reference_lidar_pose : list + The final target lidar pose with length 6. + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + + return self.post_processor.generate_object_center_v2x( + cav_contents, reference_lidar_pose) + + def generate_object_center_camera(self, cav_contents, reference_lidar_pose): + raise NotImplementedError() + + def augment(self, lidar_np, object_bbx_center, object_bbx_mask): + """ + Given the raw point cloud, augment by flipping and rotation. + + Parameters + ---------- + lidar_np : np.ndarray + (n, 4) shape + + object_bbx_center : np.ndarray + (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw + + object_bbx_mask : np.ndarray + Indicate which elements in object_bbx_center are padded. + """ + tmp_dict = {'lidar_np': lidar_np, + 'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask} + tmp_dict = self.data_augmentor.forward(tmp_dict) + + lidar_np = tmp_dict['lidar_np'] + object_bbx_center = tmp_dict['object_bbx_center'] + object_bbx_mask = tmp_dict['object_bbx_mask'] + + return lidar_np, object_bbx_center, object_bbx_mask \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xverse_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xverse_basedataset.py new file mode 100644 index 0000000000000000000000000000000000000000..9ef2a524169daab2f87c3e210921d8a89990689c --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xverse_basedataset.py @@ -0,0 +1,1118 @@ + +import os +from collections import OrderedDict +import cv2 +import h5py +import torch +import torchvision +import numpy as np +from torch.utils.data import Dataset +from PIL import Image +import json +import random +import re +import math + +import logging +_logger = logging.getLogger(__name__) + +import opencood.utils.pcd_utils as pcd_utils +from opencood.data_utils.augmentor.data_augmentor import DataAugmentor +from opencood.hypes_yaml.yaml_utils import load_yaml +from opencood.utils.camera_utils import load_camera_data +from opencood.utils.transformation_utils import x1_to_x2 +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor + + +class V2XVERSEBaseDataset(Dataset): + def __init__(self, params, visualize, train=True): + self.params = params + self.visualize = visualize + self.train = train + + self.pre_processor = build_preprocessor(params["preprocess"], train) + self.post_processor = build_postprocessor(params["postprocess"], train) + self.data_augmentor = DataAugmentor(params['data_augment'], + train) + + self.frame_gap = params.get('frame_gap',200) + self.time_delay = params.get('time_delay',0) + + if 'target_assigner_config' in self.params['loss']['args']: + self.det_range = self.params['loss']['args']['target_assigner_config']['cav_lidar_range'] # [-36, -36, -22, 36, 36, 14] + else: + self.det_range = [-36, -36, -22, 36, 36, 14] + + if self.time_delay % self.frame_gap != 0: + print("Time delay of v2xverse dataset should be a multiple of frame_gap !") + self.frame_delay = int(self.time_delay / self.frame_gap) + print(f'*** time_delay = {self.time_delay} ***') + + self.test_flag = False + if self.train: + root_dir = params['root_dir'] + towns = [1,2,3,4,6] + elif not visualize: + root_dir = params['validate_dir'] + towns = [7,10] # [6,7,8,9,10] + else: + root_dir = params['test_dir'] + towns = [5] + self.test_flag = True + self.root_dir = root_dir + self.clock = 0 + + print("Dataset dir:", root_dir) + + if 'train_params' not in params or \ + 'max_cav' not in params['train_params']: + self.max_cav = 5 + else: + self.max_cav = params['train_params']['max_cav'] + + self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False + self.load_camera_file = True if 'camera' in params['input_source'] else False + self.load_depth_file = True if 'depth' in params['input_source'] else False + + self.label_type = params['label_type'] # 'lidar' or 'camera' + self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \ + else self.generate_object_center_camera + self.generate_object_center_single = self.generate_object_center # will it follows 'self.generate_object_center' when 'self.generate_object_center' change? + + if self.load_camera_file: + self.data_aug_conf = params["fusion"]["args"]["data_aug_conf"] + + # by default, we load lidar, camera and metadata. But users may + # define additional inputs/tasks + self.add_data_extension = \ + params['add_data_extension'] if 'add_data_extension' \ + in params else [] + + if "noise_setting" not in self.params: + self.params['noise_setting'] = OrderedDict() + self.params['noise_setting']['add_noise'] = False + + if root_dir is None: + print('Not loading from an existing dataset!') + return + if not os.path.exists(root_dir): + print('Dataset path do not exists!') + return + + # first load all paths of different scenarios + scenario_folders = sorted([os.path.join(root_dir, x) + for x in os.listdir(root_dir) if + os.path.isdir(os.path.join(root_dir, x))]) + self.scenario_folders = scenario_folders + + ################################# + ## v2xverse data load + ################################# + + self.rsu_change_frame = 25 + self.route_frames = [] + + data_index_name = 'dataset_index.txt' + if 'index_file' in self.params: + data_index_name = self.params['index_file'] + '.txt' + print('data_index_name:', data_index_name) + dataset_indexs = self._load_text(data_index_name).split('\n') + + filter_file = None + if 'filte_danger' in self.params: + if os.path.exists(os.path.join(self.root_dir,self.params['filte_danger'])): + filter_file = self._load_json(self.params['filte_danger']) + + weathers = [0,1,2,3,4,5,6,7,8,9,10] + pattern = re.compile('weather-(\d+).*town(\d\d)') + for line in dataset_indexs: + if len(line.split()) != 3: + continue + path, frames, egos = line.split() + route_path = os.path.join(self.root_dir, path) + frames = int(frames) + res = pattern.findall(path) + if len(res) != 1: + continue + weather = int(res[0][0]) + town = int(res[0][1]) + if weather not in weathers or town not in towns: + continue + + files = os.listdir(route_path) + ego_files = [file for file in files if file.startswith('ego')] + rsu_files = [file for file in files if file.startswith('rsu')] + + # recompute rsu change frames + file_len_list = [] + if len(rsu_files) > 0: + for rsu_file in ['rsu_1000', 'rsu_1001']: + if rsu_file in rsu_files: + rsu_frame_len = len(os.listdir(os.path.join(route_path,rsu_file,'measurements'))) + file_len_list.append(rsu_frame_len) + self.rsu_change_frame = max(file_len_list) + 1 + + for j, file in enumerate(ego_files): + ego_path = os.path.join(path, file) + others_list = ego_files[:j]+ego_files[j+1:] + others_path_list = [] + for others in others_list: + others_path_list.append(os.path.join(path, others)) + + for i in range(frames): + # reduce the ratio of frames not at junction + if filter_file is not None: + danger_frame_flag = False + for route_id in filter_file: + if route_path.endswith(filter_file[route_id]['sub_path']): + for junction_range in filter_file[route_id]['selected_frames'][file]: + if i > junction_range[0] and i < junction_range[1]+15: + danger_frame_flag = True + if (not danger_frame_flag): + continue + scene_dict = {} + scene_dict['ego'] = ego_path + scene_dict['other_egos'] = others_path_list + scene_dict['num_car'] = len(ego_files) + scene_dict['rsu'] = [] + # order of rsu + if i%self.rsu_change_frame != 0 and len(rsu_files)>0: + order = int(i/self.rsu_change_frame)+1 # int(i/10)+1 + rsu_path = 'rsu_{}00{}'.format(order, ego_path[-1]) + if True: # os.path.exists(os.path.join(route_path, rsu_path,'measurements','{}.json'.format(str(i).zfill(4)))): + scene_dict['rsu'].append(os.path.join(path, rsu_path)) + + self.route_frames.append((scene_dict, i)) # (scene_dict, i) + self.label_mode = self.params.get('label_mode', 'v2xverse') + self.first_det = False + print("Sub route dir nums: %d" % len(self.route_frames)) + + def _load_text(self, path): + text = open(os.path.join(self.root_dir,path), 'r').read() + return text + + def _load_image(self, path): + trans_totensor = torchvision.transforms.ToTensor() + trans_toPIL = torchvision.transforms.ToPILImage() + try: + img = Image.open(os.path.join(self.root_dir,path)) + img_tensor = trans_totensor(img) + img_PIL = trans_toPIL(img_tensor) + except Exception as e: + _logger.info(path) + n = path[-8:-4] + new_path = path[:-8] + "%04d.jpg" % (int(n) - 1) + img = Image.open(os.path.join(self.root_dir,new_path)) + img_tensor = trans_totensor(img) + img_PIL = trans_toPIL(img_tensor) + return img_PIL + + def _load_json(self, path): + try: + json_value = json.load(open(os.path.join(self.root_dir,path))) + except Exception as e: + _logger.info(path) + n = path[-9:-5] + new_path = path[:-9] + "%04d.json" % (int(n) - 1) + json_value = json.load(open(os.path.join(self.root_dir,new_path))) + return json_value + + def _load_npy(self, path): + try: + array = np.load(os.path.join(self.root_dir,path), allow_pickle=True) + except Exception as e: + _logger.info(path) + n = path[-8:-4] + new_path = path[:-8] + "%04d.npy" % (int(n) - 1) + array = np.load(os.path.join(self.root_dir,new_path), allow_pickle=True) + return array + + def get_one_record(self, route_dir, frame_id, agent='ego', visible_actors=None, tpe='all', extra_source=None): + ''' + Parameters + ---------- + scene_dict: str, index given by dataloader. + frame_id: int, frame id. + + Returns + ------- + data: + structure: dict{ + #################### + # input to the model + #################### + 'agent': 'ego' or 'other_ego', # whether it is the ego car + 'rgb_[direction]': torch.Tenser, # direction in [left, right, center], shape (3, 128, 128) + 'rgb': torch.Tensor, front rgb image , # shape (3, 224, 224) + 'measurements': torch.Tensor, size [7]: the first 6 dims is the onehot vector of command, and the last dim is car speed + 'command': int, 0-5, discrete command signal 0:left, 1:right, 2:straight, + # 3: lane follow, 4:lane change left, 5: lane change right + 'pose': np.array, shape(3,), lidar pose[gps_x, gps_y, theta] + 'detmap_pose': pose for density map + 'target_point': torch.Tensor, size[2], (x,y) coordinate in the left hand coordinate system, + where X-axis towards right side of the car + 'lidar': np.ndarray, # shape (3, 224, 224), 2D projection of lidar, range x:[-28m, 28m], y:[-28m,28m] + in the right hand coordinate system with X-axis towards left of car + #################### + # target of model + #################### + 'img_traffic': not yet used in model + 'command_waypoints': torch.Tensor, size[10,2], 10 (x,y) coordinates in the same coordinate system with target point + 'is_junction': int, 0 or 1, 1 means the car is at junction + 'traffic_light_state': int, 0 or 1 + 'det_data': np.array, (400,7), flattened density map, 7 feature dims corresponds to + [prob_obj, box bias_X, box bias_Y, box_orientation, l, w, speed] + 'img_traj': not yet used in model + 'stop_sign': int, 0 or 1, exist of stop sign + }, + ''' + + output_record = OrderedDict() + + if agent == 'ego': + output_record['ego'] = True + else: + output_record['ego'] = False + + BEV = None + + if route_dir is not None: + measurements = self._load_json(os.path.join(route_dir, "measurements", "%04d.json" % frame_id)) + actors_data = self._load_json(os.path.join(route_dir, "actors_data", "%04d.json" % frame_id)) + elif extra_source is not None: + if 'actors_data' in extra_source: + actors_data = extra_source['actors_data'] + else: + actors_data = {} + measurements = extra_source['measurements'] + + ego_loc = np.array([measurements['x'], measurements['y']]) + output_record['params'] = {} + + cam_list = ['front','right','left','rear'] + cam_angle_list = [0, 60, -60, 180] + for cam_id in range(4): + output_record['params']['camera{}'.format(cam_id)] = {} + output_record['params']['camera{}'.format(cam_id)]['cords'] = [measurements['x'], measurements['y'], 1.0,\ + 0,measurements['theta']/np.pi*180+cam_angle_list[cam_id],0] + output_record['params']['camera{}'.format(cam_id)]['extrinsic'] = measurements['camera_{}_extrinsics'.format(cam_list[cam_id])] + output_record['params']['camera{}'.format(cam_id)]['intrinsic'] = measurements['camera_{}_intrinsics'.format(cam_list[cam_id])] + + if 'speed' in measurements: + output_record['params']['ego_speed'] = measurements['speed']*3.6 + else: + output_record['params']['ego_speed'] = 0 + + output_record['params']['lidar_pose'] = \ + [measurements['lidar_pose_x'], measurements['lidar_pose_y'], 0, \ + 0,measurements['theta']/np.pi*180-90,0] + self.distance_to_map_center = (self.det_range[3]-self.det_range[0])/2+self.det_range[0] + output_record['params']['map_pose'] = \ + [measurements['lidar_pose_x'] + self.distance_to_map_center*np.cos(measurements["theta"]-np.pi/2), + measurements['lidar_pose_y'] + self.distance_to_map_center*np.sin(measurements["theta"]-np.pi/2), 0, \ + 0,measurements['theta']/np.pi*180-90,0] + detmap_pose_x = measurements['lidar_pose_x'] + self.distance_to_map_center*np.cos(measurements["theta"]-np.pi/2) + detmap_pose_y = measurements['lidar_pose_y'] + self.distance_to_map_center*np.sin(measurements["theta"]-np.pi/2) + detmap_theta = measurements["theta"] + np.pi/2 + output_record['detmap_pose'] = np.array([-detmap_pose_y, detmap_pose_x, detmap_theta]) + output_record['params']['lidar_pose_clean'] = output_record['params']['lidar_pose'] + output_record['params']['plan_trajectory'] = [] + output_record['params']['true_ego_pos'] = \ + [measurements['lidar_pose_x'], measurements['lidar_pose_y'], 0, \ + 0,measurements['theta']/np.pi*180,0] + output_record['params']['predicted_ego_pos'] = \ + [measurements['lidar_pose_x'], measurements['lidar_pose_y'], 0, \ + 0,measurements['theta']/np.pi*180,0] + + if tpe == 'all': + if route_dir is not None: + lidar = self._load_npy(os.path.join(route_dir, "lidar", "%04d.npy" % frame_id)) + output_record['rgb_front'] = self._load_image(os.path.join(route_dir, "rgb_front", "%04d.jpg" % frame_id)) + output_record['rgb_left'] = self._load_image(os.path.join(route_dir, "rgb_left", "%04d.jpg" % frame_id)) + output_record['rgb_right'] = self._load_image(os.path.join(route_dir, "rgb_right", "%04d.jpg" % frame_id)) + output_record['rgb_rear'] = self._load_image(os.path.join(route_dir, "rgb_rear", "%04d.jpg" % frame_id)) + if agent != 'rsu': + BEV = self._load_image(os.path.join(route_dir, "birdview", "%04d.jpg" % frame_id)) + elif extra_source is not None: + lidar = extra_source['lidar'] + if 'rgb_front' in extra_source: + output_record['rgb_front'] = extra_source['rgb_front'] + output_record['rgb_left'] = extra_source['rgb_left'] + output_record['rgb_right'] = extra_source['rgb_right'] + output_record['rgb_rear'] = extra_source['rgb_rear'] + else: + output_record['rgb_front'] = None + output_record['rgb_left'] = None + output_record['rgb_right'] = None + output_record['rgb_rear'] = None + BEV = None + + output_record['lidar_np'] = lidar + lidar_transformed = np.zeros((output_record['lidar_np'].shape)) + lidar_transformed[:,0] = output_record['lidar_np'][:,1] + lidar_transformed[:,1] = -output_record['lidar_np'][:,0] + lidar_transformed[:,2:] = output_record['lidar_np'][:,2:] + output_record['lidar_np'] = lidar_transformed.astype(np.float32) + output_record['lidar_np'][:, 2] += measurements['lidar_pose_z'] + + if visible_actors is not None: + actors_data = self.filter_actors_data_according_to_visible(actors_data, visible_actors) + + ################ LSS debug TODO: clean up this function ##################### + if not self.first_det: + import copy + if True: # agent=='rsu': + measurements["affected_light_id"] = -1 + measurements["is_vehicle_present"] = [] + measurements["is_bike_present"] = [] + measurements["is_junction_vehicle_present"] = [] + measurements["is_pedestrian_present"] = [] + measurements["future_waypoints"] = [] + cop3_range = [36,12,12,12, 0.25] + heatmap = generate_heatmap_multiclass( + copy.deepcopy(measurements), copy.deepcopy(actors_data), max_distance=36 + ) + self.det_data = ( + generate_det_data_multiclass( + heatmap, copy.deepcopy(measurements), copy.deepcopy(actors_data), cop3_range + ) + .reshape(3, int((cop3_range[0]+cop3_range[1])/cop3_range[4] + *(cop3_range[2]+cop3_range[3])/cop3_range[4]), -1) #(2, H*W,7) + .astype(np.float32) + ) + self.first_det = True + if self.label_mode == 'cop3': + self.first_det = False + output_record['det_data'] = self.det_data + ############################################################## + if agent == 'rsu' : + for actor_id in actors_data.keys(): + if actors_data[actor_id]['tpe'] == 0: + box = actors_data[actor_id]['box'] + if abs(box[0]-0.8214) < 0.01 and abs(box[1]-0.18625) < 0.01 : + actors_data[actor_id]['tpe'] = 3 + + output_record['params']['vehicles'] = {} + for actor_id in actors_data.keys(): + + ###################### + ## debug + ###################### + # if agent == 'ego': + # continue + + if tpe in [0, 1, 3]: + if actors_data[actor_id]['tpe'] != tpe: + continue + + # exclude ego car + loc_actor = np.array(actors_data[actor_id]['loc'][0:2]) + dis = np.linalg.norm(ego_loc - loc_actor) + if dis < 0.1: + continue + + if not ('box' in actors_data[actor_id].keys() and 'ori' in actors_data[actor_id].keys() and 'loc' in actors_data[actor_id].keys()): + continue + output_record['params']['vehicles'][actor_id] = {} + output_record['params']['vehicles'][actor_id]['tpe'] = actors_data[actor_id]['tpe'] + yaw = math.degrees(math.atan(actors_data[actor_id]['ori'][1]/actors_data[actor_id]['ori'][0])) + pitch = math.degrees(math.asin(actors_data[actor_id]['ori'][2])) + output_record['params']['vehicles'][actor_id]['angle'] = [0,yaw,pitch] + output_record['params']['vehicles'][actor_id]['center'] = [0,0,actors_data[actor_id]['box'][2]] + output_record['params']['vehicles'][actor_id]['extent'] = actors_data[actor_id]['box'] + output_record['params']['vehicles'][actor_id]['location'] = [actors_data[actor_id]['loc'][0],actors_data[actor_id]['loc'][1],0] + output_record['params']['vehicles'][actor_id]['speed'] = 3.6 * math.sqrt(actors_data[actor_id]['vel'][0]**2+actors_data[actor_id]['vel'][1]**2 ) + + direction_list = ['front','left','right','rear'] + theta_list = [0,-60,60,180] + dis_list = [0,0,0,-2.6] + camera_data_list = [] + for i, direction in enumerate(direction_list): + if 'rgb_{}'.format(direction) in output_record: + camera_data_list.append(output_record['rgb_{}'.format(direction)]) + dis_to_lidar = dis_list[i] + output_record['params']['camera{}'.format(i)]['cords'] = \ + [measurements['x'] + dis_to_lidar*np.sin(measurements['theta']), measurements['y'] - dis_to_lidar*np.cos(measurements['theta']), 2.3,\ + 0,measurements['theta']/np.pi*180 - 90 + theta_list[i],0] + output_record['params']['camera{}'.format(i)]['extrinsic'] = measurements['camera_{}_extrinsics'.format(direction_list[i])] + output_record['params']['camera{}'.format(i)]['intrinsic'] = measurements['camera_{}_intrinsics'.format(direction_list[i])] + output_record['camera_data'] = camera_data_list + bev_visibility_np = 255*np.ones((256,256,3), dtype=np.uint8) + output_record['bev_visibility.png'] = bev_visibility_np + + if agent != 'rsu': + output_record['BEV'] = BEV + else: + output_record['BEV'] = None + return output_record + + def filter_actors_data_according_to_visible(self, actors_data, visible_actors): + to_del_id = [] + for actors_id in actors_data.keys(): + if actors_id in visible_actors: + continue + to_del_id.append(actors_id) + for actors_id in to_del_id: + del actors_data[actors_id] + return actors_data + + def get_visible_actors_one_term(self, route_dir, frame_id): + cur_visible_actors = [] + actors_data = self._load_json(os.path.join(route_dir, "actors_data", "%04d.json" % frame_id)) + + for actors_id in actors_data: + if actors_data[actors_id]['tpe']==2: + continue + if not 'lidar_visible' in actors_data[actors_id]: + cur_visible_actors.append(actors_id) + print('Lose of lidar_visible!') + continue + if actors_data[actors_id]['lidar_visible']==1: + cur_visible_actors.append(actors_id) + return cur_visible_actors + + def get_visible_actors(self, scene_dict, frame_id): + visible_actors = {} # id only + if self.test_flag: + visible_actors['car_0'] = None + for i, route_dir in enumerate(scene_dict['other_egos']): + visible_actors['car_{}'.format(i+1)] = None + for i, rsu_dir in enumerate(scene_dict['rsu']): + visible_actors['rsu_{}'.format(i)] = None + else: + visible_actors['car_0'] = self.get_visible_actors_one_term(scene_dict['ego'], frame_id) + if self.params['train_params']['max_cav'] > 1: + for i, route_dir in enumerate(scene_dict['other_egos']): + visible_actors['car_{}'.format(i+1)] = self.get_visible_actors_one_term(route_dir, frame_id) + for i, rsu_dir in enumerate(scene_dict['rsu']): + visible_actors['rsu_{}'.format(i)] = self.get_visible_actors_one_term(rsu_dir, frame_id) + for keys in visible_actors: + visible_actors[keys] = list(set(visible_actors[keys])) + return visible_actors + + def retrieve_base_data(self, idx, tpe='all', extra_source=None, data_dir=None): + if extra_source is None: + if data_dir is not None: + scene_dict, frame_id = data_dir + else: + scene_dict, frame_id = self.route_frames[idx] + frame_id_latency = frame_id - self.frame_delay + visible_actors = None + visible_actors = self.get_visible_actors(scene_dict, frame_id) + data = OrderedDict() + data['car_0'] = self.get_one_record(scene_dict['ego'], frame_id , agent='ego', visible_actors=visible_actors['car_0'], tpe=tpe) + if self.params['train_params']['max_cav'] > 1: + for i, route_dir in enumerate(scene_dict['other_egos']): + try: + data['car_{}'.format(i+1)] = self.get_one_record(route_dir, frame_id_latency , agent='other_ego', visible_actors=visible_actors['car_{}'.format(i+1)], tpe=tpe) + except: + print('load other ego failed') + continue + if self.params['train_params']['max_cav'] > 2: + for i, rsu_dir in enumerate(scene_dict['rsu']): + try: + data['rsu_{}'.format(i)] = self.get_one_record(rsu_dir, frame_id_latency, agent='rsu', visible_actors=visible_actors['rsu_{}'.format(i)], tpe=tpe) + except: + print('load rsu failed') + continue + else: + data = OrderedDict() + scene_dict = None + frame_id = None + data['car_0'] = self.get_one_record(route_dir=None, frame_id=None , agent='ego', visible_actors=None, tpe=tpe, extra_source=extra_source['car_data'][0]) + if self.params['train_params']['max_cav'] > 1: + if len(extra_source['car_data']) > 1: + for i in range(len(extra_source['car_data'])-1): + data['car_{}'.format(i+1)] = self.get_one_record(route_dir=None, frame_id=None , agent='other_ego', visible_actors=None, tpe=tpe, extra_source=extra_source['car_data'][i+1]) + for i in range(len(extra_source['rsu_data'])): + data['rsu_{}'.format(i)] = self.get_one_record(route_dir=None, frame_id=None , agent='rsu', visible_actors=None, tpe=tpe, extra_source=extra_source['rsu_data'][i]) + data['car_0']['scene_dict'] = scene_dict + data['car_0']['frame_id'] = frame_id + return data + + + def __len__(self): + return len(self.route_frames) + + def __getitem__(self, idx): + """ + Abstract method, needs to be define by the children class. + """ + pass + + @staticmethod + def extract_timestamps(yaml_files): + """ + Given the list of the yaml files, extract the mocked timestamps. + + Parameters + ---------- + yaml_files : list + The full path of all yaml files of ego vehicle + + Returns + ------- + timestamps : list + The list containing timestamps only. + """ + timestamps = [] + + for file in yaml_files: + res = file.split('/')[-1] + + timestamp = res.replace('.yaml', '') + timestamps.append(timestamp) + + return timestamps + + @staticmethod + def return_timestamp_key(scenario_database, timestamp_index): + """ + Given the timestamp index, return the correct timestamp key, e.g. + 2 --> '000078'. + + Parameters + ---------- + scenario_database : OrderedDict + The dictionary contains all contents in the current scenario. + + timestamp_index : int + The index for timestamp. + + Returns + ------- + timestamp_key : str + The timestamp key saved in the cav dictionary. + """ + # get all timestamp keys + timestamp_keys = list(scenario_database.items())[0][1] + # retrieve the correct index + timestamp_key = list(timestamp_keys.items())[timestamp_index][0] + + return timestamp_key + + @staticmethod + def find_camera_files(cav_path, timestamp, sensor="camera"): + """ + Retrieve the paths to all camera files. + + Parameters + ---------- + cav_path : str + The full file path of current cav. + + timestamp : str + Current timestamp + + sensor : str + "camera" or "depth" + + Returns + ------- + camera_files : list + The list containing all camera png file paths. + """ + camera0_file = os.path.join(cav_path, + timestamp + f'_{sensor}0.png') + camera1_file = os.path.join(cav_path, + timestamp + f'_{sensor}1.png') + camera2_file = os.path.join(cav_path, + timestamp + f'_{sensor}2.png') + camera3_file = os.path.join(cav_path, + timestamp + f'_{sensor}3.png') + return [camera0_file, camera1_file, camera2_file, camera3_file] + + + def augment(self, lidar_np, object_bbx_center, object_bbx_mask): + """ + Given the raw point cloud, augment by flipping and rotation. + + Parameters + ---------- + lidar_np : np.ndarray + (n, 4) shape + + object_bbx_center : np.ndarray + (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw + + object_bbx_mask : np.ndarray + Indicate which elements in object_bbx_center are padded. + """ + tmp_dict = {'lidar_np': lidar_np, + 'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask} + tmp_dict = self.data_augmentor.forward(tmp_dict) + + lidar_np = tmp_dict['lidar_np'] + object_bbx_center = tmp_dict['object_bbx_center'] + object_bbx_mask = tmp_dict['object_bbx_mask'] + + return lidar_np, object_bbx_center, object_bbx_mask + + + def generate_object_center_lidar(self, + cav_contents, + reference_lidar_pose): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + The object_bbx_center is in ego coordinate. + + Notice: it is a wrap of postprocessor + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + in fact it is used in get_item_single_car, so the list length is 1 + + reference_lidar_pose : list + The final target lidar pose with length 6. + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + return self.post_processor.generate_object_center(cav_contents, + reference_lidar_pose) + + def generate_object_center_camera(self, + cav_contents, + reference_lidar_pose): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + The object_bbx_center is in ego coordinate. + + Notice: it is a wrap of postprocessor + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + in fact it is used in get_item_single_car, so the list length is 1 + + reference_lidar_pose : list + The final target lidar pose with length 6. + + visibility_map : np.ndarray + for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up. + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + return self.post_processor.generate_visible_object_center( + cav_contents, reference_lidar_pose + ) + + def get_ext_int(self, params, camera_id): + if self.params['extrinsic'] == 1: + return self.get_ext_int_1(params, camera_id) + elif self.params['extrinsic'] == 2: + return self.get_ext_int_2(params, camera_id) + def get_ext_int_1(self, params, camera_id): + camera_coords = np.array(params["camera%d" % camera_id]["cords"]).astype( + np.float32) + camera_to_lidar = x1_to_x2( + camera_coords, params["lidar_pose_clean"] + ).astype(np.float32) # T_LiDAR_camera + camera_to_lidar = camera_to_lidar @ np.array( + [[0, 0, 1, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]], + dtype=np.float32) # UE4 coord to opencv coord + camera_intrinsic = np.array(params["camera%d" % camera_id]["intrinsic"]).astype( + np.float32 + ) + return camera_to_lidar, camera_intrinsic + def get_ext_int_2(self, params, camera_id): + camera_extrinsic = np.array(params["camera%d" % camera_id]["extrinsic"]).astype( + np.float32) + camera_extrinsic = camera_extrinsic @ np.array( + [[0, 0, 1, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]], + dtype=np.float32) # UE4 coord to opencv coord + camera_intrinsic = np.array(params["camera%d" % camera_id]["intrinsic"]).astype( + np.float32 + ) + return camera_extrinsic, camera_intrinsic +VALUES = [255] +EXTENT = [0] +def generate_heatmap_multiclass(measurements, actors_data, max_distance=30, pixels_per_meter=8): + actors_data_multiclass = { + 0: {}, 1: {}, 2:{}, 3:{} + } + for _id in actors_data.keys(): + actors_data_multiclass[actors_data[_id]['tpe']][_id] = actors_data[_id] + heatmap_0 = generate_heatmap(measurements, actors_data_multiclass[0], max_distance, pixels_per_meter) + heatmap_1 = generate_heatmap(measurements, actors_data_multiclass[1], max_distance, pixels_per_meter) + # heatmap_2 = generate_heatmap(measurements, actors_data_multiclass[2], max_distance, pixels_per_meter) # traffic light, not used + heatmap_3 = generate_heatmap(measurements, actors_data_multiclass[3], max_distance, pixels_per_meter) + return {0: heatmap_0, 1: heatmap_1, 3: heatmap_3} + +def get_yaw_angle(forward_vector): + forward_vector = forward_vector / np.linalg.norm(forward_vector) + yaw = math.acos(forward_vector[0]) + if forward_vector[1] < 0: + yaw = 2 * np.pi - yaw + return yaw + +def generate_heatmap(measurements, actors_data, max_distance=30, pixels_per_meter=8): + img_size = max_distance * pixels_per_meter * 2 + img = np.zeros((img_size, img_size, 3), np.int) + ego_x = measurements["lidar_pose_x"] + ego_y = measurements["lidar_pose_y"] + ego_theta = measurements["theta"] + R = np.array( + [ + [np.cos(ego_theta), -np.sin(ego_theta)], + [np.sin(ego_theta), np.cos(ego_theta)], + ] + ) + ego_id = None + for _id in actors_data: + color = np.array([1, 1, 1]) + if actors_data[_id]["tpe"] == 2: + if int(_id) == int(measurements["affected_light_id"]): + if actors_data[_id]["sta"] == 0: + color = np.array([1, 1, 1]) + else: + color = np.array([0, 0, 0]) + yaw = get_yaw_angle(actors_data[_id]["ori"]) + TR = np.array([[np.cos(yaw), np.sin(yaw)], [-np.sin(yaw), np.cos(yaw)]]) + actors_data[_id]["loc"] = np.array( + actors_data[_id]["loc"][:2] + ) + TR.T.dot(np.array(actors_data[_id]["taigger_loc"])[:2]) + actors_data[_id]["ori"] = np.array(actors_data[_id]["ori"]) + actors_data[_id]["box"] = np.array(actors_data[_id]["trigger_box"]) * 2 + else: + continue + raw_loc = actors_data[_id]["loc"] + if (raw_loc[0] - ego_x) ** 2 + (raw_loc[1] - ego_y) ** 2 <= 2: + ego_id = _id + color = np.array([0, 1, 1]) + new_loc = R.T.dot(np.array([raw_loc[0] - ego_x, raw_loc[1] - ego_y])) + actors_data[_id]["loc"] = np.array(new_loc) + raw_ori = actors_data[_id]["ori"] + new_ori = R.T.dot(np.array([raw_ori[0], raw_ori[1]])) + actors_data[_id]["ori"] = np.array(new_ori) + actors_data[_id]["box"] = np.array(actors_data[_id]["box"]) + if int(_id) in measurements["is_vehicle_present"]: + color = np.array([1, 1, 1]) + elif int(_id) in measurements["is_bike_present"]: + color = np.array([1, 1, 1]) + elif int(_id) in measurements["is_junction_vehicle_present"]: + color = np.array([1, 1, 1]) + elif int(_id) in measurements["is_pedestrian_present"]: + color = np.array([1, 1, 1]) + actors_data[_id]["color"] = color + + if ego_id is not None and ego_id in actors_data: + del actors_data[ego_id] # Do not show ego car + for _id in actors_data: + if actors_data[_id]["tpe"] == 2: + continue # FIXME donot add traffix light + if int(_id) != int(measurements["affected_light_id"]): + continue + if actors_data[_id]["sta"] != 0: + continue + act_img = np.zeros((img_size, img_size, 3), np.uint8) + loc = actors_data[_id]["loc"][:2] + ori = actors_data[_id]["ori"][:2] + box = actors_data[_id]["box"] + if box[0] < 1.5: + box = box * 1.5 # FIXME enlarge the size of pedstrian and bike + color = actors_data[_id]["color"] + for i in range(len(VALUES)): + act_img = add_rect( + act_img, + loc, + ori, + box + EXTENT[i], + VALUES[i], + pixels_per_meter, + max_distance, + color, + ) + act_img = np.clip(act_img, 0, 255) + img = img + act_img + img = np.clip(img, 0, 255) + img = img.astype(np.uint8) + img = img[:, :, 0] + return img + +def add_rect(img, loc, ori, box, value, pixels_per_meter, max_distance, color): + img_size = max_distance * pixels_per_meter * 2 + vet_ori = np.array([-ori[1], ori[0]]) + hor_offset = box[0] * ori + vet_offset = box[1] * vet_ori + left_up = (loc + hor_offset + vet_offset + max_distance) * pixels_per_meter + left_down = (loc + hor_offset - vet_offset + max_distance) * pixels_per_meter + right_up = (loc - hor_offset + vet_offset + max_distance) * pixels_per_meter + right_down = (loc - hor_offset - vet_offset + max_distance) * pixels_per_meter + left_up = np.around(left_up).astype(np.int) + left_down = np.around(left_down).astype(np.int) + right_down = np.around(right_down).astype(np.int) + right_up = np.around(right_up).astype(np.int) + left_up = list(left_up) + left_down = list(left_down) + right_up = list(right_up) + right_down = list(right_down) + color = [int(x) for x in value * color] + cv2.fillConvexPoly(img, np.array([left_up, left_down, right_down, right_up]), color) + return img + +def generate_det_data_multiclass( + heatmap, measurements, actors_data, det_range=[30,10,10,10, 0.8] +): + actors_data_multiclass = { + 0: {}, 1: {}, 2: {}, 3:{} + } + for _id in actors_data.keys(): + actors_data_multiclass[actors_data[_id]['tpe']][_id] = actors_data[_id] + det_data = [] + for _class in range(4): + if _class != 2: + det_data.append(generate_det_data(heatmap[_class], measurements, actors_data_multiclass[_class], det_range)) + + return np.array(det_data) + +from skimage.measure import block_reduce + +def generate_det_data( + heatmap, measurements, actors_data, det_range=[30,10,10,10, 0.8] +): + res = det_range[4] + max_distance = max(det_range) + traffic_heatmap = block_reduce(heatmap, block_size=(int(8*res), int(8*res)), func=np.mean) + traffic_heatmap = np.clip(traffic_heatmap, 0.0, 255.0) + traffic_heatmap = traffic_heatmap[:int((det_range[0]+det_range[1])/res), int((max_distance-det_range[2])/res):int((max_distance+det_range[3])/res)] + det_data = np.zeros((int((det_range[0]+det_range[1])/res), int((det_range[2]+det_range[3])/res), 7)) # (50,25,7) + vertical, horizontal = det_data.shape[:2] + + ego_x = measurements["lidar_pose_x"] + ego_y = measurements["lidar_pose_y"] + ego_theta = measurements["theta"] + R = np.array( + [ + [np.cos(ego_theta), -np.sin(ego_theta)], + [np.sin(ego_theta), np.cos(ego_theta)], + ] + ) + need_deleted_ids = [] + for _id in actors_data: + raw_loc = actors_data[_id]["loc"] + new_loc = R.T.dot(np.array([raw_loc[0] - ego_x, raw_loc[1] - ego_y])) + new_loc[1] = -new_loc[1] + actors_data[_id]["loc"] = np.array(new_loc) + raw_ori = actors_data[_id]["ori"] + new_ori = R.T.dot(np.array([raw_ori[0], raw_ori[1]])) + dis = new_loc[0] ** 2 + new_loc[1] ** 2 + if ( + dis <= 2 + or dis >= (max_distance) ** 2 * 2 + or "box" not in actors_data[_id] + or actors_data[_id]['tpe'] == 2 + ): + need_deleted_ids.append(_id) + continue + actors_data[_id]["ori"] = np.array(new_ori) + actors_data[_id]["box"] = np.array(actors_data[_id]["box"]) + + for _id in need_deleted_ids: + del actors_data[_id] + + for i in range(vertical): # 50 + for j in range(horizontal): # 25 + if traffic_heatmap[i][j] < 0.05 * 255.0: + continue + center_x, center_y = convert_grid_to_xy(i, j, det_range) + min_dis = 1000 + min_id = None + for _id in actors_data: + loc = actors_data[_id]["loc"][:2] + ori = actors_data[_id]["ori"][:2] + box = actors_data[_id]["box"] + dis = (loc[0] - center_x) ** 2 + (loc[1] - center_y) ** 2 + if dis < min_dis: + min_dis = dis + min_id = _id + + if min_id is None: + continue + + loc = actors_data[min_id]["loc"][:2] + ori = actors_data[min_id]["ori"][:2] + box = actors_data[min_id]["box"] + theta = (get_yaw_angle(ori) / np.pi + 2) % 2 + speed = np.linalg.norm(actors_data[min_id]["vel"]) + + # prob = np.power(0.5 / max(0.5, np.sqrt(min_dis)), 0.5) + + det_data[i][j] = np.array( + [ + 0, + (loc[0] - center_x) * 3.0, + (loc[1] - center_y) * 3.0, + theta / 2.0, + box[0] / 7.0, + box[1] / 4.0, + 0, + ] + ) + + heatmap = np.zeros((int((det_range[0]+det_range[1])/res), int((det_range[2]+det_range[3])/res))) # (50,25) + for _id in actors_data: + loc = actors_data[_id]["loc"][:2] + ori = actors_data[_id]["ori"][:2] + box = actors_data[_id]["box"] + try: + x,y = loc + i,j = convert_xy_to_grid(x,y,det_range) + i = int(np.around(i)) + j = int(np.around(j)) + + if i < vertical and i > 0 and j > 0 and j < horizontal: + det_data[i][j][-1] = 1.0 + + ################## Gaussian Heatmap ##################### + w, h = box[:2]/det_range[4] + heatmap = draw_heatmap(heatmap, h, w, j, i) + ######################################################### + + # theta = (get_yaw_angle(ori) / np.pi + 2) % 2 + # center_x, center_y = convert_grid_to_xy(i, j, det_range) + + # det_data[i][j] = np.array( + # [ + # 0, + # (loc[0] - center_x) * 3.0, + # (loc[1] - center_y) * 3.0, + # theta / 2.0, + # box[0] / 7.0, + # box[1] / 4.0, + # 0, + # ] + # ) + + except: + print('actor data error, skip!') + det_data[:,:,0] = heatmap + return det_data + +def convert_grid_to_xy(i, j, det_range): + x = det_range[4]*(j + 0.5) - det_range[2] + y = det_range[0] - det_range[4]*(i+0.5) + return x, y + +def convert_xy_to_grid(x, y, det_range): + j = (x + det_range[2]) / det_range[4] - 0.5 + i = (det_range[0] - y) / det_range[4] - 0.5 + return i, j + +def draw_heatmap(heatmap, h, w, x, y): + feature_map_size = heatmap.shape + radius = gaussian_radius( + (h, w), + min_overlap=0.1) + radius = max(2, int(radius)) + + # throw out not in range objects to avoid out of array + # area when creating the heatmap + if not (0 <= y < feature_map_size[0] + and 0 <= x < feature_map_size[1]): + return heatmap + + heatmap = draw_gaussian(heatmap, (x,y), radius) + return heatmap + +def draw_gaussian(heatmap, center, radius, k=1): + """Get gaussian masked heatmap. + + Args: + heatmap (torch.Tensor): Heatmap to be masked. + center (torch.Tensor): Center coord of the heatmap. + radius (int): Radius of gausian. + K (int): Multiple of masked_gaussian. Defaults to 1. + + Returns: + torch.Tensor: Masked heatmap. + """ + diameter = 2 * radius + 1 + gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6) + + x, y = int(center[0]), int(center[1]) + + height, width = heatmap.shape[0:2] + + left, right = min(x, radius), min(width - x, radius + 1) + top, bottom = min(y, radius), min(height - y, radius + 1) + + masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] + masked_gaussian = gaussian[radius - top:radius + bottom, + radius - left:radius + right] + + if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: + # torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap) + np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) + # masked_heatmap = np.max([masked_heatmap[None,], (masked_gaussian * k)[None,]], axis=0)[0] + # heatmap[y - top:y + bottom, x - left:x + right] = masked_heatmap + return heatmap + +def gaussian_2d(shape, sigma=1): + """Generate gaussian map. + + Args: + shape (list[int]): Shape of the map. + sigma (float): Sigma to generate gaussian map. + Defaults to 1. + + Returns: + np.ndarray: Generated gaussian map. + """ + m, n = [(ss - 1.) / 2. for ss in shape] + y, x = np.ogrid[-m:m + 1, -n:n + 1] + + h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) + h[h < np.finfo(h.dtype).eps * h.max()] = 0 + return h + +def gaussian_radius(det_size, min_overlap=0.5): + """Get radius of gaussian. + + Args: + det_size (tuple[torch.Tensor]): Size of the detection result. + min_overlap (float): Gaussian_overlap. Defaults to 0.5. + + Returns: + torch.Tensor: Computed radius. + """ + height, width = det_size + + a1 = 1 + b1 = (height + width) + c1 = width * height * (1 - min_overlap) / (1 + min_overlap) + sq1 = np.sqrt(b1**2 - 4 * a1 * c1) + r1 = (b1 + sq1) / (2 * a1) + + a2 = 4 + b2 = 2 * (height + width) + c2 = (1 - min_overlap) * width * height + sq2 = np.sqrt(b2**2 - 4 * a2 * c2) + r2 = (b2 + sq2) / (2 * a2) + + a3 = 4 * min_overlap + b3 = -2 * min_overlap * (height + width) + c3 = (min_overlap - 1) * width * height + sq3 = np.sqrt(b3**2 - 4 * a3 * c3) + r3 = (b3 + sq3) / (2 * a3) + return min(r1, r2, r3) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..ddac124dcebaa55b65940062d5f013580b5e234a --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_fusion_dataset.py @@ -0,0 +1,414 @@ +# early fusion dataset +import torch +import numpy as np +from opencood.utils.pcd_utils import downsample_lidar_minimum +import math +from collections import OrderedDict + +from opencood.utils import box_utils +from opencood.utils.common_utils import merge_features_to_dict +from opencood.data_utils.post_processor import build_postprocessor +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.hypes_yaml.yaml_utils import load_yaml +from opencood.utils.pcd_utils import \ + mask_points_by_range, mask_ego_points, shuffle_points, \ + downsample_lidar_minimum +from opencood.utils.transformation_utils import x1_to_x2 + + +def getEarlyFusionDataset(cls): + class EarlyFusionDataset(cls): + """ + This dataset is used for early fusion, where each CAV transmit the raw + point cloud to the ego vehicle. + """ + def __init__(self, params, visualize, train=True): + super(EarlyFusionDataset, self).__init__(params, visualize, train) + self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \ + else False + assert self.supervise_single is False + self.proj_first = False if 'proj_first' not in params['fusion']['args']\ + else params['fusion']['args']['proj_first'] + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + self.heterogeneous = False + if 'heter' in params: + self.heterogeneous = True + + def __getitem__(self, idx): + base_data_dict = self.retrieve_base_data(idx) + + processed_data_dict = OrderedDict() + processed_data_dict['ego'] = {} + + ego_id = -1 + ego_lidar_pose = [] + + # first find the ego vehicle's lidar pose + for cav_id, cav_content in base_data_dict.items(): + if cav_content['ego']: + ego_id = cav_id + ego_lidar_pose = cav_content['params']['lidar_pose'] + break + + assert ego_id != -1 + assert len(ego_lidar_pose) > 0 + + projected_lidar_stack = [] + object_stack = [] + object_id_stack = [] + + # loop over all CAVs to process information + for cav_id, selected_cav_base in base_data_dict.items(): + # check if the cav is within the communication range with ego + distance = \ + math.sqrt((selected_cav_base['params']['lidar_pose'][0] - + ego_lidar_pose[0]) ** 2 + ( + selected_cav_base['params'][ + 'lidar_pose'][1] - ego_lidar_pose[ + 1]) ** 2) + if distance > self.params['comm_range']: + continue + + selected_cav_processed = self.get_item_single_car( + selected_cav_base, + ego_lidar_pose) + # all these lidar and object coordinates are projected to ego + # already. + projected_lidar_stack.append( + selected_cav_processed['projected_lidar']) + object_stack.append(selected_cav_processed['object_bbx_center']) + object_id_stack += selected_cav_processed['object_ids'] + + # exclude all repetitive objects + unique_indices = \ + [object_id_stack.index(x) for x in set(object_id_stack)] + object_stack = np.vstack(object_stack) + object_stack = object_stack[unique_indices] + + # make sure bounding boxes across all frames have the same number + object_bbx_center = \ + np.zeros((self.params['postprocess']['max_num'], 7)) + mask = np.zeros(self.params['postprocess']['max_num']) + object_bbx_center[:object_stack.shape[0], :] = object_stack + mask[:object_stack.shape[0]] = 1 + + # convert list to numpy array, (N, 4) + projected_lidar_stack = np.vstack(projected_lidar_stack) + + # data augmentation + projected_lidar_stack, object_bbx_center, mask = \ + self.augment(projected_lidar_stack, object_bbx_center, mask) + + # we do lidar filtering in the stacked lidar + projected_lidar_stack = mask_points_by_range(projected_lidar_stack, + self.params['preprocess'][ + 'cav_lidar_range']) + # augmentation may remove some of the bbx out of range + object_bbx_center_valid = object_bbx_center[mask == 1] + object_bbx_center_valid, range_mask = \ + box_utils.mask_boxes_outside_range_numpy(object_bbx_center_valid, + self.params['preprocess'][ + 'cav_lidar_range'], + self.params['postprocess'][ + 'order'], + return_mask=True + ) + mask[object_bbx_center_valid.shape[0]:] = 0 + object_bbx_center[:object_bbx_center_valid.shape[0]] = \ + object_bbx_center_valid + object_bbx_center[object_bbx_center_valid.shape[0]:] = 0 + unique_indices = list(np.array(unique_indices)[range_mask]) + + # pre-process the lidar to voxel/bev/downsampled lidar + lidar_dict = self.pre_processor.preprocess(projected_lidar_stack) + + # generate the anchor boxes + anchor_box = self.post_processor.generate_anchor_box() + + # generate targets label + label_dict = \ + self.post_processor.generate_label( + gt_box_center=object_bbx_center, + anchors=anchor_box, + mask=mask) + + processed_data_dict['ego'].update( + {'object_bbx_center': object_bbx_center, + 'object_bbx_mask': mask, + 'object_ids': [object_id_stack[i] for i in unique_indices], + 'anchor_box': anchor_box, + 'processed_lidar': lidar_dict, + 'label_dict': label_dict}) + + if self.visualize: + processed_data_dict['ego'].update({'origin_lidar': + projected_lidar_stack}) + + return processed_data_dict + + def get_item_single_car(self, selected_cav_base, ego_pose): + """ + Project the lidar and bbx to ego space first, and then do clipping. + + Parameters + ---------- + selected_cav_base : dict + The dictionary contains a single CAV's raw information. + ego_pose : list + The ego vehicle lidar pose under world coordinate. + + Returns + ------- + selected_cav_processed : dict + The dictionary contains the cav's processed information. + """ + selected_cav_processed = {} + + # calculate the transformation matrix + transformation_matrix = \ + x1_to_x2(selected_cav_base['params']['lidar_pose'], + ego_pose) + + # retrieve objects under ego coordinates + object_bbx_center, object_bbx_mask, object_ids = \ + self.generate_object_center([selected_cav_base], + ego_pose) + + # filter lidar + lidar_np = selected_cav_base['lidar_np'] + lidar_np = shuffle_points(lidar_np) + # remove points that hit itself + lidar_np = mask_ego_points(lidar_np) + # project the lidar to ego space + lidar_np[:, :3] = \ + box_utils.project_points_by_matrix_torch(lidar_np[:, :3], + transformation_matrix) + + selected_cav_processed.update( + {'object_bbx_center': object_bbx_center[object_bbx_mask == 1], + 'object_ids': object_ids, + 'projected_lidar': lidar_np}) + + return selected_cav_processed + + def collate_batch_test(self, batch): + """ + Customized collate function for pytorch dataloader during testing + for late fusion dataset. + + Parameters + ---------- + batch : dict + + Returns + ------- + batch : dict + Reformatted batch. + """ + # currently, we only support batch size of 1 during testing + assert len(batch) <= 1, "Batch size 1 is required during testing!" + batch = batch[0] # only ego + + output_dict = {} + + for cav_id, cav_content in batch.items(): + output_dict.update({cav_id: {}}) + # shape: (1, max_num, 7) + object_bbx_center = \ + torch.from_numpy(np.array([cav_content['object_bbx_center']])) + object_bbx_mask = \ + torch.from_numpy(np.array([cav_content['object_bbx_mask']])) + object_ids = cav_content['object_ids'] + + # the anchor box is the same for all bounding boxes usually, thus + # we don't need the batch dimension. + if cav_content['anchor_box'] is not None: + output_dict[cav_id].update({'anchor_box': + torch.from_numpy(np.array( + cav_content[ + 'anchor_box']))}) + if self.visualize: + origin_lidar = [cav_content['origin_lidar']] + + # processed lidar dictionary + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch( + [cav_content['processed_lidar']]) + # label dictionary + label_torch_dict = \ + self.post_processor.collate_batch([cav_content['label_dict']]) + + # save the transformation matrix (4, 4) to ego vehicle + transformation_matrix_torch = \ + torch.from_numpy(np.identity(4)).float() + transformation_matrix_clean_torch = \ + torch.from_numpy(np.identity(4)).float() + + output_dict[cav_id].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'processed_lidar': processed_lidar_torch_dict, + 'label_dict': label_torch_dict, + 'object_ids': object_ids, + 'transformation_matrix': transformation_matrix_torch, + 'transformation_matrix_clean': transformation_matrix_clean_torch}) + + if self.visualize: + origin_lidar = \ + np.array( + downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict[cav_id].update({'origin_lidar': origin_lidar}) + + return output_dict + + def collate_batch_train(self, batch): + # Intermediate fusion is different the other two + output_dict = {'ego': {}} + + object_bbx_center = [] + object_bbx_mask = [] + object_ids = [] + processed_lidar_list = [] + image_inputs_list = [] + # used to record different scenario + label_dict_list = [] + origin_lidar = [] + + # heterogeneous + lidar_agent_list = [] + + # pairwise transformation matrix + pairwise_t_matrix_list = [] + + ### 2022.10.10 single gt #### + if self.supervise_single: + pos_equal_one_single = [] + neg_equal_one_single = [] + targets_single = [] + + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + object_bbx_center.append(ego_dict['object_bbx_center']) + object_bbx_mask.append(ego_dict['object_bbx_mask']) + object_ids.append(ego_dict['object_ids']) + if self.load_lidar_file: + processed_lidar_list.append(ego_dict['processed_lidar']) + if self.load_camera_file: + image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict. + + label_dict_list.append(ego_dict['label_dict']) + + if self.visualize: + origin_lidar.append(ego_dict['origin_lidar']) + + ### 2022.10.10 single gt #### + if self.supervise_single: + pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one']) + neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one']) + targets_single.append(ego_dict['single_label_dict_torch']['targets']) + + # heterogeneous + if self.heterogeneous: + lidar_agent_list.append(ego_dict['lidar_agent']) + + # convert to numpy, (B, max_num, 7) + object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) + object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) + + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_lidar_list) + + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + lidar_agent_idx = lidar_agent.nonzero()[0].tolist() + for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords' + merged_feature_dict[k] = [v[index] for index in lidar_agent_idx] + + if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0): + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(merged_feature_dict) + output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict}) + + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat') + + if self.heterogeneous: + camera_agent = 1 - lidar_agent + camera_agent_idx = camera_agent.nonzero()[0].tolist() + if sum(camera_agent) != 0: + for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ... + merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx]) + + if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0): + output_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + # (B, max_cav) + pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list)) + + # add pairwise_t_matrix to label dict + + # object id is only used during inference, where batch size is 1. + # so here we only get the first element. + output_dict['ego'].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'label_dict': label_torch_dict, + 'object_ids': object_ids[0]}) + + + if self.visualize: + origin_lidar = \ + np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + + if self.supervise_single: + output_dict['ego'].update({ + "label_dict_single" : + {"pos_equal_one": torch.cat(pos_equal_one_single, dim=0), + "neg_equal_one": torch.cat(neg_equal_one_single, dim=0), + "targets": torch.cat(targets_single, dim=0)} + }) + + if self.heterogeneous: + output_dict['ego'].update({ + "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...] + }) + + return output_dict + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict, output_dict) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + return pred_box_tensor, pred_score, gt_box_tensor + + return EarlyFusionDataset + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_multiclass_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_multiclass_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..e6043ef809e8e4c71cf6a8e2347d93ef33c58dd5 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_multiclass_fusion_dataset.py @@ -0,0 +1,899 @@ +# early fusion dataset +import random +import math +from collections import OrderedDict +import numpy as np +import torch +import copy +from icecream import ic +from PIL import Image +import pickle as pkl +from opencood.utils import box_utils as box_utils +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor +from opencood.utils.camera_utils import ( + sample_augmentation, + img_transform, + normalize_img, + img_to_tensor, +) +# from opencood.utils.heter_utils import AgentSelector +from opencood.utils.common_utils import merge_features_to_dict +from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation +from opencood.utils.pose_utils import add_noise_data_dict, add_noise_data_dict_asymmetric +from opencood.utils.pcd_utils import ( + mask_points_by_range, + mask_ego_points, + mask_ego_points_v2, + shuffle_points, + downsample_lidar_minimum, +) +from opencood.utils.common_utils import read_json + + +def getEarlymulticlassFusionDataset(cls): + """ + cls: the Basedataset. + """ + class EarlymulticlassFusionDataset(cls): + def __init__(self, params, visualize, train=True): + super().__init__(params, visualize, train) + # supervise single + self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \ + else False + self.proj_first = False if 'proj_first' not in params['fusion']['args']\ + else params['fusion']['args']['proj_first'] + + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + self.heterogeneous = False + if 'heter' in params: + self.heterogeneous = True + self.selector = AgentSelector(params['heter'], self.max_cav) + self.kd_flag = params.get('kd_flag', False) + self.box_align = False + if "box_align" in params: + self.box_align = True + self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result'] + self.stage1_result = read_json(self.stage1_result_path) + self.box_align_args = params['box_align']['args'] + self.multiclass = params['model']['args']['multi_class'] + self.online_eval_only = False + + def get_item_single_car(self, selected_cav_base, ego_cav_base, base_data_dict, tpe='all', cav_id='car_0', online_eval=False): + """ + Process a single CAV's information for the train/test pipeline. + + + Parameters + ---------- + selected_cav_base : dict + The dictionary contains a single CAV's raw information. + including 'params', 'camera_data' + ego_pose : list, length 6 + The ego vehicle lidar pose under world coordinate. + ego_pose_clean : list, length 6 + only used for gt box generation + + Returns + ------- + selected_cav_processed : dict + The dictionary contains the cav's processed information. + """ + selected_cav_processed = {} + ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean'] + selected_pose, selected_pose_clean = selected_cav_base['params']['lidar_pose'], selected_cav_base['params']['lidar_pose_clean'] + + # calculate the transformation matrix + transformation_matrix = \ + x1_to_x2(selected_cav_base['params']['lidar_pose'], + ego_pose) # T_ego_cav + transformation_matrix_clean = \ + x1_to_x2(selected_cav_base['params']['lidar_pose_clean'], + ego_pose_clean) + + # lidar + if tpe == 'all': + if self.load_lidar_file or self.visualize: + # process lidar + lidar_np = selected_cav_base['lidar_np'] + lidar_np = shuffle_points(lidar_np) + # remove points that hit itself + if not cav_id.startswith('rsu'): + lidar_np = mask_ego_points_v2(lidar_np) + # project the lidar to ego space + # x,y,z in ego space + + project_lidar_bank = [] + lidar_bank = [] + for agent_id in base_data_dict: + collab_cav_base = base_data_dict[agent_id] + collab_lidar_np = collab_cav_base['lidar_np'] + collab_lidar_np = shuffle_points(collab_lidar_np) + # remove points that hit itself + if not agent_id.startswith('rsu'): + collab_lidar_np = mask_ego_points_v2(collab_lidar_np) + # project the lidar to ego space + # x,y,z in ego space + + # calculate the transformation matrix + transformation_matrix_for_selected = \ + x1_to_x2(collab_cav_base['params']['lidar_pose'], + selected_pose) # T_ego_cav + + projected_collab_lidar = \ + box_utils.project_points_by_matrix_torch(collab_lidar_np[:, :3], + transformation_matrix_for_selected) + project_lidar_bank.append(projected_collab_lidar) + lidar_bank.append(collab_lidar_np) + + projected_lidar = np.concatenate(project_lidar_bank, axis=0) + lidar_np = np.concatenate(lidar_bank, axis=0) + + # if self.proj_first: + lidar_np[:, :3] = projected_lidar + if self.visualize: + # filter lidar + if not selected_cav_base['ego']: + projected_lidar *= 0 + selected_cav_processed.update({'projected_lidar': projected_lidar}) + + if self.kd_flag: + lidar_proj_np = copy.deepcopy(lidar_np) + lidar_proj_np[:,:3] = projected_lidar + + selected_cav_processed.update({'projected_lidar': lidar_proj_np}) + + processed_lidar = self.pre_processor.preprocess(lidar_np) + selected_cav_processed.update({'processed_features': processed_lidar}) + + if not online_eval: + # generate targets label single GT, note the reference pose is itself. + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center( + [selected_cav_base], selected_cav_base['params']['lidar_pose'] + ) + + label_dict = {} + if tpe == 'all': + # unused label + if False: + label_dict = self.post_processor.generate_label( + gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + selected_cav_processed.update({ + "single_label_dict": label_dict, + "single_object_bbx_center": object_bbx_center, + "single_object_bbx_mask": object_bbx_mask}) + + if tpe == 'all': + # camera + if self.load_camera_file: + camera_data_list = selected_cav_base["camera_data"] + + params = selected_cav_base["params"] + imgs = [] + rots = [] + trans = [] + intrins = [] + extrinsics = [] + post_rots = [] + post_trans = [] + + for idx, img in enumerate(camera_data_list): + camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx) + + intrin = torch.from_numpy(camera_intrinsic) + rot = torch.from_numpy( + camera_to_lidar[:3, :3] + ) # R_wc, we consider world-coord is the lidar-coord + tran = torch.from_numpy(camera_to_lidar[:3, 3]) # T_wc + + post_rot = torch.eye(2) + post_tran = torch.zeros(2) + + img_src = [img] + + # depth + if self.load_depth_file: + depth_img = selected_cav_base["depth_data"][idx] + img_src.append(depth_img) + else: + depth_img = None + + # data augmentation + resize, resize_dims, crop, flip, rotate = sample_augmentation( + self.data_aug_conf, self.train + ) + img_src, post_rot2, post_tran2 = img_transform( + img_src, + post_rot, + post_tran, + resize=resize, + resize_dims=resize_dims, + crop=crop, + flip=flip, + rotate=rotate, + ) + # for convenience, make augmentation matrices 3x3 + post_tran = torch.zeros(3) + post_rot = torch.eye(3) + post_tran[:2] = post_tran2 + post_rot[:2, :2] = post_rot2 + + # decouple RGB and Depth + + img_src[0] = normalize_img(img_src[0]) + if self.load_depth_file: + img_src[1] = img_to_tensor(img_src[1]) * 255 + + imgs.append(torch.cat(img_src, dim=0)) + intrins.append(intrin) + extrinsics.append(torch.from_numpy(camera_to_lidar)) + rots.append(rot) + trans.append(tran) + post_rots.append(post_rot) + post_trans.append(post_tran) + + selected_cav_processed.update( + { + "image_inputs": + { + "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W] + "intrins": torch.stack(intrins), + "extrinsics": torch.stack(extrinsics), + "rots": torch.stack(rots), + "trans": torch.stack(trans), + "post_rots": torch.stack(post_rots), + "post_trans": torch.stack(post_trans), + } + } + ) + + # anchor box + selected_cav_processed.update({"anchor_box": self.anchor_box}) + + + if not online_eval: + # note the reference pose ego + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base], + ego_pose_clean) + selected_cav_processed.update( + { + "object_bbx_center": object_bbx_center[object_bbx_mask == 1], + "object_bbx_mask": object_bbx_mask, + "object_ids": object_ids, + } + ) + + selected_cav_processed.update( + { + 'transformation_matrix': transformation_matrix, + 'transformation_matrix_clean': transformation_matrix_clean + } + ) + + return selected_cav_processed + + def __getitem__(self, idx, extra_source=None, data_dir=None): + + if data_dir is not None: + extra_source=1 + + object_bbx_center_list = [] + object_bbx_mask_list = [] + object_id_dict = {} + + object_bbx_center_list_single = [] + object_bbx_mask_list_single = [] + + + output_dict = {} + for tpe in ['all', 0, 1, 3]: + output_single_class = self.__getitem_single_class__(idx, tpe, extra_source, data_dir) + output_dict[tpe] = output_single_class + if tpe == 'all' and extra_source==None: + continue + elif tpe == 'all' and extra_source!=None: + break + object_bbx_center_list.append(output_single_class['ego']['object_bbx_center']) + object_bbx_mask_list.append(output_single_class['ego']['object_bbx_mask']) + if self.supervise_single: + object_bbx_center_list_single.append(output_single_class['ego']['single_object_bbx_center_torch']) + object_bbx_mask_list_single.append(output_single_class['ego']['single_object_bbx_mask_torch']) + + object_id_dict[tpe] = output_single_class['ego']['object_ids'] + + if self.multiclass and extra_source==None: + output_dict['all']['ego']['object_bbx_center'] = np.stack(object_bbx_center_list, axis=0) + output_dict['all']['ego']['object_bbx_mask'] = np.stack(object_bbx_mask_list, axis=0) + if self.supervise_single: + output_dict['all']['ego']['single_object_bbx_center_torch'] = torch.stack(object_bbx_center_list_single, axis=1) + output_dict['all']['ego']['single_object_bbx_mask_torch'] = torch.stack(object_bbx_mask_list_single, axis=1) + + output_dict['all']['ego']['object_ids'] = object_id_dict + # print('finish get item') + return output_dict['all'] + + def __getitem_single_class__(self, idx, tpe=None, extra_source=None, data_dir=None): + + if extra_source is None and data_dir is None: + base_data_dict = self.retrieve_base_data(idx, tpe) ## {id:{'ego':True/False, 'params': {'lidar_pose','speed','vehicles','ego_pos',...}, 'lidar_np': array (N,4)}} + elif data_dir is not None: + base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, data_dir=data_dir) + elif extra_source is not None: + base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, extra_source=extra_source) + + # base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting']) + base_data_dict = add_noise_data_dict_asymmetric(base_data_dict,self.params['noise_setting']) + processed_data_dict = OrderedDict() + processed_data_dict['ego'] = {} + + ego_id = -1 + ego_lidar_pose = [] + ego_cav_base = None + + # first find the ego vehicle's lidar pose + for cav_id, cav_content in base_data_dict.items(): + if cav_content['ego']: + ego_id = cav_id + ego_lidar_pose = cav_content['params']['lidar_pose'] + ego_cav_base = cav_content + break + + assert cav_id == list(base_data_dict.keys())[ + 0], "The first element in the OrderedDict must be ego" + assert ego_id != -1 + assert len(ego_lidar_pose) > 0 + + agents_image_inputs = [] + processed_features = [] + object_stack = [] + object_id_stack = [] + single_label_list = [] + single_object_bbx_center_list = [] + single_object_bbx_mask_list = [] + too_far = [] + lidar_pose_list = [] + lidar_pose_clean_list = [] + cav_id_list = [] + projected_lidar_clean_list = [] # disconet + + if self.visualize or self.kd_flag: + projected_lidar_stack = [] + + # loop over all CAVs to process information + for cav_id, selected_cav_base in base_data_dict.items(): + # check if the cav is within the communication range with ego + distance = \ + math.sqrt((selected_cav_base['params']['lidar_pose'][0] - + ego_lidar_pose[0]) ** 2 + ( + selected_cav_base['params'][ + 'lidar_pose'][1] - ego_lidar_pose[ + 1]) ** 2) + + # if distance is too far, we will just skip this agent + if distance > self.params['comm_range']: + too_far.append(cav_id) + continue + + lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean']) + lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose + cav_id_list.append(cav_id) + + for cav_id in too_far: + base_data_dict.pop(cav_id) + + if self.box_align and str(idx) in self.stage1_result.keys(): # False + from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np + stage1_content = self.stage1_result[str(idx)] + if stage1_content is not None: + all_agent_id_list = stage1_content['cav_id_list'] # include those out of range + all_agent_corners_list = stage1_content['pred_corner3d_np_list'] + all_agent_uncertainty_list = stage1_content['uncertainty_np_list'] + + cur_agent_id_list = cav_id_list + cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list] + cur_agnet_pose = np.array(cur_agent_pose) + cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list` + + pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64) + for cur_in_all_ind in cur_agent_in_all_agent] + uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64) + for cur_in_all_ind in cur_agent_in_all_agent] + + if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0: + refined_pose = box_alignment_relative_sample_np(pred_corners_list, + cur_agnet_pose, + uncertainty_list=uncertainty_list, + **self.box_align_args) + cur_agnet_pose[:,[0,1,4]] = refined_pose + + for i, cav_id in enumerate(cav_id_list): + lidar_pose_list[i] = cur_agnet_pose[i].tolist() + base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist() + + pairwise_t_matrix = \ + get_pairwise_transformation(base_data_dict, + self.max_cav, + self.proj_first) + + lidar_poses = np.array(lidar_pose_list).reshape(-1, 6) # [N_cav, 6] + lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6) # [N_cav, 6] + + # merge preprocessed features from different cavs into the same dict + cav_num = len(cav_id_list) + + # heterogeneous + if self.heterogeneous: + lidar_agent, camera_agent = self.selector.select_agent(idx) + lidar_agent = lidar_agent[:cav_num] + processed_data_dict['ego'].update({"lidar_agent": lidar_agent}) + + for _i, cav_id in enumerate(cav_id_list): + selected_cav_base = base_data_dict[cav_id] + + # dynamic object center generator! for heterogeneous input + if (not self.visualize) and self.heterogeneous and lidar_agent[_i]: + self.generate_object_center = self.generate_object_center_lidar + elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]): + self.generate_object_center = self.generate_object_center_camera + + selected_cav_processed = self.get_item_single_car( + selected_cav_base, + ego_cav_base, + base_data_dict, + tpe, + cav_id, + extra_source!=None) + + if extra_source==None: + object_stack.append(selected_cav_processed['object_bbx_center']) + object_id_stack += selected_cav_processed['object_ids'] + if tpe == 'all': + if self.load_lidar_file: + processed_features.append( + selected_cav_processed['processed_features']) + if self.load_camera_file: + agents_image_inputs.append( + selected_cav_processed['image_inputs']) + + if self.visualize or self.kd_flag: + projected_lidar_stack.append( + selected_cav_processed['projected_lidar']) + + if self.supervise_single and extra_source==None: + single_label_list.append(selected_cav_processed['single_label_dict']) + single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center']) + single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask']) + + # generate single view GT label + if self.supervise_single and extra_source==None: + single_label_dicts = {} + if tpe == 'all': + # unused label + if False: + single_label_dicts = self.post_processor.collate_batch(single_label_list) + single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list)) + single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list)) + processed_data_dict['ego'].update({ + "single_label_dict_torch": single_label_dicts, + "single_object_bbx_center_torch": single_object_bbx_center, + "single_object_bbx_mask_torch": single_object_bbx_mask, + }) + + if self.kd_flag: + stack_lidar_np = np.vstack(projected_lidar_stack) + stack_lidar_np = mask_points_by_range(stack_lidar_np, + self.params['preprocess'][ + 'cav_lidar_range']) + stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np) + processed_data_dict['ego'].update({'teacher_processed_lidar': + stack_feature_processed}) + + if extra_source is None: + # exclude all repetitive objects + unique_indices = \ + [object_id_stack.index(x) for x in set(object_id_stack)] + object_stack = np.vstack(object_stack) + object_stack = object_stack[unique_indices] + + # make sure bounding boxes across all frames have the same number + object_bbx_center = \ + np.zeros((self.params['postprocess']['max_num'], 7)) + mask = np.zeros(self.params['postprocess']['max_num']) + object_bbx_center[:object_stack.shape[0], :] = object_stack + mask[:object_stack.shape[0]] = 1 + + processed_data_dict['ego'].update( + {'object_bbx_center': object_bbx_center, # (100,7) + 'object_bbx_mask': mask, # (100,) + 'object_ids': [object_id_stack[i] for i in unique_indices], + } + ) + + + # generate targets label + label_dict = {} + if tpe == 'all': + # unused label + if False: + label_dict = \ + self.post_processor.generate_label( + gt_box_center=object_bbx_center, + anchors=self.anchor_box, + mask=mask) + + processed_data_dict['ego'].update( + { + 'anchor_box': self.anchor_box, + 'label_dict': label_dict, + 'cav_num': cav_num, + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_poses_clean': lidar_poses_clean, + 'lidar_poses': lidar_poses}) + + if tpe == 'all': + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_features) + processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict}) + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack') + processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + if self.visualize: + processed_data_dict['ego'].update({'origin_lidar': + # projected_lidar_stack}) + np.vstack( + projected_lidar_stack)}) + processed_data_dict['ego'].update({'lidar_len': [len(projected_lidar_stack[i]) for i in range(len(projected_lidar_stack))]}) + + + processed_data_dict['ego'].update({'sample_idx': idx, + 'cav_id_list': cav_id_list}) + + img_front_list = [] + img_left_list = [] + img_right_list = [] + BEV_list = [] + + if self.visualize: + for car_id in base_data_dict: + if not base_data_dict[car_id]['ego'] == True: + continue + if 'rgb_front' in base_data_dict[car_id] and 'rgb_left' in base_data_dict[car_id] and 'rgb_right' in base_data_dict[car_id] and 'BEV' in base_data_dict[car_id] : + img_front_list.append(base_data_dict[car_id]['rgb_front']) + img_left_list.append(base_data_dict[car_id]['rgb_left']) + img_right_list.append(base_data_dict[car_id]['rgb_right']) + BEV_list.append(base_data_dict[car_id]['BEV']) + processed_data_dict['ego'].update({'img_front': img_front_list, + 'img_left': img_left_list, + 'img_right': img_right_list, + 'BEV': BEV_list}) + processed_data_dict['ego'].update({'scene_dict': base_data_dict['car_0']['scene_dict'], + 'frame_id': base_data_dict['car_0']['frame_id'], + }) + + + return processed_data_dict + + + def collate_batch_train(self, batch, online_eval_only=False): + # Intermediate fusion is different the other two + output_dict = {'ego': {}} + + object_bbx_center = [] + object_bbx_mask = [] + object_ids = [] + processed_lidar_list = [] + image_inputs_list = [] + # used to record different scenario + record_len = [] + label_dict_list = [] + lidar_pose_list = [] + origin_lidar = [] + lidar_len = [] + lidar_pose_clean_list = [] + + # heterogeneous + lidar_agent_list = [] + + # pairwise transformation matrix + pairwise_t_matrix_list = [] + + # disconet + teacher_processed_lidar_list = [] + + # image + img_front = [] + img_left = [] + img_right = [] + BEV = [] + + dict_list = [] + + ### 2022.10.10 single gt #### + if self.supervise_single: + pos_equal_one_single = [] + neg_equal_one_single = [] + targets_single = [] + object_bbx_center_single = [] + object_bbx_mask_single = [] + + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + if not online_eval_only: + object_bbx_center.append(ego_dict['object_bbx_center']) + object_bbx_mask.append(ego_dict['object_bbx_mask']) + object_ids.append(ego_dict['object_ids']) + else: + object_ids.append(None) + lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6] + lidar_pose_clean_list.append(ego_dict['lidar_poses_clean']) + if self.load_lidar_file: + processed_lidar_list.append(ego_dict['processed_lidar']) + if self.load_camera_file: + image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict. + + record_len.append(ego_dict['cav_num']) + label_dict_list.append(ego_dict['label_dict']) + pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix']) + + dict_list.append([ego_dict['scene_dict'], ego_dict['frame_id']]) + + if self.visualize: + origin_lidar.append(ego_dict['origin_lidar']) + lidar_len.append(ego_dict['lidar_len']) + if len(ego_dict['img_front']) > 0 and len(ego_dict['img_right']) > 0 and len(ego_dict['img_left']) > 0 and len(ego_dict['BEV']) > 0: + img_front.append(ego_dict['img_front'][0]) + img_left.append(ego_dict['img_left'][0]) + img_right.append(ego_dict['img_right'][0]) + BEV.append(ego_dict['BEV'][0]) + + + if self.kd_flag: + teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar']) + + ### 2022.10.10 single gt #### + if self.supervise_single and not online_eval_only: + # unused label + if False: + pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one']) + neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one']) + targets_single.append(ego_dict['single_label_dict_torch']['targets']) + object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch']) + object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch']) + + # heterogeneous + if self.heterogeneous: + lidar_agent_list.append(ego_dict['lidar_agent']) + + # convert to numpy, (B, max_num, 7) + if not online_eval_only: + object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) + object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) + else: + object_bbx_center = None + object_bbx_mask = None + + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_lidar_list) + + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + lidar_agent_idx = lidar_agent.nonzero()[0].tolist() + for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords' + merged_feature_dict[k] = [v[index] for index in lidar_agent_idx] + + if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0): + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(merged_feature_dict) + output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict}) + + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat') + + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + camera_agent = 1 - lidar_agent + camera_agent_idx = camera_agent.nonzero()[0].tolist() + if sum(camera_agent) != 0: + for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ... + merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx]) + + if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0): + output_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + record_len = torch.from_numpy(np.array(record_len, dtype=int)) + lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0)) + lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0)) + + # unused label + label_torch_dict = {} + if False: + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + # (B, max_cav) + pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list)) + + # add pairwise_t_matrix to label dict + label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix + label_torch_dict['record_len'] = record_len + + + # object id is only used during inference, where batch size is 1. + # so here we only get the first element. + output_dict['ego'].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'record_len': record_len, + 'label_dict': label_torch_dict, + 'object_ids': object_ids[0], + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_pose_clean': lidar_pose_clean, + 'lidar_pose': lidar_pose, + 'anchor_box': self.anchor_box_torch}) + + + output_dict['ego'].update({'dict_list': dict_list}) + + if self.visualize: + origin_lidar = torch.from_numpy(np.array(origin_lidar)) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + lidar_len = np.array(lidar_len) + output_dict['ego'].update({'lidar_len': lidar_len}) + output_dict['ego'].update({'img_front': img_front}) + output_dict['ego'].update({'img_right': img_right}) + output_dict['ego'].update({'img_left': img_left}) + output_dict['ego'].update({'BEV': BEV}) + + if self.kd_flag: + teacher_processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(teacher_processed_lidar_list) + output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict}) + + + if self.supervise_single and not online_eval_only: + output_dict['ego'].update({ + "label_dict_single":{ + # "pos_equal_one": torch.cat(pos_equal_one_single, dim=0), + # "neg_equal_one": torch.cat(neg_equal_one_single, dim=0), + # "targets": torch.cat(targets_single, dim=0), + # for centerpoint + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }, + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }) + + if self.heterogeneous: + output_dict['ego'].update({ + "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...] + }) + + return output_dict + + def collate_batch_test(self, batch, online_eval_only=False): + + self.online_eval_only = online_eval_only + + assert len(batch) <= 1, "Batch size 1 is required during testing!" + output_dict = self.collate_batch_train(batch, online_eval_only) + if output_dict is None: + return None + + # check if anchor box in the batch + if batch[0]['ego']['anchor_box'] is not None: + output_dict['ego'].update({'anchor_box': + self.anchor_box_torch}) + + # save the transformation matrix (4, 4) to ego vehicle + # transformation is only used in post process (no use.) + # we all predict boxes in ego coord. + transformation_matrix_torch = \ + torch.from_numpy(np.identity(4)).float() + transformation_matrix_clean_torch = \ + torch.from_numpy(np.identity(4)).float() + + output_dict['ego'].update({'transformation_matrix': + transformation_matrix_torch, + 'transformation_matrix_clean': + transformation_matrix_clean_torch,}) + + output_dict['ego'].update({ + "sample_idx": batch[0]['ego']['sample_idx'], + "cav_id_list": batch[0]['ego']['cav_id_list'] + }) + + return output_dict + + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict, output_dict) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + return pred_box_tensor, pred_score, gt_box_tensor + + def post_process_multiclass(self, data_dict, output_dict, online_eval_only=False): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + + if online_eval_only == False: + online_eval_only = self.online_eval_only + + num_class = output_dict['ego']['cls_preds'].shape[1] + + + pred_box_tensor_list = [] + pred_score_list = [] + gt_box_tensor_list = [] + + num_list = [0,1,3] + + for i in range(num_class): + data_dict_single = copy.deepcopy(data_dict) + output_dict_single = copy.deepcopy(output_dict) + if not online_eval_only: + data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:] + data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:] + data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]] + + output_dict_single['ego']['cls_preds'] = output_dict['ego']['cls_preds'][:,i:i+1,:,:] + output_dict_single['ego']['reg_preds'] = output_dict['ego']['reg_preds_multiclass'][:,i,:,:] + + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict_single, output_dict_single) + + if not online_eval_only: + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict_single) + else: + gt_box_tensor = None + + pred_box_tensor_list.append(pred_box_tensor) + pred_score_list.append(pred_score) + gt_box_tensor_list.append(gt_box_tensor) + + return pred_box_tensor_list, pred_score_list, gt_box_tensor_list + + return EarlymulticlassFusionDataset + + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_2stage_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_2stage_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..5e65f96b42d512b63b5ffb8c2fd5d68f178f1edb --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_2stage_fusion_dataset.py @@ -0,0 +1,603 @@ +# intermediate fusion dataset +import random +import math +from collections import OrderedDict +import numpy as np +import torch +import copy +from icecream import ic +from PIL import Image +import pickle as pkl +from opencood.utils import box_utils as box_utils +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor +from opencood.utils.camera_utils import ( + sample_augmentation, + img_transform, + normalize_img, + img_to_tensor, +) +from opencood.utils.common_utils import merge_features_to_dict +from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation +from opencood.utils.pose_utils import add_noise_data_dict +from opencood.utils.pcd_utils import ( + mask_points_by_range, + mask_ego_points, + shuffle_points, + downsample_lidar_minimum, +) + +def getIntermediate2stageFusionDataset(cls): + """ + cls: the Basedataset. + """ + class Intermediate2stageFusionDataset(cls): + def __init__(self, params, visualize, train=True): + super().__init__(params, visualize, train) + # intermediate and supervise single + self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \ + else False + # it is assert to be False but by default it will load single label for 1-stage training. + assert self.supervise_single is False + + self.proj_first = False if 'proj_first' not in params['fusion']['args']\ + else params['fusion']['args']['proj_first'] + + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + self.heterogeneous = False + if 'heter' in params: + self.heterogeneous = True + + def get_item_single_car(self, selected_cav_base, ego_cav_base): + """ + Process a single CAV's information for the train/test pipeline. + + + Parameters + ---------- + selected_cav_base : dict + The dictionary contains a single CAV's raw information. + including 'params', 'camera_data' + ego_pose : list, length 6 + The ego vehicle lidar pose under world coordinate. + ego_pose_clean : list, length 6 + only used for gt box generation + + Returns + ------- + selected_cav_processed : dict + The dictionary contains the cav's processed information. + """ + selected_cav_processed = {} + ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean'] + + # calculate the transformation matrix + transformation_matrix = \ + x1_to_x2(selected_cav_base['params']['lidar_pose'], + ego_pose) # T_ego_cav + transformation_matrix_clean = \ + x1_to_x2(selected_cav_base['params']['lidar_pose_clean'], + ego_pose_clean) + + # lidar + if self.load_lidar_file or self.visualize: + # process lidar + lidar_np = selected_cav_base['lidar_np'] + lidar_np = shuffle_points(lidar_np) + # remove points that hit itself + lidar_np = mask_ego_points(lidar_np) + + # no projected lidar + no_project_lidar = copy.deepcopy(lidar_np) + + # project the lidar to ego space + # x,y,z in ego space + projected_lidar = \ + box_utils.project_points_by_matrix_torch(lidar_np[:, :3], + transformation_matrix) + if self.proj_first: # + lidar_np[:, :3] = projected_lidar + + if self.visualize: + # filter lidar + selected_cav_processed.update({'projected_lidar': projected_lidar}) + + processed_lidar = self.pre_processor.preprocess(lidar_np) + selected_cav_processed.update({'projected_lidar': projected_lidar, + 'no_projected_lidar': no_project_lidar, + 'processed_features': processed_lidar}) + + # generate targets label single GT, note the reference pose is itself. + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center( + [selected_cav_base], selected_cav_base['params']['lidar_pose'] + ) + label_dict = self.post_processor.generate_label( + gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + selected_cav_processed.update({"object_bbx_center_no_coop": object_bbx_center[object_bbx_mask==1], + "single_label_dict": label_dict}) + + # camera + if self.load_camera_file: + camera_data_list = selected_cav_base["camera_data"] + + params = selected_cav_base["params"] + imgs = [] + rots = [] + trans = [] + intrins = [] + post_rots = [] + post_trans = [] + + for idx, img in enumerate(camera_data_list): + camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx) + + intrin = torch.from_numpy(camera_intrinsic) + rot = torch.from_numpy( + camera_to_lidar[:3, :3] + ) # R_wc, we consider world-coord is the lidar-coord + tran = torch.from_numpy(camera_to_lidar[:3, 3]) # T_wc + + post_rot = torch.eye(2) + post_tran = torch.zeros(2) + + img_src = [img] + + # depth + if self.load_depth_file: + depth_img = selected_cav_base["depth_data"][idx] + img_src.append(depth_img) + else: + depth_img = None + + # data augmentation + resize, resize_dims, crop, flip, rotate = sample_augmentation( + self.data_aug_conf, self.train + ) + img_src, post_rot2, post_tran2 = img_transform( + img_src, + post_rot, + post_tran, + resize=resize, + resize_dims=resize_dims, + crop=crop, + flip=flip, + rotate=rotate, + ) + # for convenience, make augmentation matrices 3x3 + post_tran = torch.zeros(3) + post_rot = torch.eye(3) + post_tran[:2] = post_tran2 + post_rot[:2, :2] = post_rot2 + + # decouple RGB and Depth + + img_src[0] = normalize_img(img_src[0]) + if self.load_depth_file: + img_src[1] = img_to_tensor(img_src[1]) * 255 + + imgs.append(torch.cat(img_src, dim=0)) + intrins.append(intrin) + rots.append(rot) + trans.append(tran) + post_rots.append(post_rot) + post_trans.append(post_tran) + + selected_cav_processed.update( + { + "image_inputs": + { + "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W] + "intrins": torch.stack(intrins), + "rots": torch.stack(rots), + "trans": torch.stack(trans), + "post_rots": torch.stack(post_rots), + "post_trans": torch.stack(post_trans), + } + } + ) + + # anchor box + selected_cav_processed.update({"anchor_box": self.anchor_box}) + + # note the reference pose ego + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base], + ego_pose_clean) + + selected_cav_processed.update( + { + "object_bbx_center": object_bbx_center[object_bbx_mask == 1], + "object_bbx_mask": object_bbx_mask, + "object_ids": object_ids, + 'transformation_matrix': transformation_matrix, + 'transformation_matrix_clean': transformation_matrix_clean + } + ) + + + return selected_cav_processed + + def __getitem__(self, idx): + base_data_dict = self.retrieve_base_data(idx) + base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting']) + + processed_data_dict = OrderedDict() + processed_data_dict['ego'] = {} + + ego_id = -1 + ego_lidar_pose = [] + ego_cav_base = None + + # first find the ego vehicle's lidar pose + for cav_id, cav_content in base_data_dict.items(): + if cav_content['ego']: + ego_id = cav_id + ego_lidar_pose = cav_content['params']['lidar_pose'] + ego_cav_base = cav_content + break + + assert cav_id == list(base_data_dict.keys())[ + 0], "The first element in the OrderedDict must be ego" + assert ego_id != -1 + assert len(ego_lidar_pose) > 0 + + agents_image_inputs = [] + processed_features = [] + object_stack = [] + object_id_stack = [] + single_label_list = [] + too_far = [] + lidar_pose_list = [] + lidar_pose_clean_list = [] + cav_id_list = [] + + projected_lidar_stack = [] + no_projected_lidar_stack = [] + + vsa_lidar_stack = [] + + if self.visualize: + projected_lidar_stack = [] + + # loop over all CAVs to process information + for cav_id, selected_cav_base in base_data_dict.items(): + # check if the cav is within the communication range with ego + distance = \ + math.sqrt((selected_cav_base['params']['lidar_pose'][0] - + ego_lidar_pose[0]) ** 2 + ( + selected_cav_base['params'][ + 'lidar_pose'][1] - ego_lidar_pose[ + 1]) ** 2) + + # if distance is too far, we will just skip this agent + if distance > self.params['comm_range']: + too_far.append(cav_id) + continue + + lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean']) + lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose + cav_id_list.append(cav_id) + + for cav_id in too_far: + base_data_dict.pop(cav_id) + + + pairwise_t_matrix = \ + get_pairwise_transformation(base_data_dict, + self.max_cav, + self.proj_first) + + lidar_poses = np.array(lidar_pose_list).reshape(-1, 6) # [N_cav, 6] + lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6) # [N_cav, 6] + + # merge preprocessed features from different cavs into the same dict + cav_num = len(cav_id_list) + + # heterogeneous + if self.heterogeneous: + lidar_agent, camera_agent = self.selector.select_agent(idx) + lidar_agent = lidar_agent[:cav_num] + processed_data_dict['ego'].update({"lidar_agent": lidar_agent}) + + + for _i, cav_id in enumerate(cav_id_list): + selected_cav_base = base_data_dict[cav_id] + + # dynamic object center generator! for heterogeneous input. + if (not self.visualize) and self.heterogeneous and lidar_agent[_i]: + self.generate_object_center = self.generate_object_center_lidar + elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]): + self.generate_object_center = self.generate_object_center_camera + + selected_cav_processed = self.get_item_single_car( + selected_cav_base, + ego_cav_base) + + object_stack.append(selected_cav_processed['object_bbx_center']) + object_id_stack += selected_cav_processed['object_ids'] + + if self.load_lidar_file: + processed_features.append( + selected_cav_processed['processed_features']) + if self.proj_first: + vsa_lidar_stack.append(selected_cav_processed['projected_lidar']) + else: + vsa_lidar_stack.append(selected_cav_processed['no_projected_lidar']) + + if self.load_camera_file: + agents_image_inputs.append( + selected_cav_processed['image_inputs']) + + if self.visualize: + projected_lidar_stack.append( + selected_cav_processed['projected_lidar']) + + single_label_list.append(selected_cav_processed['single_label_dict']) + + # generate single view label (no coop) label + label_dict_no_coop = single_label_list # [{cav1_label}, {cav2_label}...] + + + # exclude all repetitive objects + unique_indices = \ + [object_id_stack.index(x) for x in set(object_id_stack)] + object_stack = np.vstack(object_stack) + object_stack = object_stack[unique_indices] + + # make sure bounding boxes across all frames have the same number + object_bbx_center = \ + np.zeros((self.params['postprocess']['max_num'], 7)) + mask = np.zeros(self.params['postprocess']['max_num']) + object_bbx_center[:object_stack.shape[0], :] = object_stack + mask[:object_stack.shape[0]] = 1 + + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_features) + processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict, + 'vsa_lidar': vsa_lidar_stack}) + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack') + processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + # generate targets label + label_dict_coop = \ + self.post_processor.generate_label( + gt_box_center=object_bbx_center, + anchors=self.anchor_box, + mask=mask) + + label_dict = { + 'stage1': label_dict_no_coop, # list + 'stage2': label_dict_coop # dict + } + + processed_data_dict['ego'].update( + {'object_bbx_center': object_bbx_center, + 'object_bbx_mask': mask, + 'object_ids': [object_id_stack[i] for i in unique_indices], + 'anchor_box': self.anchor_box, + 'label_dict': label_dict, + 'cav_num': cav_num, + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_poses_clean': lidar_poses_clean, + 'lidar_poses': lidar_poses}) + + + if self.visualize: + processed_data_dict['ego'].update({'origin_lidar': + np.vstack( + projected_lidar_stack)}) + + + processed_data_dict['ego'].update({'sample_idx': idx, + 'cav_id_list': cav_id_list}) + + return processed_data_dict + + + def collate_batch_train(self, batch): + # Intermediate fusion is different the other two + output_dict = {'ego': {}} + + object_bbx_center = [] + object_bbx_mask = [] + object_ids = [] + processed_lidar_list = [] + image_inputs_list = [] + # used to record different scenario + record_len = [] + label_dict_no_coop_batch_list = [] + label_dict_list = [] + lidar_pose_list = [] + origin_lidar = [] + vsa_lidar = [] + lidar_pose_clean_list = [] + + # pairwise transformation matrix + pairwise_t_matrix_list = [] + + # heterogeneous + lidar_agent_list = [] + + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + object_bbx_center.append(ego_dict['object_bbx_center']) + object_bbx_mask.append(ego_dict['object_bbx_mask']) + object_ids.append(ego_dict['object_ids']) + lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6] + lidar_pose_clean_list.append(ego_dict['lidar_poses_clean']) + if self.load_lidar_file: + processed_lidar_list.append(ego_dict['processed_lidar']) + vsa_lidar.append(ego_dict['vsa_lidar']) + if self.load_camera_file: + image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict. + + record_len.append(ego_dict['cav_num']) + label_dict_no_coop_batch_list.append(ego_dict['label_dict']['stage1']) + label_dict_list.append(ego_dict['label_dict']['stage2']) + + pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix']) + + if self.visualize: + origin_lidar.append(ego_dict['origin_lidar']) + + # heterogeneous + if self.heterogeneous: + lidar_agent_list.append(ego_dict['lidar_agent']) + + + # convert to numpy, (B, max_num, 7) + object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) + object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) + + # example: {'voxel_features':[np.array([1,2,3]]), + # np.array([3,5,6]), ...]} + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_lidar_list) + # [sum(record_len), C, H, W] + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + lidar_agent_idx = lidar_agent.nonzero()[0].tolist() + for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords' + merged_feature_dict[k] = [v[index] for index in lidar_agent_idx] + + if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0): + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(merged_feature_dict) + output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict}) + + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat') + + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + camera_agent = 1 - lidar_agent + camera_agent_idx = camera_agent.nonzero()[0].tolist() + if sum(camera_agent) != 0: + for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ... + merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx]) + + if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0): + output_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + record_len = torch.from_numpy(np.array(record_len, dtype=int)) + lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0)) + lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0)) + label_dict_no_coop_cavs_batch_list = [label_dict for label_dict_cavs_list in + label_dict_no_coop_batch_list for label_dict in + label_dict_cavs_list] + label_no_coop_torch_dict = \ + self.post_processor.collate_batch(label_dict_no_coop_cavs_batch_list) + + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + + # (B, max_cav) + pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list)) + + # add pairwise_t_matrix to label dict + label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix + label_torch_dict['record_len'] = record_len + + # object id is only used during inference, where batch size is 1. + # so here we only get the first element. + output_dict['ego'].update({ 'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'record_len': record_len, + 'label_dict': { + 'stage1': label_no_coop_torch_dict, + 'stage2': label_torch_dict, + }, + 'object_ids': object_ids[0], + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_pose_clean': lidar_pose_clean, + 'lidar_pose': lidar_pose, + 'proj_first': self.proj_first, + 'anchor_box': self.anchor_box_torch}) + + if self.load_lidar_file: + coords = [] + idx = 0 + for b in range(len(batch)): + for points in vsa_lidar[b]: + assert len(points) != 0 + coor_pad = np.pad(points, ((0, 0), (1, 0)), + mode="constant", constant_values=idx) + coords.append(coor_pad) + idx += 1 + origin_lidar_for_vsa = np.concatenate(coords, axis=0) + origin_lidar_for_vsa = torch.from_numpy(origin_lidar_for_vsa) + output_dict['ego'].update({'origin_lidar_for_vsa': origin_lidar_for_vsa}) + + if self.visualize: + origin_lidar = \ + np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + + if self.heterogeneous: + output_dict['ego'].update({ + "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...] + }) + + return output_dict + + def collate_batch_test(self, batch): + assert len(batch) <= 1, "Batch size 1 is required during testing!" + output_dict = self.collate_batch_train(batch) + if output_dict is None: + return None + + # check if anchor box in the batch + output_dict['ego'].update({'anchor_box': self.anchor_box_torch}) + + # save the transformation matrix (4, 4) to ego vehicle + # transformation is only used in post process (no use.) + # we all predict boxes in ego coord. + transformation_matrix_torch = \ + torch.from_numpy(np.identity(4)).float() + transformation_matrix_clean_torch = \ + torch.from_numpy(np.identity(4)).float() + + output_dict['ego'].update({'transformation_matrix': + transformation_matrix_torch, + 'transformation_matrix_clean': + transformation_matrix_clean_torch,}) + + output_dict['ego'].update({ + "sample_idx": batch[0]['ego']['sample_idx'], + "cav_id_list": batch[0]['ego']['cav_id_list'] + }) + + return output_dict + + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict, output_dict) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + return pred_box_tensor, pred_score, gt_box_tensor + + + return Intermediate2stageFusionDataset \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..0e720e16ea91b61435f2bb147013b44b6e5bf94d --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_fusion_dataset.py @@ -0,0 +1,679 @@ +# intermediate fusion dataset +import random +import math +from collections import OrderedDict +import numpy as np +import torch +import copy +from icecream import ic +from PIL import Image +import pickle as pkl +from opencood.utils import box_utils as box_utils +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor +from opencood.utils.camera_utils import ( + sample_augmentation, + img_transform, + normalize_img, + img_to_tensor, +) +from opencood.utils.common_utils import merge_features_to_dict +from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation +from opencood.utils.pose_utils import add_noise_data_dict +from opencood.utils.pcd_utils import ( + mask_points_by_range, + mask_ego_points, + shuffle_points, + downsample_lidar_minimum, +) +from opencood.utils.common_utils import read_json + + +def getIntermediateFusionDataset(cls): + """ + cls: the Basedataset. + """ + class IntermediateFusionDataset(cls): + def __init__(self, params, visualize, train=True): + super().__init__(params, visualize, train) + # intermediate and supervise single + self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \ + else False + self.proj_first = False if 'proj_first' not in params['fusion']['args']\ + else params['fusion']['args']['proj_first'] + + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + self.heterogeneous = False + if 'heter' in params: + self.heterogeneous = True + + self.kd_flag = params.get('kd_flag', False) + + self.box_align = False + if "box_align" in params: + self.box_align = True + self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result'] + self.stage1_result = read_json(self.stage1_result_path) + self.box_align_args = params['box_align']['args'] + + + + + def get_item_single_car(self, selected_cav_base, ego_cav_base): + """ + Process a single CAV's information for the train/test pipeline. + + + Parameters + ---------- + selected_cav_base : dict + The dictionary contains a single CAV's raw information. + including 'params', 'camera_data' + ego_pose : list, length 6 + The ego vehicle lidar pose under world coordinate. + ego_pose_clean : list, length 6 + only used for gt box generation + + Returns + ------- + selected_cav_processed : dict + The dictionary contains the cav's processed information. + """ + selected_cav_processed = {} + ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean'] + + # calculate the transformation matrix + transformation_matrix = \ + x1_to_x2(selected_cav_base['params']['lidar_pose'], + ego_pose) # T_ego_cav + transformation_matrix_clean = \ + x1_to_x2(selected_cav_base['params']['lidar_pose_clean'], + ego_pose_clean) + + # lidar + if self.load_lidar_file or self.visualize: + # process lidar + lidar_np = selected_cav_base['lidar_np'] + lidar_np = shuffle_points(lidar_np) + # remove points that hit itself + lidar_np = mask_ego_points(lidar_np) + # project the lidar to ego space + # x,y,z in ego space + projected_lidar = \ + box_utils.project_points_by_matrix_torch(lidar_np[:, :3], + transformation_matrix) + if self.proj_first: + lidar_np[:, :3] = projected_lidar + + if self.visualize: + # filter lidar + selected_cav_processed.update({'projected_lidar': projected_lidar}) + + if self.kd_flag: + lidar_proj_np = copy.deepcopy(lidar_np) + lidar_proj_np[:,:3] = projected_lidar + + selected_cav_processed.update({'projected_lidar': lidar_proj_np}) + + processed_lidar = self.pre_processor.preprocess(lidar_np) + selected_cav_processed.update({'processed_features': processed_lidar}) + + # generate targets label single GT, note the reference pose is itself. + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center( + [selected_cav_base], selected_cav_base['params']['lidar_pose'] + ) + label_dict = self.post_processor.generate_label( + gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + selected_cav_processed.update({ + "single_label_dict": label_dict, + "single_object_bbx_center": object_bbx_center, + "single_object_bbx_mask": object_bbx_mask}) + + # camera + if self.load_camera_file: + camera_data_list = selected_cav_base["camera_data"] + + params = selected_cav_base["params"] + imgs = [] + rots = [] + trans = [] + intrins = [] + extrinsics = [] + post_rots = [] + post_trans = [] + + for idx, img in enumerate(camera_data_list): + camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx) + + intrin = torch.from_numpy(camera_intrinsic) + rot = torch.from_numpy( + camera_to_lidar[:3, :3] + ) # R_wc, we consider world-coord is the lidar-coord + tran = torch.from_numpy(camera_to_lidar[:3, 3]) # T_wc + + post_rot = torch.eye(2) + post_tran = torch.zeros(2) + + img_src = [img] + + # depth + if self.load_depth_file: + depth_img = selected_cav_base["depth_data"][idx] + img_src.append(depth_img) + else: + depth_img = None + + # data augmentation + resize, resize_dims, crop, flip, rotate = sample_augmentation( + self.data_aug_conf, self.train + ) + img_src, post_rot2, post_tran2 = img_transform( + img_src, + post_rot, + post_tran, + resize=resize, + resize_dims=resize_dims, + crop=crop, + flip=flip, + rotate=rotate, + ) + # for convenience, make augmentation matrices 3x3 + post_tran = torch.zeros(3) + post_rot = torch.eye(3) + post_tran[:2] = post_tran2 + post_rot[:2, :2] = post_rot2 + + # decouple RGB and Depth + + img_src[0] = normalize_img(img_src[0]) + if self.load_depth_file: + img_src[1] = img_to_tensor(img_src[1]) * 255 + + imgs.append(torch.cat(img_src, dim=0)) + intrins.append(intrin) + extrinsics.append(torch.from_numpy(camera_to_lidar)) + rots.append(rot) + trans.append(tran) + post_rots.append(post_rot) + post_trans.append(post_tran) + + + selected_cav_processed.update( + { + "image_inputs": + { + "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W] + "intrins": torch.stack(intrins), + "extrinsics": torch.stack(extrinsics), + "rots": torch.stack(rots), + "trans": torch.stack(trans), + "post_rots": torch.stack(post_rots), + "post_trans": torch.stack(post_trans), + } + } + ) + + # anchor box + selected_cav_processed.update({"anchor_box": self.anchor_box}) + + # note the reference pose ego + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base], + ego_pose_clean) + + selected_cav_processed.update( + { + "object_bbx_center": object_bbx_center[object_bbx_mask == 1], + "object_bbx_mask": object_bbx_mask, + "object_ids": object_ids, + 'transformation_matrix': transformation_matrix, + 'transformation_matrix_clean': transformation_matrix_clean + } + ) + + + return selected_cav_processed + + def __getitem__(self, idx): + base_data_dict = self.retrieve_base_data(idx) + base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting']) + + processed_data_dict = OrderedDict() + processed_data_dict['ego'] = {} + + ego_id = -1 + ego_lidar_pose = [] + ego_cav_base = None + + # first find the ego vehicle's lidar pose + for cav_id, cav_content in base_data_dict.items(): + if cav_content['ego']: + ego_id = cav_id + ego_lidar_pose = cav_content['params']['lidar_pose'] + ego_cav_base = cav_content + break + + assert cav_id == list(base_data_dict.keys())[ + 0], "The first element in the OrderedDict must be ego" + assert ego_id != -1 + assert len(ego_lidar_pose) > 0 + + agents_image_inputs = [] + processed_features = [] + object_stack = [] + object_id_stack = [] + single_label_list = [] + single_object_bbx_center_list = [] + single_object_bbx_mask_list = [] + too_far = [] + lidar_pose_list = [] + lidar_pose_clean_list = [] + cav_id_list = [] + projected_lidar_clean_list = [] # disconet + + if self.visualize or self.kd_flag: + projected_lidar_stack = [] + + # loop over all CAVs to process information + for cav_id, selected_cav_base in base_data_dict.items(): + # check if the cav is within the communication range with ego + distance = \ + math.sqrt((selected_cav_base['params']['lidar_pose'][0] - + ego_lidar_pose[0]) ** 2 + ( + selected_cav_base['params'][ + 'lidar_pose'][1] - ego_lidar_pose[ + 1]) ** 2) + + # if distance is too far, we will just skip this agent + if distance > self.params['comm_range']: + too_far.append(cav_id) + continue + + lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean']) + lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose + cav_id_list.append(cav_id) + + for cav_id in too_far: + base_data_dict.pop(cav_id) + + ########## Updated by Yifan Lu 2022.1.26 ############ + # box align to correct pose. + # stage1_content contains all agent. Even out of comm range. + if self.box_align and str(idx) in self.stage1_result.keys(): + from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np + stage1_content = self.stage1_result[str(idx)] + if stage1_content is not None: + all_agent_id_list = stage1_content['cav_id_list'] # include those out of range + all_agent_corners_list = stage1_content['pred_corner3d_np_list'] + all_agent_uncertainty_list = stage1_content['uncertainty_np_list'] + + cur_agent_id_list = cav_id_list + cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list] + cur_agnet_pose = np.array(cur_agent_pose) + cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list` + + pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64) + for cur_in_all_ind in cur_agent_in_all_agent] + uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64) + for cur_in_all_ind in cur_agent_in_all_agent] + + if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0: + refined_pose = box_alignment_relative_sample_np(pred_corners_list, + cur_agnet_pose, + uncertainty_list=uncertainty_list, + **self.box_align_args) + cur_agnet_pose[:,[0,1,4]] = refined_pose + + for i, cav_id in enumerate(cav_id_list): + lidar_pose_list[i] = cur_agnet_pose[i].tolist() + base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist() + + + + pairwise_t_matrix = \ + get_pairwise_transformation(base_data_dict, + self.max_cav, + self.proj_first) + + lidar_poses = np.array(lidar_pose_list).reshape(-1, 6) # [N_cav, 6] + lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6) # [N_cav, 6] + + # merge preprocessed features from different cavs into the same dict + cav_num = len(cav_id_list) + + # heterogeneous + if self.heterogeneous: + lidar_agent, camera_agent = self.selector.select_agent(idx) + lidar_agent = lidar_agent[:cav_num] + processed_data_dict['ego'].update({"lidar_agent": lidar_agent}) + + for _i, cav_id in enumerate(cav_id_list): + selected_cav_base = base_data_dict[cav_id] + + # dynamic object center generator! for heterogeneous input + if (not self.visualize) and self.heterogeneous and lidar_agent[_i]: + self.generate_object_center = self.generate_object_center_lidar + elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]): + self.generate_object_center = self.generate_object_center_camera + + selected_cav_processed = self.get_item_single_car( + selected_cav_base, + ego_cav_base) + + object_stack.append(selected_cav_processed['object_bbx_center']) + object_id_stack += selected_cav_processed['object_ids'] + if self.load_lidar_file: + processed_features.append( + selected_cav_processed['processed_features']) + if self.load_camera_file: + agents_image_inputs.append( + selected_cav_processed['image_inputs']) + + if self.visualize or self.kd_flag: + projected_lidar_stack.append( + selected_cav_processed['projected_lidar']) + + if self.supervise_single: + single_label_list.append(selected_cav_processed['single_label_dict']) + single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center']) + single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask']) + + # generate single view GT label + if self.supervise_single: + single_label_dicts = self.post_processor.collate_batch(single_label_list) + single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list)) + single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list)) + processed_data_dict['ego'].update({ + "single_label_dict_torch": single_label_dicts, + "single_object_bbx_center_torch": single_object_bbx_center, + "single_object_bbx_mask_torch": single_object_bbx_mask, + }) + + if self.kd_flag: + stack_lidar_np = np.vstack(projected_lidar_stack) + stack_lidar_np = mask_points_by_range(stack_lidar_np, + self.params['preprocess'][ + 'cav_lidar_range']) + stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np) + processed_data_dict['ego'].update({'teacher_processed_lidar': + stack_feature_processed}) + + + # exclude all repetitive objects + unique_indices = \ + [object_id_stack.index(x) for x in set(object_id_stack)] + object_stack = np.vstack(object_stack) + object_stack = object_stack[unique_indices] + + # make sure bounding boxes across all frames have the same number + object_bbx_center = \ + np.zeros((self.params['postprocess']['max_num'], 7)) + mask = np.zeros(self.params['postprocess']['max_num']) + object_bbx_center[:object_stack.shape[0], :] = object_stack + mask[:object_stack.shape[0]] = 1 + + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_features) + processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict}) + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack') + processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + + # generate targets label + label_dict = \ + self.post_processor.generate_label( + gt_box_center=object_bbx_center, + anchors=self.anchor_box, + mask=mask) + + processed_data_dict['ego'].update( + {'object_bbx_center': object_bbx_center, + 'object_bbx_mask': mask, + 'object_ids': [object_id_stack[i] for i in unique_indices], + 'anchor_box': self.anchor_box, + 'label_dict': label_dict, + 'cav_num': cav_num, + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_poses_clean': lidar_poses_clean, + 'lidar_poses': lidar_poses}) + + + if self.visualize: + processed_data_dict['ego'].update({'origin_lidar': + np.vstack( + projected_lidar_stack)}) + + + processed_data_dict['ego'].update({'sample_idx': idx, + 'cav_id_list': cav_id_list}) + + return processed_data_dict + + + def collate_batch_train(self, batch): + # Intermediate fusion is different the other two + output_dict = {'ego': {}} + + object_bbx_center = [] + object_bbx_mask = [] + object_ids = [] + processed_lidar_list = [] + image_inputs_list = [] + # used to record different scenario + record_len = [] + label_dict_list = [] + lidar_pose_list = [] + origin_lidar = [] + lidar_pose_clean_list = [] + + # heterogeneous + lidar_agent_list = [] + + # pairwise transformation matrix + pairwise_t_matrix_list = [] + + # disconet + teacher_processed_lidar_list = [] + + ### 2022.10.10 single gt #### + if self.supervise_single: + pos_equal_one_single = [] + neg_equal_one_single = [] + targets_single = [] + object_bbx_center_single = [] + object_bbx_mask_single = [] + + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + object_bbx_center.append(ego_dict['object_bbx_center']) + object_bbx_mask.append(ego_dict['object_bbx_mask']) + object_ids.append(ego_dict['object_ids']) + lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6] + lidar_pose_clean_list.append(ego_dict['lidar_poses_clean']) + if self.load_lidar_file: + processed_lidar_list.append(ego_dict['processed_lidar']) + if self.load_camera_file: + image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict. + + record_len.append(ego_dict['cav_num']) + label_dict_list.append(ego_dict['label_dict']) + pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix']) + + if self.visualize: + origin_lidar.append(ego_dict['origin_lidar']) + + if self.kd_flag: + teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar']) + + ### 2022.10.10 single gt #### + if self.supervise_single: + pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one']) + neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one']) + targets_single.append(ego_dict['single_label_dict_torch']['targets']) + object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch']) + object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch']) + + # heterogeneous + if self.heterogeneous: + lidar_agent_list.append(ego_dict['lidar_agent']) + + # convert to numpy, (B, max_num, 7) + object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) + object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) + + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_lidar_list) + + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + lidar_agent_idx = lidar_agent.nonzero()[0].tolist() + for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords' + merged_feature_dict[k] = [v[index] for index in lidar_agent_idx] + + if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0): + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(merged_feature_dict) + output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict}) + + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat') + + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + camera_agent = 1 - lidar_agent + camera_agent_idx = camera_agent.nonzero()[0].tolist() + if sum(camera_agent) != 0: + for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ... + merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx]) + + if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0): + output_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + record_len = torch.from_numpy(np.array(record_len, dtype=int)) + lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0)) + lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0)) + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + # (B, max_cav) + pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list)) + + # add pairwise_t_matrix to label dict + label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix + label_torch_dict['record_len'] = record_len + + + # object id is only used during inference, where batch size is 1. + # so here we only get the first element. + output_dict['ego'].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'record_len': record_len, + 'label_dict': label_torch_dict, + 'object_ids': object_ids[0], + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_pose_clean': lidar_pose_clean, + 'lidar_pose': lidar_pose, + 'anchor_box': self.anchor_box_torch}) + + + if self.visualize: + origin_lidar = \ + np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + + if self.kd_flag: + teacher_processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(teacher_processed_lidar_list) + output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict}) + + + if self.supervise_single: + output_dict['ego'].update({ + "label_dict_single":{ + "pos_equal_one": torch.cat(pos_equal_one_single, dim=0), + "neg_equal_one": torch.cat(neg_equal_one_single, dim=0), + "targets": torch.cat(targets_single, dim=0), + # for centerpoint + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }, + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }) + + if self.heterogeneous: + output_dict['ego'].update({ + "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...] + }) + + return output_dict + + def collate_batch_test(self, batch): + assert len(batch) <= 1, "Batch size 1 is required during testing!" + output_dict = self.collate_batch_train(batch) + if output_dict is None: + return None + + # check if anchor box in the batch + if batch[0]['ego']['anchor_box'] is not None: + output_dict['ego'].update({'anchor_box': + self.anchor_box_torch}) + + # save the transformation matrix (4, 4) to ego vehicle + # transformation is only used in post process (no use.) + # we all predict boxes in ego coord. + transformation_matrix_torch = \ + torch.from_numpy(np.identity(4)).float() + transformation_matrix_clean_torch = \ + torch.from_numpy(np.identity(4)).float() + + output_dict['ego'].update({'transformation_matrix': + transformation_matrix_torch, + 'transformation_matrix_clean': + transformation_matrix_clean_torch,}) + + output_dict['ego'].update({ + "sample_idx": batch[0]['ego']['sample_idx'], + "cav_id_list": batch[0]['ego']['cav_id_list'] + }) + + return output_dict + + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict, output_dict) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + return pred_box_tensor, pred_score, gt_box_tensor + + + return IntermediateFusionDataset + + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_heter_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_heter_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c4796807cff709d31cba726db34207962addb417 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_heter_fusion_dataset.py @@ -0,0 +1,752 @@ +''' +intermediate heter fusion dataset + +Note that for DAIR-V2X dataset, +Each agent should retrieve the objects itself, and merge them by iou, +instead of using the cooperative label. +''' + +import random +import math +from collections import OrderedDict +import numpy as np +import torch +import copy +from icecream import ic +from PIL import Image +import pickle as pkl +from opencood.utils import box_utils as box_utils +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor +from opencood.utils.camera_utils import ( + sample_augmentation, + img_transform, + normalize_img, + img_to_tensor, +) +from opencood.utils.common_utils import merge_features_to_dict, compute_iou, convert_format +from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation +from opencood.utils.pose_utils import add_noise_data_dict +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.utils.pcd_utils import ( + mask_points_by_range, + mask_ego_points, + shuffle_points, + downsample_lidar_minimum, +) +from opencood.utils.common_utils import read_json +from opencood.utils.heter_utils import Adaptor + + +def getIntermediateheterFusionDataset(cls): + """ + cls: the Basedataset. + """ + class IntermediateheterFusionDataset(cls): + def __init__(self, params, visualize, train=True): + super().__init__(params, visualize, train) + # intermediate and supervise single + self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \ + else False + self.proj_first = False if 'proj_first' not in params['fusion']['args']\ + else params['fusion']['args']['proj_first'] + + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + self.heterogeneous = True + self.modality_assignment = read_json(params['heter']['assignment_path']) + self.ego_modality = params['heter']['ego_modality'] # "m1" or "m1&m2" or "m3" + + self.modality_name_list = list(params['heter']['modality_setting'].keys()) + self.sensor_type_dict = OrderedDict() + + lidar_channels_dict = params['heter'].get('lidar_channels_dict', OrderedDict()) + mapping_dict = params['heter']['mapping_dict'] + cav_preference = params['heter'].get("cav_preference", None) + + self.adaptor = Adaptor(self.ego_modality, + self.modality_name_list, + self.modality_assignment, + lidar_channels_dict, + mapping_dict, + cav_preference, + train) + + for modality_name, modal_setting in params['heter']['modality_setting'].items(): + self.sensor_type_dict[modality_name] = modal_setting['sensor_type'] + if modal_setting['sensor_type'] == 'lidar': + setattr(self, f"pre_processor_{modality_name}", build_preprocessor(modal_setting['preprocess'], train)) + + elif modal_setting['sensor_type'] == 'camera': + setattr(self, f"data_aug_conf_{modality_name}", modal_setting['data_aug_conf']) + + else: + raise("Not support this type of sensor") + + self.reinitialize() + + + self.kd_flag = params.get('kd_flag', False) + + self.box_align = False + if "box_align" in params: + self.box_align = True + self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result'] + self.stage1_result = read_json(self.stage1_result_path) + self.box_align_args = params['box_align']['args'] + + + + def get_item_single_car(self, selected_cav_base, ego_cav_base): + """ + Process a single CAV's information for the train/test pipeline. + + + Parameters + ---------- + selected_cav_base : dict + The dictionary contains a single CAV's raw information. + including 'params', 'camera_data' + ego_pose : list, length 6 + The ego vehicle lidar pose under world coordinate. + ego_pose_clean : list, length 6 + only used for gt box generation + + Returns + ------- + selected_cav_processed : dict + The dictionary contains the cav's processed information. + """ + selected_cav_processed = {} + ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean'] + + # calculate the transformation matrix + transformation_matrix = \ + x1_to_x2(selected_cav_base['params']['lidar_pose'], + ego_pose) # T_ego_cav + transformation_matrix_clean = \ + x1_to_x2(selected_cav_base['params']['lidar_pose_clean'], + ego_pose_clean) + + modality_name = selected_cav_base['modality_name'] + sensor_type = self.sensor_type_dict[modality_name] + + # lidar + if sensor_type == "lidar" or self.visualize: + # process lidar + lidar_np = selected_cav_base['lidar_np'] + lidar_np = shuffle_points(lidar_np) + # remove points that hit itself + lidar_np = mask_ego_points(lidar_np) + # project the lidar to ego space + # x,y,z in ego space + projected_lidar = \ + box_utils.project_points_by_matrix_torch(lidar_np[:, :3], + transformation_matrix) + if self.proj_first: + lidar_np[:, :3] = projected_lidar + + if self.visualize: + # filter lidar + selected_cav_processed.update({'projected_lidar': projected_lidar}) + + if self.kd_flag: + lidar_proj_np = copy.deepcopy(lidar_np) + lidar_proj_np[:,:3] = projected_lidar + + selected_cav_processed.update({'projected_lidar': lidar_proj_np}) + + if sensor_type == "lidar": + processed_lidar = eval(f"self.pre_processor_{modality_name}").preprocess(lidar_np) + selected_cav_processed.update({f'processed_features_{modality_name}': processed_lidar}) + + # generate targets label single GT, note the reference pose is itself. + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center( + [selected_cav_base], selected_cav_base['params']['lidar_pose'] + ) + label_dict = self.post_processor.generate_label( + gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + selected_cav_processed.update({ + "single_label_dict": label_dict, + "single_object_bbx_center": object_bbx_center, + "single_object_bbx_mask": object_bbx_mask}) + + # camera + if sensor_type == "camera": + camera_data_list = selected_cav_base["camera_data"] + params = selected_cav_base["params"] + imgs = [] + rots = [] + trans = [] + intrins = [] + extrinsics = [] + post_rots = [] + post_trans = [] + + for idx, img in enumerate(camera_data_list): + camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx) + + intrin = torch.from_numpy(camera_intrinsic) + rot = torch.from_numpy( + camera_to_lidar[:3, :3] + ) # R_wc, we consider world-coord is the lidar-coord + tran = torch.from_numpy(camera_to_lidar[:3, 3]) # T_wc + + post_rot = torch.eye(2) + post_tran = torch.zeros(2) + + img_src = [img] + + # depth + if self.load_depth_file: + depth_img = selected_cav_base["depth_data"][idx] + img_src.append(depth_img) + else: + depth_img = None + + # data augmentation + resize, resize_dims, crop, flip, rotate = sample_augmentation( + eval(f"self.data_aug_conf_{modality_name}"), self.train + ) + img_src, post_rot2, post_tran2 = img_transform( + img_src, + post_rot, + post_tran, + resize=resize, + resize_dims=resize_dims, + crop=crop, + flip=flip, + rotate=rotate, + ) + # for convenience, make augmentation matrices 3x3 + post_tran = torch.zeros(3) + post_rot = torch.eye(3) + post_tran[:2] = post_tran2 + post_rot[:2, :2] = post_rot2 + + # decouple RGB and Depth + + img_src[0] = normalize_img(img_src[0]) + if self.load_depth_file: + img_src[1] = img_to_tensor(img_src[1]) * 255 + + imgs.append(torch.cat(img_src, dim=0)) + intrins.append(intrin) + extrinsics.append(torch.from_numpy(camera_to_lidar)) + rots.append(rot) + trans.append(tran) + post_rots.append(post_rot) + post_trans.append(post_tran) + + + selected_cav_processed.update( + { + f"image_inputs_{modality_name}": + { + "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W] + "intrins": torch.stack(intrins), + "extrinsics": torch.stack(extrinsics), + "rots": torch.stack(rots), + "trans": torch.stack(trans), + "post_rots": torch.stack(post_rots), + "post_trans": torch.stack(post_trans), + } + } + ) + + # anchor box + selected_cav_processed.update({"anchor_box": self.anchor_box}) + + # note the reference pose ego + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base], + ego_pose_clean) + + selected_cav_processed.update( + { + "object_bbx_center": object_bbx_center[object_bbx_mask == 1], + "object_bbx_mask": object_bbx_mask, + "object_ids": object_ids, + 'transformation_matrix': transformation_matrix, + 'transformation_matrix_clean': transformation_matrix_clean + } + ) + + + return selected_cav_processed + + def __getitem__(self, idx): + base_data_dict = self.retrieve_base_data(idx) + base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting']) + + processed_data_dict = OrderedDict() + processed_data_dict['ego'] = {} + + ego_id = -1 + ego_lidar_pose = [] + ego_cav_base = None + + # first find the ego vehicle's lidar pose + for cav_id, cav_content in base_data_dict.items(): + if cav_content['ego']: + ego_id = cav_id + ego_lidar_pose = cav_content['params']['lidar_pose'] + ego_cav_base = cav_content + break + + assert cav_id == list(base_data_dict.keys())[ + 0], "The first element in the OrderedDict must be ego" + assert ego_id != -1 + assert len(ego_lidar_pose) > 0 + + + input_list_m1 = [] # can contain lidar or camera + input_list_m2 = [] + input_list_m3 = [] + input_list_m4 = [] + + agent_modality_list = [] + object_stack = [] + object_id_stack = [] + single_label_list = [] + single_object_bbx_center_list = [] + single_object_bbx_mask_list = [] + exclude_agent = [] + lidar_pose_list = [] + lidar_pose_clean_list = [] + cav_id_list = [] + projected_lidar_clean_list = [] # disconet + + if self.visualize or self.kd_flag: + projected_lidar_stack = [] + + # loop over all CAVs to process information + for cav_id, selected_cav_base in base_data_dict.items(): + # check if the cav is within the communication range with ego + distance = \ + math.sqrt((selected_cav_base['params']['lidar_pose'][0] - + ego_lidar_pose[0]) ** 2 + ( + selected_cav_base['params'][ + 'lidar_pose'][1] - ego_lidar_pose[ + 1]) ** 2) + + # if distance is too far, we will just skip this agent + if distance > self.params['comm_range']: + exclude_agent.append(cav_id) + continue + + # if modality not match + if self.adaptor.unmatched_modality(selected_cav_base['modality_name']): + exclude_agent.append(cav_id) + continue + + lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean']) + lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose + cav_id_list.append(cav_id) + + if len(cav_id_list) == 0: + return None + + for cav_id in exclude_agent: + base_data_dict.pop(cav_id) + + ########## Updated by Yifan Lu 2022.1.26 ############ + # box align to correct pose. + # stage1_content contains all agent. Even out of comm range. + if self.box_align and str(idx) in self.stage1_result.keys(): + from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np + stage1_content = self.stage1_result[str(idx)] + if stage1_content is not None: + all_agent_id_list = stage1_content['cav_id_list'] # include those out of range + all_agent_corners_list = stage1_content['pred_corner3d_np_list'] + all_agent_uncertainty_list = stage1_content['uncertainty_np_list'] + + cur_agent_id_list = cav_id_list + cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list] + cur_agnet_pose = np.array(cur_agent_pose) + cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list` + + pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64) + for cur_in_all_ind in cur_agent_in_all_agent] + uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64) + for cur_in_all_ind in cur_agent_in_all_agent] + + if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0: + refined_pose = box_alignment_relative_sample_np(pred_corners_list, + cur_agnet_pose, + uncertainty_list=uncertainty_list, + **self.box_align_args) + cur_agnet_pose[:,[0,1,4]] = refined_pose + + for i, cav_id in enumerate(cav_id_list): + lidar_pose_list[i] = cur_agnet_pose[i].tolist() + base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist() + + + + pairwise_t_matrix = \ + get_pairwise_transformation(base_data_dict, + self.max_cav, + self.proj_first) + + lidar_poses = np.array(lidar_pose_list).reshape(-1, 6) # [N_cav, 6] + lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6) # [N_cav, 6] + + # merge preprocessed features from different cavs into the same dict + cav_num = len(cav_id_list) + + for _i, cav_id in enumerate(cav_id_list): + selected_cav_base = base_data_dict[cav_id] + modality_name = selected_cav_base['modality_name'] + sensor_type = self.sensor_type_dict[selected_cav_base['modality_name']] + + # dynamic object center generator! for heterogeneous input + if not self.visualize: + self.generate_object_center = eval(f"self.generate_object_center_{sensor_type}") + # need discussion. In test phase, use lidar label. + else: + self.generate_object_center = self.generate_object_center_lidar + + selected_cav_processed = self.get_item_single_car( + selected_cav_base, + ego_cav_base) + + object_stack.append(selected_cav_processed['object_bbx_center']) + object_id_stack += selected_cav_processed['object_ids'] + + + if sensor_type == "lidar": + eval(f"input_list_{modality_name}").append(selected_cav_processed[f"processed_features_{modality_name}"]) + elif sensor_type == "camera": + eval(f"input_list_{modality_name}").append(selected_cav_processed[f"image_inputs_{modality_name}"]) + else: + raise + + agent_modality_list.append(modality_name) + + if self.visualize or self.kd_flag: + projected_lidar_stack.append( + selected_cav_processed['projected_lidar']) + + if self.supervise_single or self.heterogeneous: + single_label_list.append(selected_cav_processed['single_label_dict']) + single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center']) + single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask']) + + # generate single view GT label + if self.supervise_single or self.heterogeneous: + single_label_dicts = self.post_processor.collate_batch(single_label_list) + single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list)) + single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list)) + processed_data_dict['ego'].update({ + "single_label_dict_torch": single_label_dicts, + "single_object_bbx_center_torch": single_object_bbx_center, + "single_object_bbx_mask_torch": single_object_bbx_mask, + }) + + if self.kd_flag: + stack_lidar_np = np.vstack(projected_lidar_stack) + stack_lidar_np = mask_points_by_range(stack_lidar_np, + self.params['preprocess'][ + 'cav_lidar_range']) + stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np) + processed_data_dict['ego'].update({'teacher_processed_lidar': + stack_feature_processed}) + + + # exculude all repetitve objects, DAIR-V2X + if self.params['fusion']['dataset'] == 'dairv2x': + if len(object_stack) == 1: + object_stack = object_stack[0] + else: + ego_boxes_np = object_stack[0] + cav_boxes_np = object_stack[1] + order = self.params['postprocess']['order'] + ego_corners_np = box_utils.boxes_to_corners_3d(ego_boxes_np, order) + cav_corners_np = box_utils.boxes_to_corners_3d(cav_boxes_np, order) + ego_polygon_list = list(convert_format(ego_corners_np)) + cav_polygon_list = list(convert_format(cav_corners_np)) + iou_thresh = 0.05 + + + gt_boxes_from_cav = [] + for i in range(len(cav_polygon_list)): + cav_polygon = cav_polygon_list[i] + ious = compute_iou(cav_polygon, ego_polygon_list) + if (ious > iou_thresh).any(): + continue + gt_boxes_from_cav.append(cav_boxes_np[i]) + + if len(gt_boxes_from_cav): + object_stack_from_cav = np.stack(gt_boxes_from_cav) + object_stack = np.vstack([ego_boxes_np, object_stack_from_cav]) + else: + object_stack = ego_boxes_np + + unique_indices = np.arange(object_stack.shape[0]) + object_id_stack = np.arange(object_stack.shape[0]) + else: + # exclude all repetitive objects, OPV2V-H + unique_indices = \ + [object_id_stack.index(x) for x in set(object_id_stack)] + object_stack = np.vstack(object_stack) + object_stack = object_stack[unique_indices] + + # make sure bounding boxes across all frames have the same number + object_bbx_center = \ + np.zeros((self.params['postprocess']['max_num'], 7)) + mask = np.zeros(self.params['postprocess']['max_num']) + object_bbx_center[:object_stack.shape[0], :] = object_stack + mask[:object_stack.shape[0]] = 1 + + for modality_name in self.modality_name_list: + if self.sensor_type_dict[modality_name] == "lidar": + merged_feature_dict = merge_features_to_dict(eval(f"input_list_{modality_name}")) + processed_data_dict['ego'].update({f'input_{modality_name}': merged_feature_dict}) # maybe None + elif self.sensor_type_dict[modality_name] == "camera": + merged_image_inputs_dict = merge_features_to_dict(eval(f"input_list_{modality_name}"), merge='stack') + processed_data_dict['ego'].update({f'input_{modality_name}': merged_image_inputs_dict}) # maybe None + + processed_data_dict['ego'].update({'agent_modality_list': agent_modality_list}) + + # generate targets label + label_dict = \ + self.post_processor.generate_label( + gt_box_center=object_bbx_center, + anchors=self.anchor_box, + mask=mask) + + processed_data_dict['ego'].update( + {'object_bbx_center': object_bbx_center, + 'object_bbx_mask': mask, + 'object_ids': [object_id_stack[i] for i in unique_indices], + 'anchor_box': self.anchor_box, + 'label_dict': label_dict, + 'cav_num': cav_num, + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_poses_clean': lidar_poses_clean, + 'lidar_poses': lidar_poses}) + + + if self.visualize: + processed_data_dict['ego'].update({'origin_lidar': + np.vstack( + projected_lidar_stack)}) + + + processed_data_dict['ego'].update({'sample_idx': idx, + 'cav_id_list': cav_id_list}) + + return processed_data_dict + + + def collate_batch_train(self, batch): + # Intermediate fusion is different the other two + output_dict = {'ego': {}} + + object_bbx_center = [] + object_bbx_mask = [] + object_ids = [] + inputs_list_m1 = [] + inputs_list_m2 = [] + inputs_list_m3 = [] + inputs_list_m4 = [] + agent_modality_list = [] + # used to record different scenario + record_len = [] + label_dict_list = [] + lidar_pose_list = [] + origin_lidar = [] + lidar_pose_clean_list = [] + + # pairwise transformation matrix + pairwise_t_matrix_list = [] + + # disconet + teacher_processed_lidar_list = [] + + ### 2022.10.10 single gt #### + if self.supervise_single or self.heterogeneous: + pos_equal_one_single = [] + neg_equal_one_single = [] + targets_single = [] + object_bbx_center_single = [] + object_bbx_mask_single = [] + + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + object_bbx_center.append(ego_dict['object_bbx_center']) + object_bbx_mask.append(ego_dict['object_bbx_mask']) + object_ids.append(ego_dict['object_ids']) + lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6] + lidar_pose_clean_list.append(ego_dict['lidar_poses_clean']) + + for modality_name in self.modality_name_list: + if ego_dict[f'input_{modality_name}'] is not None: + eval(f"inputs_list_{modality_name}").append(ego_dict[f'input_{modality_name}']) # OrderedDict() if empty? + + agent_modality_list.extend(ego_dict['agent_modality_list']) + + record_len.append(ego_dict['cav_num']) + label_dict_list.append(ego_dict['label_dict']) + pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix']) + + if self.visualize: + origin_lidar.append(ego_dict['origin_lidar']) + + if self.kd_flag: + teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar']) + + ### 2022.10.10 single gt #### + if self.supervise_single or self.heterogeneous: + pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one']) + neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one']) + targets_single.append(ego_dict['single_label_dict_torch']['targets']) + object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch']) + object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch']) + + + # convert to numpy, (B, max_num, 7) + object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) + object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) + + + # 2023.2.5 + for modality_name in self.modality_name_list: + if len(eval(f"inputs_list_{modality_name}")) != 0: + if self.sensor_type_dict[modality_name] == "lidar": + merged_feature_dict = merge_features_to_dict(eval(f"inputs_list_{modality_name}")) + processed_lidar_torch_dict = eval(f"self.pre_processor_{modality_name}").collate_batch(merged_feature_dict) + output_dict['ego'].update({f'inputs_{modality_name}': processed_lidar_torch_dict}) + + elif self.sensor_type_dict[modality_name] == "camera": + merged_image_inputs_dict = merge_features_to_dict(eval(f"inputs_list_{modality_name}"), merge='cat') + output_dict['ego'].update({f'inputs_{modality_name}': merged_image_inputs_dict}) + + + output_dict['ego'].update({"agent_modality_list": agent_modality_list}) + + record_len = torch.from_numpy(np.array(record_len, dtype=int)) + lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0)) + lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0)) + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + # (B, max_cav) + pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list)) + + # add pairwise_t_matrix to label dict + label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix + label_torch_dict['record_len'] = record_len + + + # object id is only used during inference, where batch size is 1. + # so here we only get the first element. + output_dict['ego'].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'record_len': record_len, + 'label_dict': label_torch_dict, + 'object_ids': object_ids[0], + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_pose_clean': lidar_pose_clean, + 'lidar_pose': lidar_pose, + 'anchor_box': self.anchor_box_torch}) + + + if self.visualize: + origin_lidar = \ + np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + + if self.kd_flag: + teacher_processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(teacher_processed_lidar_list) + output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict}) + + + if self.supervise_single or self.heterogeneous: + output_dict['ego'].update({ + "label_dict_single":{ + "pos_equal_one": torch.cat(pos_equal_one_single, dim=0), + "neg_equal_one": torch.cat(neg_equal_one_single, dim=0), + "targets": torch.cat(targets_single, dim=0), + # for centerpoint + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }, + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }) + + return output_dict + + def collate_batch_test(self, batch): + assert len(batch) <= 1, "Batch size 1 is required during testing!" + if batch[0] is None: + return None + output_dict = self.collate_batch_train(batch) + if output_dict is None: + return None + + # check if anchor box in the batch + if batch[0]['ego']['anchor_box'] is not None: + output_dict['ego'].update({'anchor_box': + self.anchor_box_torch}) + + # save the transformation matrix (4, 4) to ego vehicle + # transformation is only used in post process (no use.) + # we all predict boxes in ego coord. + transformation_matrix_torch = \ + torch.from_numpy(np.identity(4)).float() + transformation_matrix_clean_torch = \ + torch.from_numpy(np.identity(4)).float() + + output_dict['ego'].update({'transformation_matrix': + transformation_matrix_torch, + 'transformation_matrix_clean': + transformation_matrix_clean_torch,}) + + output_dict['ego'].update({ + "sample_idx": batch[0]['ego']['sample_idx'], + "cav_id_list": batch[0]['ego']['cav_id_list'], + "agent_modality_list": batch[0]['ego']['agent_modality_list'] + }) + + return output_dict + + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict, output_dict) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + return pred_box_tensor, pred_score, gt_box_tensor + + + return IntermediateheterFusionDataset + + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_multiclass_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_multiclass_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..2a120724ef30eedc9b80202b0d25a8ff8af7be4d --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_multiclass_fusion_dataset.py @@ -0,0 +1,892 @@ +# intermediate fusion dataset +import random +import math +from collections import OrderedDict +import numpy as np +import torch +import copy +from icecream import ic +from PIL import Image +import pickle as pkl +from opencood.utils import box_utils as box_utils +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor +from opencood.utils.camera_utils import ( + sample_augmentation, + img_transform, + normalize_img, + img_to_tensor, +) +# from opencood.utils.heter_utils import AgentSelector +from opencood.utils.common_utils import merge_features_to_dict +from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation, get_pairwise_transformation_asymmetric +from opencood.utils.pose_utils import add_noise_data_dict, add_noise_data_dict_asymmetric +from opencood.utils.pcd_utils import ( + mask_points_by_range, + mask_ego_points, + mask_ego_points_v2, + shuffle_points, + downsample_lidar_minimum, +) +from opencood.utils.common_utils import read_json + + +def getIntermediatemulticlassFusionDataset(cls): + """ + cls: the Basedataset. + """ + class IntermediatemulticlassFusionDataset(cls): + def __init__(self, params, visualize, train=True): + super().__init__(params, visualize, train) + # intermediate and supervise single + self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \ + else False + self.proj_first = False if 'proj_first' not in params['fusion']['args']\ + else params['fusion']['args']['proj_first'] + + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + self.heterogeneous = False + if 'heter' in params: + self.heterogeneous = True + self.selector = AgentSelector(params['heter'], self.max_cav) + + self.kd_flag = params.get('kd_flag', False) + + self.box_align = False + if "box_align" in params: + self.box_align = True + self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result'] + self.stage1_result = read_json(self.stage1_result_path) + self.box_align_args = params['box_align']['args'] + + self.multiclass = params['model']['args']['multi_class'] + self.online_eval_only = False + + def get_item_single_car(self, selected_cav_base, ego_cav_base, tpe='all', cav_id='car_0', online_eval=False): + """ + Process a single CAV's information for the train/test pipeline. + + + Parameters + ---------- + selected_cav_base : dict + The dictionary contains a single CAV's raw information. + including 'params', 'camera_data' + ego_pose : list, length 6 + The ego vehicle lidar pose under world coordinate. + ego_pose_clean : list, length 6 + only used for gt box generation + + Returns + ------- + selected_cav_processed : dict + The dictionary contains the cav's processed information. + """ + selected_cav_processed = {} + ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean'] + + # calculate the transformation matrix + transformation_matrix = \ + x1_to_x2(selected_cav_base['params']['lidar_pose'], + ego_pose) # T_ego_cav + transformation_matrix_clean = \ + x1_to_x2(selected_cav_base['params']['lidar_pose_clean'], + ego_pose_clean) + + # lidar + if tpe == 'all': + if self.load_lidar_file or self.visualize: + # process lidar + lidar_np = selected_cav_base['lidar_np'] + lidar_np = shuffle_points(lidar_np) + # remove points that hit itself + if not cav_id.startswith('rsu'): + lidar_np = mask_ego_points_v2(lidar_np) + # project the lidar to ego space + # x,y,z in ego space + projected_lidar = \ + box_utils.project_points_by_matrix_torch(lidar_np[:, :3], + transformation_matrix) + if self.proj_first: + lidar_np[:, :3] = projected_lidar + + if self.visualize: + # filter lidar + selected_cav_processed.update({'projected_lidar': projected_lidar}) + + if self.kd_flag: + lidar_proj_np = copy.deepcopy(lidar_np) + lidar_proj_np[:,:3] = projected_lidar + + selected_cav_processed.update({'projected_lidar': lidar_proj_np}) + + processed_lidar = self.pre_processor.preprocess(lidar_np) + selected_cav_processed.update({'processed_features': processed_lidar}) + + if True: # not online_eval: + # generate targets label single GT, note the reference pose is itself. + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center( + [selected_cav_base], selected_cav_base['params']['lidar_pose'] + ) + label_dict = {} + if tpe == 'all': + # unused label + if False: + label_dict = self.post_processor.generate_label( + gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + selected_cav_processed.update({ + "single_label_dict": label_dict, + "single_object_bbx_center": object_bbx_center, + "single_object_bbx_mask": object_bbx_mask}) + + if tpe == 'all': + # camera + if self.load_camera_file: + camera_data_list = selected_cav_base["camera_data"] + + params = selected_cav_base["params"] + imgs = [] + rots = [] + trans = [] + intrins = [] + extrinsics = [] + post_rots = [] + post_trans = [] + + for idx, img in enumerate(camera_data_list): + camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx) + + intrin = torch.from_numpy(camera_intrinsic) + rot = torch.from_numpy( + camera_to_lidar[:3, :3] + ) # R_wc, we consider world-coord is the lidar-coord + tran = torch.from_numpy(camera_to_lidar[:3, 3]) # T_wc + + post_rot = torch.eye(2) + post_tran = torch.zeros(2) + + img_src = [img] + + # depth + if self.load_depth_file: + depth_img = selected_cav_base["depth_data"][idx] + img_src.append(depth_img) + else: + depth_img = None + + # data augmentation + resize, resize_dims, crop, flip, rotate = sample_augmentation( + self.data_aug_conf, self.train + ) + img_src, post_rot2, post_tran2 = img_transform( + img_src, + post_rot, + post_tran, + resize=resize, + resize_dims=resize_dims, + crop=crop, + flip=flip, + rotate=rotate, + ) + # for convenience, make augmentation matrices 3x3 + post_tran = torch.zeros(3) + post_rot = torch.eye(3) + post_tran[:2] = post_tran2 + post_rot[:2, :2] = post_rot2 + + # decouple RGB and Depth + + img_src[0] = normalize_img(img_src[0]) + if self.load_depth_file: + img_src[1] = img_to_tensor(img_src[1]) * 255 + + imgs.append(torch.cat(img_src, dim=0)) + intrins.append(intrin) + extrinsics.append(torch.from_numpy(camera_to_lidar)) + rots.append(rot) + trans.append(tran) + post_rots.append(post_rot) + post_trans.append(post_tran) + + + selected_cav_processed.update( + { + "image_inputs": + { + "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W] + "intrins": torch.stack(intrins), + "extrinsics": torch.stack(extrinsics), + "rots": torch.stack(rots), + "trans": torch.stack(trans), + "post_rots": torch.stack(post_rots), + "post_trans": torch.stack(post_trans), + } + } + ) + + # anchor box + selected_cav_processed.update({"anchor_box": self.anchor_box}) + + if True: # not online_eval: + # note the reference pose ego + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base], + ego_pose_clean) + selected_cav_processed.update( + { + "object_bbx_center": object_bbx_center[object_bbx_mask == 1], + "object_bbx_mask": object_bbx_mask, + "object_ids": object_ids, + } + ) + selected_cav_processed.update( + { + 'transformation_matrix': transformation_matrix, + 'transformation_matrix_clean': transformation_matrix_clean + } + ) + + + return selected_cav_processed + + def __getitem__(self, idx, extra_source=None, data_dir=None, plan_without_perception_gt=True): + if (data_dir is not None) and (plan_without_perception_gt): + extra_source=1 + object_bbx_center_list = [] + object_bbx_mask_list = [] + object_id_dict = {} + + object_bbx_center_list_single = [] + object_bbx_mask_list_single = [] + + + output_dict = {} + for tpe in ['all', 0, 1, 3]: + output_single_class = self.__getitem_single_class__(idx, tpe, extra_source, data_dir) + output_dict[tpe] = output_single_class + if tpe == 'all': + continue + elif tpe == 'all' and extra_source!=None: + break + object_bbx_center_list.append(output_single_class['ego']['object_bbx_center']) + object_bbx_mask_list.append(output_single_class['ego']['object_bbx_mask']) + if self.supervise_single: + object_bbx_center_list_single.append(output_single_class['ego']['single_object_bbx_center_torch']) + object_bbx_mask_list_single.append(output_single_class['ego']['single_object_bbx_mask_torch']) + + object_id_dict[tpe] = output_single_class['ego']['object_ids'] + + if True: # self.multiclass and extra_source==None: + output_dict['all']['ego']['object_bbx_center'] = np.stack(object_bbx_center_list, axis=0) + output_dict['all']['ego']['object_bbx_mask'] = np.stack(object_bbx_mask_list, axis=0) + if self.supervise_single: + output_dict['all']['ego']['single_object_bbx_center_torch'] = torch.stack(object_bbx_center_list_single, axis=1) + output_dict['all']['ego']['single_object_bbx_mask_torch'] = torch.stack(object_bbx_mask_list_single, axis=1) + + output_dict['all']['ego']['object_ids'] = object_id_dict + # print('finish get item') + return output_dict['all'] + + def __getitem_single_class__(self, idx, tpe=None, extra_source=None, data_dir=None): + + if extra_source is None and data_dir is None: + base_data_dict = self.retrieve_base_data(idx, tpe) + elif data_dir is not None: + base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, data_dir=data_dir) + elif extra_source is not None: + base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, extra_source=extra_source) + + base_data_dict = add_noise_data_dict_asymmetric(base_data_dict,self.params['noise_setting']) + processed_data_dict = OrderedDict() + processed_data_dict['ego'] = {} + + ego_id = -1 + ego_lidar_pose = [] + ego_cav_base = None + + # first find the ego vehicle's lidar pose + for cav_id, cav_content in base_data_dict.items(): + if cav_content['ego']: + ego_id = cav_id + ego_lidar_pose = cav_content['params']['lidar_pose'] + ego_cav_base = cav_content + break + + assert cav_id == list(base_data_dict.keys())[ + 0], "The first element in the OrderedDict must be ego" + assert ego_id != -1 + assert len(ego_lidar_pose) > 0 + + agents_image_inputs = [] + processed_features = [] + object_stack = [] + object_id_stack = [] + single_label_list = [] + single_object_bbx_center_list = [] + single_object_bbx_mask_list = [] + too_far = [] + lidar_pose_list = [] + lidar_pose_clean_list = [] + cav_id_list = [] + projected_lidar_clean_list = [] # disconet + + if self.visualize or self.kd_flag: + projected_lidar_stack = [] + + # loop over all CAVs to process information + for cav_id, selected_cav_base in base_data_dict.items(): + # check if the cav is within the communication range with ego + distance = \ + math.sqrt((selected_cav_base['params']['lidar_pose'][0] - + ego_lidar_pose[0]) ** 2 + ( + selected_cav_base['params'][ + 'lidar_pose'][1] - ego_lidar_pose[ + 1]) ** 2) + + # if distance is too far, we will just skip this agent + if distance > self.params['comm_range']: + too_far.append(cav_id) + continue + + lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean']) + lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose + cav_id_list.append(cav_id) + + for cav_id in too_far: + base_data_dict.pop(cav_id) + + ########## Updated by Yifan Lu 2022.1.26 ############ + # box align to correct pose. + # stage1_content contains all agent. Even out of comm range. + if self.box_align and str(idx) in self.stage1_result.keys(): # False + from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np + stage1_content = self.stage1_result[str(idx)] + if stage1_content is not None: + all_agent_id_list = stage1_content['cav_id_list'] # include those out of range + all_agent_corners_list = stage1_content['pred_corner3d_np_list'] + all_agent_uncertainty_list = stage1_content['uncertainty_np_list'] + + cur_agent_id_list = cav_id_list + cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list] + cur_agnet_pose = np.array(cur_agent_pose) + cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list` + + pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64) + for cur_in_all_ind in cur_agent_in_all_agent] + uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64) + for cur_in_all_ind in cur_agent_in_all_agent] + + if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0: + refined_pose = box_alignment_relative_sample_np(pred_corners_list, + cur_agnet_pose, + uncertainty_list=uncertainty_list, + **self.box_align_args) + cur_agnet_pose[:,[0,1,4]] = refined_pose + + for i, cav_id in enumerate(cav_id_list): + lidar_pose_list[i] = cur_agnet_pose[i].tolist() + base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist() + + + + pairwise_t_matrix = \ + get_pairwise_transformation_asymmetric(base_data_dict, + self.max_cav, + self.proj_first) + + lidar_poses = np.array(lidar_pose_list).reshape(-1, 6) # [N_cav, 6] + lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6) # [N_cav, 6] + + # merge preprocessed features from different cavs into the same dict + cav_num = len(cav_id_list) + + # heterogeneous + if self.heterogeneous: + lidar_agent, camera_agent = self.selector.select_agent(idx) + lidar_agent = lidar_agent[:cav_num] + processed_data_dict['ego'].update({"lidar_agent": lidar_agent}) + + for _i, cav_id in enumerate(cav_id_list): + selected_cav_base = base_data_dict[cav_id] + + # dynamic object center generator! for heterogeneous input + if (not self.visualize) and self.heterogeneous and lidar_agent[_i]: + self.generate_object_center = self.generate_object_center_lidar + elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]): + self.generate_object_center = self.generate_object_center_camera + + selected_cav_processed = self.get_item_single_car( + selected_cav_base, + ego_cav_base, + tpe, + cav_id, + extra_source!=None) + + if True: #extra_source==None: + object_stack.append(selected_cav_processed['object_bbx_center']) + object_id_stack += selected_cav_processed['object_ids'] + if tpe == 'all': + if self.load_lidar_file: + processed_features.append( + selected_cav_processed['processed_features']) + if self.load_camera_file: + agents_image_inputs.append( + selected_cav_processed['image_inputs']) + + if self.visualize or self.kd_flag: + projected_lidar_stack.append( + selected_cav_processed['projected_lidar']) + + if True: #self.supervise_single and extra_source==None: + single_label_list.append(selected_cav_processed['single_label_dict']) + single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center']) + single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask']) + + # generate single view GT label + if True: # self.supervise_single and extra_source==None: + single_label_dicts = {} + if tpe == 'all': + # unused label + if False: + single_label_dicts = self.post_processor.collate_batch(single_label_list) + single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list)) + single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list)) + processed_data_dict['ego'].update({ + "single_label_dict_torch": single_label_dicts, + "single_object_bbx_center_torch": single_object_bbx_center, + "single_object_bbx_mask_torch": single_object_bbx_mask, + }) + + if self.kd_flag: + stack_lidar_np = np.vstack(projected_lidar_stack) + stack_lidar_np = mask_points_by_range(stack_lidar_np, + self.params['preprocess'][ + 'cav_lidar_range']) + stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np) + processed_data_dict['ego'].update({'teacher_processed_lidar': + stack_feature_processed}) + + if True: # extra_source is None: + # exclude all repetitive objects + unique_indices = \ + [object_id_stack.index(x) for x in set(object_id_stack)] + object_stack = np.vstack(object_stack) + object_stack = object_stack[unique_indices] + + # make sure bounding boxes across all frames have the same number + object_bbx_center = \ + np.zeros((self.params['postprocess']['max_num'], 7)) + mask = np.zeros(self.params['postprocess']['max_num']) + object_bbx_center[:object_stack.shape[0], :] = object_stack + mask[:object_stack.shape[0]] = 1 + + processed_data_dict['ego'].update( + {'object_bbx_center': object_bbx_center, # (100,7) + 'object_bbx_mask': mask, # (100,) + 'object_ids': [object_id_stack[i] for i in unique_indices], + } + ) + + # generate targets label + label_dict = {} + if tpe == 'all': + # unused label + if False: + label_dict = \ + self.post_processor.generate_label( + gt_box_center=object_bbx_center, + anchors=self.anchor_box, + mask=mask) + + processed_data_dict['ego'].update( + { + 'anchor_box': self.anchor_box, + 'label_dict': label_dict, + 'cav_num': cav_num, + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_poses_clean': lidar_poses_clean, + 'lidar_poses': lidar_poses}) + + if tpe == 'all': + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_features) + processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict}) + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack') + processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + if self.visualize: + processed_data_dict['ego'].update({'origin_lidar': + # projected_lidar_stack}) + np.vstack( + projected_lidar_stack)}) + processed_data_dict['ego'].update({'lidar_len': [len(projected_lidar_stack[i]) for i in range(len(projected_lidar_stack))]}) + + + processed_data_dict['ego'].update({'sample_idx': idx, + 'cav_id_list': cav_id_list}) + + img_front_list = [] + img_left_list = [] + img_right_list = [] + BEV_list = [] + + if self.visualize: + for car_id in base_data_dict: + if not base_data_dict[car_id]['ego'] == True: + continue + if 'rgb_front' in base_data_dict[car_id] and 'rgb_left' in base_data_dict[car_id] and 'rgb_right' in base_data_dict[car_id] and 'BEV' in base_data_dict[car_id] : + img_front_list.append(base_data_dict[car_id]['rgb_front']) + img_left_list.append(base_data_dict[car_id]['rgb_left']) + img_right_list.append(base_data_dict[car_id]['rgb_right']) + BEV_list.append(base_data_dict[car_id]['BEV']) + processed_data_dict['ego'].update({'img_front': img_front_list, + 'img_left': img_left_list, + 'img_right': img_right_list, + 'BEV': BEV_list}) + processed_data_dict['ego'].update({'scene_dict': base_data_dict['car_0']['scene_dict'], + 'frame_id': base_data_dict['car_0']['frame_id'], + }) + + + # TODO: LSS debug + processed_data_dict['ego'].update({"det_data": base_data_dict['car_0']['det_data']}) + detmap_pose_list = [] + for car_id in base_data_dict: + detmap_pose_list.append(base_data_dict[car_id]['detmap_pose']) + detmap_pose_list = torch.from_numpy(np.array(detmap_pose_list)) + processed_data_dict['ego'].update({"detmap_pose": detmap_pose_list}) + ## + + return processed_data_dict + + + def collate_batch_train(self, batch, online_eval_only=False): + # Intermediate fusion is different the other two + output_dict = {'ego': {}} + + object_bbx_center = [] + object_bbx_mask = [] + object_ids = [] + processed_lidar_list = [] + image_inputs_list = [] + # used to record different scenario + record_len = [] + label_dict_list = [] + lidar_pose_list = [] + origin_lidar = [] + lidar_len = [] + lidar_pose_clean_list = [] + + # heterogeneous + lidar_agent_list = [] + + # pairwise transformation matrix + pairwise_t_matrix_list = [] + + # disconet + teacher_processed_lidar_list = [] + + # image + img_front = [] + img_left = [] + img_right = [] + BEV = [] + + dict_list = [] + + # TODO: LSS debug + det_data = [] + detmap_pose = [] + + ### 2022.10.10 single gt #### + if self.supervise_single: + pos_equal_one_single = [] + neg_equal_one_single = [] + targets_single = [] + object_bbx_center_single = [] + object_bbx_mask_single = [] + + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + det_data.append(torch.from_numpy(ego_dict['det_data']).unsqueeze(0)) + detmap_pose.append(ego_dict['detmap_pose']) + if not online_eval_only: + object_bbx_center.append(ego_dict['object_bbx_center']) + object_bbx_mask.append(ego_dict['object_bbx_mask']) + object_ids.append(ego_dict['object_ids']) + else: + object_ids.append(None) + lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6] + lidar_pose_clean_list.append(ego_dict['lidar_poses_clean']) + if self.load_lidar_file: + processed_lidar_list.append(ego_dict['processed_lidar']) + if self.load_camera_file: + image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict. + + record_len.append(ego_dict['cav_num']) + label_dict_list.append(ego_dict['label_dict']) + pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix']) + + dict_list.append([ego_dict['scene_dict'], ego_dict['frame_id']]) + + if self.visualize: + origin_lidar.append(ego_dict['origin_lidar']) + lidar_len.append(ego_dict['lidar_len']) + if len(ego_dict['img_front']) > 0 and len(ego_dict['img_right']) > 0 and len(ego_dict['img_left']) > 0 and len(ego_dict['BEV']) > 0: + img_front.append(ego_dict['img_front'][0]) + img_left.append(ego_dict['img_left'][0]) + img_right.append(ego_dict['img_right'][0]) + BEV.append(ego_dict['BEV'][0]) + + + if self.kd_flag: + teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar']) + + ### 2022.10.10 single gt #### + if self.supervise_single and not online_eval_only: + # unused label + if False: + pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one']) + neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one']) + targets_single.append(ego_dict['single_label_dict_torch']['targets']) + object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch']) + object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch']) + + # heterogeneous + if self.heterogeneous: + lidar_agent_list.append(ego_dict['lidar_agent']) + + # convert to numpy, (B, max_num, 7) + if not online_eval_only: + object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) + object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) + else: + object_bbx_center = None + object_bbx_mask = None + + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_lidar_list) + + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + lidar_agent_idx = lidar_agent.nonzero()[0].tolist() + for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords' + merged_feature_dict[k] = [v[index] for index in lidar_agent_idx] + + if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0): + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(merged_feature_dict) + output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict}) + + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat') + + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + camera_agent = 1 - lidar_agent + camera_agent_idx = camera_agent.nonzero()[0].tolist() + if sum(camera_agent) != 0: + for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ... + merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx]) + + if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0): + output_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + record_len = torch.from_numpy(np.array(record_len, dtype=int)) + lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0)) + lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0)) + # unused label + label_torch_dict = {} + if False: + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + # (B, max_cav) + pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list)) + + # add pairwise_t_matrix to label dict + label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix + label_torch_dict['record_len'] = record_len + + + # object id is only used during inference, where batch size is 1. + # so here we only get the first element. + output_dict['ego'].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'record_len': record_len, + 'label_dict': label_torch_dict, + 'object_ids': object_ids[0], + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_pose_clean': lidar_pose_clean, + 'lidar_pose': lidar_pose, + 'anchor_box': self.anchor_box_torch}) + + + output_dict['ego'].update({'dict_list': dict_list}) + + if self.visualize: + origin_lidar = torch.from_numpy(np.array(origin_lidar)) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + lidar_len = np.array(lidar_len) + output_dict['ego'].update({'lidar_len': lidar_len}) + output_dict['ego'].update({'img_front': img_front}) + output_dict['ego'].update({'img_right': img_right}) + output_dict['ego'].update({'img_left': img_left}) + output_dict['ego'].update({'BEV': BEV}) + + if self.kd_flag: + teacher_processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(teacher_processed_lidar_list) + output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict}) + + + if self.supervise_single and not online_eval_only: + output_dict['ego'].update({ + "label_dict_single":{ + # for centerpoint + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }, + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }) + + if self.heterogeneous: + output_dict['ego'].update({ + "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...] + }) + + # TODO: LSS debug + det_data = torch.cat(det_data, dim=0) + detmap_pose = torch.cat(detmap_pose, dim=0) + output_dict['ego'].update({'detmap_pose': detmap_pose}) + + output_dict['ego']['label_dict'].update({ + 'det_data': det_data}) + return output_dict + + def collate_batch_test(self, batch, online_eval_only=False): + + self.online_eval_only = online_eval_only + assert len(batch) <= 1, "Batch size 1 is required during testing!" + output_dict = self.collate_batch_train(batch, online_eval_only) + if output_dict is None: + return None + + # check if anchor box in the batch + if batch[0]['ego']['anchor_box'] is not None: + output_dict['ego'].update({'anchor_box': + self.anchor_box_torch}) + + # save the transformation matrix (4, 4) to ego vehicle + # transformation is only used in post process (no use.) + # we all predict boxes in ego coord. + transformation_matrix_torch = \ + torch.from_numpy(np.identity(4)).float() + transformation_matrix_clean_torch = \ + torch.from_numpy(np.identity(4)).float() + + output_dict['ego'].update({'transformation_matrix': + transformation_matrix_torch, + 'transformation_matrix_clean': + transformation_matrix_clean_torch,}) + + output_dict['ego'].update({ + "sample_idx": batch[0]['ego']['sample_idx'], + "cav_id_list": batch[0]['ego']['cav_id_list'] + }) + + return output_dict + + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict, output_dict) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + return pred_box_tensor, pred_score, gt_box_tensor + + def post_process_multiclass(self, data_dict, output_dict, online_eval_only=False): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + + if online_eval_only == False: + online_eval_only = self.online_eval_only + + num_class = output_dict['ego']['cls_preds'].shape[1] + + + pred_box_tensor_list = [] + pred_score_list = [] + gt_box_tensor_list = [] + + num_list = [0,1,3] + + for i in range(num_class): + data_dict_single = copy.deepcopy(data_dict) + output_dict_single = copy.deepcopy(output_dict) + if not online_eval_only: + data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:] + data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:] + data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]] + + output_dict_single['ego']['cls_preds'] = output_dict['ego']['cls_preds'][:,i:i+1,:,:] + output_dict_single['ego']['reg_preds'] = output_dict['ego']['reg_preds_multiclass'][:,i,:,:] + + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict_single, output_dict_single) + if not online_eval_only: + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict_single) + else: + gt_box_tensor = None + + pred_box_tensor_list.append(pred_box_tensor) + pred_score_list.append(pred_score) + gt_box_tensor_list.append(gt_box_tensor) + + return pred_box_tensor_list, pred_score_list, gt_box_tensor_list + + return IntermediatemulticlassFusionDataset + + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..2fd9095e5e566dcfd7b945cdacd9331e1838c29e --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_fusion_dataset.py @@ -0,0 +1,564 @@ +# late fusion dataset +import random +import math +from collections import OrderedDict +import cv2 +import numpy as np +import torch +import copy +from icecream import ic +from PIL import Image +import pickle as pkl +from opencood.utils import box_utils as box_utils +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor +from opencood.utils.camera_utils import ( + sample_augmentation, + img_transform, + normalize_img, + img_to_tensor, +) +from opencood.data_utils.augmentor.data_augmentor import DataAugmentor +from opencood.utils.transformation_utils import x1_to_x2 +from opencood.utils.pose_utils import add_noise_data_dict +from opencood.utils.pcd_utils import ( + mask_points_by_range, + mask_ego_points, + shuffle_points, + downsample_lidar_minimum, +) + + +def getLateFusionDataset(cls): + """ + cls: the Basedataset. + """ + class LateFusionDataset(cls): + def __init__(self, params, visualize, train=True): + super().__init__(params, visualize, train) + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + self.heterogeneous = False + if 'heter' in params: + self.heterogeneous = True + + def __getitem__(self, idx): + base_data_dict = self.retrieve_base_data(idx) + if self.train: + reformat_data_dict = self.get_item_train(base_data_dict) + else: + reformat_data_dict = self.get_item_test(base_data_dict, idx) + + return reformat_data_dict + + def get_item_train(self, base_data_dict): + processed_data_dict = OrderedDict() + base_data_dict = add_noise_data_dict( + base_data_dict, self.params["noise_setting"] + ) + # during training, we return a random cav's data + # only one vehicle is in processed_data_dict + if not self.visualize: + selected_cav_id, selected_cav_base = random.choice( + list(base_data_dict.items()) + ) + else: + selected_cav_id, selected_cav_base = list(base_data_dict.items())[0] + + selected_cav_processed = self.get_item_single_car(selected_cav_base) + processed_data_dict.update({"ego": selected_cav_processed}) + + return processed_data_dict + + + def get_item_test(self, base_data_dict, idx): + """ + processed_data_dict.keys() = ['ego', "650", "659", ...] + """ + base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting']) + + processed_data_dict = OrderedDict() + ego_id = -1 + ego_lidar_pose = [] + cav_id_list = [] + lidar_pose_list = [] + + # first find the ego vehicle's lidar pose + for cav_id, cav_content in base_data_dict.items(): + if cav_content['ego']: + ego_id = cav_id + ego_lidar_pose = cav_content['params']['lidar_pose'] + ego_lidar_pose_clean = cav_content['params']['lidar_pose_clean'] + break + + assert ego_id != -1 + assert len(ego_lidar_pose) > 0 + + # loop over all CAVs to process information + for cav_id, selected_cav_base in base_data_dict.items(): + distance = \ + math.sqrt((selected_cav_base['params']['lidar_pose'][0] - + ego_lidar_pose[0]) ** 2 + ( + selected_cav_base['params'][ + 'lidar_pose'][1] - ego_lidar_pose[ + 1]) ** 2) + if distance > self.params['comm_range']: + continue + cav_id_list.append(cav_id) + lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) + + cav_id_list_newname = [] + for cav_id in cav_id_list: + selected_cav_base = base_data_dict[cav_id] + # find the transformation matrix from current cav to ego. + cav_lidar_pose = selected_cav_base['params']['lidar_pose'] + transformation_matrix = x1_to_x2(cav_lidar_pose, ego_lidar_pose) + cav_lidar_pose_clean = selected_cav_base['params']['lidar_pose_clean'] + transformation_matrix_clean = x1_to_x2(cav_lidar_pose_clean, ego_lidar_pose_clean) + + selected_cav_processed = \ + self.get_item_single_car(selected_cav_base) + selected_cav_processed.update({'transformation_matrix': transformation_matrix, + 'transformation_matrix_clean': transformation_matrix_clean}) + update_cav = "ego" if cav_id == ego_id else cav_id + processed_data_dict.update({update_cav: selected_cav_processed}) + cav_id_list_newname.append(update_cav) + + # heterogeneous + if self.heterogeneous: + processed_data_dict['ego']['idx'] = idx + processed_data_dict['ego']['cav_list'] = cav_id_list_newname + + return processed_data_dict + + + def get_item_single_car(self, selected_cav_base): + """ + Process a single CAV's information for the train/test pipeline. + + + Parameters + ---------- + selected_cav_base : dict + The dictionary contains a single CAV's raw information. + including 'params', 'camera_data' + + Returns + ------- + selected_cav_processed : dict + The dictionary contains the cav's processed information. + """ + selected_cav_processed = {} + + # label + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center_single( + [selected_cav_base], selected_cav_base["params"]["lidar_pose_clean"] + ) + + # lidar + if self.load_lidar_file or self.visualize: + lidar_np = selected_cav_base['lidar_np'] + lidar_np = shuffle_points(lidar_np) + lidar_np = mask_points_by_range(lidar_np, + self.params['preprocess'][ + 'cav_lidar_range']) + # remove points that hit ego vehicle + lidar_np = mask_ego_points(lidar_np) + + # data augmentation, seems very important for single agent training, because lack of data diversity. + # only work for lidar modality in training. + if not self.heterogeneous: + lidar_np, object_bbx_center, object_bbx_mask = \ + self.augment(lidar_np, object_bbx_center, object_bbx_mask) + + lidar_dict = self.pre_processor.preprocess(lidar_np) + selected_cav_processed.update({'processed_lidar': lidar_dict}) + + + + + if self.visualize: + selected_cav_processed.update({'origin_lidar': lidar_np}) + + # camera + if self.load_camera_file: + # adapted from https://github.com/nv-tlabs/lift-splat-shoot/blob/master/src/data.py + camera_data_list = selected_cav_base["camera_data"] + + params = selected_cav_base["params"] + imgs = [] + rots = [] + trans = [] + intrins = [] + extrinsics = [] # cam_to_lidar + post_rots = [] + post_trans = [] + + for idx, img in enumerate(camera_data_list): + camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx) + + intrin = torch.from_numpy(camera_intrinsic) + rot = torch.from_numpy( + camera_to_lidar[:3, :3] + ) # R_wc, we consider world-coord is the lidar-coord + tran = torch.from_numpy(camera_to_lidar[:3, 3]) # T_wc + + post_rot = torch.eye(2) + post_tran = torch.zeros(2) + + img_src = [img] + + # depth + if self.load_depth_file: + depth_img = selected_cav_base["depth_data"][idx] + img_src.append(depth_img) + else: + depth_img = None + + # data augmentation + resize, resize_dims, crop, flip, rotate = sample_augmentation( + self.data_aug_conf, self.train + ) + img_src, post_rot2, post_tran2 = img_transform( + img_src, + post_rot, + post_tran, + resize=resize, + resize_dims=resize_dims, + crop=crop, + flip=flip, + rotate=rotate, + ) + # for convenience, make augmentation matrices 3x3 + post_tran = torch.zeros(3) + post_rot = torch.eye(3) + post_tran[:2] = post_tran2 + post_rot[:2, :2] = post_rot2 + + img_src[0] = normalize_img(img_src[0]) + if self.load_depth_file: + img_src[1] = img_to_tensor(img_src[1]) * 255 + + imgs.append(torch.cat(img_src, dim=0)) + intrins.append(intrin) + extrinsics.append(torch.from_numpy(camera_to_lidar)) + rots.append(rot) + trans.append(tran) + post_rots.append(post_rot) + post_trans.append(post_tran) + + selected_cav_processed.update( + { + "image_inputs": + { + "imgs": torch.stack(imgs), # [N, 3or4, H, W] + "intrins": torch.stack(intrins), + "extrinsics": torch.stack(extrinsics), + "rots": torch.stack(rots), + "trans": torch.stack(trans), + "post_rots": torch.stack(post_rots), + "post_trans": torch.stack(post_trans), + } + } + ) + + + selected_cav_processed.update( + { + "object_bbx_center": object_bbx_center, + "object_bbx_mask": object_bbx_mask, + "object_ids": object_ids, + } + ) + + # generate targets label + label_dict = self.post_processor.generate_label( + gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + selected_cav_processed.update({"label_dict": label_dict}) + + return selected_cav_processed + + + def collate_batch_train(self, batch): + """ + Customized collate function for pytorch dataloader during training + for early and late fusion dataset. + + Parameters + ---------- + batch : dict + + Returns + ------- + batch : dict + Reformatted batch. + """ + # during training, we only care about ego. + output_dict = {'ego': {}} + + object_bbx_center = [] + object_bbx_mask = [] + processed_lidar_list = [] + label_dict_list = [] + origin_lidar = [] + + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + object_bbx_center.append(ego_dict['object_bbx_center']) + object_bbx_mask.append(ego_dict['object_bbx_mask']) + label_dict_list.append(ego_dict['label_dict']) + + if self.visualize: + origin_lidar.append(ego_dict['origin_lidar']) + + # convert to numpy, (B, max_num, 7) + object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) + object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + output_dict['ego'].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'anchor_box': torch.from_numpy(self.anchor_box), + 'label_dict': label_torch_dict}) + if self.visualize: + origin_lidar = \ + np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + + if self.load_lidar_file: + for i in range(len(batch)): + processed_lidar_list.append(batch[i]['ego']['processed_lidar']) + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(processed_lidar_list) + output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict}) + + if self.load_camera_file: + # collate ego camera information + imgs_batch = [] + rots_batch = [] + trans_batch = [] + intrins_batch = [] + extrinsics_batch = [] + post_trans_batch = [] + post_rots_batch = [] + for i in range(len(batch)): + ego_dict = batch[i]["ego"]["image_inputs"] + imgs_batch.append(ego_dict["imgs"]) + rots_batch.append(ego_dict["rots"]) + trans_batch.append(ego_dict["trans"]) + intrins_batch.append(ego_dict["intrins"]) + extrinsics_batch.append(ego_dict["extrinsics"]) + post_trans_batch.append(ego_dict["post_trans"]) + post_rots_batch.append(ego_dict["post_rots"]) + + output_dict["ego"].update({ + "image_inputs": + { + "imgs": torch.stack(imgs_batch), # [B, N, C, H, W] + "rots": torch.stack(rots_batch), + "trans": torch.stack(trans_batch), + "intrins": torch.stack(intrins_batch), + "post_trans": torch.stack(post_trans_batch), + "post_rots": torch.stack(post_rots_batch), + } + } + ) + + + return output_dict + + def collate_batch_test(self, batch): + """ + Customized collate function for pytorch dataloader during testing + for late fusion dataset. + + Parameters + ---------- + batch : dict + + Returns + ------- + batch : dict + Reformatted batch. + """ + # currently, we only support batch size of 1 during testing + assert len(batch) <= 1, "Batch size 1 is required during testing!" + batch = batch[0] + + output_dict = {} + + # heterogeneous + if self.heterogeneous: + idx = batch['ego']['idx'] + cav_list = batch['ego']['cav_list'] # ['ego', '650' ..] + cav_num = len(batch) + lidar_agent, camera_agent = self.selector.select_agent(idx) + lidar_agent = lidar_agent[:cav_num] # [1,0,0,1,0] + lidar_agent_idx = lidar_agent.nonzero()[0].tolist() + lidar_agent_cav_id = [cav_list[index] for index in lidar_agent_idx] # ['ego', ...] + + + # for late fusion, we also need to stack the lidar for better + # visualization + if self.visualize: + projected_lidar_list = [] + origin_lidar = [] + + for cav_id, cav_content in batch.items(): + output_dict.update({cav_id: {}}) + # shape: (1, max_num, 7) + object_bbx_center = \ + torch.from_numpy(np.array([cav_content['object_bbx_center']])) + object_bbx_mask = \ + torch.from_numpy(np.array([cav_content['object_bbx_mask']])) + object_ids = cav_content['object_ids'] + + # the anchor box is the same for all bounding boxes usually, thus + # we don't need the batch dimension. + output_dict[cav_id].update( + {"anchor_box": self.anchor_box_torch} + ) + + transformation_matrix = cav_content['transformation_matrix'] + if self.visualize: + origin_lidar = [cav_content['origin_lidar']] + if (self.params['only_vis_ego'] is False) or (cav_id=='ego'): + projected_lidar = copy.deepcopy(cav_content['origin_lidar']) + projected_lidar[:, :3] = \ + box_utils.project_points_by_matrix_torch( + projected_lidar[:, :3], + transformation_matrix) + projected_lidar_list.append(projected_lidar) + + if self.load_lidar_file: + # processed lidar dictionary + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch( + [cav_content['processed_lidar']]) + output_dict[cav_id].update({'processed_lidar': processed_lidar_torch_dict}) + + if self.load_camera_file: + imgs_batch = [cav_content["image_inputs"]["imgs"]] + rots_batch = [cav_content["image_inputs"]["rots"]] + trans_batch = [cav_content["image_inputs"]["trans"]] + intrins_batch = [cav_content["image_inputs"]["intrins"]] + extrinsics_batch = [cav_content["image_inputs"]["extrinsics"]] + post_trans_batch = [cav_content["image_inputs"]["post_trans"]] + post_rots_batch = [cav_content["image_inputs"]["post_rots"]] + + output_dict[cav_id].update({ + "image_inputs": + { + "imgs": torch.stack(imgs_batch), + "rots": torch.stack(rots_batch), + "trans": torch.stack(trans_batch), + "intrins": torch.stack(intrins_batch), + "extrinsics": torch.stack(extrinsics_batch), + "post_trans": torch.stack(post_trans_batch), + "post_rots": torch.stack(post_rots_batch), + } + } + ) + + # heterogeneous + if self.heterogeneous: + if cav_id in lidar_agent_cav_id: + output_dict[cav_id].pop('image_inputs') + else: + output_dict[cav_id].pop('processed_lidar') + + # label dictionary + label_torch_dict = \ + self.post_processor.collate_batch([cav_content['label_dict']]) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + # save the transformation matrix (4, 4) to ego vehicle + transformation_matrix_torch = \ + torch.from_numpy( + np.array(cav_content['transformation_matrix'])).float() + + # late fusion training, no noise + transformation_matrix_clean_torch = \ + torch.from_numpy( + np.array(cav_content['transformation_matrix_clean'])).float() + + output_dict[cav_id].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'label_dict': label_torch_dict, + 'object_ids': object_ids, + 'transformation_matrix': transformation_matrix_torch, + 'transformation_matrix_clean': transformation_matrix_clean_torch}) + + if self.visualize: + origin_lidar = \ + np.array( + downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict[cav_id].update({'origin_lidar': origin_lidar}) + + if self.visualize: + projected_lidar_stack = [torch.from_numpy( + np.vstack(projected_lidar_list))] + output_dict['ego'].update({'origin_lidar': projected_lidar_stack}) + # output_dict['ego'].update({'projected_lidar_list': projected_lidar_list}) + + return output_dict + + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + pred_box_tensor, pred_score = self.post_processor.post_process( + data_dict, output_dict + ) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + return pred_box_tensor, pred_score, gt_box_tensor + + def post_process_no_fusion(self, data_dict, output_dict_ego): + data_dict_ego = OrderedDict() + data_dict_ego["ego"] = data_dict["ego"] + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + pred_box_tensor, pred_score = self.post_processor.post_process( + data_dict_ego, output_dict_ego + ) + return pred_box_tensor, pred_score, gt_box_tensor + + def post_process_no_fusion_uncertainty(self, data_dict, output_dict_ego): + data_dict_ego = OrderedDict() + data_dict_ego['ego'] = data_dict['ego'] + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + pred_box_tensor, pred_score, uncertainty = \ + self.post_processor.post_process(data_dict_ego, output_dict_ego, return_uncertainty=True) + return pred_box_tensor, pred_score, gt_box_tensor, uncertainty + + return LateFusionDataset \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_heter_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_heter_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c518bc7e23b2b1fde59b38c052969c3c30e16b2c --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_heter_fusion_dataset.py @@ -0,0 +1,565 @@ +# late fusion dataset +import random +import math +from collections import OrderedDict +import cv2 +import numpy as np +import torch +import copy +from icecream import ic +from PIL import Image +import pickle as pkl +from opencood.utils import box_utils as box_utils +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor +from opencood.utils.camera_utils import ( + sample_augmentation, + img_transform, + normalize_img, + img_to_tensor, +) +from opencood.data_utils.augmentor.data_augmentor import DataAugmentor +from opencood.utils.transformation_utils import x1_to_x2 +from opencood.utils.pose_utils import add_noise_data_dict +from opencood.utils.pcd_utils import ( + mask_points_by_range, + mask_ego_points, + shuffle_points, + downsample_lidar_minimum, +) +from opencood.utils.common_utils import read_json +from opencood.utils.common_utils import merge_features_to_dict +from opencood.utils.heter_utils import Adaptor + +def getLateheterFusionDataset(cls): + """ + cls: the Basedataset. + """ + class LateheterFusionDataset(cls): + def __init__(self, params, visualize, train=True): + super().__init__(params, visualize, train) + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + self.heterogeneous = True + self.modality_assignment = read_json(params['heter']['assignment_path']) + self.ego_modality = params['heter']['ego_modality'] # "m1" or "m1&m2" or "m3" + + self.modality_name_list = list(params['heter']['modality_setting'].keys()) + self.sensor_type_dict = OrderedDict() + + lidar_channels_dict = params['heter'].get('lidar_channels_dict', OrderedDict()) + mapping_dict = params['heter']['mapping_dict'] + + self.adaptor = Adaptor(self.ego_modality, + self.modality_name_list, + self.modality_assignment, + lidar_channels_dict, + mapping_dict, + None, + train) + + for modality_name, modal_setting in params['heter']['modality_setting'].items(): + self.sensor_type_dict[modality_name] = modal_setting['sensor_type'] + if modal_setting['sensor_type'] == 'lidar': + setattr(self, f"pre_processor_{modality_name}", build_preprocessor(modal_setting['preprocess'], train)) + + elif modal_setting['sensor_type'] == 'camera': + setattr(self, f"data_aug_conf_{modality_name}", modal_setting['data_aug_conf']) + + else: + raise("Not support this type of sensor") + + self.reinitialize() + + def __getitem__(self, idx): + base_data_dict = self.retrieve_base_data(idx) + if self.train: + reformat_data_dict = self.get_item_train(base_data_dict) + else: + reformat_data_dict = self.get_item_test(base_data_dict, idx) + return reformat_data_dict + + def get_item_train(self, base_data_dict): + processed_data_dict = OrderedDict() + base_data_dict = add_noise_data_dict( + base_data_dict, self.params["noise_setting"] + ) + # during training, we return a random cav's data + # only one vehicle is in processed_data_dict + if not self.visualize: + options = [] + for cav_id, cav_content in base_data_dict.items(): + if cav_content['modality_name'] in self.ego_modality: + options.append(cav_id) + selected_cav_base = base_data_dict[random.choice(options)] + else: + selected_cav_id, selected_cav_base = list(base_data_dict.items())[0] + + selected_cav_processed = self.get_item_single_car(selected_cav_base) + processed_data_dict.update({"ego": selected_cav_processed}) + + return processed_data_dict + + + def get_item_test(self, base_data_dict, idx): + """ + processed_data_dict.keys() = ['ego', "650", "659", ...] + """ + base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting']) + + processed_data_dict = OrderedDict() + ego_id = -1 + ego_lidar_pose = [] + cav_id_list = [] + lidar_pose_list = [] + + # first find the ego vehicle's lidar pose + for cav_id, cav_content in base_data_dict.items(): + if cav_content['ego']: + ego_id = cav_id + ego_lidar_pose = cav_content['params']['lidar_pose'] + ego_lidar_pose_clean = cav_content['params']['lidar_pose_clean'] + break + + assert ego_id != -1 + assert len(ego_lidar_pose) > 0 + + # loop over all CAVs to process information + for cav_id, selected_cav_base in base_data_dict.items(): + distance = \ + math.sqrt((selected_cav_base['params']['lidar_pose'][0] - + ego_lidar_pose[0]) ** 2 + ( + selected_cav_base['params'][ + 'lidar_pose'][1] - ego_lidar_pose[ + 1]) ** 2) + if distance > self.params['comm_range']: + continue + + if self.adaptor.unmatched_modality(selected_cav_base['modality_name']): + continue + + cav_id_list.append(cav_id) + lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) + + cav_id_list_newname = [] + for cav_id in cav_id_list: + selected_cav_base = base_data_dict[cav_id] + # find the transformation matrix from current cav to ego. + cav_lidar_pose = selected_cav_base['params']['lidar_pose'] + transformation_matrix = x1_to_x2(cav_lidar_pose, ego_lidar_pose) + cav_lidar_pose_clean = selected_cav_base['params']['lidar_pose_clean'] + transformation_matrix_clean = x1_to_x2(cav_lidar_pose_clean, ego_lidar_pose_clean) + + # In test phase, we all use lidar label for fair comparison. (need discussion) + self.label_type = 'lidar' # DAIRV2X + self.generate_object_center = self.generate_object_center_lidar # OPV2V, V2XSET + + selected_cav_processed = \ + self.get_item_single_car(selected_cav_base) + selected_cav_processed.update({'transformation_matrix': transformation_matrix, + 'transformation_matrix_clean': transformation_matrix_clean}) + update_cav = "ego" if cav_id == ego_id else cav_id + processed_data_dict.update({update_cav: selected_cav_processed}) + cav_id_list_newname.append(update_cav) + + + return processed_data_dict + + + def get_item_single_car(self, selected_cav_base): + """ + Process a single CAV's information for the train/test pipeline. + + + Parameters + ---------- + selected_cav_base : dict + The dictionary contains a single CAV's raw information. + including 'params', 'camera_data' + + Returns + ------- + selected_cav_processed : dict + The dictionary contains the cav's processed information. + """ + selected_cav_processed = {} + modality_name = selected_cav_base['modality_name'] + sensor_type = self.sensor_type_dict[modality_name] + + # label + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center_single( + [selected_cav_base], selected_cav_base["params"]["lidar_pose_clean"] + ) + + # lidar + if sensor_type == "lidar" or self.visualize: + lidar_np = selected_cav_base['lidar_np'] + lidar_np = shuffle_points(lidar_np) + lidar_np = mask_points_by_range(lidar_np, + self.params['preprocess'][ + 'cav_lidar_range']) + # remove points that hit ego vehicle + lidar_np = mask_ego_points(lidar_np) + + # data augmentation, seems very important for single agent training, because lack of data diversity. + # only work for lidar modality in training. + lidar_np, object_bbx_center, object_bbx_mask = \ + self.augment(lidar_np, object_bbx_center, object_bbx_mask) + if sensor_type == "lidar": + processed_lidar = eval(f"self.pre_processor_{modality_name}").preprocess(lidar_np) + selected_cav_processed.update({f'processed_features_{modality_name}': processed_lidar}) + + + if self.visualize: + selected_cav_processed.update({'origin_lidar': lidar_np}) + + # camera + if sensor_type == "camera": + # adapted from https://github.com/nv-tlabs/lift-splat-shoot/blob/master/src/data.py + camera_data_list = selected_cav_base["camera_data"] + + params = selected_cav_base["params"] + imgs = [] + rots = [] + trans = [] + intrins = [] + extrinsics = [] # cam_to_lidar + post_rots = [] + post_trans = [] + + for idx, img in enumerate(camera_data_list): + camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx) + + intrin = torch.from_numpy(camera_intrinsic) + rot = torch.from_numpy( + camera_to_lidar[:3, :3] + ) # R_wc, we consider world-coord is the lidar-coord + tran = torch.from_numpy(camera_to_lidar[:3, 3]) # T_wc + + post_rot = torch.eye(2) + post_tran = torch.zeros(2) + + img_src = [img] + + # depth + if self.load_depth_file: + depth_img = selected_cav_base["depth_data"][idx] + img_src.append(depth_img) + else: + depth_img = None + + # data augmentation + resize, resize_dims, crop, flip, rotate = sample_augmentation( + eval(f"self.data_aug_conf_{modality_name}"), self.train + ) + img_src, post_rot2, post_tran2 = img_transform( + img_src, + post_rot, + post_tran, + resize=resize, + resize_dims=resize_dims, + crop=crop, + flip=flip, + rotate=rotate, + ) + # for convenience, make augmentation matrices 3x3 + post_tran = torch.zeros(3) + post_rot = torch.eye(3) + post_tran[:2] = post_tran2 + post_rot[:2, :2] = post_rot2 + + img_src[0] = normalize_img(img_src[0]) + if self.load_depth_file: + img_src[1] = img_to_tensor(img_src[1]) * 255 + + imgs.append(torch.cat(img_src, dim=0)) + intrins.append(intrin) + extrinsics.append(torch.from_numpy(camera_to_lidar)) + rots.append(rot) + trans.append(tran) + post_rots.append(post_rot) + post_trans.append(post_tran) + + selected_cav_processed.update( + { + f"image_inputs_{modality_name}": + { + "imgs": torch.stack(imgs), # [N, 3or4, H, W] + "intrins": torch.stack(intrins), + "extrinsics": torch.stack(extrinsics), + "rots": torch.stack(rots), + "trans": torch.stack(trans), + "post_rots": torch.stack(post_rots), + "post_trans": torch.stack(post_trans), + } + } + ) + + + selected_cav_processed.update( + { + "object_bbx_center": object_bbx_center, + "object_bbx_mask": object_bbx_mask, + "object_ids": object_ids, + "modality_name": modality_name + } + ) + + # generate targets label + label_dict = self.post_processor.generate_label( + gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + selected_cav_processed.update({"label_dict": label_dict}) + + return selected_cav_processed + + + def collate_batch_train(self, batch): + """ + Customized collate function for pytorch dataloader during training + for early and late fusion dataset. + + Parameters + ---------- + batch : dict + + Returns + ------- + batch : dict + Reformatted batch. + """ + # during training, we only care about ego. + output_dict = {'ego': {}} + + object_bbx_center = [] + object_bbx_mask = [] + label_dict_list = [] + origin_lidar = [] + inputs_list_m1 = [] + inputs_list_m2 = [] + inputs_list_m3 = [] + inputs_list_m4 = [] + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + object_bbx_center.append(ego_dict['object_bbx_center']) + object_bbx_mask.append(ego_dict['object_bbx_mask']) + label_dict_list.append(ego_dict['label_dict']) + + if self.visualize: + origin_lidar.append(ego_dict['origin_lidar']) + + # convert to numpy, (B, max_num, 7) + object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) + object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + output_dict['ego'].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'anchor_box': torch.from_numpy(self.anchor_box), + 'label_dict': label_torch_dict}) + if self.visualize: + origin_lidar = \ + np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + + + + + for modality_name in self.modality_name_list: + sensor_type = self.sensor_type_dict[modality_name] + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + if f'processed_features_{modality_name}' in ego_dict: + eval(f"inputs_list_{modality_name}").append(ego_dict[f'processed_features_{modality_name}']) + elif f'image_inputs_{modality_name}' in ego_dict: + eval(f"inputs_list_{modality_name}").append(ego_dict[f'image_inputs_{modality_name}']) + + if self.sensor_type_dict[modality_name] == "lidar": + processed_lidar_torch_dict = eval(f"self.pre_processor_{modality_name}").collate_batch(eval(f"inputs_list_{modality_name}")) + output_dict['ego'].update({f'inputs_{modality_name}': processed_lidar_torch_dict}) + elif self.sensor_type_dict[modality_name] == "camera": + merged_image_inputs_dict = merge_features_to_dict(eval(f"inputs_list_{modality_name}"), merge='stack') + output_dict['ego'].update({f'inputs_{modality_name}': merged_image_inputs_dict}) + + return output_dict + + def collate_batch_test(self, batch): + """ + Customized collate function for pytorch dataloader during testing + for late fusion dataset. + + Parameters + ---------- + batch : dict + + Returns + ------- + batch : dict + Reformatted batch. + """ + # currently, we only support batch size of 1 during testing + assert len(batch) <= 1, "Batch size 1 is required during testing!" + batch = batch[0] + + output_dict = {} + + # for late fusion, we also need to stack the lidar for better + # visualization + if self.visualize: + projected_lidar_list = [] + origin_lidar = [] + + for cav_id, cav_content in batch.items(): + modality_name = cav_content['modality_name'] + sensor_type = self.sensor_type_dict[modality_name] + + output_dict.update({cav_id: {}}) + # shape: (1, max_num, 7) + object_bbx_center = \ + torch.from_numpy(np.array([cav_content['object_bbx_center']])) + object_bbx_mask = \ + torch.from_numpy(np.array([cav_content['object_bbx_mask']])) + object_ids = cav_content['object_ids'] + + # the anchor box is the same for all bounding boxes usually, thus + # we don't need the batch dimension. + output_dict[cav_id].update( + {"anchor_box": self.anchor_box_torch} + ) + + transformation_matrix = cav_content['transformation_matrix'] + if self.visualize: + origin_lidar = [cav_content['origin_lidar']] + if (self.params.get('only_vis_ego', True) is False) or (cav_id=='ego'): + projected_lidar = copy.deepcopy(cav_content['origin_lidar']) + projected_lidar[:, :3] = \ + box_utils.project_points_by_matrix_torch( + projected_lidar[:, :3], + transformation_matrix) + projected_lidar_list.append(projected_lidar) + + if sensor_type == "lidar": + # processed lidar dictionary + processed_lidar_torch_dict = \ + eval(f"self.pre_processor_{modality_name}").collate_batch([cav_content[f'processed_features_{modality_name}']]) + output_dict[cav_id].update({f'inputs_{modality_name}': processed_lidar_torch_dict}) + + if sensor_type == 'camera': + imgs_batch = [cav_content[f"image_inputs_{modality_name}"]["imgs"]] + rots_batch = [cav_content[f"image_inputs_{modality_name}"]["rots"]] + trans_batch = [cav_content[f"image_inputs_{modality_name}"]["trans"]] + intrins_batch = [cav_content[f"image_inputs_{modality_name}"]["intrins"]] + extrinsics_batch = [cav_content[f"image_inputs_{modality_name}"]["extrinsics"]] + post_trans_batch = [cav_content[f"image_inputs_{modality_name}"]["post_trans"]] + post_rots_batch = [cav_content[f"image_inputs_{modality_name}"]["post_rots"]] + + output_dict[cav_id].update({ + f"inputs_{modality_name}": + { + "imgs": torch.stack(imgs_batch), + "rots": torch.stack(rots_batch), + "trans": torch.stack(trans_batch), + "intrins": torch.stack(intrins_batch), + "extrinsics": torch.stack(extrinsics_batch), + "post_trans": torch.stack(post_trans_batch), + "post_rots": torch.stack(post_rots_batch), + } + } + ) + + + # label dictionary + label_torch_dict = \ + self.post_processor.collate_batch([cav_content['label_dict']]) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + # save the transformation matrix (4, 4) to ego vehicle + transformation_matrix_torch = \ + torch.from_numpy( + np.array(cav_content['transformation_matrix'])).float() + + # late fusion training, no noise + transformation_matrix_clean_torch = \ + torch.from_numpy( + np.array(cav_content['transformation_matrix_clean'])).float() + + output_dict[cav_id].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'label_dict': label_torch_dict, + 'object_ids': object_ids, + 'transformation_matrix': transformation_matrix_torch, + 'transformation_matrix_clean': transformation_matrix_clean_torch, + 'modality_name': modality_name}) + + if self.visualize: + origin_lidar = \ + np.array( + downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict[cav_id].update({'origin_lidar': origin_lidar}) + + if self.visualize: + projected_lidar_stack = [torch.from_numpy( + np.vstack(projected_lidar_list))] + output_dict['ego'].update({'origin_lidar': projected_lidar_stack}) + # output_dict['ego'].update({'projected_lidar_list': projected_lidar_list}) + + return output_dict + + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + pred_box_tensor, pred_score = self.post_processor.post_process( + data_dict, output_dict + ) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + return pred_box_tensor, pred_score, gt_box_tensor + + def post_process_no_fusion(self, data_dict, output_dict_ego): + data_dict_ego = OrderedDict() + data_dict_ego["ego"] = data_dict["ego"] + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + pred_box_tensor, pred_score = self.post_processor.post_process( + data_dict_ego, output_dict_ego + ) + return pred_box_tensor, pred_score, gt_box_tensor + + def post_process_no_fusion_uncertainty(self, data_dict, output_dict_ego): + data_dict_ego = OrderedDict() + data_dict_ego['ego'] = data_dict['ego'] + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + pred_box_tensor, pred_score, uncertainty = \ + self.post_processor.post_process(data_dict_ego, output_dict_ego, return_uncertainty=True) + return pred_box_tensor, pred_score, gt_box_tensor, uncertainty + + return LateheterFusionDataset \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multi_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multi_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..71dbbe07a881bc356351d78e6a4cc683a547e97f --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multi_fusion_dataset.py @@ -0,0 +1,631 @@ +# late fusion dataset +import random +import math +from collections import OrderedDict +import cv2 +import numpy as np +import torch +import copy +from icecream import ic +from PIL import Image +import pickle as pkl +from opencood.utils import box_utils as box_utils +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor +from opencood.utils.camera_utils import ( + sample_augmentation, + img_transform, + normalize_img, + img_to_tensor, +) +from opencood.data_utils.augmentor.data_augmentor import DataAugmentor +from opencood.utils.transformation_utils import x1_to_x2 +from opencood.utils.pose_utils import add_noise_data_dict +from opencood.utils.pcd_utils import ( + mask_points_by_range, + mask_ego_points, + shuffle_points, + downsample_lidar_minimum, +) + + + +def getLateclassFusionDataset(cls): + """ + cls: the BaseDataset or父类数据集, 负责一些基础接口,如: + - retrieve_base_data() + - generate_object_center_single() + - self.post_processor + - self.pre_processor + - self.selector (如果用了 heterogeneous 配置) + 等等 + """ + class LateclassFusionDataset(cls): + def __init__(self, params, visualize, train=True): + super().__init__(params, visualize, train) + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + # 是否启用异构学习(例如只选择某些Agent用lidar,某些Agent用camera) + self.heterogeneous = False + if "heter" in params: + self.heterogeneous = True + + # 是否为多类别 + self.multiclass = params["model"]["args"].get("multi_class", False) + + # 根据需要,可在这里给定多类别的类别 ID 列表 + # 比如 [0, 1, 3] 分别对应 car / pedestrian / cyclist 等 + self.class_list = params.get("class_list", [0, 1, 3]) + # 若项目里您是通过 [ 'all', 0, 1, 3 ] 这种方式区分,也可自行调整 + + # 用于可视化 + self.visualize = visualize + self.train = train + + def __getitem__(self, idx): + """ + 训练阶段:随机选 1 个 CAV 做 late 监督(与LateFusionDataset一致); + 测试/验证阶段:保留所有范围内 CAV 的信息。 + """ + base_data_dict = self.retrieve_base_data(idx) + if self.train: + reformat_data_dict = self.get_item_train(base_data_dict) + else: + reformat_data_dict = self.get_item_test(base_data_dict, idx) + return reformat_data_dict + + def get_item_train(self, base_data_dict): + """ + 训练阶段的处理逻辑:通常是只抽取 1 个 CAV(含有 label), + 以减少内存开销、保持与单车训练类似。 + """ + from collections import OrderedDict + processed_data_dict = OrderedDict() + + # 数据扰动(如果有) + base_data_dict = self.add_noise_data_if_needed(base_data_dict) + + # 只随机抽取一个 CAV + if not self.visualize: + selected_cav_id, selected_cav_base = random.choice( + list(base_data_dict.items()) + ) + else: + # 若要可视化,通常选 ego 做可视化 + selected_cav_id, selected_cav_base = list(base_data_dict.items())[0] + + # 处理单个车辆(含多类别的 bbox) + cav_processed = self.get_item_single_car(selected_cav_base) + processed_data_dict["ego"] = cav_processed + return processed_data_dict + + def get_item_test(self, base_data_dict, idx): + """ + 测试/验证阶段:保留所有在 comm_range 内的 CAV,都要 late fusion 的 label。 + """ + from collections import OrderedDict + import math + + base_data_dict = self.add_noise_data_if_needed(base_data_dict) + + processed_data_dict = OrderedDict() + ego_id, ego_pose = -1, None + # 首先找到 ego + for cav_id, cav_content in base_data_dict.items(): + if cav_content["ego"]: + ego_id = cav_id + ego_pose = cav_content["params"]["lidar_pose"] + ego_pose_clean = cav_content["params"]["lidar_pose_clean"] + break + assert ego_id != -1 + + cav_id_list = [] + for cav_id, cav_content in base_data_dict.items(): + distance = math.sqrt( + (cav_content["params"]["lidar_pose"][0] - ego_pose[0]) ** 2 + + (cav_content["params"]["lidar_pose"][1] - ego_pose[1]) ** 2 + ) + if distance <= self.params["comm_range"]: + cav_id_list.append(cav_id) + + cav_id_list_newname = [] + for cav_id in cav_id_list: + selected_cav_base = base_data_dict[cav_id] + transformation_matrix = self.x1_to_x2( + selected_cav_base["params"]["lidar_pose"], ego_pose + ) + transformation_matrix_clean = self.x1_to_x2( + selected_cav_base["params"]["lidar_pose_clean"], ego_pose_clean + ) + cav_processed = self.get_item_single_car(selected_cav_base) + cav_processed.update( + { + "transformation_matrix": transformation_matrix, + "transformation_matrix_clean": transformation_matrix_clean, + } + ) + # 若是 ego 自身,就命名为 "ego",否则保持 cav_id + update_cav_key = "ego" if cav_id == ego_id else cav_id + processed_data_dict[update_cav_key] = cav_processed + cav_id_list_newname.append(update_cav_key) + + # heterogeneous 额外信息 + if self.heterogeneous: + processed_data_dict["ego"]["idx"] = idx + processed_data_dict["ego"]["cav_list"] = cav_id_list_newname + + return processed_data_dict + + def get_item_single_car(self, cav_base): + """ + 处理单辆车的信息,生成其多类别的 label、lidar 数据、camera 数据等等。 + """ + selected_cav_processed = {} + + # 1) 生成多类别或单类别目标框 + # 如果多类别,就将 cav_base 中属于各类的目标框分开存储/或一次性存 [num_class, max_box, 7] + if self.multiclass: + # 举例:将 class_list = [0,1,3] 三个类别分别解析 + # 最简单做法是:对 cav_base["params"]["lidar_pose_clean"] 调用多次 generate_object_center_single + # 并把结果堆叠 + all_box_list, all_mask_list, all_ids_list = [], [], [] + for cls_id in self.class_list: + box_c, mask_c, ids_c = self.generate_object_center_single( + [cav_base], + cav_base["params"]["lidar_pose_clean"], + class_type=cls_id, # 您可在 generate_object_center_single 里根据 class_type 做过滤 + ) + all_box_list.append(box_c) + all_mask_list.append(mask_c) + all_ids_list.append(ids_c) + + # 堆叠成 [num_class, max_box, 7] / [num_class, max_box] + # 需注意每次 generate_object_center_single 返回的 max_box 数量可能不同, + # 这里需统一补零或 slice 到相同维度(可参考已有Late/IntermediateFusion实现). + object_bbx_center, object_bbx_mask = self.stack_multiclass_label( + all_box_list, all_mask_list + ) + # object_ids 可以按类别各存一个 list,也可以只存 [num_class, ...] + object_ids = all_ids_list # 也可做特殊处理 + else: + # 单类别情况下:直接一次即可 + object_bbx_center, object_bbx_mask, object_ids = ( + self.generate_object_center_single( + [cav_base], cav_base["params"]["lidar_pose_clean"] + ) + ) + + # 2) lidar 处理(或 camera) + # 若需要 lidar,可做 voxelize -> self.pre_processor + if self.load_lidar_file or self.visualize: + lidar_np = cav_base["lidar_np"] + # 一些基础处理,如 shuffle_points, mask_points_by_range, mask_ego_points 等 + lidar_np = self.basic_lidar_preprocess(lidar_np) + # 数据增强(根据需要) + lidar_np, object_bbx_center, object_bbx_mask = self.augment_if_needed( + lidar_np, object_bbx_center, object_bbx_mask + ) + # 真正处理,如 voxelize/BEV projection + processed_lidar = self.pre_processor.preprocess(lidar_np) + selected_cav_processed["processed_lidar"] = processed_lidar + + if self.visualize: + selected_cav_processed["origin_lidar"] = lidar_np + + # 3) camera 处理 + if self.load_camera_file: + # 类似 LateFusionDataset 中的逻辑 + camera_inputs = self.process_camera_data(cav_base) + selected_cav_processed["image_inputs"] = camera_inputs + + # 4) 保存多类别框 + selected_cav_processed.update( + { + "object_bbx_center": object_bbx_center, + "object_bbx_mask": object_bbx_mask, + "object_ids": object_ids, + } + ) + + # 5) 生成 label,若多类别则也要多类别 label + if self.multiclass: + # 自行封装 post_processor.generate_label(...) 以支持 multi-class + # 也可对每个类别分别调用 + label_dict = self.post_processor.generate_label_multiclass( + object_bbx_center, # [num_class, max_box, 7] + self.anchor_box, + object_bbx_mask, # [num_class, max_box] + ) + else: + label_dict = self.post_processor.generate_label( + object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + + selected_cav_processed["label_dict"] = label_dict + return selected_cav_processed + + ############################ + # collate_batch 相关处理 # + ############################ + def collate_batch_train(self, batch): + """ + 训练集的 collate: + 由于本示例中 train 阶段只随机取了 1 个 CAV,直接按 batch 拼接即可。 + 若您想要真正多 CAV 的 late 监督训练,则需参考 test collate 的思路。 + """ + import torch + from collections import OrderedDict + output_dict = {"ego": {}} + + object_bbx_center_list = [] + object_bbx_mask_list = [] + label_dict_list = [] + origin_lidar_list = [] + + processed_lidar_list = [] + + for item in batch: + ego_data = item["ego"] + object_bbx_center_list.append(ego_data["object_bbx_center"]) + object_bbx_mask_list.append(ego_data["object_bbx_mask"]) + label_dict_list.append(ego_data["label_dict"]) + + if self.visualize and "origin_lidar" in ego_data: + origin_lidar_list.append(ego_data["origin_lidar"]) + + if "processed_lidar" in ego_data: + processed_lidar_list.append(ego_data["processed_lidar"]) + + # 转成 tensor + object_bbx_center_torch = self.list_to_tensor(object_bbx_center_list) + object_bbx_mask_torch = self.list_to_tensor(object_bbx_mask_list) + + # 多类别 label 的 collate (或单类别) + label_torch_dict = self.post_processor.collate_batch(label_dict_list) + # 若使用 centerpoint, 还要再把 object_bbx_center_torch 等融合进 label_torch_dict + label_torch_dict.update( + { + "object_bbx_center": object_bbx_center_torch, + "object_bbx_mask": object_bbx_mask_torch, + } + ) + + output_dict["ego"].update( + { + "object_bbx_center": object_bbx_center_torch, + "object_bbx_mask": object_bbx_mask_torch, + "anchor_box": torch.from_numpy(self.anchor_box), + "label_dict": label_torch_dict, + } + ) + + # lidar + if len(processed_lidar_list) > 0: + processed_lidar_torch_dict = self.pre_processor.collate_batch( + processed_lidar_list + ) + output_dict["ego"]["processed_lidar"] = processed_lidar_torch_dict + + # camera + if self.load_camera_file: + # 类似 LateFusionDataset: 将 batch 里的 camera 信息按维度拼起来 + camera_inputs = self.collate_camera_inputs_train(batch) + output_dict["ego"]["image_inputs"] = camera_inputs + + # visualization + if self.visualize and len(origin_lidar_list) > 0: + # 您可以根据需要 downsample + origin_lidar_torch = self.list_to_tensor(origin_lidar_list) + output_dict["ego"]["origin_lidar"] = origin_lidar_torch + + return output_dict + + def collate_batch_test(self, batch): + """ + 测试集(或验证集)的 collate: + 一般只支持 batch_size=1(尤其在多 CAV 的情况下), + 然后把每个 CAV 单独拿出来做 late 处理。 + """ + assert len(batch) == 1, "Test time batch_size must be 1 for late fusion!" + batch = batch[0] + + output_dict = {} + # heterogeneous + if self.heterogeneous and "idx" in batch["ego"]: + idx = batch["ego"]["idx"] + cav_list = batch["ego"]["cav_list"] + # 选择哪些 cav 用 lidar / camera + # lidar_agent, camera_agent = self.selector.select_agent(idx) + # ... + + # 收集并 collate + if self.visualize: + import copy + projected_lidar_list = [] + + for cav_id, cav_content in batch.items(): + output_dict[cav_id] = {} + # 把 object_bbx_center/mask 变成 [1, ...] + object_bbx_center = self.unsqueeze_to_batch(cav_content["object_bbx_center"]) + object_bbx_mask = self.unsqueeze_to_batch(cav_content["object_bbx_mask"]) + + label_dict = self.post_processor.collate_batch([cav_content["label_dict"]]) + # centerpoint 需把 object_bbx_center/mask 再塞回 label_dict + label_dict.update( + { + "object_bbx_center": object_bbx_center, + "object_bbx_mask": object_bbx_mask, + } + ) + + # lidar + if "processed_lidar" in cav_content: + # 只有 1 个 cav 的 processed_lidar + processed_lidar_torch = self.pre_processor.collate_batch( + [cav_content["processed_lidar"]] + ) + output_dict[cav_id]["processed_lidar"] = processed_lidar_torch + + # camera + if self.load_camera_file and "image_inputs" in cav_content: + # 同理,只拼一个 + cam_torch = self.collate_camera_inputs_test(cav_content) + output_dict[cav_id]["image_inputs"] = cam_torch + + # heterogeneous 可根据 cav_id 判断是否保留/剔除 + # if self.heterogeneous: + # pass + + # 保存变换矩阵 + output_dict[cav_id]["transformation_matrix"] = torch.from_numpy( + cav_content["transformation_matrix"] + ).float() + output_dict[cav_id]["transformation_matrix_clean"] = torch.from_numpy( + cav_content["transformation_matrix_clean"] + ).float() + + # label + 其他信息 + output_dict[cav_id].update( + { + "object_bbx_center": object_bbx_center, + "object_bbx_mask": object_bbx_mask, + "label_dict": label_dict, + "anchor_box": self.anchor_box_torch, + "object_ids": cav_content["object_ids"], + } + ) + + if self.visualize and "origin_lidar" in cav_content: + output_dict[cav_id]["origin_lidar"] = torch.from_numpy( + cav_content["origin_lidar"] + ) + + # 若需要把多 cav 的点云拼接到 ego 上做可视化,可以在这里做拼接 + return output_dict + + ###################################### + # 多类别后处理示例 # + ###################################### + def post_process(self, data_dict, output_dict): + """ + 如果是多类别,就调用 self.post_process_multiclass, + 否则与普通 late fusion 相同。 + """ + if self.multiclass: + # 返回 [List of pred_box], [List of score], [List of gt_box],每个元素对应一个类别 + return self.post_process_multiclass(data_dict, output_dict) + else: + pred_box, pred_score = self.post_processor.post_process(data_dict, output_dict) + gt_box = self.post_processor.generate_gt_bbx(data_dict) + return pred_box, pred_score, gt_box + + def post_process_multiclass(self, data_dict, output_dict): + """ + 多类别的后处理,每个类别各跑一次 NMS 或类似处理,然后拼一起返回。 + """ + import copy + + # num_class = len(self.class_list) + pred_box_tensor_list = [] + pred_score_list = [] + gt_box_tensor_list = [] + + # 对每个类别独立后处理 + for i, cls_id in enumerate(self.class_list): + # 1) 拷贝出仅包含该类别的数据 + data_dict_single, output_dict_single = self.split_single_class( + data_dict, output_dict, class_index=i + ) + # 2) 跑后处理 + pred_box_tensor, pred_score = self.post_processor.post_process( + data_dict_single, output_dict_single + ) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict_single) + + pred_box_tensor_list.append(pred_box_tensor) + pred_score_list.append(pred_score) + gt_box_tensor_list.append(gt_box_tensor) + + return pred_box_tensor_list, pred_score_list, gt_box_tensor_list + + ############################################ + # 下方放一些复用/简化方法(根据项目适配即可) # + ############################################ + def add_noise_data_if_needed(self, base_data_dict): + """ + 根据 self.params["noise_setting"] 等需求决定是否进行噪声扰动。 + 这里直接调用已有的 add_noise_data_dict 或 add_noise_data_dict_asymmetric。 + """ + from opencood.utils.pose_utils import add_noise_data_dict + # 如果想用非对称噪声,请自行替换 + return add_noise_data_dict(base_data_dict, self.params["noise_setting"]) + + def basic_lidar_preprocess(self, lidar_np): + """ + 一些通用的点云预处理,如范围裁剪、shuffle、去除自车点等。 + """ + from opencood.utils.pcd_utils import ( + shuffle_points, + mask_points_by_range, + mask_ego_points, + ) + lidar_np = shuffle_points(lidar_np) + lidar_np = mask_points_by_range(lidar_np, self.params["preprocess"]["cav_lidar_range"]) + lidar_np = mask_ego_points(lidar_np) + return lidar_np + + def augment_if_needed(self, lidar_np, object_bbx_center, object_bbx_mask): + """ + 若 self.train 并且无需异构,可对点云/标签做数据增强。 + """ + if self.train and not self.heterogeneous: + lidar_np, object_bbx_center, object_bbx_mask = self.augment( + lidar_np, object_bbx_center, object_bbx_mask + ) + return lidar_np, object_bbx_center, object_bbx_mask + + def process_camera_data(self, cav_base): + """ + 将相机图像根据参数(分辨率缩放、裁剪、flip 等)做增广,并返回成一个 dict。 + 可参考 LateFusionDataset / LSS 处理流程。 + """ + # 这里仅示例化简, 具体实现请参考原 LateFusionDataset 中的 get_item_single_car -> process_camera_data + camera_data_list = cav_base["camera_data"] + # ... 做增广与 transform ... + camera_inputs = {"imgs": None, "rots": None, ...} + return camera_inputs + + def collate_camera_inputs_train(self, batch): + """ + 将 train batch 里多帧图像按维度拼接,比如 [B, N, C, H, W] + """ + # 略,参考 LateFusionDataset 的 collate_batch_train + return {} + + def collate_camera_inputs_test(self, cav_content): + """ + 测试阶段只 collate 单个 cav + """ + # 参考 LateFusionDataset 的 collate_batch_test + return {} + + def stack_multiclass_label(self, box_list, mask_list): + """ + 输入是一个 list,每个元素是 (max_box, 7)/(max_box,), + 最终拼成 [num_class, max_box, 7] / [num_class, max_box]。 + 若每个类别分配的 max_box 不同,需要先找最大值再做 padding。 + """ + import numpy as np + num_class = len(box_list) + max_box_counts = [b.shape[0] for b in box_list] + M = max(max_box_counts) if max_box_counts else 0 + + # 组合 + box_array = [] + mask_array = [] + for i in range(num_class): + cur_box = box_list[i] + cur_mask = mask_list[i] + pad_size = M - cur_box.shape[0] + if pad_size > 0: + # 在 0 处 padding + cur_box = np.concatenate( + [cur_box, np.zeros((pad_size, 7), dtype=cur_box.dtype)], axis=0 + ) + cur_mask = np.concatenate( + [cur_mask, np.zeros(pad_size, dtype=cur_mask.dtype)], axis=0 + ) + box_array.append(cur_box[None, ...]) # [1, M, 7] + mask_array.append(cur_mask[None, ...]) # [1, M] + + if len(box_array) == 0: + # 说明没对象 + return np.zeros((0, 0, 7)), np.zeros((0, 0)) + + box_array = np.concatenate(box_array, axis=0) # [num_class, M, 7] + mask_array = np.concatenate(mask_array, axis=0) # [num_class, M] + return box_array, mask_array + + def split_single_class(self, data_dict, output_dict, class_index): + """ + post_process_multiclass 用到: + 将 data_dict/output_dict 中多类别的 object_bbx_center/mask + 拆分出第 class_index 个类别的子数据,以便单独跑 NMS。 + """ + import copy + data_dict_single = {"ego": {}} + output_dict_single = {} + + # 遍历所有 cav (late fusion) + for cav_id in data_dict.keys(): + cav_content = data_dict[cav_id] + cav_output = output_dict[cav_id] + + # 如果 object_bbx_center 是 [num_class, M, 7],mask 是 [num_class, M] + # 拆分出 cav_idx = class_index 这一路 + single_box_center = cav_content["object_bbx_center"][class_index, ...] + single_mask = cav_content["object_bbx_mask"][class_index, ...] + # object_ids 如果是按类别存储的list,可按 class_index 取即可 + # 如果合并一起,需要自己额外做记录 + if isinstance(cav_content["object_ids"], list): + single_ids = cav_content["object_ids"][class_index] + else: + single_ids = cav_content["object_ids"] # 或者看具体储存方式 + + # 类似地,对网络输出 cls_preds, reg_preds_multiclass 都要取第 class_index 路 + # 具体看原网络 forward 的输出 shape + cls_preds_single = cav_output["cls_preds"][ + :, class_index : class_index + 1, :, : + ] # e.g. [B,1,H,W] + reg_preds_single = cav_output["reg_preds_multiclass"][ + :, class_index, :, : + ] # [B,H,W,Nreg] + + # 构造新的 data_dict_single / output_dict_single + data_dict_single[cav_id] = copy.deepcopy(cav_content) + data_dict_single[cav_id]["object_bbx_center"] = single_box_center[None, ...] # 保留一个 batch 维 + data_dict_single[cav_id]["object_bbx_mask"] = single_mask[None, ...] + data_dict_single[cav_id]["object_ids"] = single_ids + + output_dict_single[cav_id] = copy.deepcopy(cav_output) + output_dict_single[cav_id]["cls_preds"] = cls_preds_single + output_dict_single[cav_id]["reg_preds"] = reg_preds_single + + return data_dict_single, output_dict_single + + ################################################### + # 一些工具函数(和原 LateFusionDataset/中间类一致) # + ################################################### + def x1_to_x2(self, lidar_pose1, lidar_pose2): + """ + 位姿变换矩阵, 与 opencood.utils.transformation_utils.x1_to_x2 一致。 + """ + return x1_to_x2(lidar_pose1, lidar_pose2) + + def list_to_tensor(self, data_list): + """ + 简易把 list of np.array 变成 torch.Tensor, 做 batch 拼接用。 + """ + import numpy as np + import torch + if len(data_list) == 0: + return None + arr = np.stack(data_list, axis=0) + return torch.from_numpy(arr) + + def unsqueeze_to_batch(self, arr): + """ + 如果 arr 是 np.ndarray,就转成 [1, ...],再转成 torch。 + """ + import numpy as np + import torch + if isinstance(arr, np.ndarray): + arr = arr[None, ...] # 在前面加一个 batch 维 + arr = torch.from_numpy(arr) + elif isinstance(arr, torch.Tensor) and arr.dim() == 2: + # [M,7] -> [1,M,7] + arr = arr.unsqueeze(0) + return arr + + return LateMultiFusionDataset \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multiclass_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multiclass_fusion_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..7cf62fac764d4722b466a966965820d638b7cbdc --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multiclass_fusion_dataset.py @@ -0,0 +1,1233 @@ +# late fusion dataset +import random +import math +from collections import OrderedDict +import cv2 +import numpy as np +import torch +import copy +from icecream import ic +from PIL import Image +import pickle as pkl +from opencood.utils import box_utils as box_utils +from opencood.data_utils.pre_processor import build_preprocessor +from opencood.data_utils.post_processor import build_postprocessor +from opencood.utils.camera_utils import ( + sample_augmentation, + img_transform, + normalize_img, + img_to_tensor, +) +from opencood.data_utils.augmentor.data_augmentor import DataAugmentor +from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation +from opencood.utils.pose_utils import add_noise_data_dict, add_noise_data_dict_asymmetric +from opencood.utils.pcd_utils import ( + mask_points_by_range, + mask_ego_points, + mask_ego_points_v2, + shuffle_points, + downsample_lidar_minimum, +) +from opencood.utils.common_utils import merge_features_to_dict + +def getLatemulticlassFusionDataset(cls): + """ + cls: the Basedataset. + """ + class LatemulticlassFusionDataset(cls): + def __init__(self, params, visualize, train=True): + super().__init__(params, visualize, train) + self.anchor_box = self.post_processor.generate_anchor_box() + self.anchor_box_torch = torch.from_numpy(self.anchor_box) + + self.heterogeneous = False + if 'heter' in params: + self.heterogeneous = True + + self.multiclass = params['model']['args']['multi_class'] + + self.proj_first = False if 'proj_first' not in params['fusion']['args']\ + else params['fusion']['args']['proj_first'] + + # self.proj_first = False + self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \ + else False + # self.supervise_single = False + self.online_eval_only = False + + + def __getitem__(self, idx, extra_source=None, data_dir=None): + + if data_dir is not None: + extra_source=1 + + object_bbx_center_list = [] + object_bbx_mask_list = [] + object_id_dict = {} + + object_bbx_center_list_single = [] + object_bbx_mask_list_single = [] + + gt_object_bbx_center_list = [] + gt_object_bbx_mask_list = [] + gt_object_id_dict = {} + + gt_object_bbx_center_list_single = [] + gt_object_bbx_mask_list_single = [] + + output_dict = {} + for tpe in ['all', 0, 1, 3]: + output_single_class = self.__getitem_single_class__(idx, tpe, extra_source, data_dir) + output_dict[tpe] = output_single_class + if tpe == 'all' and extra_source is None: + continue + elif tpe == 'all' and extra_source is not None: + break + object_bbx_center_list.append(output_single_class['ego']['object_bbx_center']) + object_bbx_mask_list.append(output_single_class['ego']['object_bbx_mask']) + object_id_dict[tpe] = output_single_class['ego']['object_ids'] + + gt_object_bbx_center_list.append(output_single_class['ego']['gt_object_bbx_center']) + gt_object_bbx_mask_list.append(output_single_class['ego']['gt_object_bbx_mask']) + gt_object_id_dict[tpe] = output_single_class['ego']['gt_object_ids'] + + if self.multiclass and extra_source is None: + output_dict['all']['ego']['object_bbx_center'] = np.stack(object_bbx_center_list, axis=0) + output_dict['all']['ego']['object_bbx_mask'] = np.stack(object_bbx_mask_list, axis=0) + output_dict['all']['ego']['object_ids'] = object_id_dict + + output_dict['all']['ego']['gt_object_bbx_center'] = np.stack(gt_object_bbx_center_list, axis=0) + output_dict['all']['ego']['gt_object_bbx_mask'] = np.stack(gt_object_bbx_mask_list, axis=0) + output_dict['all']['ego']['gt_object_ids'] = gt_object_id_dict + + + return output_dict['all'] + + def __getitem_single_class__(self, idx, tpe=None, extra_source=None, data_dir=None): + + if extra_source is None and data_dir is None: + base_data_dict = self.retrieve_base_data(idx, tpe) ## {id:{'ego':True/False, 'params': {'lidar_pose','speed','vehicles','ego_pos',...}, 'lidar_np': array (N,4)}} + elif data_dir is not None: + base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, data_dir=data_dir) + elif extra_source is not None: + base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, extra_source=extra_source) + + # base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting']) + base_data_dict = add_noise_data_dict_asymmetric(base_data_dict,self.params['noise_setting']) + processed_data_dict = OrderedDict() + processed_data_dict['ego'] = {} + ego_id = -1 + ego_lidar_pose = [] + ego_cav_base = None + cav_id_list = [] + lidar_pose_list = [] + too_far = [] + # first find the ego vehicle's lidar pose + for cav_id, cav_content in base_data_dict.items(): + + if cav_content['ego']: + ego_id = cav_id + ego_lidar_pose = cav_content['params']['lidar_pose'] + ego_lidar_pose_clean = cav_content['params']['lidar_pose_clean'] + ego_cav_base = cav_content + break + + assert ego_id != -1 + assert len(ego_lidar_pose) > 0 + + agents_image_inputs = [] + processed_features = [] + object_stack = [] + object_mask_stack = [] + object_id_stack = [] + + gt_object_stack = [] + gt_object_mask_stack = [] + gt_object_id_stack = [] + + single_label_list = [] + single_object_bbx_center_list = [] + single_object_bbx_mask_list = [] + too_far = [] + lidar_pose_list = [] + lidar_pose_clean_list = [] + cav_id_list = [] + projected_lidar_clean_list = [] # disconet + + if self.visualize: + projected_lidar_stack = [] + + # loop over all CAVs to process information + for cav_id, selected_cav_base in base_data_dict.items(): + distance = \ + math.sqrt((selected_cav_base['params']['lidar_pose'][0] - + ego_lidar_pose[0]) ** 2 + ( + selected_cav_base['params'][ + 'lidar_pose'][1] - ego_lidar_pose[ + 1]) ** 2) + if distance > self.params['comm_range']: + too_far.append(cav_id) + continue + cav_id_list.append(cav_id) + lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) + lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean']) + + for cav_id in too_far: + base_data_dict.pop(cav_id) + + pairwise_t_matrix = \ + get_pairwise_transformation(base_data_dict, + self.max_cav, + self.proj_first) + cav_num = len(cav_id_list) + cav_id_list_newname = [] + + lidar_poses = np.array(lidar_pose_list).reshape(-1, 6) # [N_cav, 6] + lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6) # [N_cav, 6] + + for cav_id in cav_id_list: + selected_cav_base = base_data_dict[cav_id] + # find the transformation matrix from current cav to ego. + cav_lidar_pose = selected_cav_base['params']['lidar_pose'] + transformation_matrix = x1_to_x2(cav_lidar_pose, ego_lidar_pose) + cav_lidar_pose_clean = selected_cav_base['params']['lidar_pose_clean'] + transformation_matrix_clean = x1_to_x2(cav_lidar_pose_clean, ego_lidar_pose_clean) + + selected_cav_processed = \ + self.get_item_single_car(selected_cav_base, + ego_cav_base, + tpe, + extra_source!=None) + selected_cav_processed.update({'transformation_matrix': transformation_matrix, + 'transformation_matrix_clean': transformation_matrix_clean}) + if extra_source is None: + object_stack.append(selected_cav_processed['object_bbx_center']) + object_mask_stack.append(selected_cav_processed['object_bbx_mask']) + object_id_stack += selected_cav_processed['object_ids'] + + + gt_object_stack.append(selected_cav_processed['gt_object_bbx_center']) + gt_object_mask_stack.append(selected_cav_processed['gt_object_bbx_mask']) + gt_object_id_stack += selected_cav_processed['gt_object_ids'] + + if tpe == 'all': + + if self.load_lidar_file: + processed_features.append( + selected_cav_processed['processed_lidar']) + + if self.load_camera_file: + agents_image_inputs.append( + selected_cav_processed['image_inputs']) + + if self.visualize: + projected_lidar_stack.append( + selected_cav_processed['projected_lidar']) + + + if self.supervise_single and extra_source is None : + single_label_list.append(selected_cav_processed['single_label_dict']) + single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center']) + single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask']) + + update_cav = "ego" if cav_id == ego_id else cav_id + processed_data_dict.update({update_cav: selected_cav_processed}) + cav_id_list_newname.append(update_cav) + + if self.supervise_single and extra_source is None: + single_label_dicts = {} + if tpe == 'all': + # unused label + if False: + single_label_dicts = self.post_processor.collate_batch(single_label_list) + single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list)) + single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list)) + processed_data_dict['ego'].update({ + "single_label_dict_torch": single_label_dicts, + "single_object_bbx_center_torch": single_object_bbx_center, + "single_object_bbx_mask_torch": single_object_bbx_mask, + }) + + # heterogeneous + if self.heterogeneous: + processed_data_dict['ego']['idx'] = idx + processed_data_dict['ego']['cav_list'] = cav_id_list_newname + + if extra_source is None: + unique_indices = \ + [object_id_stack.index(x) for x in set(object_id_stack)] + object_stack = np.vstack(object_stack) + object_mask_stack = np.concatenate(object_mask_stack) + object_stack = object_stack[unique_indices] + object_mask_stack = object_mask_stack[unique_indices] + + # make sure bounding boxes across all frames have the same number + object_bbx_center = \ + np.zeros((self.params['postprocess']['max_num'], 7)) + mask = np.zeros(self.params['postprocess']['max_num']) + object_bbx_center[:object_stack.shape[0], :] = object_stack + mask[:object_mask_stack.shape[0]] = object_mask_stack + # mask[:object_mask_stack.shape[0]] = 1 + + gt_unique_indices = \ + [gt_object_id_stack.index(x) for x in set(gt_object_id_stack)] + gt_object_stack = np.vstack(gt_object_stack) + gt_object_mask_stack = np.concatenate(gt_object_mask_stack) + gt_object_stack = gt_object_stack[gt_unique_indices] + gt_object_mask_stack = gt_object_mask_stack[unique_indices] + + # make sure bounding boxes across all frames have the same number + gt_object_bbx_center = \ + np.zeros((self.params['postprocess']['max_num'], 7)) + gt_mask = np.zeros(self.params['postprocess']['max_num']) + gt_object_bbx_center[:gt_object_stack.shape[0], :] = gt_object_stack + gt_mask[:gt_object_mask_stack.shape[0]] = gt_object_mask_stack + # gt_mask[:gt_object_mask_stack.shape[0]] = 1 + + processed_data_dict['ego'].update( + {'object_bbx_center': object_bbx_center, # (100,7) + 'object_bbx_mask': mask, # (100,) + 'object_ids': [object_id_stack[i] for i in unique_indices], + } + ) + + # generate targets label + label_dict = {} + # if tpe == 'all': + # unused label + if extra_source is None: + label_dict = \ + self.post_processor.generate_label( + gt_box_center=object_bbx_center, + anchors=self.anchor_box, + mask=mask) + gt_label_dict = \ + self.post_processor.generate_label( + gt_box_center=gt_object_bbx_center, + anchors=self.anchor_box, + mask=gt_mask) + + + processed_data_dict['ego'].update( + {'gt_object_bbx_center': gt_object_bbx_center, # (100,7) + 'gt_object_bbx_mask': gt_mask, # (100,) + 'gt_object_ids': [gt_object_id_stack[i] for i in gt_unique_indices], + 'gt_label_dict': gt_label_dict}) + + processed_data_dict['ego'].update( + { + 'anchor_box': self.anchor_box, + 'label_dict': label_dict, + 'cav_num': cav_num, + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_poses_clean': lidar_poses_clean, + 'lidar_poses': lidar_poses}) + + if tpe == 'all': + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_features) + processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict}) + + if self.load_camera_file: + merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack') + processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + if self.visualize: + processed_data_dict['ego'].update({'origin_lidar': + # projected_lidar_stack}) + np.vstack( + projected_lidar_stack)}) + processed_data_dict['ego'].update({'lidar_len': [len(projected_lidar_stack[i]) for i in range(len(projected_lidar_stack))]}) + + + processed_data_dict['ego'].update({'sample_idx': idx, + 'cav_id_list': cav_id_list}) + + img_front_list = [] + img_left_list = [] + img_right_list = [] + BEV_list = [] + + if self.visualize: + for car_id in base_data_dict: + if not base_data_dict[car_id]['ego'] == True: + continue + if 'rgb_front' in base_data_dict[car_id] and 'rgb_left' in base_data_dict[car_id] and 'rgb_right' in base_data_dict[car_id] and 'BEV' in base_data_dict[car_id] : + img_front_list.append(base_data_dict[car_id]['rgb_front']) + img_left_list.append(base_data_dict[car_id]['rgb_left']) + img_right_list.append(base_data_dict[car_id]['rgb_right']) + BEV_list.append(base_data_dict[car_id]['BEV']) + processed_data_dict['ego'].update({'img_front': img_front_list, + 'img_left': img_left_list, + 'img_right': img_right_list, + 'BEV': BEV_list}) + processed_data_dict['ego'].update({'scene_dict': base_data_dict['car_0']['scene_dict'], + 'frame_id': base_data_dict['car_0']['frame_id'], + }) + + return processed_data_dict + + def get_item_single_car(self, selected_cav_base, ego_cav_base, tpe, online_eval=False): + """ + Process a single CAV's information for the train/test pipeline. + + + Parameters + ---------- + selected_cav_base : dict + The dictionary contains a single CAV's raw information. + including 'params', 'camera_data' + + Returns + ------- + selected_cav_processed : dict + The dictionary contains the cav's processed information. + """ + selected_cav_processed = {} + + if not online_eval: + # label + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center_single( + [selected_cav_base], selected_cav_base["params"]["lidar_pose_clean"] + ) + + ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean'] + + + # calculate the transformation matrix + transformation_matrix = \ + x1_to_x2(selected_cav_base['params']['lidar_pose'], + ego_pose) # T_ego_cav + transformation_matrix_clean = \ + x1_to_x2(selected_cav_base['params']['lidar_pose_clean'], + ego_pose_clean) + + # lidar + if tpe == 'all': + if self.load_lidar_file or self.visualize: + lidar_np = selected_cav_base['lidar_np'] + lidar_np = shuffle_points(lidar_np) + lidar_np = mask_points_by_range(lidar_np, + self.params['preprocess'][ + 'cav_lidar_range']) + # remove points that hit ego vehicle + lidar_np = mask_ego_points_v2(lidar_np) + + # data augmentation, seems very important for single agent training, because lack of data diversity. + # only work for lidar modality in training. + if not self.heterogeneous and not online_eval: + lidar_np, object_bbx_center, object_bbx_mask = \ + self.augment(lidar_np, object_bbx_center, object_bbx_mask) + + projected_lidar = \ + box_utils.project_points_by_matrix_torch(lidar_np[:, :3], transformation_matrix) + + if self.proj_first: + lidar_np[:, :3] = projected_lidar + + if self.visualize: + # filter lidar + selected_cav_processed.update({'projected_lidar': projected_lidar}) + + lidar_dict = self.pre_processor.preprocess(lidar_np) + selected_cav_processed.update({'processed_lidar': lidar_dict}) + + if self.visualize: + selected_cav_processed.update({'origin_lidar': lidar_np}) + + if not online_eval: + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center( + [selected_cav_base], selected_cav_base['params']['lidar_pose'] + ) + + gt_object_bbx_center, gt_object_bbx_mask, gt_object_ids = self.generate_object_center( + [selected_cav_base], selected_cav_base['params']['lidar_pose'] + ) + + label_dict = self.post_processor.generate_label( + gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + + gt_label_dict = self.post_processor.generate_label( + gt_box_center=gt_object_bbx_center, anchors=self.anchor_box, mask=gt_object_bbx_mask + ) + + selected_cav_processed.update({ + "single_label_dict": label_dict, + "single_object_bbx_center": object_bbx_center, + "single_object_bbx_mask": object_bbx_mask}) + + # camera + if tpe == 'all': + if self.load_camera_file: + # adapted from https://github.com/nv-tlabs/lift-splat-shoot/blob/master/src/data.py + camera_data_list = selected_cav_base["camera_data"] + + params = selected_cav_base["params"] + imgs = [] + rots = [] + trans = [] + intrins = [] + extrinsics = [] # cam_to_lidar + post_rots = [] + post_trans = [] + + for idx, img in enumerate(camera_data_list): + camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx) + + intrin = torch.from_numpy(camera_intrinsic) + rot = torch.from_numpy( + camera_to_lidar[:3, :3] + ) # R_wc, we consider world-coord is the lidar-coord + tran = torch.from_numpy(camera_to_lidar[:3, 3]) # T_wc + + post_rot = torch.eye(2) + post_tran = torch.zeros(2) + + img_src = [img] + + # depth + if self.load_depth_file: + depth_img = selected_cav_base["depth_data"][idx] + img_src.append(depth_img) + else: + depth_img = None + + # data augmentation + resize, resize_dims, crop, flip, rotate = sample_augmentation( + self.data_aug_conf, self.train + ) + img_src, post_rot2, post_tran2 = img_transform( + img_src, + post_rot, + post_tran, + resize=resize, + resize_dims=resize_dims, + crop=crop, + flip=flip, + rotate=rotate, + ) + # for convenience, make augmentation matrices 3x3 + post_tran = torch.zeros(3) + post_rot = torch.eye(3) + post_tran[:2] = post_tran2 + post_rot[:2, :2] = post_rot2 + + img_src[0] = normalize_img(img_src[0]) + if self.load_depth_file: + img_src[1] = img_to_tensor(img_src[1]) * 255 + + imgs.append(torch.cat(img_src, dim=0)) + intrins.append(intrin) + extrinsics.append(torch.from_numpy(camera_to_lidar)) + rots.append(rot) + trans.append(tran) + post_rots.append(post_rot) + post_trans.append(post_tran) + + selected_cav_processed.update( + { + "image_inputs": + { + "imgs": torch.stack(imgs), # [N, 3or4, H, W] + "intrins": torch.stack(intrins), + "extrinsics": torch.stack(extrinsics), + "rots": torch.stack(rots), + "trans": torch.stack(trans), + "post_rots": torch.stack(post_rots), + "post_trans": torch.stack(post_trans), + } + } + ) + + selected_cav_processed.update({"anchor_box": self.anchor_box}) + + if not online_eval: + object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base], + ego_pose_clean) + + gt_object_bbx_center, gt_object_bbx_mask, gt_object_ids = self.generate_object_center([selected_cav_base], + ego_pose_clean) + selected_cav_processed.update( + { + "object_bbx_center": object_bbx_center, + "object_bbx_mask": object_bbx_mask, + "object_ids": object_ids, + } + ) + + selected_cav_processed.update( + { + "gt_object_bbx_center": gt_object_bbx_center[gt_object_bbx_mask == 1], + "gt_object_bbx_mask": gt_object_bbx_mask, + "gt_object_ids": gt_object_ids + } + ) + + # generate targets label + label_dict = self.post_processor.generate_label( + gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask + ) + selected_cav_processed.update({"label_dict": label_dict}) + + selected_cav_processed.update( + { + 'transformation_matrix': transformation_matrix, + 'transformation_matrix_clean': transformation_matrix_clean + } + ) + + return selected_cav_processed + + + def collate_batch_train(self, batch, online_eval_only=False): + """ + Customized collate function for pytorch dataloader during training + for early and late fusion dataset. + + Parameters + ---------- + batch : dict + + Returns + ------- + batch : dict + Reformatted batch. + """ + # during training, we only care about ego. + output_dict = {'ego': {}} + + object_bbx_center = [] + object_bbx_mask = [] + processed_lidar_list = [] + label_dict_list = [] + origin_lidar = [] + + gt_object_bbx_center = [] + gt_object_bbx_mask = [] + gt_object_ids = [] + gt_label_dict_list = [] + record_len = [] + + object_ids = [] + image_inputs_list = [] + # used to record different scenario + record_len = [] + label_dict_list = [] + lidar_pose_list = [] + origin_lidar = [] + lidar_len = [] + lidar_pose_clean_list = [] + + # heterogeneous + lidar_agent_list = [] + + # pairwise transformation matrix + pairwise_t_matrix_list = [] + + # disconet + teacher_processed_lidar_list = [] + + # image + img_front = [] + img_left = [] + img_right = [] + BEV = [] + + dict_list = [] + + if self.supervise_single: + pos_equal_one_single = [] + neg_equal_one_single = [] + targets_single = [] + object_bbx_center_single = [] + object_bbx_mask_single = [] + + for i in range(len(batch)): + ego_dict = batch[i]['ego'] + + if not online_eval_only: + object_bbx_center.append(ego_dict['object_bbx_center']) + object_bbx_mask.append(ego_dict['object_bbx_mask']) + object_ids.append(ego_dict['object_ids']) + + gt_object_bbx_center.append(ego_dict['gt_object_bbx_center']) + gt_object_bbx_mask.append(ego_dict['gt_object_bbx_mask']) + + gt_object_ids.append(ego_dict['gt_object_ids']) + + label_dict_list.append(ego_dict['label_dict']) + + gt_label_dict_list.append(ego_dict['gt_label_dict']) + + else: + object_ids.append(None) + gt_object_ids.append(None) + + lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6] + lidar_pose_clean_list.append(ego_dict['lidar_poses_clean']) + + if self.load_lidar_file: + processed_lidar_list.append(ego_dict['processed_lidar']) + if self.load_camera_file: + image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict. + + record_len.append(ego_dict['cav_num']) + pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix']) + + dict_list.append([ego_dict['scene_dict'], ego_dict['frame_id']]) + + if self.visualize: + origin_lidar.append(ego_dict['origin_lidar']) + # lidar_len.append(ego_dict['lidar_len']) + if len(ego_dict['img_front']) > 0 and len(ego_dict['img_right']) > 0 and len(ego_dict['img_left']) > 0 and len(ego_dict['BEV']) > 0: + img_front.append(ego_dict['img_front'][0]) + img_left.append(ego_dict['img_left'][0]) + img_right.append(ego_dict['img_right'][0]) + BEV.append(ego_dict['BEV'][0]) + + if self.supervise_single and not online_eval_only: + # unused label + if False: + pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one']) + neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one']) + targets_single.append(ego_dict['single_label_dict_torch']['targets']) + object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch']) + object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch']) + + # heterogeneous + if self.heterogeneous: + lidar_agent_list.append(ego_dict['lidar_agent']) + + # convert to numpy, (B, max_num, 7) + if not online_eval_only: + object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) + object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) + gt_object_bbx_center = torch.from_numpy(np.array(gt_object_bbx_center)) + gt_object_bbx_mask = torch.from_numpy(np.array(gt_object_bbx_mask)) + else: + object_bbx_center = None + object_bbx_mask = None + gt_object_bbx_center = None + gt_object_bbx_mask = None + + + # unused label + label_torch_dict = {} + if False: + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + + record_len = torch.from_numpy(np.array(record_len)) + record_len = torch.from_numpy(np.array(record_len, dtype=int)) + pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list)) + label_torch_dict['record_len'] = record_len + label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix + # for centerpoint + if not online_eval_only: + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + output_dict['ego'].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask,}) + output_dict['ego'].update({ + 'anchor_box': torch.from_numpy(self.anchor_box), + 'label_dict': label_torch_dict, + 'record_len': record_len, + 'pairwise_t_matrix': pairwise_t_matrix}) + if self.visualize: + origin_lidar = \ + np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + + if self.load_lidar_file: + merged_feature_dict = merge_features_to_dict(processed_lidar_list) + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + lidar_agent_idx = lidar_agent.nonzero()[0].tolist() + for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords' + merged_feature_dict[k] = [v[index] for index in lidar_agent_idx] + + if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0): + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(merged_feature_dict) + output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict}) + + if self.load_camera_file: + # collate ego camera information + imgs_batch = [] + rots_batch = [] + trans_batch = [] + intrins_batch = [] + extrinsics_batch = [] + post_trans_batch = [] + post_rots_batch = [] + for i in range(len(batch)): + ego_dict = batch[i]["ego"]["image_inputs"] + imgs_batch.append(ego_dict["imgs"]) + rots_batch.append(ego_dict["rots"]) + trans_batch.append(ego_dict["trans"]) + intrins_batch.append(ego_dict["intrins"]) + extrinsics_batch.append(ego_dict["extrinsics"]) + post_trans_batch.append(ego_dict["post_trans"]) + post_rots_batch.append(ego_dict["post_rots"]) + + output_dict["ego"].update({ + "image_inputs": + { + "imgs": torch.stack(imgs_batch), # [B, N, C, H, W] + "rots": torch.stack(rots_batch), + "trans": torch.stack(trans_batch), + "intrins": torch.stack(intrins_batch), + "post_trans": torch.stack(post_trans_batch), + "post_rots": torch.stack(post_rots_batch), + } + } + ) + + merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat') + + if self.heterogeneous: + lidar_agent = np.concatenate(lidar_agent_list) + camera_agent = 1 - lidar_agent + camera_agent_idx = camera_agent.nonzero()[0].tolist() + if sum(camera_agent) != 0: + for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ... + merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx]) + + if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0): + output_dict['ego'].update({'image_inputs': merged_image_inputs_dict}) + + record_len = torch.from_numpy(np.array(record_len, dtype=int)) + pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list)) + label_torch_dict['record_len'] = record_len + label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix + lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0)) + lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0)) + + if not online_eval_only: + label_torch_dict = \ + self.post_processor.collate_batch(label_dict_list) + + gt_label_torch_dict = \ + self.post_processor.collate_batch(gt_label_dict_list) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + gt_label_torch_dict.update({'gt_object_bbx_center': gt_object_bbx_center, + 'gt_object_bbx_mask': gt_object_bbx_mask}) + else: + gt_label_torch_dict = {} + + gt_label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix + gt_label_torch_dict['record_len'] = record_len + + # object id is only used during inference, where batch size is 1. + # so here we only get the first element. + output_dict['ego'].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'record_len': record_len, + 'label_dict': label_torch_dict, + 'object_ids': object_ids[0], + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_pose_clean': lidar_pose_clean, + 'lidar_pose': lidar_pose, + 'anchor_box': self.anchor_box_torch}) + + output_dict['ego'].update({'gt_object_bbx_center': gt_object_bbx_center, + 'gt_object_bbx_mask': gt_object_bbx_mask, + 'gt_label_dict': gt_label_torch_dict, + 'gt_object_ids': gt_object_ids[0]}) + + output_dict['ego'].update({'dict_list': dict_list}) + output_dict['ego'].update({'record_len': record_len, + 'pairwise_t_matrix': pairwise_t_matrix + }) + + if self.visualize: + origin_lidar = torch.from_numpy(np.array(origin_lidar)) + output_dict['ego'].update({'origin_lidar': origin_lidar}) + output_dict['ego'].update({'img_front': img_front}) + output_dict['ego'].update({'img_right': img_right}) + output_dict['ego'].update({'img_left': img_left}) + output_dict['ego'].update({'BEV': BEV}) + + if self.supervise_single and not online_eval_only: + output_dict['ego'].update({ + "label_dict_single":{ + # "pos_equal_one": torch.cat(pos_equal_one_single, dim=0), + # "neg_equal_one": torch.cat(neg_equal_one_single, dim=0), + # "targets": torch.cat(targets_single, dim=0), + # for centerpoint + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }, + "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0), + "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0) + }) + + if self.heterogeneous: + output_dict['ego'].update({ + "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...] + }) + + + return output_dict + + def collate_batch_test(self, batch, online_eval_only=False): + """ + Customized collate function for pytorch dataloader during testing + for late fusion dataset. + + Parameters + ---------- + batch : dict + + Returns + ------- + batch : dicn + Reformatted batch. + """ + # currently, we only support batch size of 1 during testing + assert len(batch) <= 1, "Batch size 1 is required during testing!" + + self.online_eval_only = online_eval_only + + output_dict = self.collate_batch_train(batch, online_eval_only) + if output_dict is None: + return None + + batch = batch[0] + + if batch['ego']['anchor_box'] is not None: + output_dict['ego'].update({'anchor_box': + self.anchor_box_torch}) + + record_len = torch.from_numpy(np.array([batch['ego']['cav_num']])) + pairwise_t_matrix = torch.from_numpy(np.array([batch['ego']['pairwise_t_matrix']])) + + output_dict['ego'].update({'record_len': record_len, + 'pairwise_t_matrix': pairwise_t_matrix + }) + + # heterogeneous + if self.heterogeneous: + idx = batch['ego']['idx'] + cav_list = batch['ego']['cav_list'] # ['ego', '650' ..] + cav_num = len(batch) + lidar_agent, camera_agent = self.selector.select_agent(idx) + lidar_agent = lidar_agent[:cav_num] # [1,0,0,1,0] + lidar_agent_idx = lidar_agent.nonzero()[0].tolist() + lidar_agent_cav_id = [cav_list[index] for index in lidar_agent_idx] # ['ego', ...] + + + # for late fusion, we also need to stack the lidar for better + # visualization + if self.visualize: + projected_lidar_list = [] + origin_lidar = [] + + for cav_id, cav_content in batch.items(): + if cav_id != 'ego': + output_dict.update({cav_id: {}}) + # output_dict.update({cav_id: {}}) + + if not online_eval_only: + object_bbx_center = \ + torch.from_numpy(np.array([cav_content['object_bbx_center']])) + object_bbx_mask = \ + torch.from_numpy(np.array([cav_content['object_bbx_mask']])) + object_ids = cav_content['object_ids'] + + # the anchor box is the same for all bounding boxes usually, thus + # we don't need the batch dimension. + output_dict[cav_id].update( + {"anchor_box": self.anchor_box_torch} + ) + + transformation_matrix = cav_content['transformation_matrix'] + + if self.visualize: + origin_lidar = [cav_content['origin_lidar']] + if (self.params['only_vis_ego'] is False) or (cav_id=='ego'): + projected_lidar = copy.deepcopy(cav_content['origin_lidar']) + projected_lidar[:, :3] = \ + box_utils.project_points_by_matrix_torch( + projected_lidar[:, :3], + transformation_matrix) + projected_lidar_list.append(projected_lidar) + + + if self.load_lidar_file: + # processed lidar dictionary + #if 'processed_features' in cav_content.keys(): + + merged_feature_dict = merge_features_to_dict([cav_content['processed_lidar']]) + processed_lidar_torch_dict = \ + self.pre_processor.collate_batch(merged_feature_dict) + output_dict[cav_id].update({'processed_lidar': processed_lidar_torch_dict}) + + if self.load_camera_file: + imgs_batch = [cav_content["image_inputs"]["imgs"]] + rots_batch = [cav_content["image_inputs"]["rots"]] + trans_batch = [cav_content["image_inputs"]["trans"]] + intrins_batch = [cav_content["image_inputs"]["intrins"]] + extrinsics_batch = [cav_content["image_inputs"]["extrinsics"]] + post_trans_batch = [cav_content["image_inputs"]["post_trans"]] + post_rots_batch = [cav_content["image_inputs"]["post_rots"]] + + output_dict[cav_id].update({ + "image_inputs": + { + "imgs": torch.stack(imgs_batch), + "rots": torch.stack(rots_batch), + "trans": torch.stack(trans_batch), + "intrins": torch.stack(intrins_batch), + "extrinsics": torch.stack(extrinsics_batch), + "post_trans": torch.stack(post_trans_batch), + "post_rots": torch.stack(post_rots_batch), + } + } + ) + + # heterogeneous + if self.heterogeneous: + if cav_id in lidar_agent_cav_id: + output_dict[cav_id].pop('image_inputs') + else: + output_dict[cav_id].pop('processed_lidar') + + if not online_eval_only: + # label dictionary + label_torch_dict = \ + self.post_processor.collate_batch([cav_content['label_dict']]) + + # for centerpoint + label_torch_dict.update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask}) + + # save the transformation matrix (4, 4) to ego vehicle + transformation_matrix_torch = \ + torch.from_numpy( + np.array(cav_content['transformation_matrix'])).float() + + # late fusion training, no noise + transformation_matrix_clean_torch = \ + torch.from_numpy( + np.array(cav_content['transformation_matrix_clean'])).float() + + if not online_eval_only: + output_dict[cav_id].update({'object_bbx_center': object_bbx_center, + 'object_bbx_mask': object_bbx_mask, + 'label_dict': label_torch_dict, + # 'record_len': record_len, + 'object_ids': object_ids,}) + output_dict[cav_id].update({ + 'transformation_matrix': transformation_matrix_torch, + 'transformation_matrix_clean': transformation_matrix_clean_torch}) + + + if 'cav_num' in cav_content.keys(): + record_len = torch.from_numpy(np.array([cav_content['cav_num']])) + output_dict[cav_id].update({'record_len': record_len}) + + if 'pairwise_t_matrix' in cav_content.keys(): + pairwise_t_matrix = torch.from_numpy(np.array([cav_content['pairwise_t_matrix']])) + output_dict[cav_id].update({'pairwise_t_matrix': pairwise_t_matrix}) + + + + if self.visualize: + origin_lidar = \ + np.array( + downsample_lidar_minimum(pcd_np_list=origin_lidar)) + origin_lidar = torch.from_numpy(origin_lidar) + output_dict[cav_id].update({'origin_lidar': origin_lidar}) + + if self.visualize: + projected_lidar_stack = [torch.from_numpy( + np.vstack(projected_lidar_list))] + output_dict['ego'].update({'origin_lidar': projected_lidar_stack}) + + output_dict['ego'].update({ + "sample_idx": batch['ego']['sample_idx'], + "cav_id_list": batch['ego']['cav_id_list'] + }) + batch_record_len = output_dict['ego']['record_len'] + + for cav_id in output_dict.keys(): + if 'record_len' in output_dict[cav_id].keys(): + continue + output_dict[cav_id].update({'record_len': batch_record_len}) + + + return output_dict + + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + pred_box_tensor, pred_score = self.post_processor.post_process( + data_dict, output_dict + ) + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + return pred_box_tensor, pred_score, gt_box_tensor + + def post_process_no_fusion(self, data_dict, output_dict_ego): + data_dict_ego = OrderedDict() + data_dict_ego["ego"] = data_dict["ego"] + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + pred_box_tensor, pred_score = self.post_processor.post_process( + data_dict_ego, output_dict_ego + ) + return pred_box_tensor, pred_score, gt_box_tensor + + def post_process_multiclass(self, data_dict, output_dict, online_eval_only=False): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + + if online_eval_only == False: + online_eval_only = self.online_eval_only + + num_class = output_dict['ego']['cls_preds'].shape[1] + pred_box_tensor_list = [] + pred_score_list = [] + gt_box_tensor_list = [] + + num_list = [0,1,3] + + for i in range(num_class): + data_dict_single = copy.deepcopy(data_dict) + gt_dict_single = {'ego': {}} + gt_dict_single['ego'] = copy.deepcopy(data_dict['ego']) + output_dict_single = copy.deepcopy(output_dict) + if not online_eval_only: + data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:] + data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:] + data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]] + gt_dict_single['ego']['object_bbx_center'] = data_dict['ego']['gt_object_bbx_center'][:,i,:,:] + gt_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['gt_object_bbx_mask'][:,i,:] + gt_dict_single['ego']['object_ids'] = data_dict['ego']['gt_object_ids'][num_list[i]] + + + for cav in output_dict_single.keys(): + output_dict_single[cav]['cls_preds'] = output_dict[cav]['cls_preds'][:,i:i+1,:,:] + output_dict_single[cav]['reg_preds'] = output_dict[cav]['reg_preds_multiclass'][:,i,:,:] + + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict_single, output_dict_single) + + if not online_eval_only: + gt_box_tensor = self.post_processor.generate_gt_bbx(gt_dict_single) + else: + gt_box_tensor = None + + pred_box_tensor_list.append(pred_box_tensor) + pred_score_list.append(pred_score) + gt_box_tensor_list.append(gt_box_tensor) + + return pred_box_tensor_list, pred_score_list, gt_box_tensor_list + + def post_process_multiclass_no_fusion(self, data_dict, output_dict_ego, online_eval_only=False): + """ + Process the outputs of the model to 2D/3D bounding box. + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box_tensor : torch.Tensor + The tensor of prediction bounding box after NMS. + gt_box_tensor : torch.Tensor + The tensor of gt bounding box. + """ + + online_eval_only = self.online_eval_only + + num_class = data_dict['ego']['object_bbx_center'].shape[1] + + + pred_box_tensor_list = [] + pred_score_list = [] + gt_box_tensor_list = [] + + num_list = [0,1,3] + + for i in range(num_class): + data_dict_single = copy.deepcopy(data_dict) + gt_dict_single = {'ego': {}} + gt_dict_single['ego'] = copy.deepcopy(data_dict['ego']) + output_dict_single = copy.deepcopy(output_dict_ego) + data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:] + data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:] + data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]] + gt_dict_single['ego']['object_bbx_center'] = data_dict['ego']['gt_object_bbx_center'][:,i,:,:] + gt_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['gt_object_bbx_mask'][:,i,:] + gt_dict_single['ego']['object_ids'] = data_dict['ego']['gt_object_ids'][num_list[i]] + output_dict_single['ego']['cls_preds'] = output_dict_ego['ego']['cls_preds'][:,i:i+1,:,:] + output_dict_single['ego']['reg_preds'] = output_dict_ego['ego']['reg_preds_multiclass'][:,i,:,:] + data_dict_single_ego = OrderedDict() + data_dict_single_ego["ego"] = data_dict_single["ego"] + pred_box_tensor, pred_score = \ + self.post_processor.post_process(data_dict_single_ego, output_dict_single) + gt_box_tensor = self.post_processor.generate_gt_bbx(gt_dict_single) + + + pred_box_tensor_list.append(pred_box_tensor) + pred_score_list.append(pred_score) + gt_box_tensor_list.append(gt_box_tensor) + + return pred_box_tensor_list, pred_score_list, gt_box_tensor_list + + def post_process_no_fusion_uncertainty(self, data_dict, output_dict_ego): + data_dict_ego = OrderedDict() + data_dict_ego['ego'] = data_dict['ego'] + gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict) + + pred_box_tensor, pred_score, uncertainty = \ + self.post_processor.post_process(data_dict_ego, output_dict_ego, return_uncertainty=True) + return pred_box_tensor, pred_score, gt_box_tensor, uncertainty + + return LatemulticlassFusionDataset diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..33f9e917b452cfb68436e8f04d1b0a348dbda5ea --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__init__.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + +from opencood.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor +from opencood.data_utils.post_processor.bev_postprocessor import BevPostprocessor +from opencood.data_utils.post_processor.ciassd_postprocessor import CiassdPostprocessor +from opencood.data_utils.post_processor.fpvrcnn_postprocessor import FpvrcnnPostprocessor +from opencood.data_utils.post_processor.uncertainty_voxel_postprocessor import UncertaintyVoxelPostprocessor + +__all__ = { + 'VoxelPostprocessor': VoxelPostprocessor, + 'BevPostprocessor': BevPostprocessor, + 'CiassdPostprocessor': CiassdPostprocessor, + 'FpvrcnnPostprocessor': FpvrcnnPostprocessor, + 'UncertaintyVoxelPostprocessor': UncertaintyVoxelPostprocessor, +} + + +def build_postprocessor(anchor_cfg, train): + process_method_name = anchor_cfg['core_method'] + anchor_generator = __all__[process_method_name]( + anchor_params=anchor_cfg, + train=train + ) + + return anchor_generator \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c6a9a3b26c0a683efd6841280e1e0ca7f35f780 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/__init__.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/base_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/base_postprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8a1a222ffbe82e9cb9d0fd5ceadce13429ff633 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/base_postprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/bev_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/bev_postprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..560200cb57f4fc2819e9eb71dc359b67f60e3fc7 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/bev_postprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/ciassd_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/ciassd_postprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..258fd8aa88cffb50439ac207f19f2a560f9030e1 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/ciassd_postprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/fpvrcnn_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/fpvrcnn_postprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01c5be1e2e8fffb4b64bb4041ec1305b6a015400 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/fpvrcnn_postprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/uncertainty_voxel_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/uncertainty_voxel_postprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7d4596eda63e5a7df33b46f8b3c3cd43c4e667a Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/uncertainty_voxel_postprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/voxel_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/voxel_postprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5be15343ef0b980ab264879719bbc710770f130 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/voxel_postprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/base_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/base_postprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..80a3833e61c007d767a286d8f13a26bcdef5cf24 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/base_postprocessor.py @@ -0,0 +1,594 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + +""" +Template for AnchorGenerator +""" + +import numpy as np +import torch +import cv2 + +from opencood.utils import box_utils +from opencood.utils import common_utils +from opencood.utils.transformation_utils import x1_to_x2 + +class BasePostprocessor(object): + """ + Template for Anchor generator. + + Parameters + ---------- + anchor_params : dict + The dictionary containing all anchor-related parameters. + train : bool + Indicate train or test mode. + + Attributes + ---------- + bbx_dict : dictionary + Contain all objects information across the cav, key: id, value: bbx + coordinates (1, 7) + """ + + def __init__(self, anchor_params, train=True): + self.params = anchor_params + self.bbx_dict = {} + self.train = train + + def generate_anchor_box(self): + # needs to be overloaded + return None + + def generate_label(self, *argv): + return None + + def generate_gt_bbx(self, data_dict): + """ + The base postprocessor will generate 3d groundtruth bounding box. + + For early and intermediate fusion, + data_dict only contains ego. + + For late fusion, + data_dcit contains all cavs, so we need transformation matrix. + To generate gt boxes, transformation_matrix should be clean + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + Returns + ------- + gt_box3d_tensor : torch.Tensor + The groundtruth bounding box tensor, shape (N, 8, 3). + """ + gt_box3d_list = [] + # used to avoid repetitive bounding box + object_id_list = [] + + for cav_id, cav_content in data_dict.items(): + # used to project gt bounding box to ego space + # object_bbx_center is clean. + transformation_matrix = cav_content['transformation_matrix_clean'] + + object_bbx_center = cav_content['object_bbx_center'] + object_bbx_mask = cav_content['object_bbx_mask'] + object_ids = cav_content['object_ids'] + object_bbx_center = object_bbx_center[object_bbx_mask == 1] + + # convert center to corner + object_bbx_corner = \ + box_utils.boxes_to_corners_3d(object_bbx_center, + self.params['order']) + projected_object_bbx_corner = \ + box_utils.project_box3d(object_bbx_corner.float(), + transformation_matrix) + gt_box3d_list.append(projected_object_bbx_corner) + # append the corresponding ids + object_id_list += object_ids + + # gt bbx 3d + gt_box3d_list = torch.vstack(gt_box3d_list) + # some of the bbx may be repetitive, use the id list to filter + gt_box3d_selected_indices = \ + [object_id_list.index(x) for x in set(object_id_list)] + gt_box3d_tensor = gt_box3d_list[gt_box3d_selected_indices] + + # filter the gt_box to make sure all bbx are in the range. with z dim + gt_box3d_np = gt_box3d_tensor.cpu().numpy() + gt_box3d_np = box_utils.mask_boxes_outside_range_numpy(gt_box3d_np, + self.params['gt_range'], + order=None) + try: + gt_box3d_tensor = torch.from_numpy(gt_box3d_np).to(device=gt_box3d_list.device) + except: + print('load gt_box3d_tensor failed') + if len(gt_box3d_list)>0: + gt_box3d_tensor = torch.from_numpy(gt_box3d_np).to(device=gt_box3d_list[0].device) + else: + gt_box3d_tensor = None + + return gt_box3d_tensor + + + def generate_gt_bbx_by_iou(self, data_dict): + """ + This function is only used by DAIR-V2X + late fusion dataset + + DAIR-V2X + late fusion dataset's label are from veh-side and inf-side + and do not have unique object id. + + So we will filter the same object by IoU + + The base postprocessor will generate 3d groundtruth bounding box. + + For early and intermediate fusion, + data_dict only contains ego. + + For late fusion, + data_dcit contains all cavs, so we need transformation matrix. + To generate gt boxes, transformation_matrix should be clean + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + Returns + ------- + gt_box3d_tensor : torch.Tensor + The groundtruth bounding box tensor, shape (N, 8, 3). + """ + gt_box3d_list = [] + + for cav_id, cav_content in data_dict.items(): + # used to project gt bounding box to ego space + # object_bbx_center is clean. + transformation_matrix = cav_content['transformation_matrix_clean'] + + object_bbx_center = cav_content['object_bbx_center'] + object_bbx_mask = cav_content['object_bbx_mask'] + object_ids = cav_content['object_ids'] + object_bbx_center = object_bbx_center[object_bbx_mask == 1] + + # convert center to corner + object_bbx_corner = \ + box_utils.boxes_to_corners_3d(object_bbx_center, + self.params['order']) + projected_object_bbx_corner = \ + box_utils.project_box3d(object_bbx_corner.float(), + transformation_matrix) + gt_box3d_list.append(projected_object_bbx_corner) + + # if only ego agent + if len(data_dict) == 1: + gt_box3d_tensor = torch.vstack(gt_box3d_list) + # both veh-side and inf-side label + else: + veh_corners_np = gt_box3d_list[0].cpu().numpy() + inf_corners_np = gt_box3d_list[1].cpu().numpy() + inf_polygon_list = list(common_utils.convert_format(inf_corners_np)) + veh_polygon_list = list(common_utils.convert_format(veh_corners_np)) + iou_thresh = 0.05 + + + gt_from_inf = [] + for i in range(len(inf_polygon_list)): + inf_polygon = inf_polygon_list[i] + ious = common_utils.compute_iou(inf_polygon, veh_polygon_list) + if (ious > iou_thresh).any(): + continue + gt_from_inf.append(inf_corners_np[i]) + + if len(gt_from_inf): + gt_from_inf = np.stack(gt_from_inf) + gt_box3d = np.vstack([veh_corners_np, gt_from_inf]) + else: + gt_box3d = veh_corners_np + + gt_box3d_tensor = torch.from_numpy(gt_box3d).to(device=gt_box3d_list[0].device) + + # mask_boxes_outside_range_numpy has filtering of z-dim + # gt_box3d_np = gt_box3d_tensor.cpu().numpy() + # gt_box3d_np = box_utils.mask_boxes_outside_range_numpy(gt_box3d_np, + # self.params['gt_range'], + # self.params['order']) + # gt_box3d_tensor = torch.from_numpy(gt_box3d_np).to(device=gt_box3d_list[0].device) + + # need discussion. not filter z-dim. + mask = \ + box_utils.get_mask_for_boxes_within_range_torch(gt_box3d_tensor, self.params['gt_range']) + gt_box3d_tensor = gt_box3d_tensor[mask, :, :] + + + return gt_box3d_tensor + + def generate_object_center(self, + cav_contents, + reference_lidar_pose, + enlarge_z=False): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + in fact it is used in get_item_single_car, so the list length is 1 + + reference_lidar_pose : list + The final target lidar pose with length 6. + + enlarge_z : + if True, enlarge the z axis range to include more object + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + tmp_object_dict = {} + for cav_content in cav_contents: + tmp_object_dict.update(cav_content['params']['vehicles']) + + output_dict = {} + filter_range = self.params['anchor_args']['cav_lidar_range'] \ + if self.train else self.params['gt_range'] + + box_utils.project_world_objects(tmp_object_dict, + output_dict, + reference_lidar_pose, + filter_range, + self.params['order'], + enlarge_z) + + object_np = np.zeros((self.params['max_num'], 7)) + mask = np.zeros(self.params['max_num']) + object_ids = [] + + for i, (object_id, object_bbx) in enumerate(output_dict.items()): + object_np[i] = object_bbx[0, :] + mask[i] = 1 + object_ids.append(object_id) + return object_np, mask, object_ids + + + def generate_object_center_v2x(self, + cav_contents, + reference_lidar_pose): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + In fact, only the ego vehile needs to generate object center + + reference_lidar_pose : list + The final target lidar pose with length 6. + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + # from opencood.data_utils.datasets import GT_RANGE + + assert len(cav_contents) == 1 + + """ + In old version, we only let ego agent return gt box. + Other agent return empty. + + But it's not suitable for late fusion. + Also, we should filter out boxes that don't have any lidar point hits. + + Thankfully, 'lidar_np' is in cav_contents[0].keys() + """ + + + gt_boxes = cav_contents[0]['params']['vehicles'] # notice [N,10], 10 includes [x,y,z,dx,dy,dz,w,a,b,c] + object_ids = cav_contents[0]['params']['object_ids'] + lidar_np = cav_contents[0]['lidar_np'] + + tmp_object_dict = {"gt_boxes": gt_boxes, "object_ids":object_ids} + + output_dict = {} + filter_range = self.params['anchor_args']['cav_lidar_range'] # v2x we don't use GT_RANGE. + + box_utils.project_world_objects_v2x(tmp_object_dict, + output_dict, + reference_lidar_pose, + filter_range, + self.params['order'], + lidar_np=lidar_np) + + object_np = np.zeros((self.params['max_num'], 7)) + mask = np.zeros(self.params['max_num']) + object_ids = [] + + + for i, (object_id, object_bbx) in enumerate(output_dict.items()): + object_np[i] = object_bbx[0, :] + mask[i] = 1 + object_ids.append(object_id) + + return object_np, mask, object_ids + + def generate_object_center_dairv2x(self, + cav_contents, + reference_lidar_pose): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + + reference_lidar_pose : list + The final target lidar pose with length 6. + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + + # tmp_object_dict = {} + tmp_object_list = [] + cav_content = cav_contents[0] + tmp_object_list = cav_content['params']['vehicles'] #世界坐标系下 + + output_dict = {} + filter_range = self.params['anchor_args']['cav_lidar_range'] + + + box_utils.project_world_objects_dairv2x(tmp_object_list, + output_dict, + reference_lidar_pose, + filter_range, + self.params['order']) + + object_np = np.zeros((self.params['max_num'], 7)) + mask = np.zeros(self.params['max_num']) + object_ids = [] + + for i, (object_id, object_bbx) in enumerate(output_dict.items()): + object_np[i] = object_bbx[0, :] + mask[i] = 1 + object_ids.append(object_id) + + return object_np, mask, object_ids + + + def generate_object_center_dairv2x_single(self, + cav_contents, + suffix=""): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + + # tmp_object_dict = {} + tmp_object_list = [] + cav_content = cav_contents[0] + tmp_object_list = cav_content['params'][f'vehicles{suffix}'] # ego 坐标系下 + + output_dict = {} + filter_range = self.params['anchor_args']['cav_lidar_range'] + + + box_utils.load_single_objects_dairv2x(tmp_object_list, + output_dict, + filter_range, + self.params['order']) + + object_np = np.zeros((self.params['max_num'], 7)) + mask = np.zeros(self.params['max_num']) + object_ids = [] + + for i, (object_id, object_bbx) in enumerate(output_dict.items()): + object_np[i] = object_bbx[0, :] + mask[i] = 1 + object_ids.append(object_id) + + return object_np, mask, object_ids + + + + def generate_object_center_dairv2x_single_hetero(self, + cav_contents, + reference_lidar_pose, + suffix, + ): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + + # tmp_object_dict = {} + tmp_object_list = [] + cav_content = cav_contents[0] + tmp_object_list = cav_content['params'][f'vehicles{suffix}'] # ego 坐标系下 + + output_dict = {} + filter_range = self.params['anchor_args']['cav_lidar_range'] + + cav_coor = cav_content['params']['lidar_pose'] # T_world_cav + ego_coor = reference_lidar_pose # T_world_ego + T_ego_cav = x1_to_x2(cav_coor, ego_coor) # T_ego_cav + + box_utils.load_single_objects_dairv2x_hetero(tmp_object_list, + output_dict, + filter_range, + T_ego_cav, + self.params['order']) + + object_np = np.zeros((self.params['max_num'], 7)) + mask = np.zeros(self.params['max_num']) + object_ids = [] + + for i, (object_id, object_bbx) in enumerate(output_dict.items()): + object_np[i] = object_bbx[0, :] + mask[i] = 1 + object_ids.append(object_id) + + return object_np, mask, object_ids + + + + + + def generate_visible_object_center(self, + cav_contents, + reference_lidar_pose, + enlarge_z=False): + """ + Retrieve all objects in a format of (n, 7), where 7 represents + x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. + + Parameters + ---------- + cav_contents : list + List of dictionary, save all cavs' information. + in fact it is used in get_item_single_car, so the list length is 1 + + reference_lidar_pose : list + The final target lidar pose with length 6. + + visibility_map : np.ndarray, uint8 + for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up. + + enlarge_z : + if True, enlarge the z axis range to include more object + + Returns + ------- + object_np : np.ndarray + Shape is (max_num, 7). + mask : np.ndarray + Shape is (max_num,). + object_ids : list + Length is number of bbx in current sample. + """ + + tmp_object_dict = {} + for cav_content in cav_contents: + tmp_object_dict.update(cav_content['params']['vehicles']) + + output_dict = {} + filter_range = self.params['anchor_args']['cav_lidar_range'] # if self.train else GT_RANGE_OPV2V + inf_filter_range = [-1e5, -1e5, -1e5, 1e5, 1e5, 1e5] + visibility_map = np.asarray(cv2.cvtColor(cav_contents[0]["bev_visibility.png"], cv2.COLOR_BGR2GRAY)) + ego_lidar_pose = cav_contents[0]["params"]["lidar_pose_clean"] + + # 1-time filter: in ego coordinate, use visibility map to filter. + box_utils.project_world_visible_objects(tmp_object_dict, + output_dict, + ego_lidar_pose, + inf_filter_range, + self.params['order'], + visibility_map, + enlarge_z) + + updated_tmp_object_dict = {} + for k, v in tmp_object_dict.items(): + if k in output_dict: + updated_tmp_object_dict[k] = v # not visible + output_dict = {} + + # 2-time filter: use reference_lidar_pose + box_utils.project_world_objects(updated_tmp_object_dict, + output_dict, + reference_lidar_pose, + filter_range, + self.params['order'], + enlarge_z) + + object_np = np.zeros((self.params['max_num'], 7)) + mask = np.zeros(self.params['max_num']) + object_ids = [] + + for i, (object_id, object_bbx) in enumerate(output_dict.items()): + object_np[i] = object_bbx[0, :] + mask[i] = 1 + object_ids.append(object_id) + + return object_np, mask, object_ids + + def generate_object_center_v2xset_camera(self, + cav_contents, + reference_lidar_pose, + enlarge_z=False): + + tmp_object_dict = {} + for cav_content in cav_contents: + tmp_object_dict.update(cav_content['params']['vehicles']) + + output_dict = {} + filter_range = [-45, -45, -3, 45, 45, 1] + + box_utils.project_world_objects(tmp_object_dict, + output_dict, + reference_lidar_pose, + filter_range, + self.params['order'], + enlarge_z) + + object_np = np.zeros((self.params['max_num'], 7)) + mask = np.zeros(self.params['max_num']) + object_ids = [] + + for i, (object_id, object_bbx) in enumerate(output_dict.items()): + object_np[i] = object_bbx[0, :] + mask[i] = 1 + object_ids.append(object_id) + return object_np, mask, object_ids \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/bev_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/bev_postprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..b08b189314f5307437235a3d33acd8179dc1a513 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/bev_postprocessor.py @@ -0,0 +1,451 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang haxiang@g.ucla.edu +# License: TDG-Attribution-NonCommercial-NoDistrib + +""" +Anchor-free 2d Generator +""" + +import numpy as np +import torch +import torch.nn.functional as F + +from opencood.utils.transformation_utils import dist_to_continuous +from opencood.data_utils.post_processor.base_postprocessor \ + import BasePostprocessor +from opencood.utils import box_utils +from opencood.visualization import vis_utils + + +class BevPostprocessor(BasePostprocessor): + def __init__(self, anchor_params, train): + super(BevPostprocessor, self).__init__(anchor_params, train) + # self.geometry_param = anchor_params["geometry"] + self.geometry_param = anchor_params["geometry_param"] + + # TODO + # Hard coded for now. Need to calculate for our own training dataset + self.target_mean = np.array([0.008, 0.001, 0.202, 0.2, 0.43, 1.368]) + self.target_std_dev = np.array([0.866, 0.5, 0.954, 0.668, 0.09, 0.111]) + + def generate_anchor_box(self): + return None + + def generate_label(self, **kwargs): + """ + Generate targets for training. + + Parameters + ---------- + kwargs : list + gt_box_center:(max_num, 7) + + Returns + ------- + label_dict : dict + Dictionary that contains all target related info. + """ + assert self.params['order'] == 'lwh', \ + 'Currently BEV only support lwh bbx order.' + # (max_num, 7) + gt_box_center = kwargs['gt_box_center'] + + # (max_num) + masks = kwargs['mask'] + + # (n, 7) + gt_box_center_valid = gt_box_center[masks == 1] + # (n, 4, 3) + bev_corners = box_utils.boxes_to_corners2d(gt_box_center_valid, + self.params['order']) + + n = gt_box_center_valid.shape[0] + # (n, 4, 2) + bev_corners = bev_corners[:, :, :2] + yaw = gt_box_center_valid[:, -1] + x, y = gt_box_center_valid[:, 0], gt_box_center_valid[:, 1] + dx, dy = gt_box_center_valid[:, 3], gt_box_center_valid[:, 4] + # (n, 6) + reg_targets = np.column_stack([np.cos(yaw), np.sin(yaw), x, y, dx, dy]) + + # target label map including classification and regression targets + label_map = np.zeros(self.geometry_param["label_shape"]) + self.update_label_map(label_map, bev_corners, reg_targets) + label_map = self.normalize_targets(label_map) + label_dict = { + # (7, label_shape[0], label_shape[1]) + "label_map": np.transpose(label_map, (2, 0, 1)).astype(np.float32), + "bev_corners": bev_corners + } + return label_dict + + def update_label_map(self, label_map, bev_corners, reg_targets): + """ + Update label_map based on bbx and regression targets. + + Parameters + ---------- + label_map : numpy.array + Targets array for classification and regression tasks with + the shape of label_shape. (H, W, 7). + + bev_corners : numpy.array + The bbx corners in lidar frame with shape (n, 4, 2) + + reg_targets : numpy.array + Array containing the regression targets information. It need to be + further processed. + + """ + res = self.geometry_param["res"] + downsample_rate = self.geometry_param["downsample_rate"] + + bev_origin = np.array([self.geometry_param["L1"], + self.geometry_param["W1"]]).reshape(1, -1) + + # discretized bbx corner representations -- (n, 4, 2) + # bev_corners is real coordinate + # bev_corners_dist is pixel coordinate + bev_corners_dist = (bev_corners - bev_origin) / res / downsample_rate + # generate the coordinates of m + x = np.arange(self.geometry_param["label_shape"][0]) # H (x in lidar coordinate) + y = np.arange(self.geometry_param["label_shape"][1]) # W (y in lidar coordinate) + xx, yy = np.meshgrid(x, y) + + # (label_shape[0]*label_shape[1], 2) + points = np.concatenate([xx.reshape(-1, 1), yy.reshape(-1, 1)], + axis=-1) # pixel + bev_origin_dist = bev_origin / res / downsample_rate + + # loop over each bbx, find the points within the bbx. + for i in range(bev_corners.shape[0]): + reg_target = reg_targets[i, :] + + # find discredited points in bbx + points_in_box = \ + box_utils.get_points_in_rotated_box(points, + bev_corners_dist[i, ...]) + # convert points to continuous space + points_continuous = dist_to_continuous(points_in_box, + bev_origin_dist, + res, + downsample_rate) + actual_reg_target = np.repeat(reg_target.reshape(1, -1), + points_continuous.shape[0], + axis=0) + # build learning targets + actual_reg_target[:, 2:4] = \ + actual_reg_target[:, 2:4] - points_continuous + actual_reg_target[:, 4:] = np.log(actual_reg_target[:, 4:]) + + # update label map + label_map[points_in_box[:, 0], points_in_box[:, 1], 0] = 1.0 + label_map[points_in_box[:, 0], points_in_box[:, 1], 1:] = \ + actual_reg_target + + def normalize_targets(self, label_map): + """ + Normalize label_map + + Parameters + ---------- + label_map : numpy.array + Targets array for classification and regression tasks with the + shape of label_shape. + + Returns + ------- + label_map: numpy.array + Nromalized label_map. + + """ + label_map[..., 1:] = \ + (label_map[..., 1:] - self.target_mean) / self.target_std_dev + return label_map + + def denormalize_reg_map(self, reg_map): + """ + Denormalize the regression map + + Parameters + ---------- + reg_map : np.ndarray / torch.Tensor + Regression output mapwith the shape of (label_shape[0], + label_shape[1], 6). + + Returns + ------- + reg_map : np.ndarray / torch.Tensor + Denormalized regression map. + + """ + if isinstance(reg_map, np.ndarray): + target_mean = self.target_mean + target_std_dev = self.target_std_dev + + else: + target_mean = \ + torch.from_numpy(self.target_mean).to(reg_map.device) + target_std_dev = \ + torch.from_numpy(self.target_std_dev).to(reg_map.device) + reg_map = reg_map * target_std_dev + target_mean + return reg_map + + @staticmethod + def collate_batch(label_batch_list): + """ + Customized collate function for target label generation. + + Parameters + ---------- + label_batch_list : list + The list of dictionary that contains all labels for several + frames. + + Returns + ------- + processed_batch : dict + Reformatted labels in torch tensor. + """ + label_map_list = [x["label_map"][np.newaxis, ...] for x in + label_batch_list] + processed_batch = { + # (batch_size, 7, label_shape[0], label_shape[1]) + "label_map": torch.from_numpy(np.concatenate(label_map_list, + axis=0)), + "bev_corners": [torch.from_numpy(x["bev_corners"]) for x in + label_batch_list] + } + return processed_batch + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D bounding box. + Step1: convert each cav's output to bounding box format + Step2: project the bounding boxes to ego space. + Step:3 NMS + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box2d_tensor : torch.Tensor + The prediction bounding box tensor after NMS. + + gt_box2d_tensor : torch.Tensor + The groundtruth bounding box tensor. + """ + + # the final bounding box list + pred_box2d_list = [] + pred_score_list = [] + + for cav_id, cav_content in data_dict.items(): + assert cav_id in output_dict + # the transformation matrix to ego space + transformation_matrix = cav_content['transformation_matrix'] + + # classification probability -- (label_shape[0], label_shape[1]) + prob = output_dict[cav_id]['cls'].squeeze(0).squeeze(0) + prob = torch.sigmoid(prob) + # regression map -- (label_shape[0], label_shape[1], 6) + reg_map = output_dict[cav_id]['reg'].squeeze(0).permute(1, 2, 0) + reg_map = self.denormalize_reg_map(reg_map) + threshold = self.params['target_args']['score_threshold'] + mask = torch.gt(prob, threshold) + + if mask.sum() > 0: + # (number of high confidence bbx, 4, 2) + corners2d = self.reg_map_to_bbx_corners(reg_map, mask) + # assume the z-diviation in transformation_matrix is small, + # thus we can pad zeros to simulate the 3d transformation. + # (number of high confidence bbx, 4, 3) + box3d = F.pad(corners2d, (0, 1)) + # (number of high confidence bbx, 4, 2) + projected_boxes2d = \ + box_utils.project_points_by_matrix_torch(box3d.view(-1, 3), + transformation_matrix)[ + :, :2] + + projected_boxes2d = projected_boxes2d.view(-1, 4, 2) + scores = prob[mask] + pred_box2d_list.append(projected_boxes2d) + pred_score_list.append(scores) + + if len(pred_box2d_list): + pred_box2ds = torch.cat(pred_box2d_list, dim=0) + pred_scores = torch.cat(pred_score_list, dim=0) + else: + return None, None + + keep_index = box_utils.nms_rotated(pred_box2ds, pred_scores, + self.params['nms_thresh']) + if len(keep_index): + pred_box2ds = pred_box2ds[keep_index] + pred_scores = pred_scores[keep_index] + + # filter out the prediction out of the range. + mask = box_utils.get_mask_for_boxes_within_range_torch(pred_box2ds, self.params['gt_range']) + pred_box2ds = pred_box2ds[mask, :, :] + pred_scores = pred_scores[mask] + assert pred_scores.shape[0] == pred_box2ds.shape[0] + return pred_box2ds, pred_scores + + def reg_map_to_bbx_corners(self, reg_map, mask): + """ + Construct bbx from the regression output of the model. + + Parameters + ---------- + reg_map : torch.Tensor + Regression output of neural networks. + + mask : torch.Tensor + Masks used to filter bbx. + + Returns + ------- + corners : torch.Tensor + Bbx output with shape (N, 4, 2). + + """ + + assert len(reg_map.shape) == 3, \ + "only support shape of label_shape i.e. (*, *, 6)" + device = reg_map.device + + cos_t, sin_t, x, y, log_dx, log_dy = \ + [tt.squeeze(-1) for tt in torch.chunk(reg_map, 6, dim=-1)] + yaw = torch.atan2(sin_t, cos_t) + dx, dy = log_dx.exp(), log_dy.exp() + + grid_size = self.geometry_param["res"] * \ + self.geometry_param["downsample_rate"] + grid_x = torch.arange(self.geometry_param["L1"], + self.geometry_param["L2"], + grid_size, dtype=torch.float32, device=device) + grid_y = torch.arange(self.geometry_param["W1"], + self.geometry_param["W2"], + grid_size, + dtype=torch.float32, + device=device) + + xx, yy = torch.meshgrid([grid_x, grid_y]) + center_x = xx + x + center_y = yy + y + + bbx2d = torch.stack([center_x, center_y, dx, dy, yaw], dim=-1) + bbx2d = bbx2d[mask, :] + corners = box_utils.boxes2d_to_corners2d(bbx2d) + + return corners + + def post_process_debug(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D bounding box for debug purpose. + Step1: convert each cav's output to bounding box format + Step2: project the bounding boxes to ego space. + Step:3 NMS + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box2d_tensor : torch.Tensor + The prediction bounding box tensor after NMS. + gt_box2d_tensor : torch.Tensor + The groundtruth bounding box tensor. + """ + # the final bounding box list + pred_box2d_list = [] + pred_score_list = [] + + # the transformation matrix to ego space + transformation_matrix = data_dict['transformation_matrix'] + + # classification probability -- (label_shape[0], label_shape[1]) + prob = output_dict['cls'].squeeze(0).squeeze(0) + prob = torch.sigmoid(prob) + + # regression map -- (label_shape[0], label_shape[1], 6) + reg_map = output_dict['reg'].squeeze(0).permute(1, 2, 0) + reg_map = self.denormalize_reg_map(reg_map) + + threshold = 0.5 + mask = torch.gt(prob, threshold) + + if mask.sum() > 0: + # (number of high confidence bbx, 4, 2) + corners2d = self.reg_map_to_bbx_corners(reg_map, mask) + # assume the z-diviation in transformation_matrix is small, + # thus we can pad zeros to simulate the 3d transformation. + # (number of high confidence bbx, 4, 3) + box3d = F.pad(corners2d, (0, 1)) + + # (number of high confidence bbx, 4, 2) + projected_boxes2d = \ + box_utils.project_points_by_matrix_torch(box3d.view(-1, 3), + transformation_matrix)[:, :2] + projected_boxes2d = projected_boxes2d.view(-1, 4, 2) + scores = prob[mask] + pred_box2d_list.append(projected_boxes2d) + pred_score_list.append(scores) + + pred_box2ds = torch.cat(pred_box2d_list, dim=0) + pred_scores = torch.cat(pred_score_list, dim=0) + + keep_index = box_utils.nms_rotated(pred_box2ds, + pred_scores, + self.params['nms_thresh']) + pred_box2ds = pred_box2ds[keep_index] + + # filter out the prediction out of the range. + mask = box_utils.get_mask_for_boxes_within_range_torch(pred_box2ds, self.params['gt_range']) + pred_box2ds = pred_box2ds[mask, :, :] + return pred_box2ds + + @staticmethod + def visualize(pred_box_tensor, gt_tensor, pcd, show_vis, save_path, + dataset=None): + """ + Visualize the BEV 2D prediction, ground truth with point cloud together. + + Parameters + ---------- + pred_box_tensor : torch.Tensor + (N, 8, 3) prediction. + + gt_tensor : torch.Tensor + (N, 8, 3) groundtruth bbx + + pcd : torch.Tensor + PointCloud, (N, 4). + + show_vis : bool + Whether to show visualization. + + save_path : str + Save the visualization results to given path. + + dataset : BaseDataset + opencood dataset object. + """ + assert dataset is not None, "dataset argument can't be None" + vis_utils.visualize_single_sample_output_bev(pred_box_tensor, + gt_tensor, + pcd, + dataset, + show_vis, + save_path) diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/ciassd_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/ciassd_postprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..9a5abd8176d3819d3323ab2b620d5a58acfb6527 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/ciassd_postprocessor.py @@ -0,0 +1,168 @@ +""" +3D Anchor Generator for Voxel +""" +import math +import sys + +import numpy as np +import torch +import torch.nn.functional as F + +from opencood.data_utils.post_processor.voxel_postprocessor \ + import VoxelPostprocessor +from opencood.utils import box_utils + + +class CiassdPostprocessor(VoxelPostprocessor): + def __init__(self, anchor_params, train): + super(CiassdPostprocessor, self).__init__(anchor_params, train) + self.train = train + self.anchor_num = self.params['anchor_args']['num'] + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + Step1: convert each cav's output to bounding box format + Step2: project the bounding boxes to ego space. + Step:3 NMS + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box3d_tensor : torch.Tensor + The prediction bounding box tensor after NMS. + gt_box3d_tensor : torch.Tensor + The groundtruth bounding box tensor. + """ + # the final bounding box list + global batch_num_box_count + pred_box3d_original_list = [] + pred_box3d_list = [] + pred_box2d_list = [] + + for cav_id, cav_content in data_dict.items(): + assert cav_id in output_dict + # the transformation matrix to ego space + if 'transformation_matrix' in cav_content: + transformation_matrix = cav_content['transformation_matrix'] + else: + transformation_matrix = torch.from_numpy(np.identity(4)).float().\ + to(cav_content['anchor_box'].device) + + # (H, W, anchor_num, 7) + anchor_box = cav_content['anchor_box'] + + # prediction result + preds_dict = output_dict[cav_id]['preds_dict_stage1'] + + # preds + prob = preds_dict['cls_preds'] + prob = torch.sigmoid(prob.permute(0, 2, 3, 1).contiguous()) + reg = preds_dict['box_preds'].permute(0, 2, 3, 1).contiguous() + iou = preds_dict['iou_preds'].permute(0, 2, 3, 1).contiguous().reshape(1, -1) + dir = preds_dict['dir_cls_preds'].permute(0, 2, 3, 1).contiguous().reshape(1, -1, 2) # [N, H*W*2, 2] + + # convert regression map back to bounding box + # (N, W*L*anchor_num, 7) + batch_box3d = self.delta_to_boxes3d(reg, anchor_box) + mask = torch.gt(prob, self.params['target_args']['score_threshold']) # [N, H, W, 2] + batch_num_box_count = [int(m.sum()) for m in mask] + mask = mask.view(1, -1) # [1,N*H*W*2] + mask_reg = mask.unsqueeze(2).repeat(1, 1, 7) + + # during validation/testing, the batch size should be 1 + if not self.train: + assert batch_box3d.shape[0] == 1 + + boxes3d = torch.masked_select(batch_box3d.view(-1, 7), mask_reg[0]).view(-1, 7) + scores = torch.masked_select(prob.view(-1), mask[0]) + + dir_labels = torch.max(dir, dim=-1)[1] # indices. shape [N, H*W*2]. value 0 or 1 + dir_labels = dir_labels[mask] # sum(mask==1) + # top_labels = torch.zeros([scores.shape[0]], dtype=torch.long).cuda() + if scores.shape[0] != 0: + iou = (iou + 1) * 0.5 + scores = scores * torch.pow(iou.masked_select(mask), 4) + # correct_direction + top_labels = (boxes3d[..., -1] > 0) ^ (dir_labels.byte() == 1) + boxes3d[..., -1] += torch.where(top_labels, torch.tensor(np.pi).type_as(boxes3d), + torch.tensor(0.0).type_as(boxes3d)) + pred_box3d_original_list.append(boxes3d.detach()) + + # convert output to bounding box + if len(boxes3d) != 0: + # (N, 8, 3) + boxes3d_corner = box_utils.boxes_to_corners_3d(boxes3d, order=self.params['order']) + # (N, 8, 3) + projected_boxes3d = box_utils.project_box3d(boxes3d_corner, transformation_matrix) + # convert 3d bbx to 2d, (N,4) + projected_boxes2d = box_utils.corner_to_standup_box_torch(projected_boxes3d) + # (N, 5) + boxes2d_score = torch.cat((projected_boxes2d, scores.unsqueeze(1)), dim=1) + + pred_box2d_list.append(boxes2d_score) + pred_box3d_list.append(projected_boxes3d) + + if len(pred_box2d_list) ==0 or len(pred_box3d_list) == 0: + return None, None + # shape: (N, 5) + pred_box2d_list = torch.vstack(pred_box2d_list) + # scores + scores = pred_box2d_list[:, -1] + # predicted 3d bbx + pred_box3d_tensor = torch.vstack(pred_box3d_list) + pred_box3d_original = torch.vstack(pred_box3d_original_list) + + if not self.train: + # remove large bbx + keep_index_1 = box_utils.remove_large_pred_bbx(pred_box3d_tensor) + keep_index_2 = box_utils.remove_bbx_abnormal_z(pred_box3d_tensor) + keep_index = torch.logical_and(keep_index_1, keep_index_2) + + pred_box3d_tensor = pred_box3d_tensor[keep_index] + scores = scores[keep_index] + + # nms + keep_index = box_utils.nms_rotated(pred_box3d_tensor, + scores, + self.params['nms_thresh'] + ) + + pred_box3d_tensor = pred_box3d_tensor[keep_index] + + # select cooresponding score + scores = scores[keep_index] + + # filter out the prediction out of the range. + mask = \ + box_utils.get_mask_for_boxes_within_range_torch(pred_box3d_tensor) + pred_box3d_tensor = pred_box3d_tensor[mask, :, :] + scores = scores[mask] + + assert scores.shape[0] == pred_box3d_tensor.shape[0] + return pred_box3d_tensor, scores + else: + cur_idx = 0 + batch_pred_boxes3d = [] + batch_scores = [] + for n in batch_num_box_count: + cur_boxes = pred_box3d_tensor[cur_idx:cur_idx+n] + cur_scores = scores[cur_idx:cur_idx+n] + # nms + keep_index = box_utils.nms_rotated(cur_boxes, + cur_scores, + self.params['nms_thresh'] + ) + cur_boxes = pred_box3d_original[cur_idx:cur_idx+n] # [:, [0, 1, 2, 5, 4, 3, 6]] # hwl -> lwh + batch_pred_boxes3d.append(cur_boxes[keep_index]) + batch_scores.append(cur_scores[keep_index]) + cur_idx += n + + return batch_pred_boxes3d, batch_scores diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/fpvrcnn_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/fpvrcnn_postprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..e5e7dbde30f4578d7574411b56701ceae39de735 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/fpvrcnn_postprocessor.py @@ -0,0 +1,247 @@ +""" +3D Anchor Generator for Voxel +""" +import numpy as np +import torch + +from opencood.data_utils.post_processor.voxel_postprocessor \ + import VoxelPostprocessor +from opencood.utils import box_utils +from opencood.utils import common_utils +from opencood.utils.common_utils import limit_period +from icecream import ic + +class FpvrcnnPostprocessor(VoxelPostprocessor): + def __init__(self, anchor_params, train): + super(FpvrcnnPostprocessor, self).__init__(anchor_params, train) + # redetect box in stage2 + self.redet = True if 'redet' in anchor_params and anchor_params['redet'] else False + print("Postprocessor Stage2 ReDetect: ", self.redet) + + def post_process(self, data_dict, output_dict, stage1=False): + if stage1: + return self.post_process_stage1(data_dict, output_dict) + elif not self.redet: # stage2 refinement + return self.post_process_stage2(data_dict) + else: # stage2 redetect + return self.post_process_stage2_redet(data_dict, output_dict) + + def post_process_stage1(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + No NMS + + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box3d_tensor : torch.Tensor + The prediction bounding box tensor after NMS. + gt_box3d_tensor : torch.Tensor + The groundtruth bounding box tensor. + """ + # the final bounding box list + pred_corners_list = [] + pred_box3d_list = [] + score_list = [] + + for cav_id, cav_content in data_dict.items(): + assert cav_id in output_dict + + # (H, W, anchor_num, 7) + anchor_box = cav_content['anchor_box'] + + # prediction result + preds_dict = output_dict[cav_id]['stage1_out'] + + # preds + prob = preds_dict['cls_preds'] + prob = torch.sigmoid(prob.permute(0, 2, 3, 1).contiguous()) + reg = preds_dict['reg_preds'] # .permute(0, 2, 3, 1).contiguous() + dir = preds_dict['dir_preds'].permute(0, 2, 3, 1).contiguous().reshape(1, -1, 2) + + batch_box3d = self.delta_to_boxes3d(reg, anchor_box) # hwl + mask = torch.gt(prob, self.params['target_args']['score_threshold']) + batch_num_box_count = [int(m.sum()) for m in mask] + mask = mask.view(1, -1) + mask_reg = mask.unsqueeze(2).repeat(1, 1, 7) + + boxes3d = torch.masked_select(batch_box3d.view(-1, 7), mask_reg[0]).view(-1, 7) # hwl. right + scores = torch.masked_select(prob.view(-1), mask[0]) + + dir_labels = torch.max(dir, dim=-1)[1] + dir_labels = dir_labels[mask] + + if scores.shape[0] != 0: + if 'iou_preds' in preds_dict: + iou = torch.sigmoid(preds_dict['iou_preds'].permute(0, 2, 3, 1).contiguous()).reshape(1, -1) + iou = torch.clamp(iou, min=0.0, max=1.0) + iou = (iou + 1) * 0.5 + scores = scores * torch.pow(iou.masked_select(mask), 4) + + # correct_direction + dir_offset = self.params['dir_args']['dir_offset'] + num_bins = self.params['dir_args']['num_bins'] + + dir = preds_dict['dir_preds'].permute(0, 2, 3, 1).contiguous().reshape(1, -1, 2) + dir_cls_preds = dir[mask] + # if rot_gt > 0, then the label is 1, then the regression target is [0, 1] + dir_labels = torch.max(dir_cls_preds, dim=-1)[1] # indices. shape [1, N*H*W*2]. value 0 or 1. If value is 1, then rot_gt > 0 + + period = (2 * np.pi / num_bins) # pi + dir_rot = limit_period( + boxes3d[..., 6] - dir_offset, 0, period + ) # 限制在0到pi之间 + boxes3d[..., 6] = dir_rot + dir_offset + period * dir_labels.to(dir_cls_preds.dtype) # 转化0.25pi到2.5pi + boxes3d[..., 6] = limit_period(boxes3d[..., 6], 0.5, 2 * np.pi) # limit to [-pi, pi] + + + # filter invalid boxes + keep_idx = torch.logical_and((boxes3d[:, 3:6] > 1).all(dim=1), (boxes3d[:, 3:6] < 10).all(dim=1)) + idx_start = 0 + count = [] + for i, n in enumerate(batch_num_box_count): + count.append(int(keep_idx[idx_start:idx_start+n].sum())) + batch_num_box_count = count + boxes3d = boxes3d[keep_idx] # hwl + scores = scores[keep_idx] + + # if the number of boxes is too huge, this would consume a lot of memory in the second stage + # therefore, randomly select some boxes if the box number is too big at the beginning of the training + + # if len(boxes3d) > 300: + # keep_idx = torch.multinomial(scores, 300) + # idx_start = 0 + # count = [] + # for i, n in enumerate(batch_num_box_count): + # count.append(int(torch.logical_and(keep_idx>=idx_start, keep_idx= 0 + + detections = detections[mask] + scores = rcnn_score[mask] + # gt_boxes = label_dict['gt_of_rois_src'][mask] + mask = nms_gpu(detections, scores, thresh=0.01)[0] + boxes3d = detections[mask] # keep hwl + + projected_boxes3d = None + if len(boxes3d) != 0: + # (N, 8, 3) + boxes3d_corner = \ + box_utils.boxes_to_corners_3d(boxes3d, + order="lwh") # in stage 2, box encoding is dxdydz order + # (N, 8, 3) + projected_boxes3d = \ + box_utils.project_box3d(boxes3d_corner, + data_dict['ego']['transformation_matrix']) + + ## Added by Yifan Lu, filter box outside of GT range + if projected_boxes3d is None: + return None, None + scores = scores[mask] + cav_range = self.params['gt_range'] + mask = box_utils.get_mask_for_boxes_within_range_torch(projected_boxes3d, cav_range) + projected_boxes3d = projected_boxes3d[mask] + scores = scores[mask] + + + return projected_boxes3d, scores + + # def post_process_stage2(self, data_dict): + # """ + # it's a pseduo stage2 process, but only output the stage1 rpn result. + # """ + # from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import nms_gpu + # if 'stage2_out' not in data_dict['ego'].keys(): + # return None, None + # output_dict = data_dict['ego']['stage2_out'] + # label_dict = data_dict['ego']['rcnn_label_dict'] + # rcnn_score = label_dict['rois_scores_stage1'] + # rois = label_dict['rois'][:,[0,1,2,5,4,3,6]] + + # boxes3d_corner = \ + # box_utils.boxes_to_corners_3d(rois, + # order=self.params['order']) + # mask = box_utils.get_mask_for_boxes_within_range_torch(boxes3d_corner, self.params['gt_range']) + # boxes3d_corner = boxes3d_corner[mask] + # rcnn_score = rcnn_score[mask] + + # return boxes3d_corner, rcnn_score.flatten() + + + def post_process_stage2_redet(self, data_dict, output_dict): + return super().post_process(data_dict, output_dict) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/uncertainty_voxel_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/uncertainty_voxel_postprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..c18e1f77b8a2ca2449591b86b54028262b809e27 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/uncertainty_voxel_postprocessor.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +3D Anchor Generator for Voxel +""" +import math +import sys + +import numpy as np +import torch +from torch.nn.functional import sigmoid +import torch.nn.functional as F + +from opencood.data_utils.post_processor.base_postprocessor \ + import BasePostprocessor +from opencood.data_utils.post_processor.voxel_postprocessor \ + import VoxelPostprocessor +from opencood.utils import box_utils +from opencood.utils.box_overlaps import bbox_overlaps +from opencood.visualization import vis_utils +from opencood.utils.common_utils import limit_period + + +class UncertaintyVoxelPostprocessor(VoxelPostprocessor): + def __init__(self, anchor_params, train): + super(UncertaintyVoxelPostprocessor, self).__init__(anchor_params, train) + + def post_process_stage1(self, stage1_output_dict, anchor_box): + """ + This function is used to calculate the detections in advance + and save them(after return) for CoAlign box alignment. + """ + cls_preds = stage1_output_dict['cls_preds'] + reg_preds = stage1_output_dict['reg_preds'] + unc_preds = stage1_output_dict['unc_preds'] + + # the final bounding box list + uncertainty_dim = unc_preds.shape[1] // cls_preds.shape[1] + cls_preds = F.sigmoid(cls_preds.permute(0, 2, 3, 1).contiguous()) # [N, H, W, anchor_num] + unc_preds = unc_preds.permute(0,2,3,1).contiguous() #[N, H, W, anchor_num * 2] + + # convert regression map back to bounding box + batch_box3d = self.delta_to_boxes3d(reg_preds, anchor_box) # (N, W*L*2, 7) + mask = torch.gt(cls_preds, self.params['target_args']['score_threshold']) + batch_num_box_count = [int(m.sum()) for m in mask] + mask = mask.view(1, -1) + mask_reg = mask.unsqueeze(2).repeat(1, 1, 7) + mask_sm = mask.unsqueeze(2).repeat(1, 1, uncertainty_dim) + + + boxes3d = torch.masked_select(batch_box3d.view(-1, 7), mask_reg[0]).view(-1, 7) + uncertainty = torch.masked_select(unc_preds.view(-1,uncertainty_dim), mask_sm[0]).view(-1,uncertainty_dim) # [N*H*W*#anchor_num, 2] -> [num_select, 2] + scores = torch.masked_select(cls_preds.view(-1), mask[0]) + if 'dir_preds' in stage1_output_dict and len(boxes3d) != 0: + dir_preds = stage1_output_dict['dir_preds'] + dir_offset = self.params['dir_args']['dir_offset'] + num_bins = self.params['dir_args']['num_bins'] + + dir_cls_preds = dir_preds.permute(0, 2, 3, 1).contiguous().reshape(1, -1, num_bins) # [1, N*H*W*2, 2] + dir_cls_preds = dir_cls_preds[mask] + # if rot_gt > 0, then the label is 1, then the regression target is [0, 1] + dir_labels = torch.max(dir_cls_preds, dim=-1)[1] # indices. shape [1, N*H*W*2]. value 0 or 1. If value is 1, then rot_gt > 0 + + period = (2 * np.pi / num_bins) # pi + dir_rot = limit_period( + boxes3d[..., 6] - dir_offset, 0, period + ) # 限制在0到pi之间 + boxes3d[..., 6] = dir_rot + dir_offset + period * dir_labels.to(boxes3d.dtype) # 转化0.25pi到2.5pi + boxes3d[..., 6] = limit_period(boxes3d[..., 6], 0.5, 2 * np.pi) # limit to [-pi, pi] + + # convert output to bounding box + if len(boxes3d) != 0: + # save origianl format box. [N, 7] + pred_box3d_original = boxes3d.detach() + # (N, 8, 3) + boxes3d_corner = box_utils.boxes_to_corners_3d(boxes3d, order=self.params['order']) + # (N, 8, 3) + pred_corners_tensor = boxes3d_corner # box_utils.project_box3d(boxes3d_corner, transformation_matrix) + # convert 3d bbx to 2d, (N,4) + projected_boxes2d = box_utils.corner_to_standup_box_torch(pred_corners_tensor) + # (N, 5) + pred_box2d_score_tensor = torch.cat((projected_boxes2d, scores.unsqueeze(1)), dim=1) + scores = pred_box2d_score_tensor[:, -1] + + else: + return None, None, None + + # divide boxes to each cav + + cur_idx = 0 + batch_pred_corners3d = [] # [[N1, 8, 3], [N2, 8, 3], ...] + batch_pred_boxes3d = [] # [[N1, 7], [N2, 7], ...] + batch_uncertainty = [] # [[N1, 2], [N2, 2], ...] + batch_scores = [] + for n in batch_num_box_count: + cur_corners = pred_corners_tensor[cur_idx: cur_idx+n] + cur_boxes = pred_box3d_original[cur_idx: cur_idx+n] + cur_scores = scores[cur_idx:cur_idx+n] + cur_uncertainty = uncertainty[cur_idx: cur_idx+n] + # nms + keep_index = box_utils.nms_rotated(cur_corners, + cur_scores, + self.params['nms_thresh'] + ) + batch_pred_corners3d.append(cur_corners[keep_index]) + batch_pred_boxes3d.append(cur_boxes[keep_index]) + batch_scores.append(cur_scores[keep_index]) + batch_uncertainty.append(cur_uncertainty[keep_index]) + cur_idx += n + + return batch_pred_corners3d, batch_pred_boxes3d, batch_uncertainty + + + def post_process(self, data_dict, output_dict, return_uncertainty=False): + """ + For fusion_method: no_w_uncertainty + """ + # the final bounding box list + pred_box3d_list = [] + pred_box2d_list = [] + uncertainty_list = [] + for cav_id, cav_content in data_dict.items(): + if cav_id not in output_dict: + continue + # the transformation matrix to ego space + transformation_matrix = cav_content['transformation_matrix'] # no clean + + # (H, W, anchor_num, 7) + anchor_box = cav_content['anchor_box'] + + # classification probability + uncertainty_dim = output_dict[cav_id]['unc_preds'].shape[1] // output_dict[cav_id]['cls_preds'].shape[1] + prob = output_dict[cav_id]['cls_preds'] + prob = F.sigmoid(prob.permute(0, 2, 3, 1)) + prob = prob.reshape(1, -1) + + # regression map + reg = output_dict[cav_id]['reg_preds'] + + # uncertainty map + unc_preds = output_dict[cav_id]['unc_preds'].permute(0, 2, 3, 1).contiguous() + unc_preds = unc_preds.view(unc_preds.shape[0], -1, uncertainty_dim) # [N, H*W*#anchor_num, 2] + + # convert regression map back to bounding box + batch_box3d = self.delta_to_boxes3d(reg, anchor_box) # (N, H*W*#anchor_num, 7) + mask = \ + torch.gt(prob, self.params['target_args']['score_threshold']) + mask = mask.view(1, -1) + mask_reg = mask.unsqueeze(2).repeat(1, 1, 7) + mask_sm = mask.unsqueeze(2).repeat(1, 1, uncertainty_dim) + + # during validation/testing, the batch size should be 1 + assert batch_box3d.shape[0] == 1 + boxes3d = torch.masked_select(batch_box3d[0], + mask_reg[0]).view(-1, 7) + scores = torch.masked_select(prob[0], mask[0]) + uncertainty = torch.masked_select(unc_preds[0], mask_sm[0]).view(-1, uncertainty_dim) + + + # adding dir classifier + if 'dir_preds' in output_dict[cav_id].keys() and len(boxes3d) != 0: + dir_offset = self.params['dir_args']['dir_offset'] + num_bins = self.params['dir_args']['num_bins'] + + + dir_preds = output_dict[cav_id]['dir_preds'] # [N, H, W, 4] + dir_cls_preds = dir_preds.permute(0, 2, 3, 1).contiguous().reshape(1, -1, num_bins) # [1, N*H*W*2, 2] + dir_cls_preds = dir_cls_preds[mask] + # if rot_gt > 0, then the label is 1, then the regression target is [0, 1] + dir_labels = torch.max(dir_cls_preds, dim=-1)[1] # indices. shape [1, N*H*W*2]. value 0 or 1. If value is 1, then rot_gt > 0 + + period = (2 * np.pi / num_bins) # pi + dir_rot = limit_period( + boxes3d[..., 6] - dir_offset, 0, period + ) # 限制在0到pi之间 + boxes3d[..., 6] = dir_rot + dir_offset + period * dir_labels.to(dir_cls_preds.dtype) # 转化0.25pi到2.5pi + boxes3d[..., 6] = limit_period(boxes3d[..., 6], 0.5, 2 * np.pi) # limit to [-pi, pi] + + + # convert output to bounding box + if len(boxes3d) != 0: + # (N, 8, 3) + boxes3d_corner = \ + box_utils.boxes_to_corners_3d(boxes3d, + order=self.params['order']) + # (N, 8, 3) + projected_boxes3d = \ + box_utils.project_box3d(boxes3d_corner, + transformation_matrix) + # convert 3d bbx to 2d, (N,4) + projected_boxes2d = \ + box_utils.corner_to_standup_box_torch(projected_boxes3d) + # (N, 5) + boxes2d_score = \ + torch.cat((projected_boxes2d, scores.unsqueeze(1)), dim=1) + + pred_box2d_list.append(boxes2d_score) + pred_box3d_list.append(projected_boxes3d) + uncertainty_list.append(uncertainty) + + + if len(pred_box2d_list) ==0 or len(pred_box3d_list) == 0: + if return_uncertainty: + return None, None, None + return None, None + # shape: (N, 5) + pred_box2d_list = torch.vstack(pred_box2d_list) + uncertainty_list = torch.vstack(uncertainty_list) + uncertainty = uncertainty_list + # scores + scores = pred_box2d_list[:, -1] + # predicted 3d bbx + pred_box3d_tensor = torch.vstack(pred_box3d_list) + # remove large bbx + keep_index_1 = box_utils.remove_large_pred_bbx(pred_box3d_tensor) + keep_index_2 = box_utils.remove_bbx_abnormal_z(pred_box3d_tensor) + keep_index = torch.logical_and(keep_index_1, keep_index_2) + + pred_box3d_tensor = pred_box3d_tensor[keep_index] + scores = scores[keep_index] + uncertainty = uncertainty[keep_index] + + # nms + keep_index = box_utils.nms_rotated(pred_box3d_tensor, + scores, + self.params['nms_thresh'] + ) + + pred_box3d_tensor = pred_box3d_tensor[keep_index] + + # select cooresponding score + scores = scores[keep_index] + uncertainty = uncertainty[keep_index] + + # filter out the prediction out of the range. + mask = \ + box_utils.get_mask_for_boxes_within_range_torch(pred_box3d_tensor, self.params['gt_range']) + pred_box3d_tensor = pred_box3d_tensor[mask, :, :] + scores = scores[mask] + uncertainty = uncertainty[mask] + + assert scores.shape[0] == pred_box3d_tensor.shape[0] + + if return_uncertainty: + return pred_box3d_tensor, scores, uncertainty + + return pred_box3d_tensor, scores + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/voxel_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/voxel_postprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..479dc274d15ed3a3ec4f6c53aa2df4468e855e6b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/voxel_postprocessor.py @@ -0,0 +1,484 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +3D Anchor Generator for Voxel +""" +import math +import sys + +import numpy as np +import torch +from torch.nn.functional import sigmoid +import torch.nn.functional as F + +from opencood.data_utils.post_processor.base_postprocessor \ + import BasePostprocessor +from opencood.utils import box_utils +from opencood.utils.box_overlaps import bbox_overlaps +from opencood.visualization import vis_utils +from opencood.utils.common_utils import limit_period + + +class VoxelPostprocessor(BasePostprocessor): + def __init__(self, anchor_params, train): + super(VoxelPostprocessor, self).__init__(anchor_params, train) + self.anchor_num = self.params['anchor_args']['num'] + + def generate_anchor_box(self): + # load_voxel_params and load_point_pillar_params leads to the same anchor + # if voxel_size * feature stride is the same. + W = self.params['anchor_args']['W'] + H = self.params['anchor_args']['H'] + + l = self.params['anchor_args']['l'] + w = self.params['anchor_args']['w'] + h = self.params['anchor_args']['h'] + r = self.params['anchor_args']['r'] + + assert self.anchor_num == len(r) + r = [math.radians(ele) for ele in r] + + vh = self.params['anchor_args']['vh'] # voxel_size + vw = self.params['anchor_args']['vw'] + + xrange = [self.params['anchor_args']['cav_lidar_range'][0], + self.params['anchor_args']['cav_lidar_range'][3]] + yrange = [self.params['anchor_args']['cav_lidar_range'][1], + self.params['anchor_args']['cav_lidar_range'][4]] + + if 'feature_stride' in self.params['anchor_args']: + feature_stride = self.params['anchor_args']['feature_stride'] + else: + feature_stride = 2 + + + x = np.linspace(xrange[0] + vw, xrange[1] - vw, W // feature_stride) # vw is not precise, vw * feature_stride / 2 should be better? + y = np.linspace(yrange[0] + vh, yrange[1] - vh, H // feature_stride) + + + cx, cy = np.meshgrid(x, y) + cx = np.tile(cx[..., np.newaxis], self.anchor_num) # center + cy = np.tile(cy[..., np.newaxis], self.anchor_num) + cz = np.ones_like(cx) * -1.0 + + w = np.ones_like(cx) * w + l = np.ones_like(cx) * l + h = np.ones_like(cx) * h + + r_ = np.ones_like(cx) + for i in range(self.anchor_num): + r_[..., i] = r[i] + + if self.params['order'] == 'hwl': # pointpillar + anchors = np.stack([cx, cy, cz, h, w, l, r_], axis=-1) # (50, 176, 2, 7) + + elif self.params['order'] == 'lhw': + anchors = np.stack([cx, cy, cz, l, h, w, r_], axis=-1) + else: + sys.exit('Unknown bbx order.') + + return anchors + + def generate_label(self, **kwargs): + """ + Generate targets for training. + + Parameters + ---------- + argv : list + gt_box_center:(max_num, 7), anchor:(H, W, anchor_num, 7) + + Returns + ------- + label_dict : dict + Dictionary that contains all target related info. + """ + assert self.params['order'] == 'hwl', 'Currently Voxel only support' \ + 'hwl bbx order.' + # (max_num, 7) + gt_box_center = kwargs['gt_box_center'] + # (H, W, anchor_num, 7) + anchors = kwargs['anchors'] + # (max_num) + masks = kwargs['mask'] + + # (H, W) + feature_map_shape = anchors.shape[:2] + + # (H*W*anchor_num, 7) + anchors = anchors.reshape(-1, 7) + # normalization factor, (H * W * anchor_num) + anchors_d = np.sqrt(anchors[:, 4] ** 2 + anchors[:, 5] ** 2) + + # (H, W, 2) + pos_equal_one = np.zeros((*feature_map_shape, self.anchor_num)) + neg_equal_one = np.zeros((*feature_map_shape, self.anchor_num)) + # (H, W, self.anchor_num * 7) + targets = np.zeros((*feature_map_shape, self.anchor_num * 7)) + + # (n, 7) + gt_box_center_valid = gt_box_center[masks == 1] + # (n, 8, 3) + gt_box_corner_valid = \ + box_utils.boxes_to_corners_3d(gt_box_center_valid, + self.params['order']) + # (H*W*anchor_num, 8, 3) + anchors_corner = \ + box_utils.boxes_to_corners_3d(anchors, + order=self.params['order']) + # (H*W*anchor_num, 4) + anchors_standup_2d = \ + box_utils.corner2d_to_standup_box(anchors_corner) + # (n, 4) + gt_standup_2d = \ + box_utils.corner2d_to_standup_box(gt_box_corner_valid) + + # (H*W*anchor_n) + iou = bbox_overlaps( + np.ascontiguousarray(anchors_standup_2d).astype(np.float32), + np.ascontiguousarray(gt_standup_2d).astype(np.float32), + ) + + # the anchor boxes has the largest iou across + # shape: (n) + id_highest = np.argmax(iou.T, axis=1) + # [0, 1, 2, ..., n-1] + id_highest_gt = np.arange(iou.T.shape[0]) + # make sure all highest iou is larger than 0 + mask = iou.T[id_highest_gt, id_highest] > 0 + id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask] + + + # find anchors iou > params['pos_iou'] + id_pos, id_pos_gt = \ + np.where(iou > + self.params['target_args']['pos_threshold']) + # find anchors iou params['neg_iou'] + id_neg = np.where(np.sum(iou < + self.params['target_args']['neg_threshold'], + axis=1) == iou.shape[1])[0] + id_pos = np.concatenate([id_pos, id_highest]) + id_pos_gt = np.concatenate([id_pos_gt, id_highest_gt]) + id_pos, index = np.unique(id_pos, return_index=True) + id_pos_gt = id_pos_gt[index] + id_neg.sort() + + # cal the target and set the equal one + index_x, index_y, index_z = np.unravel_index( + id_pos, (*feature_map_shape, self.anchor_num)) + pos_equal_one[index_x, index_y, index_z] = 1 + + # calculate the targets + targets[index_x, index_y, np.array(index_z) * 7] = \ + (gt_box_center_valid[id_pos_gt, 0] - anchors[id_pos, 0]) / anchors_d[id_pos] + targets[index_x, index_y, np.array(index_z) * 7 + 1] = \ + (gt_box_center_valid[id_pos_gt, 1] - anchors[id_pos, 1]) / anchors_d[id_pos] + targets[index_x, index_y, np.array(index_z) * 7 + 2] = \ + (gt_box_center_valid[id_pos_gt, 2] - anchors[id_pos, 2]) / anchors[id_pos, 3] + targets[index_x, index_y, np.array(index_z) * 7 + 3] = np.log( + gt_box_center_valid[id_pos_gt, 3] / anchors[id_pos, 3]) + targets[index_x, index_y, np.array(index_z) * 7 + 4] = np.log( + gt_box_center_valid[id_pos_gt, 4] / anchors[id_pos, 4]) + targets[index_x, index_y, np.array(index_z) * 7 + 5] = np.log( + gt_box_center_valid[id_pos_gt, 5] / anchors[id_pos, 5]) + targets[index_x, index_y, np.array(index_z) * 7 + 6] = ( + gt_box_center_valid[id_pos_gt, 6] - anchors[id_pos, 6]) + + index_x, index_y, index_z = np.unravel_index( + id_neg, (*feature_map_shape, self.anchor_num)) + neg_equal_one[index_x, index_y, index_z] = 1 + + # to avoid a box be pos/neg in the same time + index_x, index_y, index_z = np.unravel_index( + id_highest, (*feature_map_shape, self.anchor_num)) + neg_equal_one[index_x, index_y, index_z] = 0 + + + label_dict = {'pos_equal_one': pos_equal_one, + 'neg_equal_one': neg_equal_one, + 'targets': targets} + + return label_dict + + @staticmethod + def collate_batch(label_batch_list): + """ + Customized collate function for target label generation. + + Parameters + ---------- + label_batch_list : list + The list of dictionary that contains all labels for several + frames. + + Returns + ------- + target_batch : dict + Reformatted labels in torch tensor. + """ + pos_equal_one = [] + neg_equal_one = [] + targets = [] + + for i in range(len(label_batch_list)): + pos_equal_one.append(label_batch_list[i]['pos_equal_one']) + neg_equal_one.append(label_batch_list[i]['neg_equal_one']) + targets.append(label_batch_list[i]['targets']) + + pos_equal_one = \ + torch.from_numpy(np.array(pos_equal_one)) + neg_equal_one = \ + torch.from_numpy(np.array(neg_equal_one)) + targets = \ + torch.from_numpy(np.array(targets)) + + return {'targets': targets, + 'pos_equal_one': pos_equal_one, + 'neg_equal_one': neg_equal_one} + + def post_process(self, data_dict, output_dict): + """ + Process the outputs of the model to 2D/3D bounding box. + Step1: convert each cav's output to bounding box format + Step2: project the bounding boxes to ego space. + Step:3 NMS + + For early and intermediate fusion, + data_dict only contains ego. + + For late fusion, + data_dcit contains all cavs, so we need transformation matrix. + + + Parameters + ---------- + data_dict : dict + The dictionary containing the origin input data of model. + + output_dict :dict + The dictionary containing the output of the model. + + Returns + ------- + pred_box3d_tensor : torch.Tensor + The prediction bounding box tensor after NMS. + gt_box3d_tensor : torch.Tensor + The groundtruth bounding box tensor. + """ + # the final bounding box list + pred_box3d_list = [] + pred_box2d_list = [] + for cav_id, cav_content in data_dict.items(): + assert cav_id in output_dict + # the transformation matrix to ego space + transformation_matrix = cav_content['transformation_matrix'] # no clean + + # rename variable + if 'psm' in output_dict[cav_id]: + output_dict[cav_id]['cls_preds'] = output_dict[cav_id]['psm'] + if 'rm' in output_dict: + output_dict[cav_id]['reg_preds'] = output_dict[cav_id]['rm'] + if 'dm' in output_dict: + output_dict[cav_id]['dir_preds'] = output_dict[cav_id]['dm'] + + # (H, W, anchor_num, 7) + anchor_box = cav_content['anchor_box'] + + # classification probability + prob = output_dict[cav_id]['cls_preds'] + prob = F.sigmoid(prob.permute(0, 2, 3, 1)) + # for multi-class, we need to select the class with the highest prob + if prob.shape[-1] > 1: + prob = torch.max(prob, dim=-1)[0] + prob = prob.reshape(1, -1) + + # regression map + reg = output_dict[cav_id]['reg_preds'] + + # convert regression map back to bounding box + if len(reg.shape) == 4: # anchor-based. PointPillars, SECOND + batch_box3d = self.delta_to_boxes3d(reg, anchor_box) + else: # anchor-free. CenterPoint + batch_box3d = reg.view(1, -1, 7) + + mask = \ + torch.gt(prob, self.params['target_args']['score_threshold']) + mask = mask.view(1, -1) + mask_reg = mask.unsqueeze(2).repeat(1, 1, 7) + + # during validation/testing, the batch size should be 1 + assert batch_box3d.shape[0] == 1 + boxes3d = torch.masked_select(batch_box3d[0], + mask_reg[0]).view(-1, 7) + scores = torch.masked_select(prob[0], mask[0]) + + # adding dir classifier + if 'dir_preds' in output_dict[cav_id].keys() and len(boxes3d) !=0: + dir_offset = self.params['dir_args']['dir_offset'] + num_bins = self.params['dir_args']['num_bins'] + + + dm = output_dict[cav_id]['dir_preds'] # [N, H, W, 4] + dir_cls_preds = dm.permute(0, 2, 3, 1).contiguous().reshape(1, -1, num_bins) # [1, N*H*W*2, 2] + dir_cls_preds = dir_cls_preds[mask] + # if rot_gt > 0, then the label is 1, then the regression target is [0, 1] + dir_labels = torch.max(dir_cls_preds, dim=-1)[1] # indices. shape [1, N*H*W*2]. value 0 or 1. If value is 1, then rot_gt > 0 + + period = (2 * np.pi / num_bins) # pi + dir_rot = limit_period( + boxes3d[..., 6] - dir_offset, 0, period + ) # 限制在0到pi之间 + boxes3d[..., 6] = dir_rot + dir_offset + period * dir_labels.to(dir_cls_preds.dtype) # 转化0.25pi到2.5pi + boxes3d[..., 6] = limit_period(boxes3d[..., 6], 0.5, 2 * np.pi) # limit to [-pi, pi] + + if 'iou_preds' in output_dict[cav_id].keys() and len(boxes3d) != 0: + iou = torch.sigmoid(output_dict[cav_id]['iou_preds'].permute(0, 2, 3, 1).contiguous()).reshape(1, -1) + iou = torch.clamp(iou, min=0.0, max=1.0) + iou = (iou + 1) * 0.5 + scores = scores * torch.pow(iou.masked_select(mask), 4) + + # convert output to bounding box + if len(boxes3d) != 0: + # (N, 8, 3) + boxes3d_corner = \ + box_utils.boxes_to_corners_3d(boxes3d, + order=self.params['order']) + + # STEP 2 + # (N, 8, 3) + projected_boxes3d = \ + box_utils.project_box3d(boxes3d_corner, + transformation_matrix) + # convert 3d bbx to 2d, (N,4) + projected_boxes2d = \ + box_utils.corner_to_standup_box_torch(projected_boxes3d) + # (N, 5) + boxes2d_score = \ + torch.cat((projected_boxes2d, scores.unsqueeze(1)), dim=1) + + pred_box2d_list.append(boxes2d_score) + pred_box3d_list.append(projected_boxes3d) + + if len(pred_box2d_list) ==0 or len(pred_box3d_list) == 0: + return None, None + # shape: (N, 5) + pred_box2d_list = torch.vstack(pred_box2d_list) + # scores + scores = pred_box2d_list[:, -1] + # predicted 3d bbx + pred_box3d_tensor = torch.vstack(pred_box3d_list) + # remove large bbx + keep_index_1 = box_utils.remove_large_pred_bbx(pred_box3d_tensor) + keep_index_2 = box_utils.remove_bbx_abnormal_z(pred_box3d_tensor) + keep_index = torch.logical_and(keep_index_1, keep_index_2) + + pred_box3d_tensor = pred_box3d_tensor[keep_index] + scores = scores[keep_index] + + # STEP3 + # nms + keep_index = box_utils.nms_rotated(pred_box3d_tensor, + scores, + self.params['nms_thresh'] + ) + + pred_box3d_tensor = pred_box3d_tensor[keep_index] + + # select cooresponding score + scores = scores[keep_index] + + # filter out the prediction out of the range. with z-dim + pred_box3d_np = pred_box3d_tensor.cpu().numpy() + pred_box3d_np, mask = box_utils.mask_boxes_outside_range_numpy(pred_box3d_np, + self.params['gt_range'], + order=None, + return_mask=True) + pred_box3d_tensor = torch.from_numpy(pred_box3d_np).to(device=pred_box3d_tensor.device) + scores = scores[mask] + + assert scores.shape[0] == pred_box3d_tensor.shape[0] + + return pred_box3d_tensor, scores + + @staticmethod + def delta_to_boxes3d(deltas, anchors): + """ + Convert the output delta to 3d bbx. + + Parameters + ---------- + deltas : torch.Tensor + (N, 14, H, W) + anchors : torch.Tensor + (W, L, 2, 7) -> xyzhwlr + + Returns + ------- + box3d : torch.Tensor + (N, W*L*2, 7) + """ + # batch size + N = deltas.shape[0] + deltas = deltas.permute(0, 2, 3, 1).contiguous().view(N, -1, 7) + boxes3d = torch.zeros_like(deltas) + + if deltas.is_cuda: + anchors = anchors.cuda() + boxes3d = boxes3d.cuda() + + # (W*L*2, 7) + anchors_reshaped = anchors.view(-1, 7).float() + # the diagonal of the anchor 2d box, (W*L*2) + anchors_d = torch.sqrt( + anchors_reshaped[:, 4] ** 2 + anchors_reshaped[:, 5] ** 2) + anchors_d = anchors_d.repeat(N, 2, 1).transpose(1, 2) + anchors_reshaped = anchors_reshaped.repeat(N, 1, 1) + + # Inv-normalize to get xyz + boxes3d[..., [0, 1]] = torch.mul(deltas[..., [0, 1]], anchors_d) + \ + anchors_reshaped[..., [0, 1]] + boxes3d[..., [2]] = torch.mul(deltas[..., [2]], + anchors_reshaped[..., [3]]) + \ + anchors_reshaped[..., [2]] + # hwl + boxes3d[..., [3, 4, 5]] = torch.exp( + deltas[..., [3, 4, 5]]) * anchors_reshaped[..., [3, 4, 5]] + # yaw angle + boxes3d[..., 6] = deltas[..., 6] + anchors_reshaped[..., 6] + + return boxes3d + + @staticmethod + def visualize(pred_box_tensor, gt_tensor, pcd, show_vis, save_path, dataset=None): + """ + Visualize the prediction, ground truth with point cloud together. + + Parameters + ---------- + pred_box_tensor : torch.Tensor + (N, 8, 3) prediction. + + gt_tensor : torch.Tensor + (N, 8, 3) groundtruth bbx + + pcd : torch.Tensor + PointCloud, (N, 4). + + show_vis : bool + Whether to show visualization. + + save_path : str + Save the visualization results to given path. + + dataset : BaseDataset + opencood dataset object. + + """ + vis_utils.visualize_single_sample_output_gt(pred_box_tensor, + gt_tensor, + pcd, + show_vis, + save_path) diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4096f05d0d07d4adc11e232097768d0d4f169a24 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__init__.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + +from opencood.data_utils.pre_processor.base_preprocessor import BasePreprocessor +from opencood.data_utils.pre_processor.voxel_preprocessor import VoxelPreprocessor +from opencood.data_utils.pre_processor.bev_preprocessor import BevPreprocessor +from opencood.data_utils.pre_processor.sp_voxel_preprocessor import SpVoxelPreprocessor + +__all__ = { + 'BasePreprocessor': BasePreprocessor, + 'VoxelPreprocessor': VoxelPreprocessor, + 'BevPreprocessor': BevPreprocessor, + 'SpVoxelPreprocessor': SpVoxelPreprocessor +} + + +def build_preprocessor(preprocess_cfg, train): + process_method_name = preprocess_cfg['core_method'] + error_message = f"{process_method_name} is not found. " \ + f"Please add your processor file's name in opencood/" \ + f"data_utils/processor/init.py" + assert process_method_name in ['BasePreprocessor', 'VoxelPreprocessor', + 'BevPreprocessor', 'SpVoxelPreprocessor'], \ + error_message + + processor = __all__[process_method_name]( + preprocess_params=preprocess_cfg, + train=train + ) + + return processor diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..681acc1dc7d70e93a280a6c31384f7bc509e4047 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/__init__.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/base_preprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/base_preprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3639e6c5518f5cd7c3ce14029804896303729ecd Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/base_preprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/bev_preprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/bev_preprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45ff28b233651916134c0052dfc7c98d646d3657 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/bev_preprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/sp_voxel_preprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/sp_voxel_preprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..526a8985aa1ba1252d6040b4de3cccef98d44b52 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/sp_voxel_preprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/voxel_preprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/voxel_preprocessor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..397b3ac2789fde44dc5053a7511028ab1a033508 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/voxel_preprocessor.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/base_preprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/base_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..34215183a3f67bbb4c3801f0e072e004734b2264 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/base_preprocessor.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + +import numpy as np + +from opencood.utils import pcd_utils + + +class BasePreprocessor(object): + """ + Basic Lidar pre-processor. + + Parameters + ---------- + preprocess_params : dict + The dictionary containing all parameters of the preprocessing. + + train : bool + Train or test mode. + """ + + def __init__(self, preprocess_params, train): + self.params = preprocess_params + self.train = train + + def preprocess(self, pcd_np): + """ + Preprocess the lidar points by simple sampling. + + Parameters + ---------- + pcd_np : np.ndarray + The raw lidar. + + Returns + ------- + data_dict : the output dictionary. + """ + data_dict = {} + sample_num = self.params['args']['sample_num'] + + pcd_np = pcd_utils.downsample_lidar(pcd_np, sample_num) + data_dict['downsample_lidar'] = pcd_np + + return data_dict + + def project_points_to_bev_map(self, points, ratio=0.1): + """ + Project points to BEV occupancy map with default ratio=0.1. + + Parameters + ---------- + points : np.ndarray + (N, 3) / (N, 4) + + ratio : float + Discretization parameters. Default is 0.1. + + Returns + ------- + bev_map : np.ndarray + BEV occupancy map including projected points with shape + (img_row, img_col). + + """ + L1, W1, H1, L2, W2, H2 = self.params["cav_lidar_range"] + img_row = int((L2 - L1) / ratio) + img_col = int((W2 - W1) / ratio) + bev_map = np.zeros((img_row, img_col)) + bev_origin = np.array([L1, W1, H1]).reshape(1, -1) + # (N, 3) + indices = ((points[:, :3] - bev_origin) / ratio).astype(int) + mask = np.logical_and(indices[:, 0] > 0, indices[:, 0] < img_row) + mask = np.logical_and(mask, np.logical_and(indices[:, 1] > 0, + indices[:, 1] < img_col)) + indices = indices[mask, :] + bev_map[indices[:, 0], indices[:, 1]] = 1 + return bev_map diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/bev_preprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/bev_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..8b80ee04b24c69513a5e13964939e28b360584b8 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/bev_preprocessor.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Convert lidar to bev +""" + +import numpy as np +import torch +from opencood.data_utils.pre_processor.base_preprocessor import \ + BasePreprocessor + + +class BevPreprocessor(BasePreprocessor): + def __init__(self, preprocess_params, train): + super(BevPreprocessor, self).__init__(preprocess_params, train) + self.lidar_range = self.params['cav_lidar_range'] + self.geometry_param = preprocess_params["geometry_param"] + + def preprocess(self, pcd_raw): + """ + Preprocess the lidar points to BEV representations. + + Parameters + ---------- + pcd_raw : np.ndarray + The raw lidar. + + Returns + ------- + data_dict : the structured output dictionary. + """ + bev = np.zeros(self.geometry_param['input_shape'], dtype=np.float32) + intensity_map_count = np.zeros((bev.shape[0], bev.shape[1]), + dtype=np.int) + bev_origin = np.array( + [self.geometry_param["L1"], self.geometry_param["W1"], + self.geometry_param["H1"]]).reshape(1, -1) + + indices = ((pcd_raw[:, :3] - bev_origin) / self.geometry_param[ + "res"]).astype(int) + + # if any point hit this voxel, set the voxel to 1 + for i in range(indices.shape[0]): + bev[indices[i, 0], indices[i, 1], indices[i, 2]] = 1 + bev[indices[i, 0], indices[i, 1], -1] += pcd_raw[i, 3] # intensity + intensity_map_count[indices[i, 0], indices[i, 1]] += 1 + divide_mask = intensity_map_count != 0 + bev[divide_mask, -1] = np.divide(bev[divide_mask, -1], + intensity_map_count[divide_mask]) + + data_dict = { + "bev_input": np.transpose(bev, (2, 0, 1)) # (C,H,W) + } + return data_dict + + @staticmethod + def collate_batch_list(batch): + """ + Customized pytorch data loader collate function. + + Parameters + ---------- + batch : list + List of dictionary. Each dictionary represent a single frame. + + Returns + ------- + processed_batch : dict + Updated lidar batch. + """ + bev_input_list = [ + x["bev_input"][np.newaxis, ...] for x in batch + ] + processed_batch = { + "bev_input": torch.from_numpy( + np.concatenate(bev_input_list, axis=0)) + } + return processed_batch + + @staticmethod + def collate_batch_dict(batch): + """ + Customized pytorch data loader collate function. + + Parameters + ---------- + batch : dict + Dict of list. Each element represents a CAV. + + Returns + ------- + processed_batch : dict + Updated lidar batch. + """ + bev_input_list = [ + x[np.newaxis, ...] for x in batch["bev_input"] + ] + processed_batch = { + "bev_input": torch.from_numpy( + np.concatenate(bev_input_list, axis=0)) + } + return processed_batch + + def collate_batch(self, batch): + """ + Customized pytorch data loader collate function. + + Parameters + ---------- + batch : list / dict + Batched data. + Returns + ------- + processed_batch : dict + Updated lidar batch. + """ + if isinstance(batch, list): + return self.collate_batch_list(batch) + elif isinstance(batch, dict): + return self.collate_batch_dict(batch) + else: + raise NotImplemented diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/sp_voxel_preprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/sp_voxel_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..068965ee3300cb5d3d320dce881ac4ea5f03170f --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/sp_voxel_preprocessor.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + +""" +Transform points to voxels using sparse conv library +""" +import sys + +import numpy as np +import torch +from icecream import ic + +from opencood.data_utils.pre_processor.base_preprocessor import \ + BasePreprocessor + + +class SpVoxelPreprocessor(BasePreprocessor): + def __init__(self, preprocess_params, train): + super(SpVoxelPreprocessor, self).__init__(preprocess_params, + train) + self.spconv = 1 + try: + # spconv v1.x + from spconv.utils import VoxelGeneratorV2 as VoxelGenerator + except: + # spconv v2.x + from cumm import tensorview as tv + from spconv.utils import Point2VoxelCPU3d as VoxelGenerator + self.tv = tv + self.spconv = 2 + self.lidar_range = self.params['cav_lidar_range'] + self.voxel_size = self.params['args']['voxel_size'] + self.max_points_per_voxel = self.params['args']['max_points_per_voxel'] + + if train: + self.max_voxels = self.params['args']['max_voxel_train'] + else: + self.max_voxels = self.params['args']['max_voxel_test'] + + grid_size = (np.array(self.lidar_range[3:6]) - + np.array(self.lidar_range[0:3])) / np.array(self.voxel_size) + self.grid_size = np.round(grid_size).astype(np.int64) + + # use sparse conv library to generate voxel + if self.spconv == 1: + self.voxel_generator = VoxelGenerator( + voxel_size=self.voxel_size, + point_cloud_range=self.lidar_range, + max_num_points=self.max_points_per_voxel, + max_voxels=self.max_voxels + ) + else: + self.voxel_generator = VoxelGenerator( + vsize_xyz=self.voxel_size, + coors_range_xyz=self.lidar_range, + max_num_points_per_voxel=self.max_points_per_voxel, + num_point_features=4, + max_num_voxels=self.max_voxels + ) + + def preprocess(self, pcd_np): + data_dict = {} + if self.spconv == 1: + voxel_output = self.voxel_generator.generate(pcd_np) + else: + pcd_tv = self.tv.from_numpy(pcd_np) + voxel_output = self.voxel_generator.point_to_voxel(pcd_tv) + if isinstance(voxel_output, dict): + voxels, coordinates, num_points = \ + voxel_output['voxels'], voxel_output['coordinates'], \ + voxel_output['num_points_per_voxel'] + else: + voxels, coordinates, num_points = voxel_output + + if self.spconv == 2: + voxels = voxels.numpy() + coordinates = coordinates.numpy() + num_points = num_points.numpy() + + data_dict['voxel_features'] = voxels + data_dict['voxel_coords'] = coordinates + data_dict['voxel_num_points'] = num_points + + return data_dict + + def collate_batch(self, batch): + """ + Customized pytorch data loader collate function. + + Parameters + ---------- + batch : list or dict + List or dictionary. + + Returns + ------- + processed_batch : dict + Updated lidar batch. + """ + + if isinstance(batch, list): + return self.collate_batch_list(batch) + elif isinstance(batch, dict): + return self.collate_batch_dict(batch) + else: + sys.exit('Batch has too be a list or a dictionarn') + + @staticmethod + def collate_batch_list(batch): + """ + Customized pytorch data loader collate function. + + Parameters + ---------- + batch : list + List of dictionary. Each dictionary represent a single frame. + + Returns + ------- + processed_batch : dict + Updated lidar batch. + """ + voxel_features = [] + voxel_num_points = [] + voxel_coords = [] + + for i in range(len(batch)): + voxel_features.append(batch[i]['voxel_features']) + voxel_num_points.append(batch[i]['voxel_num_points']) + coords = batch[i]['voxel_coords'] + voxel_coords.append( + np.pad(coords, ((0, 0), (1, 0)), + mode='constant', constant_values=i)) + + voxel_num_points = torch.from_numpy(np.concatenate(voxel_num_points)) + voxel_features = torch.from_numpy(np.concatenate(voxel_features)) + voxel_coords = torch.from_numpy(np.concatenate(voxel_coords)) + + return {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points} + + @staticmethod + def collate_batch_dict(batch: dict): + """ + Collate batch if the batch is a dictionary, + eg: {'voxel_features': [feature1, feature2...., feature n]} + + Parameters + ---------- + batch : dict + + Returns + ------- + processed_batch : dict + Updated lidar batch. + """ + voxel_features = \ + torch.from_numpy(np.concatenate(batch['voxel_features'])) + voxel_num_points = \ + torch.from_numpy(np.concatenate(batch['voxel_num_points'])) + coords = batch['voxel_coords'] + voxel_coords = [] + + for i in range(len(coords)): + voxel_coords.append( + np.pad(coords[i], ((0, 0), (1, 0)), + mode='constant', constant_values=i)) + voxel_coords = torch.from_numpy(np.concatenate(voxel_coords)) + + return {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points} diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/voxel_preprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/voxel_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..2149ce39bb8c72fe4d8287ec5956a4817e111917 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/voxel_preprocessor.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Convert lidar to voxel. This class was manually designed, and we recommend +to use sp_voxel_preprocessor. +""" +import sys + +import numpy as np +import torch + +from opencood.data_utils.pre_processor.base_preprocessor import \ + BasePreprocessor + + +class VoxelPreprocessor(BasePreprocessor): + def __init__(self, preprocess_params, train): + super(VoxelPreprocessor, self).__init__(preprocess_params, train) + # TODO: add intermediate lidar range later + self.lidar_range = self.params['cav_lidar_range'] + + self.vw = self.params['args']['vw'] + self.vh = self.params['args']['vh'] + self.vd = self.params['args']['vd'] + self.T = self.params['args']['T'] + + def preprocess(self, pcd_np): + """ + Preprocess the lidar points by voxelization. + + Parameters + ---------- + pcd_np : np.ndarray + The raw lidar. + + Returns + ------- + data_dict : the structured output dictionary. + """ + data_dict = {} + + # calculate the voxel coordinates + voxel_coords = ((pcd_np[:, :3] - + np.floor(np.array([self.lidar_range[0], + self.lidar_range[1], + self.lidar_range[2]])) / ( + self.vw, self.vh, self.vd))).astype(np.int32) + + # convert to (D, H, W) as the paper + voxel_coords = voxel_coords[:, [2, 1, 0]] + voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0, + return_inverse=True, + return_counts=True) + + voxel_features = [] + + for i in range(len(voxel_coords)): + voxel = np.zeros((self.T, 7), dtype=np.float32) + pts = pcd_np[inv_ind == i] + if voxel_counts[i] > self.T: + pts = pts[:self.T, :] + voxel_counts[i] = self.T + + # augment the points + voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] - + np.mean(pts[:, :3], 0)), + axis=1) + voxel_features.append(voxel) + + data_dict['voxel_features'] = np.array(voxel_features) + data_dict['voxel_coords'] = voxel_coords + + return data_dict + + def collate_batch(self, batch): + """ + Customized pytorch data loader collate function. + + Parameters + ---------- + batch : list or dict + List or dictionary. + + Returns + ------- + processed_batch : dict + Updated lidar batch. + """ + + if isinstance(batch, list): + return self.collate_batch_list(batch) + elif isinstance(batch, dict): + return self.collate_batch_dict(batch) + else: + sys.exit('Batch has too be a list or a dictionarn') + + @staticmethod + def collate_batch_list(batch): + """ + Customized pytorch data loader collate function. + + Parameters + ---------- + batch : list + List of dictionary. Each dictionary represent a single frame. + + Returns + ------- + processed_batch : dict + Updated lidar batch. + """ + voxel_features = [] + voxel_coords = [] + + for i in range(len(batch)): + voxel_features.append(batch[i]['voxel_features']) + coords = batch[i]['voxel_coords'] + voxel_coords.append( + np.pad(coords, ((0, 0), (1, 0)), + mode='constant', constant_values=i)) + + voxel_features = torch.from_numpy(np.concatenate(voxel_features)) + voxel_coords = torch.from_numpy(np.concatenate(voxel_coords)) + + return {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords} + + @staticmethod + def collate_batch_dict(batch: dict): + """ + Collate batch if the batch is a dictionary, + eg: {'voxel_features': [feature1, feature2...., feature n]} + + Parameters + ---------- + batch : dict + + Returns + ------- + processed_batch : dict + Updated lidar batch. + """ + voxel_features = \ + torch.from_numpy(np.concatenate(batch['voxel_features'])) + coords = batch['voxel_coords'] + voxel_coords = [] + + for i in range(len(coords)): + voxel_coords.append( + np.pad(coords[i], ((0, 0), (1, 0)), + mode='constant', constant_values=i)) + voxel_coords = torch.from_numpy(np.concatenate(voxel_coords)) + + return {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords} diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f7f2a0f9b3496c783b4ac624cf0c3fc2fa294ce1 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/__init__.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/center_point_loss_multiclass.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/center_point_loss_multiclass.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..676be070edf9eef84df7f22f8c639c49d8cba455 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/center_point_loss_multiclass.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_loss.py.2stage b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_loss.py.2stage new file mode 100644 index 0000000000000000000000000000000000000000..e4cec2839134406692d0f5575e1c2bb7c39a8385 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_loss.py.2stage @@ -0,0 +1,251 @@ +import torch +from torch import nn +import numpy as np +from opencood.loss.ciassd_loss import CiassdLoss, weighted_smooth_l1_loss +from icecream import ic + +class HeterceptionLoss(nn.Module): + def __init__(self, args): + super(HeterceptionLoss, self).__init__() + # self.ciassd_loss = CiassdLoss(args['stage1']) + self.ciassd_loss = CiassdLoss(args['shared_head_out'], keyname='shared_head_out') + + self.cls = args['stage2']['cls'] + self.reg = args['stage2']['reg'] + self.iou = args['stage2']['iou'] + self.kd = args['stage2']['kd'] + self.cons = args['stage2']['cons'] + self.kd_fn = nn.MSELoss(reduce='mean') + + self.loss_dict = {} + + def forward(self, output_dict, label_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + ciassd_loss = self.ciassd_loss(output_dict, label_dict) + + # only update ciassd if no bbox is detected in the first stage + if 'stage2_out' not in output_dict: + self.loss_dict = { + 'loss': ciassd_loss, + } + return ciassd_loss + + loss = 0 + self.loss_dict = {} + + # rcnn out + rcnn_cls = output_dict['stage2_out']['rcnn_cls'].view(1, -1, 1) + rcnn_iou = output_dict['stage2_out']['rcnn_iou'].view(1, -1, 1) + rcnn_reg = output_dict['stage2_out']['rcnn_reg'].view(1, -1, 7) + + tgt_cls = output_dict['rcnn_label_dict']['cls_tgt'].view(1, -1, 1) + tgt_iou = output_dict['rcnn_label_dict']['iou_tgt'].view(1, -1, 1) + tgt_reg = output_dict['rcnn_label_dict']['reg_tgt'].view(1, -1, 7) + + pos_norm = tgt_cls.sum() + # cls loss + loss_cls = weighted_sigmoid_binary_cross_entropy(rcnn_cls, tgt_cls) + + + # iou loss + # TODO: also count the negative samples + tgt_iou = 2 * (tgt_iou - 0.5) # normalize to -1, 1 + loss_iou = weighted_smooth_l1_loss(rcnn_iou, tgt_iou, + weights=tgt_cls).mean() + + # regression loss + # [deprecated by Yifan Lu] Target resampling : Generate a weights mask to force the regressor concentrate on low iou predictions + # sample 50% with iou>0.7 and 50% < 0.7 + weights = torch.ones(tgt_iou.shape, device=tgt_iou.device) + weights[tgt_cls == 0] = 0 + # neg = torch.logical_and(tgt_iou < 0.7, tgt_cls != 0) + # pos = torch.logical_and(tgt_iou >= 0.7, tgt_cls != 0) + # num_neg = int(neg.sum(dim=1)) + # num_pos = int(pos.sum(dim=1)) + # num_pos_smps = max(num_neg, 2) + # pos_indices = torch.where(pos)[1] + # not_selsected = torch.randperm(num_pos)[:num_pos - num_pos_smps] + # # not_selsected_indices = pos_indices[not_selsected] + # weights[:, pos_indices[not_selsected]] = 0 + loss_reg = weighted_smooth_l1_loss(rcnn_reg, tgt_reg, + weights=weights / max(weights.sum(), + 1)).sum() + + loss_cls_reduced = loss_cls * self.cls['weight'] + loss_iou_reduced = loss_iou * self.iou['weight'] + loss_reg_reduced = loss_reg * self.reg['weight'] + + # if torch.isnan(loss_reg_reduced): + # print('debug') + + rcnn_loss = loss_cls_reduced + loss_iou_reduced + loss_reg_reduced + + # knowledge distillation + if 'kd_items' in output_dict: + lidar_features = output_dict['kd_items']["lidar_roi_features"] # [C, sum(bev_grids)] + camera_features = output_dict['kd_items']["camera_roi_features"] # [C, sum(bev_grids)] + kd_loss_reduced = self.kd_fn(lidar_features, camera_features) * self.kd['weight'] + loss += kd_loss_reduced + self.loss_dict.update({'kd_loss': kd_loss_reduced}) + + + # transformer-based consistency + if 'cons_items' in output_dict: + random_cav_mask = output_dict['cons_items']['random_cav_mask'] # [sum(RoI_Hi*RoI_Wi), max_cav] + valid_mask = random_cav_mask[:, 0] == 0 + ego_feature = output_dict['cons_items']['fused_roi_feature'][:,0,:][valid_mask] # [sum(RoI_Hi*RoI_Wi), C] + cav_feature = output_dict['cons_items']['fused_roi_feature'][random_cav_mask==1][valid_mask] + cons_loss_reduced = self.kd_fn(ego_feature, cav_feature) * self.cons['weight'] + loss += cons_loss_reduced + self.loss_dict.update({'cons_loss': cons_loss_reduced}) + + loss += rcnn_loss + ciassd_loss + + self.loss_dict.update({ + 'loss': loss, + 'rcnn_loss': rcnn_loss, + 'cls_loss': loss_cls_reduced, + 'iou_loss': loss_iou_reduced, + 'reg_loss': loss_reg_reduced, + }) + + return loss + + def logging(self, epoch, batch_id, batch_len, writer=None): + """ + Print out the loss function for current iteration. + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + ciassd_loss_dict = self.ciassd_loss.loss_dict + ciassd_total_loss = ciassd_loss_dict['total_loss'] + reg_loss = ciassd_loss_dict['reg_loss'] + cls_loss = ciassd_loss_dict['cls_loss'] + dir_loss = ciassd_loss_dict['dir_loss'] + if 'iou_loss' in ciassd_loss_dict: + iou_loss = ciassd_loss_dict['iou_loss'].item() + else: + iou_loss = 0 + + + if (batch_id + 1) % 1 == 0: + str_to_print = "[epoch %d][%d/%d], || Loss: %.4f || Ciassd: %.4f " \ + "|| Cls1: %.4f || Loc1: %.4f || Dir1: %.4f || Iou1: %.4f" % ( + epoch, batch_id + 1, batch_len, self.loss_dict['loss'], + ciassd_total_loss.item(), cls_loss.item(), reg_loss.item(), + dir_loss.item(), iou_loss, + ) + if 'rcnn_loss' in self.loss_dict: + str_to_print += " || Rcnn: %.4f || Cls2: %.4f || Loc2: %.4f || Iou2: %.4f" % ( + self.loss_dict['rcnn_loss'], + self.loss_dict['cls_loss'].item(), + self.loss_dict['reg_loss'].item(), + self.loss_dict['iou_loss'].item(), + ) + if 'kd_loss' in self.loss_dict: + str_to_print += " || Heter kd: %.4f " % ( + self.loss_dict['kd_loss'].item(), + ) + if 'cons_loss' in self.loss_dict: + str_to_print += " || Heter cons: %.4f " % ( + self.loss_dict['cons_loss'].item(), + ) + if 'sh_loss' in self.loss_dict: + str_to_print += " || Shared Head ciassd: %.4f " % ( + self.loss_dict['sh_loss'].item(), + ) + print(str_to_print) + + if writer: + writer.add_scalar('Ciassd_regression_loss', reg_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_Confidence_loss', cls_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_Direction_loss', dir_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_Iou_loss', iou_loss, + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_loss', ciassd_total_loss.item(), + epoch * batch_len + batch_id) + + if 'rcnn_loss' in self.loss_dict: + writer.add_scalar('Rcnn_regression_loss', + self.loss_dict['reg_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('Rcnn_Confidence_loss', + self.loss_dict['cls_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('Rcnn_Iou_loss', + self.loss_dict['iou_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('Rcnn_loss', self.loss_dict['rcnn_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('Total_loss', self.loss_dict['loss'].item(), + epoch * batch_len + batch_id) + + if 'kd_loss' in self.loss_dict: + writer.add_scalar('Heter_kd_loss', + self.loss_dict['kd_loss'].item(), + epoch * batch_len + batch_id) + if 'cons_loss' in self.loss_dict: + writer.add_scalar('Heter_cons_loss', + self.loss_dict['cons_loss'].item(), + epoch * batch_len + batch_id) + if 'sh_loss' in self.loss_dict: + writer.add_scalar('shared head ciassd_loss', + self.loss_dict['sh_loss'].item(), + epoch * batch_len + batch_id) + + +def weighted_sigmoid_binary_cross_entropy(preds, tgts, weights=None, + class_indices=None): + if weights is not None: + weights = weights.unsqueeze(-1) + if class_indices is not None: + weights *= ( + indices_to_dense_vector(class_indices, preds.shape[2]) + .view(1, 1, -1) + .type_as(preds) + ) + per_entry_cross_ent = nn.functional.binary_cross_entropy_with_logits(preds, + tgts, + weights) + return per_entry_cross_ent + + +def indices_to_dense_vector( + indices, size, indices_value=1.0, default_value=0, dtype=np.float32 +): + """Creates dense vector with indices set to specific value and rest to zeros. + This function exists because it is unclear if it is safe to use + tf.sparse_to_dense(indices, [size], 1, validate_indices=False) + with indices which are not ordered. + This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) + Args: + indices: 1d Tensor with integer indices which are to be set to + indices_values. + size: scalar with size (integer) of output Tensor. + indices_value: values of elements specified by indices in the output vector + default_value: values of other elements in the output vector. + dtype: data type. + Returns: + dense 1D Tensor of shape [size] with indices set to indices_values and the + rest set to default_value. + """ + dense = torch.zeros(size).fill_(default_value) + dense[indices] = indices_value + + return dense \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_redet_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_redet_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..b0fd06567cabb2ec8cbcb5591208c649622fa2a0 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_redet_loss.py @@ -0,0 +1,171 @@ +import torch +from torch import nn +import numpy as np +from opencood.loss.ciassd_loss import CiassdLoss, weighted_smooth_l1_loss +from icecream import ic + +class HeterceptionReDetLoss(nn.Module): + def __init__(self, args): + super(HeterceptionReDetLoss, self).__init__() + # self.ciassd_loss = CiassdLoss(args['stage1']) + self.ciassd_loss = CiassdLoss(args['shared_head_out'], keyname='shared_head_out') + self.redet_loss = CiassdLoss(args['stage2'], keyname='stage2_out') + + + self.kd = args['stage2']['kd'] + self.kd_fn = nn.MSELoss(reduce='mean') + + self.loss_dict = {} + + def forward(self, output_dict, label_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + ciassd_loss = self.ciassd_loss(output_dict, label_dict['stage1']) + + # only update ciassd if no bbox is detected in the first stage + if 'stage2_out' not in output_dict: + self.loss_dict = { + 'loss': ciassd_loss, + } + return ciassd_loss + + output_dict['batch_size'] = len(output_dict['record_len']) + output_dict.pop('record_len') + + redet_loss = self.redet_loss(output_dict, label_dict['stage2']) + loss = redet_loss + ciassd_loss + + # knowledge distillation + if 'kd_items' in output_dict: + lidar_features = output_dict['kd_items']["lidar_roi_features"] # [C, sum(bev_grids)] + camera_features = output_dict['kd_items']["camera_roi_features"] # [C, sum(bev_grids)] + kd_loss_reduced = self.kd_fn(lidar_features, camera_features) * self.kd['weight'] + loss += kd_loss_reduced + self.loss_dict.update({'kd_loss': kd_loss_reduced}) + + self.loss_dict.update({ + 'loss': loss, + 'redet_loss': redet_loss, + }) + + return loss + + def logging(self, epoch, batch_id, batch_len, writer=None): + """ + Print out the loss function for current iteration. + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + ciassd_loss_dict = self.ciassd_loss.loss_dict + ciassd_total_loss = ciassd_loss_dict['total_loss'] + reg_loss = ciassd_loss_dict['reg_loss'] + cls_loss = ciassd_loss_dict['cls_loss'] + dir_loss = ciassd_loss_dict['dir_loss'] + + + if (batch_id + 1) % 1 == 0: + str_to_print = "[epoch %d][%d/%d], || Loss: %.4f || Ciassd: %.4f " \ + "|| Cls1: %.4f || Loc1: %.4f || Dir1: %.4f " % ( + epoch, batch_id + 1, batch_len, self.loss_dict['loss'], + ciassd_total_loss.item(), cls_loss.item(), reg_loss.item(), + dir_loss.item() + ) + if 'redet_loss' in self.loss_dict: + str_to_print += " || redet_loss: %.4f || Cls2: %.4f || Loc2: %.4f || Dir2: %.4f" % ( + self.loss_dict['redet_loss'].item(), + self.redet_loss.loss_dict['cls_loss'].item(), + self.redet_loss.loss_dict['reg_loss'].item(), + self.redet_loss.loss_dict['dir_loss'].item(), + ) + if 'kd_loss' in self.loss_dict: + str_to_print += " || Heter kd: %.4f " % ( + self.loss_dict['kd_loss'].item(), + ) + + + print(str_to_print) + + if writer: + writer.add_scalar('Ciassd_regression_loss', reg_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_Confidence_loss', cls_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_Direction_loss', dir_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_loss', ciassd_total_loss.item(), + epoch * batch_len + batch_id) + + if 'redet_loss' in self.loss_dict: + writer.add_scalar('ReDet_loss', + self.loss_dict['redet_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('ReDet_Confidence_loss', + self.redet_loss.loss_dict['cls_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('ReDet_regression_loss', + self.redet_loss.loss_dict['reg_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('ReDet_direction_loss', + self.redet_loss.loss_dict['dir_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('Total_loss', self.loss_dict['loss'].item(), + epoch * batch_len + batch_id) + + if 'kd_loss' in self.loss_dict: + writer.add_scalar('Heter_kd_loss', + self.loss_dict['kd_loss'].item(), + epoch * batch_len + batch_id) + + + +def weighted_sigmoid_binary_cross_entropy(preds, tgts, weights=None, + class_indices=None): + if weights is not None: + weights = weights.unsqueeze(-1) + if class_indices is not None: + weights *= ( + indices_to_dense_vector(class_indices, preds.shape[2]) + .view(1, 1, -1) + .type_as(preds) + ) + per_entry_cross_ent = nn.functional.binary_cross_entropy_with_logits(preds, + tgts, + weights) + return per_entry_cross_ent + + +def indices_to_dense_vector( + indices, size, indices_value=1.0, default_value=0, dtype=np.float32 +): + """Creates dense vector with indices set to specific value and rest to zeros. + This function exists because it is unclear if it is safe to use + tf.sparse_to_dense(indices, [size], 1, validate_indices=False) + with indices which are not ordered. + This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) + Args: + indices: 1d Tensor with integer indices which are to be set to + indices_values. + size: scalar with size (integer) of output Tensor. + indices_value: values of elements specified by indices in the output vector + default_value: values of other elements in the output vector. + dtype: data type. + Returns: + dense 1D Tensor of shape [size] with indices set to indices_values and the + rest set to default_value. + """ + dense = torch.zeros(size).fill_(default_value) + dense[indices] = indices_value + + return dense \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_dir_depth_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_dir_depth_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..6f136edfd23c7b77d026194716d5c051471c864f --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_dir_depth_loss.py @@ -0,0 +1,423 @@ +# -*- coding: utf-8 -*- +# Author: Yifan Lu +# Add direction classification loss +# The originally point_pillar_loss.py, can not determine if the box heading is opposite to the GT. +# Add depth loss (optional for camera based perception) + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from opencood.utils.common_utils import limit_period +from icecream import ic + +class FocalLoss(nn.Module): + r"""Criterion that computes Focal loss. + + According to :cite:`lin2018focal`, the Focal loss is computed as follows: + + .. math:: + + \text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t) + + Where: + - :math:`p_t` is the model's estimated probability for each class. + + Args: + alpha: Weighting factor :math:`\alpha \in [0, 1]`. + gamma: Focusing parameter :math:`\gamma >= 0`. + reduction: Specifies the reduction to apply to the + output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction + will be applied, ``'mean'``: the sum of the output will be divided by + the number of elements in the output, ``'sum'``: the output will be + summed. + eps: Deprecated: scalar to enforce numerical stability. This is no longer + used. + + Shape: + - Input: :math:`(N, C, *)` where C = number of classes. + - Target: :math:`(N, *)` where each value is + :math:`0 ≤ targets[i] ≤ C−1`. + + Example: + >>> N = 5 # num_classes + >>> kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'} + >>> criterion = FocalLoss(**kwargs) + >>> input = torch.randn(1, N, 3, 5, requires_grad=True) + >>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N) + >>> output = criterion(input, target) + >>> output.backward() + """ + + def __init__(self, alpha, gamma = 2.0, reduction= 'none', smooth_target = False , eps = None) -> None: + super().__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + self.smooth_target = smooth_target + self.eps = eps + if self.smooth_target: + self.smooth_kernel = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False) + self.smooth_kernel.weight = torch.nn.Parameter(torch.tensor([[[0.2, 0.9, 0.2]]]), requires_grad=False) + self.smooth_kernel = self.smooth_kernel.to(torch.device("cuda")) + + def forward(self, input, target): + n = input.shape[0] + out_size = (n,) + input.shape[2:] + + # compute softmax over the classes axis + input_soft = input.softmax(1) + log_input_soft = input.log_softmax(1) + + # create the labels one hot tensor + D = input.shape[1] + if self.smooth_target: + target_one_hot = F.one_hot(target, num_classes=D).to(input).view(-1, D) # [N*H*W, D] + target_one_hot = self.smooth_kernel(target_one_hot.float().unsqueeze(1)).squeeze(1) # [N*H*W, D] + target_one_hot = target_one_hot.view(*target.shape, D).permute(0, 3, 1, 2) + else: + target_one_hot = F.one_hot(target, num_classes=D).to(input).permute(0, 3, 1, 2) + # compute the actual focal loss + weight = torch.pow(-input_soft + 1.0, self.gamma) + + focal = -self.alpha * weight * log_input_soft + loss_tmp = torch.einsum('bc...,bc...->b...', (target_one_hot, focal)) + + if self.reduction == 'none': + loss = loss_tmp + elif self.reduction == 'mean': + loss = torch.mean(loss_tmp) + elif self.reduction == 'sum': + loss = torch.sum(loss_tmp) + else: + raise NotImplementedError(f"Invalid reduction mode: {self.reduction}") + return loss + +class WeightedSmoothL1Loss(nn.Module): + """ + Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss + https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py + | 0.5 * x ** 2 / beta if abs(x) < beta + smoothl1(x) = | + | abs(x) - 0.5 * beta otherwise, + where x = input - target. + """ + def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None): + """ + Args: + beta: Scalar float. + L1 to L2 change point. + For beta values < 1e-5, L1 loss is computed. + code_weights: (#codes) float list if not None. + Code-wise weights. + """ + super(WeightedSmoothL1Loss, self).__init__() + self.beta = beta + if code_weights is not None: + self.code_weights = np.array(code_weights, dtype=np.float32) + self.code_weights = torch.from_numpy(self.code_weights).cuda() + + @staticmethod + def smooth_l1_loss(diff, beta): + if beta < 1e-5: + loss = torch.abs(diff) + else: + n = torch.abs(diff) + loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta) + + return loss + + def forward(self, input: torch.Tensor, + target: torch.Tensor, weights: torch.Tensor = None): + """ + Args: + input: (B, #anchors, #codes) float tensor. + Ecoded predicted locations of objects. + target: (B, #anchors, #codes) float tensor. + Regression targets. + weights: (B, #anchors) float tensor if not None. + + #anchors = H * W * anchor_num + + Returns: + loss: (B, #anchors) float tensor. + Weighted smooth l1 loss without reduction. + """ + target = torch.where(torch.isnan(target), input, target) # ignore nan targets + + diff = input - target + loss = self.smooth_l1_loss(diff, self.beta) + + # anchor-wise weighting + if weights is not None: + assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] + loss = loss * weights.unsqueeze(-1) + + return loss + + + +class PointPillarDirDepthLoss(nn.Module): + def __init__(self, args): + super(PointPillarDirDepthLoss, self).__init__() + self.reg_loss_func = WeightedSmoothL1Loss() + self.alpha = 0.25 + self.gamma = 2.0 + + self.cls_weight = args['cls_weight'] + self.reg_coe = args['reg'] + + self.dir_weight = args['dir_args']['dir_weight'] + self.dir_offset = args['dir_args']['args']['dir_offset'] + self.num_bins = args['dir_args']['args']['num_bins'] + anchor_yaw = np.deg2rad(np.array(args['dir_args']['anchor_yaw'])) # for direction classification + self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1) # [1,2,1] + self.anchor_num = self.anchor_yaw_map.shape[1] + + self.depth_weight = args['depth_weight'] + self.smooth_target = True if 'smooth_target' in args and args['smooth_target'] else False + self.use_fg_mask = True if 'use_fg_mask' in args and args['use_fg_mask'] else False + self.fg_weight = 3.25 + self.bg_weight = 0.25 + if self.smooth_target: + self.depth_loss_func = FocalLoss(alpha=self.alpha, gamma=self.gamma, reduction="none", smooth_target=True) + else: + self.depth_loss_func = FocalLoss(alpha=self.alpha, gamma=self.gamma, reduction="none") + + self.loss_dict = {} + + def forward(self, output_dict, target_dict, suffix=""): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + self.loss_dict = {} + rm = output_dict[f'rm{suffix}'] # [B, 14, 50, 176] + psm = output_dict[f'psm{suffix}'] # [B, 2, 50, 176] + targets = target_dict['targets'] + + cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C + + box_cls_labels = target_dict['pos_equal_one'] # [B, 50, 176, 2] + """ + Visualize + """ + box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous() # [B, 50*176*2] + + positives = box_cls_labels > 0 + negatives = box_cls_labels == 0 + negative_cls_weights = negatives * 1.0 + cls_weights = (negative_cls_weights + 1.0 * positives).float() + reg_weights = positives.float() + + pos_normalizer = positives.sum(1, keepdim=True).float() + reg_weights /= torch.clamp(pos_normalizer, min=1.0) # [N, H*W*anchor_num] + cls_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_targets = box_cls_labels + cls_targets = cls_targets.unsqueeze(dim=-1) + + cls_targets = cls_targets.squeeze(dim=-1) + one_hot_targets = torch.zeros( + *list(cls_targets.shape), 2, + dtype=cls_preds.dtype, device=cls_targets.device + ) # [B, H*W*C, 2], C=#anchor=2 + one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0) + cls_preds = cls_preds.view(psm.shape[0], -1, 1) # [B, H*W*C, 1], C=#anchor=2 + one_hot_targets = one_hot_targets[..., 1:] + + cls_loss_src = self.cls_loss_func(cls_preds, + one_hot_targets, + weights=cls_weights) # [N, M] + cls_loss = cls_loss_src.sum() / psm.shape[0] + conf_loss = cls_loss * self.cls_weight + + # regression + rm = rm.permute(0, 2, 3, 1).contiguous() + rm = rm.view(rm.size(0), -1, 7) + targets = targets.view(targets.size(0), -1, 7) + box_preds_sin, reg_targets_sin = self.add_sin_difference(rm, + targets) + loc_loss_src =\ + self.reg_loss_func(box_preds_sin, + reg_targets_sin, + weights=reg_weights) + reg_loss = loc_loss_src.sum() / rm.shape[0] + reg_loss *= self.reg_coe + + ######## direction ########## + dir_targets = self.get_direction_target(targets) + N = output_dict[f"dm{suffix}"].shape[0] + dir_logits = output_dict[f"dm{suffix}"].permute(0, 2, 3, 1).contiguous().view(N, -1, 2) # [N, H*W*#anchor, 2] + + + dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) + dir_loss = dir_loss.view(dir_logits.shape[:2]) * reg_weights # [N, H*W*anchor_num] + dir_loss = dir_loss.sum() * self.dir_weight / N + + total_loss = reg_loss + conf_loss + dir_loss + + ########## depth ############# + if f"depth_items{suffix}" in output_dict and output_dict[f'depth_items{suffix}'] is not None: + # depth logdit: [N, D, H, W] + # depth gt indices: [N, H, W] + # fg_mask: [N, H, W] + depth_logit, depth_gt_indices = output_dict[f'depth_items{suffix}'][0], output_dict[f'depth_items{suffix}'][1] + depth_loss = self.depth_loss_func(depth_logit, depth_gt_indices) + if self.use_fg_mask: + fg_mask = output_dict[f'depth_items{suffix}'][-1] + weight_mask = (fg_mask > 0) * self.fg_weight + (fg_mask == 0) * self.bg_weight + depth_loss *= weight_mask + + depth_loss = depth_loss.mean() * self.depth_weight + + total_loss += depth_loss + self.loss_dict.update({'depth_loss': depth_loss}) + + self.loss_dict.update({'total_loss': total_loss, + 'reg_loss': reg_loss, + 'conf_loss': conf_loss, + 'dir_loss': dir_loss}) + + return total_loss + + def cls_loss_func(self, input: torch.Tensor, + target: torch.Tensor, + weights: torch.Tensor): + """ + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + weights: (B, #anchors) float tensor. + Anchor-wise weights. + + Returns: + weighted_loss: (B, #anchors, #classes) float tensor after weighting. + """ + pred_sigmoid = torch.sigmoid(input) + alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha) + pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid + focal_weight = alpha_weight * torch.pow(pt, self.gamma) + + bce_loss = self.sigmoid_cross_entropy_with_logits(input, target) + + loss = focal_weight * bce_loss + + if weights.shape.__len__() == 2 or \ + (weights.shape.__len__() == 1 and target.shape.__len__() == 2): + weights = weights.unsqueeze(-1) + + assert weights.shape.__len__() == loss.shape.__len__() + + return loss * weights + + @staticmethod + def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor): + """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits: + max(x, 0) - x * z + log(1 + exp(-abs(x))) in + https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits + + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + + Returns: + loss: (B, #anchors, #classes) float tensor. + Sigmoid cross entropy loss without reduction + """ + loss = torch.clamp(input, min=0) - input * target + \ + torch.log1p(torch.exp(-torch.abs(input))) + return loss + + @staticmethod + def add_sin_difference(boxes1, boxes2, dim=6): + assert dim != -1 + rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \ + torch.cos(boxes2[..., dim:dim + 1]) + rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \ + torch.sin(boxes2[..., dim:dim + 1]) + + boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding, + boxes1[..., dim + 1:]], dim=-1) + boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding, + boxes2[..., dim + 1:]], dim=-1) + return boxes1, boxes2 + + def get_direction_target(self, reg_targets): + """ + Args: + reg_targets: [N, H * W * #anchor_num, 7] + The last term is (theta_gt - theta_a) + + Returns: + dir_targets: + theta_gt: [N, H * W * #anchor_num, NUM_BIN] + NUM_BIN = 2 + """ + # (1, 2, 1) + H_times_W_times_anchor_num = reg_targets.shape[1] + anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1] + rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum] + offset_rot = limit_period(rot_gt - self.dir_offset, 0, 2 * np.pi) + dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / self.num_bins)).long() # [N, H*W*anchornum] + dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=self.num_bins - 1) + # one_hot: + # if rot_gt > 0, then the label is 1, then the regression target is [0, 1] + dir_cls_targets = one_hot_f(dir_cls_targets, self.num_bins) + return dir_cls_targets + + + + def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict['total_loss'] + reg_loss = self.loss_dict['reg_loss'] + conf_loss = self.loss_dict['conf_loss'] + dir_loss = self.loss_dict['dir_loss'] + depth_loss = 0 if 'depth_loss' not in self.loss_dict else self.loss_dict['depth_loss'].item() + + print("[epoch %d][%d/%d]%s || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || Dir Loss: %.4f || depth loss %.4f" % ( + epoch, batch_id + 1, batch_len, suffix, + total_loss.item(), conf_loss.item(), reg_loss.item(), dir_loss.item(), depth_loss)) + + if not writer is None: + writer.add_scalar('Regression_loss'+suffix, reg_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss'+suffix, conf_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('Dir_loss'+suffix, dir_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('depth_loss'+suffix, depth_loss, + epoch*batch_len + batch_id) + +def one_hot_f(tensor, num_bins, dim=-1, on_value=1.0, dtype=torch.float32): + tensor_onehot = torch.zeros(*list(tensor.shape), num_bins, dtype=dtype, device=tensor.device) + tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value) + return tensor_onehot + +def softmax_cross_entropy_with_logits(logits, labels): + param = list(range(len(logits.shape))) + transpose_param = [0] + [param[-1]] + param[1:-1] + logits = logits.permute(*transpose_param) + loss_ftor = torch.nn.CrossEntropyLoss(reduction="none") + loss = loss_ftor(logits, labels.max(dim=-1)[1]) + return loss diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_mash_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_mash_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..ed2ec6fdc872b1d35153f14964571b947d3db1c2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_mash_loss.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- +# Author: OpenPCDet, Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + + +class WeightedSmoothL1Loss(nn.Module): + """ + Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss + https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py + | 0.5 * x ** 2 / beta if abs(x) < beta + smoothl1(x) = | + | abs(x) - 0.5 * beta otherwise, + where x = input - target. + """ + def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None): + """ + Args: + beta: Scalar float. + L1 to L2 change point. + For beta values < 1e-5, L1 loss is computed. + code_weights: (#codes) float list if not None. + Code-wise weights. + """ + super(WeightedSmoothL1Loss, self).__init__() + self.beta = beta + if code_weights is not None: + self.code_weights = np.array(code_weights, dtype=np.float32) + self.code_weights = torch.from_numpy(self.code_weights).cuda() + + @staticmethod + def smooth_l1_loss(diff, beta): + if beta < 1e-5: + loss = torch.abs(diff) + else: + n = torch.abs(diff) + loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta) + + return loss + + def forward(self, input: torch.Tensor, + target: torch.Tensor, weights: torch.Tensor = None): + """ + Args: + input: (B, #anchors, #codes) float tensor. + Ecoded predicted locations of objects. + target: (B, #anchors, #codes) float tensor. + Regression targets. + weights: (B, #anchors) float tensor if not None. + + Returns: + loss: (B, #anchors) float tensor. + Weighted smooth l1 loss without reduction. + """ + target = torch.where(torch.isnan(target), input, target) # ignore nan targets + + diff = input - target + loss = self.smooth_l1_loss(diff, self.beta) + + # anchor-wise weighting + if weights is not None: + assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] + loss = loss * weights.unsqueeze(-1) + + return loss + + +class PointPillarMashLoss(nn.Module): + def __init__(self, args): + super(PointPillarMashLoss, self).__init__() + self.reg_loss_func = WeightedSmoothL1Loss() + self.grid_loss_func = nn.CrossEntropyLoss() + self.alpha = 0.25 + self.gamma = 2.0 + + self.cls_weight = args['cls_weight'] + self.grid_weight = args['grid_weight'] + self.reg_coe = args['reg'] + self.H = args['H'] + self.W = args['W'] + self.downsample_rate = args['downsample_rate'] + self.discrete_ratio = args['voxel_size'][0] + + self.loss_dict = {} + + def forward(self, output_dict, target_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + rm = output_dict['rm'] # [B, 14, 50, 176] + psm = output_dict['psm'] # [B, 2, 50, 176] + targets = target_dict['targets'] + + cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C + + box_cls_labels = target_dict['pos_equal_one'] # [B, 50, 176, 2] + box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous() + + positives = box_cls_labels > 0 + negatives = box_cls_labels == 0 + negative_cls_weights = negatives * 1.0 + cls_weights = (negative_cls_weights + 1.0 * positives).float() + reg_weights = positives.float() + + pos_normalizer = positives.sum(1, keepdim=True).float() + reg_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_targets = box_cls_labels + cls_targets = cls_targets.unsqueeze(dim=-1) + + cls_targets = cls_targets.squeeze(dim=-1) + one_hot_targets = torch.zeros( + *list(cls_targets.shape), 2, + dtype=cls_preds.dtype, device=cls_targets.device + ) + one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0) + cls_preds = cls_preds.view(psm.shape[0], -1, 1) + one_hot_targets = one_hot_targets[..., 1:] + + cls_loss_src = self.cls_loss_func(cls_preds, + one_hot_targets, + weights=cls_weights) # [N, M] + cls_loss = cls_loss_src.sum() / psm.shape[0] + conf_loss = cls_loss * self.cls_weight + + # regression + rm = rm.permute(0, 2, 3, 1).contiguous() + rm = rm.view(rm.size(0), -1, 7) + targets = targets.view(targets.size(0), -1, 7) + box_preds_sin, reg_targets_sin = self.add_sin_difference(rm, + targets) + loc_loss_src =\ + self.reg_loss_func(box_preds_sin, + reg_targets_sin, + weights=reg_weights) + reg_loss = loc_loss_src.sum() / rm.shape[0] + reg_loss *= self.reg_coe + + total_loss = reg_loss + conf_loss + self.loss_dict.update({'total_loss': total_loss.item(), + 'reg_loss': reg_loss.item(), + 'conf_loss': conf_loss.item()}) + + corr_vol = output_dict['corr_vol'] + # grid loss + if corr_vol is not None: + pairwise_t_matrix = target_dict['pairwise_t_matrix'] + record_len = target_dict['record_len'] + + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * self.H / self.W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * self.W / self.H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * self.W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * self.H) * 2 + + B = len(record_len) + t_matrix_list = [] + for b in range(B): + if(record_len[b] == 1): + continue + t_matrix_list.append(pairwise_t_matrix[b, 0, 1:record_len[b]]) # [N-1, 2, 3] + + t_matrix = torch.cat(t_matrix_list, 0) + N_ = t_matrix.shape[0] # N_ is sum_i{Ni-1} + + grid_gt = F.affine_grid(t_matrix, (N_, 1, self.H, self.W)) # (N_, H, W, 2) + X = grid_gt[...,0] # (N_, H, W) + Y = grid_gt[...,1] # (N_, H, W) + X_idx = ((X / 2 + 0.5) * self.W).to(torch.long) # (N_, H, W) + Y_idx = ((Y / 2 + 0.5) * self.H).to(torch.long) # (N_, H, W) + + idx = (Y_idx * self.W + X_idx) # (N_, H, W) + + # out of boundary + mask = torch.gt(grid_gt, -1) * torch.lt(grid_gt, 1) # (N_, H, W,2) + mask = mask[...,0] * mask[...,1] # (N_, H, W) + mask = ~ mask + idx[mask] = self.H * self.W # No matching, set to empty, last dimension + corr_vol_gt = idx + + grid_loss = self.grid_loss_func(corr_vol, corr_vol_gt) + grid_loss *= self.grid_weight + + total_loss += grid_loss + self.loss_dict.update({"total_loss": total_loss.item(), + "grid_loss": grid_loss.item()}) + + + + return total_loss + + def cls_loss_func(self, input: torch.Tensor, + target: torch.Tensor, + weights: torch.Tensor): + """ + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + weights: (B, #anchors) float tensor. + Anchor-wise weights. + + Returns: + weighted_loss: (B, #anchors, #classes) float tensor after weighting. + """ + pred_sigmoid = torch.sigmoid(input) + alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha) + pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid + focal_weight = alpha_weight * torch.pow(pt, self.gamma) + + bce_loss = self.sigmoid_cross_entropy_with_logits(input, target) + + loss = focal_weight * bce_loss + + if weights.shape.__len__() == 2 or \ + (weights.shape.__len__() == 1 and target.shape.__len__() == 2): + weights = weights.unsqueeze(-1) + + assert weights.shape.__len__() == loss.shape.__len__() + + return loss * weights + + @staticmethod + def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor): + """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits: + max(x, 0) - x * z + log(1 + exp(-abs(x))) in + https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits + + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + + Returns: + loss: (B, #anchors, #classes) float tensor. + Sigmoid cross entropy loss without reduction + """ + loss = torch.clamp(input, min=0) - input * target + \ + torch.log1p(torch.exp(-torch.abs(input))) + return loss + + @staticmethod + def add_sin_difference(boxes1, boxes2, dim=6): + assert dim != -1 + rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \ + torch.cos(boxes2[..., dim:dim + 1]) + rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \ + torch.sin(boxes2[..., dim:dim + 1]) + + boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding, + boxes1[..., dim + 1:]], dim=-1) + boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding, + boxes2[..., dim + 1:]], dim=-1) + return boxes1, boxes2 + + + def logging(self, epoch, batch_id, batch_len, writer = None): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict['total_loss'] + reg_loss = self.loss_dict['reg_loss'] + conf_loss = self.loss_dict['conf_loss'] + grid_loss = self.loss_dict['grid_loss'] + + print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || Grid Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, + total_loss, conf_loss, reg_loss, grid_loss)) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', conf_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Grid_loss', grid_loss, + epoch*batch_len + batch_id) diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_uncertainty_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_uncertainty_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..d04c9da93edc949e70b6baeefa7c73d8ad33773c --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_uncertainty_loss.py @@ -0,0 +1,485 @@ +# -*- coding: utf-8 -*- +# Author: OpenPCDet, Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +from tabnanny import verbose +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import d3d.mathh as mathh +from opencood.utils.common_utils import limit_period +from functools import partial + +class WeightedSmoothL1Loss(nn.Module): + """ + Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss + https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py + | 0.5 * x ** 2 / beta if abs(x) < beta + smoothl1(x) = | + | abs(x) - 0.5 * beta otherwise, + where x = input - target. + """ + def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None): + """ + Args: + beta: Scalar float. + L1 to L2 change point. + For beta values < 1e-5, L1 loss is computed. + code_weights: (#codes) float list if not None. + Code-wise weights. + """ + super(WeightedSmoothL1Loss, self).__init__() + self.beta = beta + if code_weights is not None: + self.code_weights = np.array(code_weights, dtype=np.float32) + self.code_weights = torch.from_numpy(self.code_weights).cuda() + + @staticmethod + def smooth_l1_loss(diff, beta): + if beta < 1e-5: + loss = torch.abs(diff) + else: + n = torch.abs(diff) + loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta) + + return loss + + def forward(self, input: torch.Tensor, + target: torch.Tensor, weights: torch.Tensor = None): + """ + Args: + input: (B, #anchors, #codes) float tensor. + Ecoded predicted locations of objects. + target: (B, #anchors, #codes) float tensor. + Regression targets. + weights: (B, #anchors) float tensor if not None. + + Returns: + loss: (B, #anchors) float tensor. + Weighted smooth l1 loss without reduction. + """ + target = torch.where(torch.isnan(target), input, target) # ignore nan targets + + diff = input - target + loss = self.smooth_l1_loss(diff, self.beta) + + # anchor-wise weighting + if weights is not None: + assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] + loss = loss * weights.unsqueeze(-1) + + return loss + + + + +class KLLoss(nn.Module): + def __init__(self, args): + super(KLLoss, self).__init__() + + self.angle_weight = args['angle_weight'] + self.uncertainty_dim = args['uncertainty_dim'] + if args['xy_loss_type'] == "l2": + self.xy_loss = self.kl_loss_l2 + elif args['xy_loss_type'] == "l1": + self.xy_loss = self.kl_loss_l1 + else: + raise "not implemented" + + if args['angle_loss_type'] == "l2": + self.angle_loss = self.kl_loss_l2 + elif args['angle_loss_type'] == "von": + lambda_V = args['lambda_V'] + s0 = args['s0'] + limit_period = args['limit_period'] + self.angle_loss = partial(self.kl_loss_angular, lambda_V=lambda_V, s0=s0, limit_period=limit_period) + else: + raise "not implemented" + + + + + @staticmethod + def kl_loss_l2(diff, s): + """ + Args: + diff: [B, 2] + s: [B, 2] + Returns: + loss: [B, 2] + """ + loss = 0.5*(torch.exp(-s) * (diff**2) + s) + return loss + + @staticmethod + def kl_loss_l1(diff, s): + """ + Args: + diff: [B, 2] + s: [B, 2] + Returns: + loss: [B, 2] + """ + loss = 0.5*torch.exp(-s) * torch.abs(diff) + s + return loss + + @staticmethod + def kl_loss_angular(diff, s, lambda_V=1, s0=1, limit_period=False): + """ + Args: + diff: [B, 1] + s: [B, 1] + if limit_period, + diff + 180 ~ diff. + Returns: + loss: [B, 1] + """ + exp_minus_s = torch.exp(-s) + if limit_period: + cos_abs = torch.abs(torch.cos(diff)) + loss = loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * cos_abs.detach() + lambda_V * F.elu(s-s0) + else: + loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * torch.cos(diff) + lambda_V * F.elu(s-s0) + + return loss + + + def forward(self, input: torch.Tensor, + target: torch.Tensor, + sm: torch.Tensor, + weights: torch.Tensor = None): + target = torch.where(torch.isnan(target), input, target) # ignore nan targets + + + + if self.uncertainty_dim == 3: + xy_diff = input[...,:2] - target[...,:2] + loss1 = self.xy_loss(xy_diff, sm[...,:2]) + + theta_diff = input[...,7:8] - target[...,7:8] + + loss2 = self.angle_weight * self.angle_loss(theta_diff, sm[...,2:3]) + + loss = torch.cat((loss1, loss2), dim=-1) + + elif self.uncertainty_dim == 7: + ## is this right? + other_diff = input[...,:6] - target[...,:6] + theta_diff = input[...,7:8] - target[...,7:8] + + diff = torch.cat((other_diff, theta_diff), dim=-1) + loss = self.xy_loss(diff, sm) + + elif self.uncertainty_dim == 2: + xy_diff = input[...,:2] - target[...,:2] + loss = self.xy_loss(xy_diff, sm[...,:2]) + else: + raise "not implemented" + + # anchor-wise weighting + if weights is not None: + assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] + loss = loss * weights.unsqueeze(-1) + + return loss + + + +class PointPillarUncertaintyLoss(nn.Module): + def __init__(self, args): + super(PointPillarUncertaintyLoss, self).__init__() + self.reg_loss_func = WeightedSmoothL1Loss() + self.alpha = 0.25 + self.gamma = 2.0 + + self.cls_weight = args['cls_weight'] + self.kl_weight = args['kl_weight'] + self.reg_coe = args['reg'] + self.uncertainty_dim = args['kl_args']['uncertainty_dim'] + + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_weight = args['dir_args']['dir_weight'] + self.dir_offset = args['dir_args']['args']['dir_offset'] + self.num_bins = args['dir_args']['args']['num_bins'] + anchor_yaw = np.deg2rad(np.array(args['dir_args']['anchor_yaw'])) # for direction classification + self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1) # [1,2,1] + self.anchor_num = self.anchor_yaw_map.shape[1] + + else: + self.use_dir =False + + + self.kl_loss_func = KLLoss(args['kl_args']) + + self.loss_dict = {} + + def forward(self, output_dict, target_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + rm = output_dict['rm'] # [B, 14, 50, 176] + psm = output_dict['psm'] # [B, 2, 50, 176] + sm = output_dict['sm'] # log of sigma^2 / scale [B, 6, 50 176] + targets = target_dict['targets'] + + cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C + + box_cls_labels = target_dict['pos_equal_one'] # [B, 50, 176, 2] + box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous() # -> [B, 50*176*2], two types of anchor + + positives = box_cls_labels > 0 + negatives = box_cls_labels == 0 + negative_cls_weights = negatives * 1.0 + cls_weights = (negative_cls_weights + 1.0 * positives).float() # all 1 + reg_weights = positives.float() + + pos_normalizer = positives.sum(1, keepdim=True).float() # positive number per sample + reg_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_targets = box_cls_labels + cls_targets = cls_targets.unsqueeze(dim=-1) + + cls_targets = cls_targets.squeeze(dim=-1) + one_hot_targets = torch.zeros( + *list(cls_targets.shape), 2, + dtype=cls_preds.dtype, device=cls_targets.device + ) + one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0) + cls_preds = cls_preds.view(psm.shape[0], -1, 1) + one_hot_targets = one_hot_targets[..., 1:] + + cls_loss_src = self.cls_loss_func(cls_preds, + one_hot_targets, + weights=cls_weights) # [N, M] + cls_loss = cls_loss_src.sum() / psm.shape[0] + conf_loss = cls_loss * self.cls_weight + + ########## regression ########## + rm = rm.permute(0, 2, 3, 1).contiguous() + rm = rm.view(rm.size(0), -1, 7) + targets = targets.view(targets.size(0), -1, 7) + + box_preds_sin, reg_targets_sin = self.add_sin_difference_dim(rm, + targets) + loc_loss_src =\ + self.reg_loss_func(box_preds_sin[...,:7], + reg_targets_sin[...,:7], + weights=reg_weights) + reg_loss = loc_loss_src.sum() / rm.shape[0] + reg_loss *= self.reg_coe + + + ######## direction ########## + if self.use_dir: + dir_targets = self.get_direction_target(targets) + N = output_dict["dm"].shape[0] + dir_logits = output_dict["dm"].permute(0, 2, 3, 1).contiguous().view(N, -1, 2) # [N, H*W*#anchor, 2] + + + dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) + + dir_loss = dir_loss.view(dir_logits.shape[:2]) * reg_weights # [N, H*W*anchor_num] + + dir_loss = dir_loss.sum() * self.dir_weight / N + + ######## kl ######### + sm = sm.permute(0, 2, 3, 1).contiguous() # [N, H, W, #anchor_num * 3] + sm = sm.view(sm.size(0), -1, self.uncertainty_dim) + + kl_loss_src = \ + self.kl_loss_func(box_preds_sin, + reg_targets_sin, + sm, + reg_weights) + + kl_loss = kl_loss_src.sum() / sm.shape[0] + kl_loss *= self.kl_weight + + # total_loss = reg_loss + conf_loss + kl_loss + total_loss = reg_loss + conf_loss + + self.loss_dict.update({'total_loss': total_loss, + 'reg_loss': reg_loss, + 'conf_loss': conf_loss, + 'kl_loss': kl_loss}) + + if self.use_dir: + # total_loss += dir_loss + self.loss_dict.update({'dir_loss': dir_loss}) + + + return total_loss + + def get_direction_target(self, reg_targets): + """ + Args: + reg_targets: [N, H * W * #anchor_num, 7] + The last term is (theta_gt - theta_a) + + Returns: + dir_targets: + theta_gt: [N, H * W * #anchor_num, NUM_BIN] + NUM_BIN = 2 + """ + # (1, 2, 1) + H_times_W_times_anchor_num = reg_targets.shape[1] + anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1] + rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum] + offset_rot = limit_period(rot_gt - self.dir_offset, 0, 2 * np.pi) + dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / self.num_bins)).long() # [N, H*W*anchornum] + dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=self.num_bins - 1) + # one_hot: + # if rot_gt > 0, then the label is 1, then the regression target is [0, 1] + dir_cls_targets = one_hot_f(dir_cls_targets, self.num_bins) + return dir_cls_targets + + + + def cls_loss_func(self, input: torch.Tensor, + target: torch.Tensor, + weights: torch.Tensor): + """ + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + weights: (B, #anchors) float tensor. + Anchor-wise weights. + + Returns: + weighted_loss: (B, #anchors, #classes) float tensor after weighting. + """ + pred_sigmoid = torch.sigmoid(input) + alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha) + pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid + focal_weight = alpha_weight * torch.pow(pt, self.gamma) + + bce_loss = self.sigmoid_cross_entropy_with_logits(input, target) + + loss = focal_weight * bce_loss + + if weights.shape.__len__() == 2 or \ + (weights.shape.__len__() == 1 and target.shape.__len__() == 2): + weights = weights.unsqueeze(-1) + + assert weights.shape.__len__() == loss.shape.__len__() + + return loss * weights + + @staticmethod + def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor): + """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits: + max(x, 0) - x * z + log(1 + exp(-abs(x))) in + https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits + + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + + Returns: + loss: (B, #anchors, #classes) float tensor. + Sigmoid cross entropy loss without reduction + """ + loss = torch.clamp(input, min=0) - input * target + \ + torch.log1p(torch.exp(-torch.abs(input))) + return loss + + @staticmethod + def add_sin_difference_dim(boxes1, boxes2, dim=6): + """ + This is different with other loss function. + Here we especially retain the angel + + Add sin difference ? + Replace sin difference ! + + Returns: + [B, H*W, 7] -> [B, H*W, 8] + """ + assert dim != -1 + + # sin(theta1 - theta2) = sin(theta1)*cos(theta2) - cos(theta1)*sin(theta2) + + rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \ + torch.cos(boxes2[..., dim:dim + 1]) + + rad_tg_encoding = torch.cos(boxes1[..., dim: dim + 1]) * \ + torch.sin(boxes2[..., dim: dim + 1]) + + # boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding, + # boxes1[..., dim + 1:]], dim=-1) + # boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding, + # boxes2[..., dim + 1:]], dim=-1) + + boxes1_encoded = torch.cat([boxes1[..., :dim], rad_pred_encoding, + boxes1[..., dim:]], dim=-1) + boxes2_encoded = torch.cat([boxes2[..., :dim], rad_tg_encoding, + boxes2[..., dim:]], dim=-1) + + return boxes1_encoded, boxes2_encoded + + + def logging(self, epoch, batch_id, batch_len, writer = None): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict['total_loss'] + reg_loss = self.loss_dict['reg_loss'] + conf_loss = self.loss_dict['conf_loss'] + kl_loss = self.loss_dict['kl_loss'] + + + print_msg = ("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || KL Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, + total_loss.item(), conf_loss.item(), reg_loss.item(), kl_loss.item())) + + if self.use_dir: + dir_loss = self.loss_dict['dir_loss'] + print_msg += " || Dir Loss: %.4f" % dir_loss.item() + + print(print_msg) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', conf_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('kl_loss', kl_loss.item(), + epoch*batch_len + batch_id) + if self.use_dir: + writer.add_scalar('dir_loss', dir_loss.item(), + epoch*batch_len + batch_id) + +def one_hot_f(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32): + tensor_onehot = torch.zeros(*list(tensor.shape), depth, dtype=dtype, device=tensor.device) # [4, 70400, 2] + tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value) # [4, 70400, 2] + return tensor_onehot + +def softmax_cross_entropy_with_logits(logits, labels): + param = list(range(len(logits.shape))) + transpose_param = [0] + [param[-1]] + param[1:-1] + logits = logits.permute(*transpose_param) + loss_ftor = torch.nn.CrossEntropyLoss(reduction="none") + loss = loss_ftor(logits, labels.max(dim=-1)[1]) + return loss diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_v2v_robust_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_v2v_robust_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..64c041713a7e69f7456a638d619f552f94e58567 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_v2v_robust_loss.py @@ -0,0 +1,387 @@ +# -*- coding: utf-8 -*- +# Author: OpenPCDet, Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +from icecream import ic +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from opencood.models.sub_modules.v2v_robust_module import regroup +from opencood.utils.transformation_utils import tfm_to_pose, tfm_to_pose_torch +torch.set_printoptions(precision=3, sci_mode=False) + +class WeightedSmoothL1Loss(nn.Module): + """ + Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss + https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py + | 0.5 * x ** 2 / beta if abs(x) < beta + smoothl1(x) = | + | abs(x) - 0.5 * beta otherwise, + where x = input - target. + """ + def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None): + """ + Args: + beta: Scalar float. + L1 to L2 change point. + For beta values < 1e-5, L1 loss is computed. + code_weights: (#codes) float list if not None. + Code-wise weights. + """ + super(WeightedSmoothL1Loss, self).__init__() + self.beta = beta + if code_weights is not None: + self.code_weights = np.array(code_weights, dtype=np.float32) + self.code_weights = torch.from_numpy(self.code_weights).cuda() + + @staticmethod + def smooth_l1_loss(diff, beta): + if beta < 1e-5: + loss = torch.abs(diff) + else: + n = torch.abs(diff) + loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta) + + return loss + + def forward(self, input: torch.Tensor, + target: torch.Tensor, weights: torch.Tensor = None): + """ + Args: + input: (B, #anchors, #codes) float tensor. + Ecoded predicted locations of objects. + target: (B, #anchors, #codes) float tensor. + Regression targets. + weights: (B, #anchors) float tensor if not None. + + Returns: + loss: (B, #anchors) float tensor. + Weighted smooth l1 loss without reduction. + """ + target = torch.where(torch.isnan(target), input, target) # ignore nan targets + + diff = input - target + loss = self.smooth_l1_loss(diff, self.beta) + + # anchor-wise weighting + if weights is not None: + assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] + loss = loss * weights.unsqueeze(-1) + + return loss + + +class PointPillarV2VRobustLoss(nn.Module): + def __init__(self, args): + super(PointPillarV2VRobustLoss, self).__init__() + self.reg_loss_func = WeightedSmoothL1Loss() + self.score_loss_func = nn.BCELoss(reduce=True, reduction="mean") + self.pose_loss_func = nn.SmoothL1Loss(reduce=True, reduction="mean", beta=1.0/9) + self.alpha = 0.25 + self.gamma = 2.0 + + self.cls_weight = args['cls_weight'] + self.reg_coe = args['reg'] + self.score_weight = args['score_weight'] + self.pose_weight = args['pose_weight'] + self.loss_dict = {} + + def forward(self, output_dict, target_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + stage = output_dict['stage'] + + if stage == 0 or stage == 2: + rm = output_dict['rm'] # [B, 14, 50, 176] + psm = output_dict['psm'] # [B, 2, 50, 176] + targets = target_dict['targets'] + + cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C + + box_cls_labels = target_dict['pos_equal_one'] # [B, 50, 176, 2] + box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous() + + positives = box_cls_labels > 0 + negatives = box_cls_labels == 0 + negative_cls_weights = negatives * 1.0 + cls_weights = (negative_cls_weights + 1.0 * positives).float() + reg_weights = positives.float() + + pos_normalizer = positives.sum(1, keepdim=True).float() + reg_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_targets = box_cls_labels + cls_targets = cls_targets.unsqueeze(dim=-1) + + cls_targets = cls_targets.squeeze(dim=-1) + one_hot_targets = torch.zeros( + *list(cls_targets.shape), 2, + dtype=cls_preds.dtype, device=cls_targets.device + ) + one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0) + cls_preds = cls_preds.view(psm.shape[0], -1, 1) + one_hot_targets = one_hot_targets[..., 1:] + + cls_loss_src = self.cls_loss_func(cls_preds, + one_hot_targets, + weights=cls_weights) # [N, M] + cls_loss = cls_loss_src.sum() / psm.shape[0] + conf_loss = cls_loss * self.cls_weight + + # regression + rm = rm.permute(0, 2, 3, 1).contiguous() + rm = rm.view(rm.size(0), -1, 7) + targets = targets.view(targets.size(0), -1, 7) + box_preds_sin, reg_targets_sin = self.add_sin_difference(rm, + targets) + loc_loss_src =\ + self.reg_loss_func(box_preds_sin, + reg_targets_sin, + weights=reg_weights) + reg_loss = loc_loss_src.sum() / rm.shape[0] + reg_loss *= self.reg_coe + + total_loss = reg_loss + conf_loss + + self.loss_dict.update({'total_loss': total_loss, + 'reg_loss': reg_loss, + 'conf_loss': conf_loss}) + else: + total_loss = 0 + + + + # robust v2vnet part + record_len = target_dict['record_len'] # we can also put this in output_dict + if stage == 0: + scores = output_dict['scores'] + choice = output_dict['choice'] + + score_loss = self.attention_loss(scores, choice, record_len) + total_loss += self.score_weight * score_loss + self.loss_dict.update({'total_loss': total_loss, + 'score_loss': score_loss}) + + elif stage == 1 or stage == 2: + pairwise_corr = output_dict['pairwise_corr'] + pairwise_t_matrix = output_dict['pairwise_t_matrix'] + pairwise_t_matrix_gt = target_dict['pairwise_t_matrix'] + + pose_loss = self.pose_loss(pairwise_corr, pairwise_t_matrix, pairwise_t_matrix_gt, record_len) + total_loss += self.pose_weight * pose_loss + self.loss_dict.update({'total_loss': total_loss, + 'pose_loss': pose_loss}) + + + return total_loss + + def attention_loss(self, scores, choices, record_len): + """ + Args: + scores: (B, L, L) + scores[b,i,i] is already 0. + choices: (sum(N_cav), 1) + 0 is strong noise, 1 is weak noise + record_len: + list, shape [B] + """ + # first build gt label from choice + B = scores.shape[0] + choice_split = regroup(choices, record_len) + label = torch.zeros_like(scores, device=scores.device) + mask = torch.zeros_like(scores, device=scores.device) + for b in range(B): + N = record_len[b] + choice = choice_split[b].float() # [N, 1] + choice = choice @ choice.T # [N, N] + + gamma = 0.85 + label[b,:N,:N] = choice * gamma + (1-choice) * (1-gamma) # [N, N] + + mask[b,:N,:N] = 1 + mask[b,range(N),range(N)] = 0 + + mask = mask.bool() + + input = torch.masked_select(scores, mask) + target = torch.masked_select(label, mask) + print("input:", input) + print("target:", target) + + return self.score_loss_func(input, target) + + def pose_loss(self, pairwise_corr, pairwise_t_matrix, pairwise_t_matrix_gt, record_len): + """ + Args: + pairwise_corr: [B, L, L, 3] + pairwise_t_matrix/pairwise_t_matrix_gt: [B,L,L,4,4] + record_len: list, shape [B] + """ + + pairwise_t_matrix_gt = pairwise_t_matrix_gt.float() + B, L = pairwise_t_matrix.shape[:2] + mask = torch.zeros((B, L, L), device = pairwise_t_matrix.device) + + for b in range(B): + N = record_len[b] + mask[b,:N,:N] = 1 + mask[b,range(N), range(N)] = 0 + + pair_corr_gt = torch.linalg.solve(pairwise_t_matrix.transpose(-2,-1), pairwise_t_matrix_gt.transpose(-2,-1)).transpose(-2,-1) + + yaw = pairwise_corr[..., 2] # [B,L,L] + yaw_gt = torch.rad2deg(torch.atan2(pair_corr_gt[...,1,0], pair_corr_gt[...,0,0])) # [B,L,L] + + x = pairwise_corr[..., 0] # [B,L,L] + x_gt = pair_corr_gt[..., 0,3] + + y = pairwise_corr[..., 1] # [B,L,L] + y_gt = pair_corr_gt[..., 1,3] + + mask = mask.bool() + mask = mask.view(B,L,L) # [B, L, L, ] + + input_x = torch.masked_select(x, mask) + target_x = torch.masked_select(x_gt, mask) + + input_y = torch.masked_select(y, mask) + target_y = torch.masked_select(y_gt, mask) + + input_yaw = torch.masked_select(yaw, mask) + target_yaw = torch.masked_select(yaw_gt, mask) + + loss_x = self.pose_loss_func(input_x, target_x) + loss_y = self.pose_loss_func(input_y, target_y) + loss_yaw = self.pose_loss_func(input_yaw, target_yaw) + + lambda_trans = 2/3 + lambda_rot = 1/3 + print("x:\n", input_x, "\n", target_x) + print("y:\n", input_y, "\n", target_y) + print("yaw:\n", input_yaw, "\n", target_yaw) + + return lambda_trans * (loss_x + loss_y) + lambda_rot * loss_yaw + + + + def cls_loss_func(self, input: torch.Tensor, + target: torch.Tensor, + weights: torch.Tensor): + """ + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + weights: (B, #anchors) float tensor. + Anchor-wise weights. + + Returns: + weighted_loss: (B, #anchors, #classes) float tensor after weighting. + """ + pred_sigmoid = torch.sigmoid(input) + alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha) + pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid + focal_weight = alpha_weight * torch.pow(pt, self.gamma) + + bce_loss = self.sigmoid_cross_entropy_with_logits(input, target) + + loss = focal_weight * bce_loss + + if weights.shape.__len__() == 2 or \ + (weights.shape.__len__() == 1 and target.shape.__len__() == 2): + weights = weights.unsqueeze(-1) + + assert weights.shape.__len__() == loss.shape.__len__() + + return loss * weights + + @staticmethod + def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor): + """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits: + max(x, 0) - x * z + log(1 + exp(-abs(x))) in + https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits + + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + + Returns: + loss: (B, #anchors, #classes) float tensor. + Sigmoid cross entropy loss without reduction + """ + loss = torch.clamp(input, min=0) - input * target + \ + torch.log1p(torch.exp(-torch.abs(input))) + return loss + + @staticmethod + def add_sin_difference(boxes1, boxes2, dim=6): + assert dim != -1 + rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \ + torch.cos(boxes2[..., dim:dim + 1]) + rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \ + torch.sin(boxes2[..., dim:dim + 1]) + + boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding, + boxes1[..., dim + 1:]], dim=-1) + boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding, + boxes2[..., dim + 1:]], dim=-1) + return boxes1, boxes2 + + + def logging(self, epoch, batch_id, batch_len, writer = None): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict['total_loss'].item() + if 'reg_loss' in self.loss_dict: + reg_loss = self.loss_dict['reg_loss'].item() + else: + reg_loss = 0 + if 'conf_loss' in self.loss_dict: + conf_loss = self.loss_dict['conf_loss'].item() + else: + conf_loss = 0 + if "score_loss" in self.loss_dict: + score_loss = self.loss_dict['score_loss'] + else: + score_loss = 0 + if "pose_loss" in self.loss_dict: + pose_loss = self.loss_dict['pose_loss'] + else: + pose_loss = 0 + + print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || Score Loss: %.4f || Pose Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, + total_loss, conf_loss, reg_loss, score_loss, pose_loss)) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', conf_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Score_loss', score_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Pose_loss', pose_loss, + epoch*batch_len + batch_id) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..d56357f43807611fd80ad27be91025dc8d6bcc34 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss.py @@ -0,0 +1,697 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import math + + +""" +Gaussian Loss +""" +class GaussianFocalLoss(nn.Module): + """GaussianFocalLoss is a variant of focal loss. + + More details can be found in the `paper + `_ + Code is modified from `kp_utils.py + `_ # noqa: E501 + Please notice that the target in GaussianFocalLoss is a gaussian heatmap, + not 0/1 binary target. + + Args: + alpha (float): Power of prediction. + gamma (float): Power of target for negtive samples. + reduction (str): Options are "none", "mean" and "sum". + loss_weight (float): Loss weight of current loss. + """ + + def __init__(self, + alpha=2.0, + gamma=4.0, + reduction='mean', + loss_weight=1.0): + super(GaussianFocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None): + """Forward function. + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning target of the prediction + in gaussian distribution. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + reduction_override (str, optional): The reduction method used to + override the original reduction method of the loss. + Defaults to None. + """ + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + loss_reg = self.loss_weight * gaussian_focal_loss( + pred, + target, + weight, + alpha=self.alpha, + gamma=self.gamma, + reduction=reduction, + avg_factor=avg_factor) + return loss_reg + +def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0): + """`Focal Loss `_ for targets in gaussian + distribution. + + Args: + pred (torch.Tensor): The prediction. + gaussian_target (torch.Tensor): The learning target of the prediction + in gaussian distribution. + alpha (float, optional): A balanced form for Focal Loss. + Defaults to 2.0. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 4.0. + """ + eps = 1e-12 + device = pred.device + pos_weights = gaussian_target.eq(1) + pos_weights = pos_weights.to(device) + neg_weights = (1 - gaussian_target).pow(gamma) + neg_weights = neg_weights.to(device) + pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights + neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights + return pos_loss + neg_loss + + +def clip_sigmoid(x, eps=1e-4): + """Sigmoid function for input feature. + + Args: + x (torch.Tensor): Input feature map with the shape of [B, N, H, W]. + eps (float): Lower bound of the range to be clamped to. Defaults + to 1e-4. + + Returns: + torch.Tensor: Feature map after sigmoid. + """ + y = torch.clamp(torch.sigmoid(x), min=eps, max=1 - eps) + # y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps) + return y + +def _gather_feat(feat, ind, mask=None): + # feat : [bs, wxh, c] + dim = feat.size(2) + # ind : [bs, index, c] + ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) + feat = feat.gather(1, ind) # 按照dim=1获取ind + if mask is not None: + mask = mask.unsqueeze(2).expand_as(feat) + feat = feat[mask] + feat = feat.view(-1, dim) + return feat + + +def _transpose_and_gather_feat(feat, ind): + feat = feat.permute(0, 2, 3, 1).contiguous() ## # from [bs c h w] to [bs, h, w, c] + feat = feat.view(feat.size(0), -1, feat.size(3)) # to [bs, wxh, c] + feat = _gather_feat(feat, ind) + return feat + + + +class RegLoss(nn.Module): + '''Regression loss for an output tensor + Arguments: + output (batch x dim x h x w) + mask (batch x max_objects) + ind (batch x max_objects) + target (batch x max_objects x dim) + ''' + def __init__(self): + super(RegLoss, self).__init__() + + def forward(self, output, mask, ind, target): + pred = _transpose_and_gather_feat(output, ind) + mask = mask.float().unsqueeze(2) + + loss = F.l1_loss(pred*mask, target*mask, reduction='none') + loss = loss / (mask.sum() + 1e-4) + loss = loss.transpose(2 ,0).sum(dim=2).sum(dim=1) + return loss + + + +class FastFocalLoss(nn.Module): + ''' + Reimplemented focal loss, exactly the same as the CornerNet version. + Faster and costs much less memory. + ''' + def __init__(self): + super(FastFocalLoss, self).__init__() + + def forward(self, out, target, ind, mask, cat): + ''' + Arguments: + out, target: B x C x H x W + ind, mask: B x M + cat (category id for peaks): B x M + ''' + mask = mask.float() + gt = torch.pow(1 - target, 4) + neg_loss = torch.log(1 - out) * torch.pow(out, 2) * gt + neg_loss = neg_loss.sum() + + pos_pred_pix = _transpose_and_gather_feat(out, ind) # B x M x C + pos_pred = pos_pred_pix.gather(2, cat.unsqueeze(2)) # B x M + num_pos = mask.sum() + pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * \ + mask.unsqueeze(2) + pos_loss = pos_loss.sum() + if num_pos == 0: + return - neg_loss + return - (pos_loss + neg_loss) / num_pos + +class CenterPointLoss(nn.Module): + def __init__(self, args): + super(CenterPointLoss, self).__init__() + + self.cls_weight = args['cls_weight'] + self.loc_weight = args['loc_weight'] + self.code_weights = args['code_weights'] + self.target_cfg = args['target_assigner_config'] + self.lidar_range = self.target_cfg['cav_lidar_range'] + self.voxel_size = self.target_cfg['voxel_size'] + + self.loss_cls = GaussianFocalLoss(reduction='mean') + self.crit = FastFocalLoss() + self.crit_reg = RegLoss() + + self.loss_dict = {} + + def forward(self, output_dict, target_dict, suffix=""): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + # Predictions + box_preds = output_dict['bbox_preds{}'.format(suffix)].permute(0, 2, 3, 1).contiguous() # [B, H, W, C] + cls_preds = clip_sigmoid(output_dict['cls_preds{}'.format(suffix)]) + + # GTs + bbox_center = target_dict['object_bbx_center{}'.format(suffix)].cpu().numpy() + bbox_mask = target_dict['object_bbx_mask{}'.format(suffix)].cpu().numpy() + batch_size = bbox_mask.shape[0] + + max_gt = int(max(bbox_mask.sum(axis=1))) + gt_boxes3d = np.zeros((batch_size, max_gt, bbox_center[0].shape[-1]), dtype=np.float32) # [B, max_anchor_num, 7] + for k in range(batch_size): + gt_boxes3d[k, :int(bbox_mask[k].sum()), :] = bbox_center[k, :int(bbox_mask[k].sum()), :] + gt_boxes3d = torch.from_numpy(gt_boxes3d).to(box_preds.device) + + targets_dict = self.assign_targets( + gt_boxes=gt_boxes3d # [B, max_anchor_num, 7 + C ] heatmap [2,1,h,w] anno_boxes [2,100,8] inds [2, 100] + ) + + cls_gt = targets_dict['heatmaps'] + box_gt = (targets_dict['anno_boxes'], targets_dict['inds'], targets_dict['masks']) + + cls_loss = self.get_cls_layer_loss(cls_preds, cls_gt) + box_loss = self.get_box_reg_layer_loss(box_preds, box_gt) + rpn_loss = cls_loss + box_loss + + self.loss_dict.update({ 'total_loss': rpn_loss.item(), + 'reg_loss': box_loss.item(), + 'cls_loss': cls_loss.item()}) + + return rpn_loss + + def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict.get('total_loss', 0) + reg_loss = self.loss_dict.get('reg_loss', 0) + cls_loss = self.loss_dict.get('cls_loss', 0) + + print("[epoch %d][%d/%d]%s, || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, suffix, + total_loss, cls_loss, reg_loss)) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', cls_loss, + epoch*batch_len + batch_id) + + + def get_cls_layer_loss(self, pred_heatmaps, gt_heatmaps): + num_pos = gt_heatmaps.eq(1).float().sum().item() + + cls_loss = self.loss_cls( + pred_heatmaps, + gt_heatmaps, + avg_factor=max(num_pos, 1)) + + cls_loss = cls_loss * self.cls_weight + return cls_loss + + + def _gather_feat(self, feat, ind, mask=None): + """Gather feature map. + + Given feature map and index, return indexed feature map. + + Args: + feat (torch.tensor): Feature map with the shape of [B, H*W, 10]. + ind (torch.Tensor): Index of the ground truth boxes with the + shape of [B, max_obj]. + mask (torch.Tensor): Mask of the feature map with the shape + of [B, max_obj]. Default: None. + + Returns: + torch.Tensor: Feature map after gathering with the shape + of [B, max_obj, 10]. + """ + device = feat.device + dim = feat.size(2) + ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) # 把 ind 和 dim 拼接在一起 + feat = feat.gather(1, ind.to(device)) + if mask is not None: + mask = mask.unsqueeze(2).expand_as(feat) + feat = feat[mask] + feat = feat.view(-1, dim) + return feat + + + def get_box_reg_layer_loss(self, bbox_preds, bbox_gt): + target_box, inds, masks = bbox_gt + pred = bbox_preds + ind = inds + num = masks.float().sum() + pred = pred.view(pred.size(0), -1, pred.size(3)) # [n, h*w, 8 ] + pred = self._gather_feat(pred, ind) + mask = masks.unsqueeze(2).expand_as(target_box).float() ## 把 mask 的维度进行扩展 + isnotnan = (~torch.isnan(target_box)).float() + mask *= isnotnan + + code_weights = self.code_weights + bbox_weights = mask * mask.new_tensor(code_weights) + + loc_loss = l1_loss( + pred, target_box, bbox_weights, avg_factor=(num + 1e-4)) + + loc_loss = loc_loss * self.loc_weight + return loc_loss + + + def assign_targets(self, gt_boxes): + """Generate targets. + + Args: + gt_boxes: ( M, 7+c) box + cls ## 这个地方函数和centerpoint-kitti 那个不太一样,这里是分开进行计算的 + + Returns: + Returns: + tuple[list[torch.Tensor]]: Tuple of target including \ + the following results in order. + + - list[torch.Tensor]: Heatmap scores. + - list[torch.Tensor]: Ground truth boxes. + - list[torch.Tensor]: Indexes indicating the \ + position of the valid boxes. + - list[torch.Tensor]: Masks indicating which \ + boxes are valid. + """ + if gt_boxes.shape[-1] == 8: + gt_bboxes_3d, gt_labels_3d = gt_boxes[..., :-1], gt_boxes[..., -1] # gt_box [2,14,8] batch_size * bbox_num * 8 + heatmaps, anno_boxes, inds, masks = self.get_targets_single(gt_bboxes_3d, gt_labels_3d) + elif gt_boxes.shape[-1] == 7: + gt_bboxes_3d = gt_boxes + heatmaps, anno_boxes, inds, masks = self.get_targets_single(gt_bboxes_3d) + + # transpose heatmaps, because the dimension of tensors in each task is + # different, we have to use numpy instead of torch to do the transpose. + # heatmaps = np.array(heatmaps).transpose(1, 0).tolist() + # heatmaps = [torch.stack(hms_) for hms_ in heatmaps] + # # heatmaps = torch.from_numpy(np.array(heatmaps)) + # # transpose anno_boxes + # anno_boxes = np.array(anno_boxes).transpose(1, 0).tolist() + # anno_boxes = [torch.stack(anno_boxes_) for anno_boxes_ in anno_boxes] + # # transpose inds + # inds = np.array(inds).transpose(1, 0).tolist() + # inds = [torch.stack(inds_) for inds_ in inds] + # # transpose inds + # masks = np.array(masks).transpose(1, 0).tolist() + # masks = [torch.stack(masks_) for masks_ in masks] + + all_targets_dict = { + 'heatmaps': heatmaps, + 'anno_boxes': anno_boxes, + 'inds': inds, + 'masks': masks + } + + return all_targets_dict + + + def get_targets_single(self, gt_bbox_3d, gt_labels_3d=None): + + batch_size = gt_bbox_3d.shape[0] + device = gt_bbox_3d.device + max_objs = self.target_cfg['max_objs'] + pc_range = self.lidar_range + voxel_size = self.voxel_size + + grid_size = (np.array(self.lidar_range[3:6]) - + np.array(self.lidar_range[0:3])) / np.array(self.voxel_size) + grid_size = np.round(grid_size).astype(np.int64) + feature_map_size = grid_size[:2] // self.target_cfg['out_size_factor'] + + draw_gaussian = draw_heatmap_gaussian + heatmaps, anno_boxes, inds, masks = [], [], [], [] + + for batch in range(batch_size): + task_boxes = gt_bbox_3d[batch, :, :] + if not gt_labels_3d is None: + task_classes = gt_labels_3d[batch, :] + + heatmap = gt_bbox_3d.new_zeros( # 辅助gt_bboxes_3d的属性 + (1, feature_map_size[1],feature_map_size[0])) + + anno_box = gt_bbox_3d.new_zeros((max_objs, 8), + dtype = torch.float32) + + ind = gt_bbox_3d.new_zeros((max_objs), dtype=torch.int64) + mask = gt_bbox_3d.new_zeros((max_objs), dtype=torch.uint8) + + num_objs = min(task_boxes.shape[0], max_objs) + + for k in range(num_objs): + # 计算x的heatmap坐标 + coor_x = (task_boxes[k][0] - pc_range[0]) / voxel_size[0] / self.target_cfg['out_size_factor'] + coor_y = (task_boxes[k][1] - pc_range[1]) / voxel_size[1] / self.target_cfg['out_size_factor'] + coor_z = (task_boxes[k][2] - pc_range[2]) / voxel_size[2] / self.target_cfg['out_size_factor'] + h = task_boxes[k][3] / voxel_size[0] / self.target_cfg['out_size_factor'] + w = task_boxes[k][4] / voxel_size[1] / self.target_cfg['out_size_factor'] + l = task_boxes[k][5] / voxel_size[2] / self.target_cfg['out_size_factor'] + rot = task_boxes[k][6] + + if h > 0 and w > 0: + radius = gaussian_radius( + (h, w), + min_overlap=self.target_cfg['gaussian_overlap']) + radius = max(self.target_cfg['min_radius'], int(radius)) + + center = torch.tensor([coor_x, coor_y], + dtype=torch.float32, + device=device) + center_int = center.to(torch.int32) ## bbox 的中心在heatmap 中的位置 + + # throw out not in range objects to avoid out of array + # area when creating the heatmap + if not (0 <= center_int[0] < feature_map_size[0].item() + and 0 <= center_int[1] < feature_map_size[1].item()): + continue + + draw_gaussian(heatmap[0], center_int, radius) + + x, y = center_int[0], center_int[1] + assert (center_int[1] * feature_map_size[0] + center_int[0] < + feature_map_size[0] * feature_map_size[1]) + ind[k] = y * feature_map_size[0] + x + mask[k] = 1 + # box_dim = task_boxes[k][3:6] + # box_dim = box_dim.log() + box_dim = torch.cat([h.unsqueeze(0), w.unsqueeze(0), l.unsqueeze(0)], dim=0) + anno_box[k] = torch.cat([ + center - torch.tensor([x, y], device=device), + coor_z.unsqueeze(0), box_dim, + torch.sin(rot).unsqueeze(0), + torch.cos(rot).unsqueeze(0), + ]) # [x,y,z, w, h, l, sin(heading), cos(heading)] + + heatmaps.append(heatmap) + anno_boxes.append(anno_box) + inds.append(ind) + masks.append(mask) + # import cv2; cv2.imwrite('test_{}.png'.format(batch), heatmap.cpu().numpy()[0]*255) + heatmaps = torch.stack(heatmaps) + anno_boxes = torch.stack(anno_boxes) + inds = torch.stack(inds) + masks = torch.stack(masks) + return heatmaps, anno_boxes, inds, masks # [B, H, W] + + +def gaussian_2d(shape, sigma=1): + """Generate gaussian map. + + Args: + shape (list[int]): Shape of the map. + sigma (float): Sigma to generate gaussian map. + Defaults to 1. + + Returns: + np.ndarray: Generated gaussian map. + """ + m, n = [(ss - 1.) / 2. for ss in shape] + y, x = np.ogrid[-m:m + 1, -n:n + 1] + + h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) + h[h < np.finfo(h.dtype).eps * h.max()] = 0 + return h + + + +def draw_heatmap_gaussian(heatmap, center, radius, k=1): + """Get gaussian masked heatmap. + + Args: + heatmap (torch.Tensor): Heatmap to be masked. + center (torch.Tensor): Center coord of the heatmap. + radius (int): Radius of gausian. + K (int): Multiple of masked_gaussian. Defaults to 1. + + Returns: + torch.Tensor: Masked heatmap. + """ + diameter = 2 * radius + 1 + gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6) + + x, y = int(center[0]), int(center[1]) + + height, width = heatmap.shape[0:2] + + left, right = min(x, radius), min(width - x, radius + 1) + top, bottom = min(y, radius), min(height - y, radius + 1) + + masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] + masked_gaussian = torch.from_numpy( + gaussian[radius - top:radius + bottom, + radius - left:radius + right]).to(heatmap.device, + torch.float32) + if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: + torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap) + return heatmap + + + +def gaussian_radius(det_size, min_overlap=0.5): + """Get radius of gaussian. + + Args: + det_size (tuple[torch.Tensor]): Size of the detection result. + min_overlap (float): Gaussian_overlap. Defaults to 0.5. + + Returns: + torch.Tensor: Computed radius. + """ + height, width = det_size + + a1 = 1 + b1 = (height + width) + c1 = width * height * (1 - min_overlap) / (1 + min_overlap) + sq1 = torch.sqrt(b1**2 - 4 * a1 * c1) + r1 = (b1 + sq1) / (2 * a1) + + a2 = 4 + b2 = 2 * (height + width) + c2 = (1 - min_overlap) * width * height + sq2 = torch.sqrt(b2**2 - 4 * a2 * c2) + r2 = (b2 + sq2) / (2 * a2) + + a3 = 4 * min_overlap + b3 = -2 * min_overlap * (height + width) + c3 = (min_overlap - 1) * width * height + sq3 = torch.sqrt(b3**2 - 4 * a3 * c3) + r3 = (b3 + sq3) / (2 * a3) + return min(r1, r2, r3) + + + +import functools + +import torch.nn.functional as F + + +def reduce_loss(loss, reduction): + """Reduce loss as specified. + + Args: + loss (Tensor): Elementwise loss tensor. + reduction (str): Options are "none", "mean" and "sum". + + Return: + Tensor: Reduced loss tensor. + """ + reduction_enum = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction_enum == 0: + return loss + elif reduction_enum == 1: + return loss.mean() + elif reduction_enum == 2: + return loss.sum() + + +def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): + """Apply element-wise weight and reduce loss. + + Args: + loss (Tensor): Element-wise loss. + weight (Tensor): Element-wise weights. + reduction (str): Same as built-in losses of PyTorch. + avg_factor (float): Avarage factor when computing the mean of losses. + + Returns: + Tensor: Processed loss values. + """ + # if weight is specified, apply element-wise weight + + if weight is not None: + device = loss.device + weight = weight.to(device) + loss = loss * weight + + # if avg_factor is not specified, just reduce the loss + if avg_factor is None: + loss = reduce_loss(loss, reduction) + else: + # if reduction is mean, then average the loss by avg_factor + if reduction == 'mean': + loss = loss.sum() / avg_factor + # if reduction is 'none', then do nothing, otherwise raise an error + elif reduction != 'none': + raise ValueError('avg_factor can not be used with reduction="sum"') + return loss + + +def weighted_loss(loss_func): + """Create a weighted version of a given loss function. + + To use this decorator, the loss function must have the signature like + `loss_func(pred, target, **kwargs)`. The function only needs to compute + element-wise loss without any reduction. This decorator will add weight + and reduction arguments to the function. The decorated function will have + the signature like `loss_func(pred, target, weight=None, reduction='mean', + avg_factor=None, **kwargs)`. + + :Example: + + >>> import torch + >>> @weighted_loss + >>> def l1_loss(pred, target): + >>> return (pred - target).abs() + + >>> pred = torch.Tensor([0, 2, 3]) + >>> target = torch.Tensor([1, 1, 1]) + >>> weight = torch.Tensor([1, 0, 1]) + + >>> l1_loss(pred, target) + tensor(1.3333) + >>> l1_loss(pred, target, weight) + tensor(1.) + >>> l1_loss(pred, target, reduction='none') + tensor([1., 1., 2.]) + >>> l1_loss(pred, target, weight, avg_factor=2) + tensor(1.5000) + """ + + @functools.wraps(loss_func) + def wrapper(pred, + target, + weight=None, + reduction='mean', + avg_factor=None, + **kwargs): + # get element-wise loss + loss = loss_func(pred, target, **kwargs) + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + return wrapper + + +@weighted_loss +def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0): + """`Focal Loss `_ for targets in gaussian + distribution. + + Args: + pred (torch.Tensor): The prediction. + gaussian_target (torch.Tensor): The learning target of the prediction + in gaussian distribution. + alpha (float, optional): A balanced form for Focal Loss. + Defaults to 2.0. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 4.0. + """ + eps = 1e-12 + device = pred.device + pos_weights = gaussian_target.eq(1) + pos_weights = pos_weights.to(device) + neg_weights = (1 - gaussian_target).pow(gamma) + neg_weights = neg_weights.to(device) + pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights + neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights + return pos_loss + neg_loss + +@weighted_loss +def l1_loss(pred, target): + """L1 loss. + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning target of the prediction. + + Returns: + torch.Tensor: Calculated loss + """ + device = pred.device + target = target.to(device) + assert pred.size() == target.size() and target.numel() > 0 + loss = torch.abs(pred - target) + return loss \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss_multiclass.py new file mode 100644 index 0000000000000000000000000000000000000000..c3fc1669a30939b8565903adfe980fc6fa94016b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss_multiclass.py @@ -0,0 +1,755 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import math + + +class GaussianFocalLoss(nn.Module): + """GaussianFocalLoss is a variant of focal loss. + + More details can be found in the `paper + `_ + Code is modified from `kp_utils.py + `_ # noqa: E501 + Please notice that the target in GaussianFocalLoss is a gaussian heatmap, + not 0/1 binary target. + + Args: + alpha (float): Power of prediction. + gamma (float): Power of target for negtive samples. + reduction (str): Options are "none", "mean" and "sum". + loss_weight (float): Loss weight of current loss. + """ + + def __init__(self, + alpha=2.0, + gamma=4.0, + reduction='mean', + loss_weight=1.0): + super(GaussianFocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None): + """Forward function. + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning target of the prediction + in gaussian distribution. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + reduction_override (str, optional): The reduction method used to + override the original reduction method of the loss. + Defaults to None. + """ + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + loss_reg = self.loss_weight * gaussian_focal_loss( + pred, # [11, 1, 100, 100] + target, # [11, 1, 100, 100] + weight, + alpha=self.alpha, + gamma=self.gamma, + reduction=reduction, + avg_factor=avg_factor) + return loss_reg + +def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0): + """`Focal Loss `_ for targets in gaussian + distribution. + + Args: + pred (torch.Tensor): The prediction. + gaussian_target (torch.Tensor): The learning target of the prediction + in gaussian distribution. + alpha (float, optional): A balanced form for Focal Loss. + Defaults to 2.0. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 4.0. + """ + eps = 1e-12 + device = pred.device + pos_weights = gaussian_target.eq(1) + pos_weights = pos_weights.to(device) + neg_weights = (1 - gaussian_target).pow(gamma) + neg_weights = neg_weights.to(device) + pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights + neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights + return pos_loss + neg_loss + + +def clip_sigmoid(x, eps=1e-4): + """Sigmoid function for input feature. + + Args: + x (torch.Tensor): Input feature map with the shape of [B, N, H, W]. + eps (float): Lower bound of the range to be clamped to. Defaults + to 1e-4. + + Returns: + torch.Tensor: Feature map after sigmoid. + """ + y = torch.clamp(torch.sigmoid(x), min=eps, max=1 - eps) + # y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps) + return y + +def _gather_feat(feat, ind, mask=None): + # feat : [bs, wxh, c] + dim = feat.size(2) + # ind : [bs, index, c] + ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) + feat = feat.gather(1, ind) # 按照dim=1获取ind + if mask is not None: + mask = mask.unsqueeze(2).expand_as(feat) + feat = feat[mask] + feat = feat.view(-1, dim) + return feat + + +def _transpose_and_gather_feat(feat, ind): + feat = feat.permute(0, 2, 3, 1).contiguous() ## # from [bs c h w] to [bs, h, w, c] + feat = feat.view(feat.size(0), -1, feat.size(3)) # to [bs, wxh, c] + feat = _gather_feat(feat, ind) + return feat + + + +class RegLoss(nn.Module): + '''Regression loss for an output tensor + Arguments: + output (batch x dim x h x w) + mask (batch x max_objects) + ind (batch x max_objects) + target (batch x max_objects x dim) + ''' + def __init__(self): + super(RegLoss, self).__init__() + + def forward(self, output, mask, ind, target): + pred = _transpose_and_gather_feat(output, ind) + mask = mask.float().unsqueeze(2) + + loss = F.l1_loss(pred*mask, target*mask, reduction='none') + loss = loss / (mask.sum() + 1e-4) + loss = loss.transpose(2 ,0).sum(dim=2).sum(dim=1) + return loss + + + +class FastFocalLoss(nn.Module): + ''' + Reimplemented focal loss, exactly the same as the CornerNet version. + Faster and costs much less memory. + ''' + def __init__(self): + super(FastFocalLoss, self).__init__() + + def forward(self, out, target, ind, mask, cat): + ''' + Arguments: + out, target: B x C x H x W + ind, mask: B x M + cat (category id for peaks): B x M + ''' + mask = mask.float() + gt = torch.pow(1 - target, 4) + neg_loss = torch.log(1 - out) * torch.pow(out, 2) * gt + neg_loss = neg_loss.sum() + + pos_pred_pix = _transpose_and_gather_feat(out, ind) # B x M x C + pos_pred = pos_pred_pix.gather(2, cat.unsqueeze(2)) # B x M + num_pos = mask.sum() + pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * \ + mask.unsqueeze(2) + pos_loss = pos_loss.sum() + if num_pos == 0: + return - neg_loss + return - (pos_loss + neg_loss) / num_pos + +class CenterPointLossmulticlass(nn.Module): + def __init__(self, args): + super(CenterPointLossmulticlass, self).__init__() + + self.cls_weight = args['cls_weight'] + self.loc_weight = args['loc_weight'] + self.code_weights = args['code_weights'] + self.target_cfg = args['target_assigner_config'] + self.lidar_range = self.target_cfg['cav_lidar_range'] + self.voxel_size = self.target_cfg['voxel_size'] + + self.loss_cls = GaussianFocalLoss(reduction='mean') + self.crit = FastFocalLoss() + self.crit_reg = RegLoss() + + self.loss_dict = {} + + self.detail_loss = {} + self.cls_output = {} + + def forward(self, output_dict, target_dict, suffix=""): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + # Predictions + box_preds = output_dict['bbox_preds{}'.format(suffix)].permute(0, 2, 3, 1).contiguous() # [B, H, W, C*K] C=8 + cls_preds = clip_sigmoid(output_dict['cls_preds{}'.format(suffix)]) # [B, 1, H, W] -> [B, 1*K, H, W] + + + # GTs + bbox_center_all = target_dict['object_bbx_center{}'.format(suffix)].cpu().numpy() # (4,100,7) -> (4,3,100,7) + bbox_mask_all = target_dict['object_bbx_mask{}'.format(suffix)].cpu().numpy() # (4,100) -> (4,3,100) + batch_size = bbox_mask_all.shape[0] + num_class = bbox_center_all.shape[1] + cls_gt_list = [] + box_gt_list = [] + for i in range(num_class): + + bbox_center = bbox_center_all[:,i,:,:] + bbox_mask = bbox_mask_all[:,i,:] + + max_gt = int(max(bbox_mask.sum(axis=1))) + gt_boxes3d = np.zeros((batch_size, max_gt, bbox_center[0].shape[-1]), dtype=np.float32) # [B, max_anchor_num, 7] + for k in range(batch_size): + gt_boxes3d[k, :int(bbox_mask[k].sum()), :] = bbox_center[k, :int(bbox_mask[k].sum()), :] + gt_boxes3d = torch.from_numpy(gt_boxes3d).to(box_preds.device) + + targets_dict = self.assign_targets( + gt_boxes=gt_boxes3d # [B, max_anchor_num, 7 + C ] heatmap [2,1,h,w] anno_boxes [2,100,8] inds [2, 100] + ) + + cls_gt_list.append(targets_dict['heatmaps']) # [B, 1, H, W] + box_gt_list.append((targets_dict['anno_boxes'], targets_dict['inds'], targets_dict['masks'])) + + cls_gt = torch.stack(cls_gt_list, dim=1) + cls_preds = cls_preds.unsqueeze(2) + + cls_loss = self.get_cls_layer_loss(cls_preds, cls_gt) + + box_loss = 0 + box_preds = box_preds.view(box_preds.shape[0], box_preds.shape[1], box_preds.shape[2], int(box_preds.shape[3]/8), 8) + + dim_list = [] + for i in range(num_class): + box_gt = box_gt_list[i] + loss_all, loss_dim = self.get_box_reg_layer_loss(box_preds[:,:,:,i,:], box_gt) + box_loss += loss_all + # box_loss += self.get_box_reg_layer_loss(box_preds[:,:,:,i,:], box_gt) + self.detail_loss.update({'box_loss_{}'.format(i): loss_dim}) + + rpn_loss = cls_loss + box_loss + + self.loss_dict.update({ 'total_loss': rpn_loss.item(), + 'reg_loss': box_loss.item(), + 'cls_loss': cls_loss.item()}) + + self.cls_output.update({'output_cls_{}'.format(suffix): cls_preds, + 'target_cls_{}'.format(suffix): cls_gt}) + + + return rpn_loss + + def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict.get('total_loss', 0) + reg_loss = self.loss_dict.get('reg_loss', 0) + cls_loss = self.loss_dict.get('cls_loss', 0) + + print("[epoch %d][%d/%d]%s, || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, suffix, + total_loss, cls_loss, reg_loss)) + + # for i in range(3): + # print('class {} reg loss: '.format(i), self.detail_loss['box_loss_{}'.format(i)]) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', cls_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Total_loss', total_loss, + epoch*batch_len + batch_id) + + for key in self.detail_loss.keys(): + loss_list = self.detail_loss.get(key, []) + if len(loss_list) > 0: + for k in range(len(loss_list)): + writer.add_scalar('{}_dim{}'.format(key,k), loss_list[k], + epoch*batch_len + batch_id) + + # for i in range(self.cls_output['output_cls_{}'.format(suffix)].shape[0]): + # writer.add_image('agent_{}_{}_output'.format(i, suffix), self.cls_output['output_cls_{}'.format(suffix)][i, 0:1, 0], 1, dataformats='CHW') + # writer.add_image('agent_{}_{}_target'.format(i, suffix), self.cls_output['target_cls_{}'.format(suffix)][i, 0:1, 0], 1, dataformats='CHW') + + + + def get_cls_layer_loss(self, pred_heatmaps, gt_heatmaps): + num_pos = gt_heatmaps.eq(1).float().sum().item() + + cls_loss = self.loss_cls( + pred_heatmaps, + gt_heatmaps, + avg_factor=max(num_pos, 1)) + + cls_loss = cls_loss * self.cls_weight + return cls_loss + + + def _gather_feat(self, feat, ind, mask=None): + """Gather feature map. + + Given feature map and index, return indexed feature map. + + Args: + feat (torch.tensor): Feature map with the shape of [B, H*W, 10]. + ind (torch.Tensor): Index of the ground truth boxes with the + shape of [B, max_obj]. + mask (torch.Tensor): Mask of the feature map with the shape + of [B, max_obj]. Default: None. + + Returns: + torch.Tensor: Feature map after gathering with the shape + of [B, max_obj, 10]. + """ + device = feat.device + dim = feat.size(2) + ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) # 把 ind 和 dim 拼接在一起 + feat = feat.gather(1, ind.to(device)) + if mask is not None: + mask = mask.unsqueeze(2).expand_as(feat) + feat = feat[mask] + feat = feat.view(-1, dim) + return feat + + + def get_box_reg_layer_loss(self, bbox_preds, bbox_gt): + target_box, inds, masks = bbox_gt + pred = bbox_preds + ind = inds + num = masks.float().sum() + pred = pred.view(pred.size(0), -1, pred.size(3)) # [n, h*w, 8 ] + pred = self._gather_feat(pred, ind) + mask = masks.unsqueeze(2).expand_as(target_box).float() ## 把 mask 的维度进行扩展 + isnotnan = (~torch.isnan(target_box)).float() + mask *= isnotnan + + code_weights = self.code_weights + bbox_weights = mask * mask.new_tensor(code_weights) + ## pred, target_box [4,100,8] + loc_loss = l1_loss( + pred, target_box, bbox_weights, avg_factor=(num + 1e-4)) + + loc_loss = loc_loss * self.loc_weight + + loss_dim = [] + import copy + for i in range(8): + code_weights2 = code_weights.copy() + for j in range(8): + if j != i: + code_weights2[j] = 0 + bbox_weights = mask * mask.new_tensor(code_weights2) + + loc_loss_2 = l1_loss( + pred, target_box, bbox_weights, avg_factor=(num + 1e-4)) + loc_loss_single = loc_loss_2 * self.loc_weight + + loss_dim.append(loc_loss_single.item()) + + return loc_loss, loss_dim + + def assign_targets(self, gt_boxes): + """Generate targets. + + Args: + gt_boxes: ( M, 7+c) box + cls ## 这个地方函数和centerpoint-kitti 那个不太一样,这里是分开进行计算的 + + Returns: + Returns: + tuple[list[torch.Tensor]]: Tuple of target including \ + the following results in order. + + - list[torch.Tensor]: Heatmap scores. + - list[torch.Tensor]: Ground truth boxes. + - list[torch.Tensor]: Indexes indicating the \ + position of the valid boxes. + - list[torch.Tensor]: Masks indicating which \ + boxes are valid. + """ + if gt_boxes.shape[-1] == 8: + gt_bboxes_3d, gt_labels_3d = gt_boxes[..., :-1], gt_boxes[..., -1] # gt_box [2,14,8] batch_size * bbox_num * 8 + heatmaps, anno_boxes, inds, masks = self.get_targets_single(gt_bboxes_3d, gt_labels_3d) + elif gt_boxes.shape[-1] == 7: + gt_bboxes_3d = gt_boxes + heatmaps, anno_boxes, inds, masks = self.get_targets_single(gt_bboxes_3d) + + # transpose heatmaps, because the dimension of tensors in each task is + # different, we have to use numpy instead of torch to do the transpose. + # heatmaps = np.array(heatmaps).transpose(1, 0).tolist() + # heatmaps = [torch.stack(hms_) for hms_ in heatmaps] + # # heatmaps = torch.from_numpy(np.array(heatmaps)) + # # transpose anno_boxes + # anno_boxes = np.array(anno_boxes).transpose(1, 0).tolist() + # anno_boxes = [torch.stack(anno_boxes_) for anno_boxes_ in anno_boxes] + # # transpose inds + # inds = np.array(inds).transpose(1, 0).tolist() + # inds = [torch.stack(inds_) for inds_ in inds] + # # transpose inds + # masks = np.array(masks).transpose(1, 0).tolist() + # masks = [torch.stack(masks_) for masks_ in masks] + + all_targets_dict = { + 'heatmaps': heatmaps, + 'anno_boxes': anno_boxes, + 'inds': inds, + 'masks': masks + } + + return all_targets_dict + + + def get_targets_single(self, gt_bbox_3d, gt_labels_3d=None): + + batch_size = gt_bbox_3d.shape[0] + device = gt_bbox_3d.device + max_objs = self.target_cfg['max_objs'] + pc_range = self.lidar_range + voxel_size = self.voxel_size + + grid_size = (np.array(self.lidar_range[3:6]) - + np.array(self.lidar_range[0:3])) / np.array(self.voxel_size) + grid_size = np.round(grid_size).astype(np.int64) + feature_map_size = grid_size[:2] // self.target_cfg['out_size_factor'] + + draw_gaussian = draw_heatmap_gaussian + heatmaps, anno_boxes, inds, masks = [], [], [], [] + + for batch in range(batch_size): + task_boxes = gt_bbox_3d[batch, :, :] + if not gt_labels_3d is None: + task_classes = gt_labels_3d[batch, :] + + heatmap = gt_bbox_3d.new_zeros( # 辅助gt_bboxes_3d的属性 + (1, feature_map_size[1],feature_map_size[0])) + + anno_box = gt_bbox_3d.new_zeros((max_objs, 8), + dtype = torch.float32) + + ind = gt_bbox_3d.new_zeros((max_objs), dtype=torch.int64) + mask = gt_bbox_3d.new_zeros((max_objs), dtype=torch.uint8) + + num_objs = min(task_boxes.shape[0], max_objs) + + for k in range(num_objs): + # 计算x的heatmap坐标 + coor_x = (task_boxes[k][0] - pc_range[0]) / voxel_size[0] / self.target_cfg['out_size_factor'] + coor_y = (task_boxes[k][1] - pc_range[1]) / voxel_size[1] / self.target_cfg['out_size_factor'] + coor_z = (task_boxes[k][2] - pc_range[2]) / voxel_size[2] / self.target_cfg['out_size_factor'] + h = task_boxes[k][3] / voxel_size[0] / self.target_cfg['out_size_factor'] + w = task_boxes[k][4] / voxel_size[1] / self.target_cfg['out_size_factor'] + l = task_boxes[k][5] / voxel_size[2] / self.target_cfg['out_size_factor'] + rot = task_boxes[k][6] + + if h > 0 and w > 0: + radius = gaussian_radius( + (h, w), + min_overlap=self.target_cfg['gaussian_overlap']) + radius = max(self.target_cfg['min_radius'], int(radius)) + + center = torch.tensor([coor_x, coor_y], + dtype=torch.float32, + device=device) + center_int = center.to(torch.int32) ## bbox 的中心在heatmap 中的位置 + + # throw out not in range objects to avoid out of array + # area when creating the heatmap + if not (0 <= center_int[0] < feature_map_size[0].item() + and 0 <= center_int[1] < feature_map_size[1].item()): + continue + + draw_gaussian(heatmap[0], center_int, radius) + + x, y = center_int[0], center_int[1] + assert (center_int[1] * feature_map_size[0] + center_int[0] < + feature_map_size[0] * feature_map_size[1]) + ind[k] = y * feature_map_size[0] + x + mask[k] = 1 + # box_dim = task_boxes[k][3:6] + # box_dim = box_dim.log() + box_dim = torch.cat([h.unsqueeze(0), w.unsqueeze(0), l.unsqueeze(0)], dim=0) + anno_box[k] = torch.cat([ + center - torch.tensor([x, y], device=device), + coor_z.unsqueeze(0), box_dim, + torch.sin(rot).unsqueeze(0), + torch.cos(rot).unsqueeze(0), + ]) # [x,y,z, w, h, l, sin(heading), cos(heading)] + + heatmaps.append(heatmap) + anno_boxes.append(anno_box) + inds.append(ind) + masks.append(mask) + # import cv2; cv2.imwrite('test_{}.png'.format(batch), heatmap.cpu().numpy()[0]*255) + heatmaps = torch.stack(heatmaps) + anno_boxes = torch.stack(anno_boxes) + inds = torch.stack(inds) + masks = torch.stack(masks) + return heatmaps, anno_boxes, inds, masks # [B, H, W] + + +def gaussian_2d(shape, sigma=1): + """Generate gaussian map. + + Args: + shape (list[int]): Shape of the map. + sigma (float): Sigma to generate gaussian map. + Defaults to 1. + + Returns: + np.ndarray: Generated gaussian map. + """ + m, n = [(ss - 1.) / 2. for ss in shape] + y, x = np.ogrid[-m:m + 1, -n:n + 1] + + h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) + h[h < np.finfo(h.dtype).eps * h.max()] = 0 + return h + + + +def draw_heatmap_gaussian(heatmap, center, radius, k=1): + """Get gaussian masked heatmap. + + Args: + heatmap (torch.Tensor): Heatmap to be masked. + center (torch.Tensor): Center coord of the heatmap. + radius (int): Radius of gausian. + K (int): Multiple of masked_gaussian. Defaults to 1. + + Returns: + torch.Tensor: Masked heatmap. + """ + diameter = 2 * radius + 1 + gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6) + + x, y = int(center[0]), int(center[1]) + + height, width = heatmap.shape[0:2] + + left, right = min(x, radius), min(width - x, radius + 1) + top, bottom = min(y, radius), min(height - y, radius + 1) + + masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] + masked_gaussian = torch.from_numpy( + gaussian[radius - top:radius + bottom, + radius - left:radius + right]).to(heatmap.device, + torch.float32) + if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: + torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap) + return heatmap + + + +def gaussian_radius(det_size, min_overlap=0.5): + """Get radius of gaussian. + + Args: + det_size (tuple[torch.Tensor]): Size of the detection result. + min_overlap (float): Gaussian_overlap. Defaults to 0.5. + + Returns: + torch.Tensor: Computed radius. + """ + height, width = det_size + + a1 = 1 + b1 = (height + width) + c1 = width * height * (1 - min_overlap) / (1 + min_overlap) + sq1 = torch.sqrt(b1**2 - 4 * a1 * c1) + r1 = (b1 + sq1) / (2 * a1) + + a2 = 4 + b2 = 2 * (height + width) + c2 = (1 - min_overlap) * width * height + sq2 = torch.sqrt(b2**2 - 4 * a2 * c2) + r2 = (b2 + sq2) / (2 * a2) + + a3 = 4 * min_overlap + b3 = -2 * min_overlap * (height + width) + c3 = (min_overlap - 1) * width * height + sq3 = torch.sqrt(b3**2 - 4 * a3 * c3) + r3 = (b3 + sq3) / (2 * a3) + return min(r1, r2, r3) + + + +import functools + +import torch.nn.functional as F + + +def reduce_loss(loss, reduction): + """Reduce loss as specified. + + Args: + loss (Tensor): Elementwise loss tensor. + reduction (str): Options are "none", "mean" and "sum". + + Return: + Tensor: Reduced loss tensor. + """ + reduction_enum = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction_enum == 0: + return loss + elif reduction_enum == 1: + return loss.mean() + elif reduction_enum == 2: + return loss.sum() + + +def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): + """Apply element-wise weight and reduce loss. + + Args: + loss (Tensor): Element-wise loss. + weight (Tensor): Element-wise weights. + reduction (str): Same as built-in losses of PyTorch. + avg_factor (float): Avarage factor when computing the mean of losses. + + Returns: + Tensor: Processed loss values. + """ + # if weight is specified, apply element-wise weight + + if weight is not None: + device = loss.device + weight = weight.to(device) + loss = loss * weight + + # if avg_factor is not specified, just reduce the loss + if avg_factor is None: + loss = reduce_loss(loss, reduction) + else: + # if reduction is mean, then average the loss by avg_factor + if reduction == 'mean': + loss = loss.sum() / avg_factor + # if reduction is 'none', then do nothing, otherwise raise an error + elif reduction != 'none': + raise ValueError('avg_factor can not be used with reduction="sum"') + return loss + + +def weighted_loss(loss_func): + """Create a weighted version of a given loss function. + + To use this decorator, the loss function must have the signature like + `loss_func(pred, target, **kwargs)`. The function only needs to compute + element-wise loss without any reduction. This decorator will add weight + and reduction arguments to the function. The decorated function will have + the signature like `loss_func(pred, target, weight=None, reduction='mean', + avg_factor=None, **kwargs)`. + + :Example: + + >>> import torch + >>> @weighted_loss + >>> def l1_loss(pred, target): + >>> return (pred - target).abs() + + >>> pred = torch.Tensor([0, 2, 3]) + >>> target = torch.Tensor([1, 1, 1]) + >>> weight = torch.Tensor([1, 0, 1]) + + >>> l1_loss(pred, target) + tensor(1.3333) + >>> l1_loss(pred, target, weight) + tensor(1.) + >>> l1_loss(pred, target, reduction='none') + tensor([1., 1., 2.]) + >>> l1_loss(pred, target, weight, avg_factor=2) + tensor(1.5000) + """ + + @functools.wraps(loss_func) + def wrapper(pred, + target, + weight=None, + reduction='mean', + avg_factor=None, + **kwargs): + # get element-wise loss + loss = loss_func(pred, target, **kwargs) + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + return wrapper + + +@weighted_loss +def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0): + """`Focal Loss `_ for targets in gaussian + distribution. + + Args: + pred (torch.Tensor): The prediction. + gaussian_target (torch.Tensor): The learning target of the prediction + in gaussian distribution. + alpha (float, optional): A balanced form for Focal Loss. + Defaults to 2.0. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 4.0. + """ + eps = 1e-12 + device = pred.device + pos_weights = gaussian_target.eq(1) + pos_weights = pos_weights.to(device) + neg_weights = (1 - gaussian_target).pow(gamma) + neg_weights = neg_weights.to(device) + pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights + neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights + return pos_loss + neg_loss + +@weighted_loss +def l1_loss(pred, target): + """L1 loss. + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning target of the prediction. + + Returns: + torch.Tensor: Calculated loss + """ + device = pred.device + target = target.to(device) + assert pred.size() == target.size() and target.numel() > 0 + loss = torch.abs(pred - target) + return loss \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/ciassd_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/ciassd_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..46e2bf79449c5eb8ac65ca609498cb283f8c1305 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/ciassd_loss.py @@ -0,0 +1,254 @@ +import torch +import torch.nn as nn +import numpy as np +from opencood.utils.common_utils import limit_period +from opencood.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor +from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import aligned_boxes_iou3d_gpu +from icecream import ic + +class CiassdLoss(nn.Module): + def __init__(self, args, keyname='stage1_out'): + super(CiassdLoss, self).__init__() + self.pos_cls_weight = args['pos_cls_weight'] + self.encode_rad_error_by_sin = args['encode_rad_error_by_sin'] + self.cls = args['cls'] + self.reg = args['reg'] + self.dir = args['dir'] + self.iou = None if 'iou' not in args else args['iou'] + self.keyname = keyname + self.loss_dict = {} + ## + self.num_cls = 2 + self.box_codesize = 7 + + def forward(self, output_dict, label_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + preds_dict = output_dict[self.keyname] + + if 'stage1' in label_dict.keys(): + target_dict = label_dict['stage1'] + else: # for PointPillars + target_dict = label_dict + + if 'record_len' in output_dict: + batch_size = int(output_dict['record_len'].sum()) + else: + batch_size = output_dict['batch_size'] + + cls_labls = target_dict['pos_equal_one'].view(batch_size, -1, self.num_cls - 1) + positives = cls_labls > 0 + negatives = target_dict['neg_equal_one'].view(batch_size, -1, self.num_cls - 1) > 0 + cared = torch.logical_or(positives, negatives) + cls_labls = cls_labls * cared.type_as(cls_labls) + # num_normalizer = cared.sum(1, keepdim=True) + pos_normalizer = positives.sum(1, keepdim=True).float() + + # cls loss + cls_preds = preds_dict["cls_preds"].permute(0, 2, 3, 1).contiguous() \ + .view(batch_size, -1, self.num_cls - 1) + cls_weights = positives * self.pos_cls_weight + negatives * 1.0 + cls_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_loss = sigmoid_focal_loss(cls_preds, cls_labls, weights=cls_weights, **self.cls) + cls_loss_reduced = cls_loss.sum() * self.cls['weight'] / batch_size + + # reg loss + reg_weights = positives / torch.clamp(pos_normalizer, min=1.0) + reg_preds = preds_dict['reg_preds'].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, self.box_codesize) + reg_targets = target_dict['targets'].view(batch_size, -1, self.box_codesize) + if self.encode_rad_error_by_sin: + reg_preds, reg_targets = add_sin_difference(reg_preds, reg_targets) + reg_loss = weighted_smooth_l1_loss(reg_preds, reg_targets, weights=reg_weights, sigma=self.reg['sigma']) + reg_loss_reduced = reg_loss.sum() * self.reg['weight'] / batch_size + + + # dir loss + dir_targets = self.get_direction_target(target_dict['targets'].view(batch_size, -1, self.box_codesize)) + dir_logits = preds_dict[f"dir_preds"].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) # [N, H*W*#anchor, 2] + + dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) + dir_loss = dir_loss.flatten() * reg_weights.flatten() # [N, H*W*anchor_num] + dir_loss_reduced = dir_loss.sum() * self.dir['weight'] / batch_size + + loss = cls_loss_reduced + reg_loss_reduced + dir_loss_reduced + + # iou loss + if self.iou is not None: + iou_preds = preds_dict["iou_preds"].permute(0, 2, 3, 1).contiguous() + pos_pred_mask = reg_weights.squeeze(dim=-1) > 0 # (4, 70400) + iou_pos_preds = iou_preds.view(batch_size, -1)[pos_pred_mask] + boxes3d_pred = VoxelPostprocessor.delta_to_boxes3d(preds_dict['reg_preds'].permute(0, 2, 3, 1).contiguous().detach(), + output_dict['anchor_box'])[pos_pred_mask] + boxes3d_tgt = VoxelPostprocessor.delta_to_boxes3d(target_dict['targets'], + output_dict['anchor_box'])[pos_pred_mask] + + iou_weights = reg_weights[pos_pred_mask].view(-1) + iou_pos_targets = aligned_boxes_iou3d_gpu(boxes3d_pred.float()[:, [0, 1, 2, 5, 4, 3, 6]], + boxes3d_tgt.float()[:, [0, 1, 2, 5, 4, 3, 6]]).detach().squeeze() + iou_pos_targets = 2 * iou_pos_targets.view(-1) - 1 + iou_loss = weighted_smooth_l1_loss(iou_pos_preds, iou_pos_targets, weights=iou_weights, sigma=self.iou['sigma']) + iou_loss_reduced = iou_loss.sum() * self.iou['weight'] / batch_size + + loss += iou_loss_reduced + self.loss_dict.update({ + 'iou_loss': iou_loss_reduced + }) + + + self.loss_dict.update({ + 'total_loss': loss, + 'cls_loss': cls_loss_reduced, + 'reg_loss': reg_loss_reduced, + 'dir_loss': dir_loss_reduced, + }) + + return loss + + def logging(self, epoch, batch_id, batch_len, writer = None): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict['total_loss'] + reg_loss = self.loss_dict['reg_loss'] + cls_loss = self.loss_dict['cls_loss'] + dir_loss = self.loss_dict['dir_loss'] + if 'iou_loss' in self.loss_dict: + iou_loss = self.loss_dict['iou_loss'] + if (batch_id + 1) % 10 == 0: + print("[epoch %d][%d/%d], || Loss: %.4f || Cls: %.4f" + " || Loc: %.4f || Dir: %.4f || Iou: %.4f" % ( + epoch, batch_id + 1, batch_len, + total_loss.item(), cls_loss.item(), reg_loss.item(), dir_loss.item(), iou_loss.item())) + if writer is not None: + writer.add_scalar('Regression_loss', reg_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', cls_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('Direction_loss', dir_loss.item(), + epoch*batch_len + batch_id) + if 'iou_loss' in self.loss_dict: + writer.add_scalar('Iou_loss', iou_loss.item(), + epoch*batch_len + batch_id) + + + def get_direction_target(self, reg_targets): + """ + Args: + reg_targets: [N, H * W * #anchor_num, 7] + The last term is (theta_gt - theta_a) + + Returns: + dir_targets: + theta_gt: [N, H * W * #anchor_num, NUM_BIN] + NUM_BIN = 2 + """ + num_bins = self.dir['args']['num_bins'] + dir_offset = self.dir['args']['dir_offset'] + anchor_yaw = np.deg2rad(np.array(self.dir['args']['anchor_yaw'])) # for direction classification + self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1) # [1,2,1] + self.anchor_num = self.anchor_yaw_map.shape[1] + + H_times_W_times_anchor_num = reg_targets.shape[1] + anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1] + + rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum] + offset_rot = limit_period(rot_gt - dir_offset, 0, 2 * np.pi) + dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long() # [N, H*W*anchornum] + dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1) + # one_hot: + # if rot_gt > 0, then the label is 1, then the regression target is [0, 1] + dir_cls_targets = one_hot_f(dir_cls_targets, num_bins) + return dir_cls_targets + + + +def add_sin_difference(boxes1, boxes2): + rad_pred_encoding = torch.sin(boxes1[..., -1:]) * torch.cos(boxes2[..., -1:]) # ry -> sin(pred_ry)*cos(gt_ry) + rad_gt_encoding = torch.cos(boxes1[..., -1:]) * torch.sin(boxes2[..., -1:]) # ry -> cos(pred_ry)*sin(gt_ry) + res_boxes1 = torch.cat([boxes1[..., :-1], rad_pred_encoding], dim=-1) + res_boxes2 = torch.cat([boxes2[..., :-1], rad_gt_encoding], dim=-1) + return res_boxes1, res_boxes2 + + +def get_direction_target(reg_targets, anchors, one_hot=True, dir_offset=0.0): + """ + Generate targets for bounding box direction classification. + + Parameters + ---------- + anchors: torch.Tensor + shape as (H*W*2, 7) or (H, W, 2, 7) + reg_targets: torch.Tensor + shape as (B, H*W*2, 7) + + Returns + ------- + dir_cls_targets : torch.Tensor + [batch_size, w*h*num_anchor_per_pos, 2] + """ + batch_size = reg_targets.shape[0] + anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1) + rot_gt = reg_targets[..., -1] + anchors[..., -1] # [4, 70400] + dir_cls_targets = ((rot_gt - dir_offset) > 0).long() # [4, 70400] + if one_hot: + dir_cls_targets = one_hot_f(dir_cls_targets, 2, dtype=anchors.dtype) + return dir_cls_targets + + +def one_hot_f(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32): + tensor_onehot = torch.zeros(*list(tensor.shape), depth, dtype=dtype, device=tensor.device) # [4, 70400, 2] + tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value) # [4, 70400, 2] + return tensor_onehot + + +def sigmoid_focal_loss(preds, targets, weights=None, **kwargs): + assert 'gamma' in kwargs and 'alpha' in kwargs + # sigmoid cross entropy with logits + # more details: https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits + per_entry_cross_ent = torch.clamp(preds, min=0) - preds * targets.type_as(preds) + per_entry_cross_ent += torch.log1p(torch.exp(-torch.abs(preds))) + # focal loss + prediction_probabilities = torch.sigmoid(preds) + p_t = (targets * prediction_probabilities) + ((1 - targets) * (1 - prediction_probabilities)) + modulating_factor = torch.pow(1.0 - p_t, kwargs['gamma']) + alpha_weight_factor = targets * kwargs['alpha'] + (1 - targets) * (1 - kwargs['alpha']) + + loss = modulating_factor * alpha_weight_factor * per_entry_cross_ent + if weights is not None: + loss *= weights + return loss + + +def softmax_cross_entropy_with_logits(logits, labels): + param = list(range(len(logits.shape))) + transpose_param = [0] + [param[-1]] + param[1:-1] + logits = logits.permute(*transpose_param) + loss_ftor = torch.nn.CrossEntropyLoss(reduction="none") + loss = loss_ftor(logits, labels.max(dim=-1)[1]) + return loss + + +def weighted_smooth_l1_loss(preds, targets, sigma=3.0, weights=None): + diff = preds - targets + abs_diff = torch.abs(diff) + abs_diff_lt_1 = torch.le(abs_diff, 1 / (sigma ** 2)).type_as(abs_diff) + loss = abs_diff_lt_1 * 0.5 * torch.pow(abs_diff * sigma, 2) + \ + (abs_diff - 0.5 / (sigma ** 2)) * (1.0 - abs_diff_lt_1) + if weights is not None: + loss *= weights + return loss \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/fpvrcnn_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/fpvrcnn_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..d62c2d608094f040a9a84ca2b86cd51b47506db9 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/fpvrcnn_loss.py @@ -0,0 +1,192 @@ +import torch +from torch import nn +import numpy as np +from opencood.loss.ciassd_loss import CiassdLoss, weighted_smooth_l1_loss +from icecream import ic + +class FpvrcnnLoss(nn.Module): + def __init__(self, args): + super(FpvrcnnLoss, self).__init__() + self.ciassd_loss = CiassdLoss(args['stage1']) + self.cls = args['stage2']['cls'] + self.reg = args['stage2']['reg'] + self.iou = args['stage2']['iou'] + self.loss_dict = {} + + def forward(self, output_dict, label_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + ciassd_loss = self.ciassd_loss(output_dict, label_dict) + + # only update ciassd if no bbox is detected in the first stage + if 'stage2_out' not in output_dict: + self.loss_dict = { + 'loss': ciassd_loss, + } + return ciassd_loss + + # rcnn out + rcnn_cls = output_dict['stage2_out']['rcnn_cls'].view(1, -1, 1) + rcnn_iou = output_dict['stage2_out']['rcnn_iou'].view(1, -1, 1) + rcnn_reg = output_dict['stage2_out']['rcnn_reg'].view(1, -1, 7) + + tgt_cls = output_dict['rcnn_label_dict']['cls_tgt'].view(1, -1, 1) + tgt_iou = output_dict['rcnn_label_dict']['iou_tgt'].view(1, -1, 1) + tgt_reg = output_dict['rcnn_label_dict']['reg_tgt'].view(1, -1, 7) + + pos_norm = tgt_cls.sum() + # cls loss + loss_cls = weighted_sigmoid_binary_cross_entropy(rcnn_cls, tgt_cls) + + + # iou loss + # TODO: also count the negative samples + tgt_iou = 2 * (tgt_iou - 0.5) # normalize to -1, 1 + loss_iou = weighted_smooth_l1_loss(rcnn_iou, tgt_iou, + weights=tgt_cls).mean() + + # regression loss + # [deprecated by Yifan Lu] Target resampling : Generate a weights mask to force the regressor concentrate on low iou predictions + # sample 50% with iou>0.7 and 50% < 0.7 + weights = torch.ones(tgt_iou.shape, device=tgt_iou.device) + weights[tgt_cls == 0] = 0 + # neg = torch.logical_and(tgt_iou < 0.7, tgt_cls != 0) + # pos = torch.logical_and(tgt_iou >= 0.7, tgt_cls != 0) + # num_neg = int(neg.sum(dim=1)) + # num_pos = int(pos.sum(dim=1)) + # num_pos_smps = max(num_neg, 2) + # pos_indices = torch.where(pos)[1] + # not_selsected = torch.randperm(num_pos)[:num_pos - num_pos_smps] + # # not_selsected_indices = pos_indices[not_selsected] + # weights[:, pos_indices[not_selsected]] = 0 + loss_reg = weighted_smooth_l1_loss(rcnn_reg, tgt_reg, + weights=weights / max(weights.sum(), + 1)).sum() + + loss_cls_reduced = loss_cls * self.cls['weight'] + loss_iou_reduced = loss_iou * self.iou['weight'] + loss_reg_reduced = loss_reg * self.reg['weight'] + + # if torch.isnan(loss_reg_reduced): + # print('debug') + + rcnn_loss = loss_cls_reduced + loss_iou_reduced + loss_reg_reduced + loss = rcnn_loss + ciassd_loss + + self.loss_dict.update({ + 'loss': loss, + 'rcnn_loss': rcnn_loss, + 'cls_loss': loss_cls_reduced, + 'iou_loss': loss_iou_reduced, + 'reg_loss': loss_reg_reduced, + }) + + return loss + + def logging(self, epoch, batch_id, batch_len, writer=None): + """ + Print out the loss function for current iteration. + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + ciassd_loss_dict = self.ciassd_loss.loss_dict + ciassd_total_loss = ciassd_loss_dict['total_loss'] + reg_loss = ciassd_loss_dict['reg_loss'] + cls_loss = ciassd_loss_dict['cls_loss'] + dir_loss = ciassd_loss_dict['dir_loss'] + iou_loss = ciassd_loss_dict['iou_loss'] + + if (batch_id + 1) % 10 == 0: + str_to_print = "[epoch %d][%d/%d], || Loss: %.4f || Ciassd: %.4f " \ + "|| Cls1: %.4f || Loc1: %.4f || Dir1: %.4f || Iou1: %.4f" % ( + epoch, batch_id + 1, batch_len, self.loss_dict['loss'], + ciassd_total_loss.item(), cls_loss.item(), reg_loss.item(), + dir_loss.item(), iou_loss.item(), + ) + if 'rcnn_loss' in self.loss_dict: + str_to_print += " || Rcnn: %.4f || Cls2: %.4f || Loc2: %.4f || Iou2: %.4f" % ( + self.loss_dict['rcnn_loss'], + self.loss_dict['cls_loss'].item(), + self.loss_dict['reg_loss'].item(), + self.loss_dict['iou_loss'].item(), + ) + print(str_to_print) + + if writer: + writer.add_scalar('Ciassd_regression_loss', reg_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_Confidence_loss', cls_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_Direction_loss', dir_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_Iou_loss', iou_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Ciassd_loss', ciassd_total_loss.item(), + epoch * batch_len + batch_id) + if 'rcnn_loss' in self.loss_dict: + writer.add_scalar('Rcnn_regression_loss', + self.loss_dict['reg_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('Rcnn_Confidence_loss', + self.loss_dict['cls_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('Rcnn_Iou_loss', + self.loss_dict['iou_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('Rcnn_loss', self.loss_dict['rcnn_loss'].item(), + epoch * batch_len + batch_id) + writer.add_scalar('Total_loss', self.loss_dict['loss'].item(), + epoch * batch_len + batch_id) + + +def weighted_sigmoid_binary_cross_entropy(preds, tgts, weights=None, + class_indices=None): + if weights is not None: + weights = weights.unsqueeze(-1) + if class_indices is not None: + weights *= ( + indices_to_dense_vector(class_indices, preds.shape[2]) + .view(1, 1, -1) + .type_as(preds) + ) + per_entry_cross_ent = nn.functional.binary_cross_entropy_with_logits(preds, + tgts, + weights) + return per_entry_cross_ent + + +def indices_to_dense_vector( + indices, size, indices_value=1.0, default_value=0, dtype=np.float32 +): + """Creates dense vector with indices set to specific value and rest to zeros. + This function exists because it is unclear if it is safe to use + tf.sparse_to_dense(indices, [size], 1, validate_indices=False) + with indices which are not ordered. + This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) + Args: + indices: 1d Tensor with integer indices which are to be set to + indices_values. + size: scalar with size (integer) of output Tensor. + indices_value: values of elements specified by indices in the output vector + default_value: values of other elements in the output vector. + dtype: data type. + Returns: + dense 1D Tensor of shape [size] with indices set to indices_values and the + rest set to default_value. + """ + dense = torch.zeros(size).fill_(default_value) + dense[indices] = indices_value + + return dense \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/pixor_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/pixor_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..2473960706d0b8c00b2ed3dd13665ffd82ed1859 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/pixor_loss.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + + +from functools import reduce + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class PixorLoss(nn.Module): + def __init__(self, args): + super(PixorLoss, self).__init__() + self.alpha = args["alpha"] + self.beta = args["beta"] + self.loss_dict = {} + + def forward(self, output_dict, target_dict): + """ + Compute loss for pixor network + Parameters + ---------- + output_dict : dict + The dictionary that contains the output. + + target_dict : dict + The dictionary that contains the target. + + Returns + ------- + total_loss : torch.Tensor + Total loss. + + """ + targets = target_dict["label_map"] + cls_preds, loc_preds = output_dict["cls"], output_dict["reg"] + + cls_targets, loc_targets = targets.split([1, 6], dim=1) + pos_count = cls_targets.sum() + neg_count = (cls_targets == 0).sum() + w1, w2 = neg_count / (pos_count + neg_count), pos_count / ( + pos_count + neg_count) + weights = torch.ones_like(cls_preds.reshape(-1)) + weights[cls_targets.reshape(-1) == 1] = w1 + weights[cls_targets.reshape(-1) == 0] = w2 + # cls_targets = cls_targets.float() + # cls_loss = F.binary_cross_entropy_with_logits(input=cls_preds.reshape(-1), target=cls_targets.reshape(-1), weight=weights, + # reduction='mean') + cls_loss = F.binary_cross_entropy_with_logits( + input=cls_preds, target=cls_targets, + reduction='mean') + pos_pixels = cls_targets.sum() + + loc_loss = F.smooth_l1_loss(cls_targets * loc_preds, + cls_targets * loc_targets, + reduction='sum') + loc_loss = loc_loss / pos_pixels if pos_pixels > 0 else loc_loss + + total_loss = self.alpha * cls_loss + self.beta * loc_loss + + self.loss_dict.update({'total_loss': total_loss, + 'reg_loss': loc_loss, + 'cls_loss': cls_loss}) + + return total_loss + + def logging(self, epoch, batch_id, batch_len, writer = None): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict['total_loss'] + reg_loss = self.loss_dict['reg_loss'] + cls_loss = self.loss_dict['cls_loss'] + + print("[epoch %d][%d/%d], || Loss: %.4f || cls Loss: %.4f" + " || reg Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, + total_loss.item(), cls_loss.item(), reg_loss.item())) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss.item(), + epoch * batch_len + batch_id) + writer.add_scalar('Confidence_loss', cls_loss.item(), + epoch * batch_len + batch_id) + + +def test(): + torch.manual_seed(0) + loss = PixorLoss(None) + pred = torch.sigmoid(torch.randn(1, 7, 2, 3)) + label = torch.zeros(1, 7, 2, 3) + loss = loss(pred, label) + print(loss) + + +if __name__ == "__main__": + test() diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_adv_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_adv_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..668cc51203d322f02157a2a3fc61f47c16a41386 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_adv_loss.py @@ -0,0 +1,208 @@ +""" +Deigned for camera modality with depth supervision. +""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from opencood.loss.point_pillar_loss import PointPillarLoss + +class PointPillarDepthAdvLoss(PointPillarLoss): + def __init__(self, args): + super().__init__(args) + self.depth = args['depth'] + self.adv = args['adv'] + self.adv_criterion = nn.BCELoss() + + + self.depth_weight = self.depth['weight'] + self.smooth_target = True if 'smooth_target' in self.depth and self.depth['smooth_target'] else False + self.use_fg_mask = True if 'use_fg_mask' in self.depth and self.depth['use_fg_mask'] else False + self.fg_weight = 3.25 + self.bg_weight = 0.25 + if self.smooth_target: + self.depth_loss_func = FocalLoss(alpha=0.25, gamma=2.0, reduction="none", smooth_target=True) + else: + self.depth_loss_func = FocalLoss(alpha=0.25, gamma=2.0, reduction="none") + + def forward(self, output_dict, target_dict, suffix=""): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + + total_loss = super().forward(output_dict, target_dict, suffix) + all_depth_loss = 0 + all_adv_loss = 0 + + depth_items_list = [x for x in output_dict.keys() if x.startswith(f"depth_items{suffix}")] + ######## Depth Supervision ######## + for depth_item_name in depth_items_list: + depth_item = output_dict[depth_item_name] + # depth logdit: [N, D, H, W] + # depth gt indices: [N, H, W] + # fg_mask: [N, H, W] + depth_logit, depth_gt_indices = depth_item[0], depth_item[1] + depth_loss = self.depth_loss_func(depth_logit, depth_gt_indices) + if self.use_fg_mask: + fg_mask = depth_item[-1] + weight_mask = (fg_mask > 0) * self.fg_weight + (fg_mask == 0) * self.bg_weight + depth_loss *= weight_mask + + depth_loss = depth_loss.mean() * self.depth_weight + all_depth_loss += depth_loss + + total_loss += all_depth_loss + + adv_list = [x for x in output_dict.keys() if x.startswith(f"real_pred{suffix}")] + ######## adv in forground object ######## + for real_pred_keyname in adv_list: + fake_pred_keyname = real_pred_keyname.replace("real", "fake") + real = output_dict[real_pred_keyname].view(-1) + fake = output_dict[fake_pred_keyname].view(-1) + nsample = real.shape[0] + + real_label = torch.full((nsample,), 1, dtype=torch.float, device=real.device) + errD_real = self.adv_criterion(real, real_label) + + fake_label = torch.full((nsample,), 0, dtype=torch.float, device=real.device) + errD_fake = self.adv_criterion(fake, fake_label) + + all_adv_loss += errD_real * self.adv['real_weight'] + errD_fake * self.adv['fake_weight'] + + all_adv_loss *= self.adv['weight'] + total_loss += all_adv_loss + + self.loss_dict.update({'depth_loss': all_depth_loss}) + self.loss_dict.update({'adv_loss': all_adv_loss}) + self.loss_dict.update({'total_loss': total_loss}) + + return total_loss + + + def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict.get('total_loss', 0) + reg_loss = self.loss_dict.get('reg_loss', 0) + cls_loss = self.loss_dict.get('cls_loss', 0) + dir_loss = self.loss_dict.get('dir_loss', 0) + iou_loss = self.loss_dict.get('iou_loss', 0) + depth_loss = self.loss_dict.get('depth_loss', 0) + adv_loss = self.loss_dict.get('adv_loss', 0) + + + print("[epoch %d][%d/%d]%s || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f || Depth Loss: %.4f || Adv Loss: %.4f " % ( + epoch, batch_id + 1, batch_len, suffix, + total_loss, cls_loss, reg_loss, dir_loss, iou_loss, depth_loss, adv_loss)) + + if not writer is None: + writer.add_scalar('Regression_loss' + suffix, reg_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss' + suffix, cls_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Dir_loss' + suffix, dir_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Iou_loss' + suffix, iou_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Depth_loss' + suffix, depth_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Adv_loss' + suffix, adv_loss, + epoch*batch_len + batch_id) + + +class FocalLoss(nn.Module): + r"""Criterion that computes Focal loss. + + According to :cite:`lin2018focal`, the Focal loss is computed as follows: + + .. math:: + + \text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t) + + Where: + - :math:`p_t` is the model's estimated probability for each class. + + Args: + alpha: Weighting factor :math:`\alpha \in [0, 1]`. + gamma: Focusing parameter :math:`\gamma >= 0`. + reduction: Specifies the reduction to apply to the + output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction + will be applied, ``'mean'``: the sum of the output will be divided by + the number of elements in the output, ``'sum'``: the output will be + summed. + eps: Deprecated: scalar to enforce numerical stability. This is no longer + used. + + Shape: + - Input: :math:`(N, C, *)` where C = number of classes. + - Target: :math:`(N, *)` where each value is + :math:`0 ≤ targets[i] ≤ C−1`. + + Example: + >>> N = 5 # num_classes + >>> kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'} + >>> criterion = FocalLoss(**kwargs) + >>> input = torch.randn(1, N, 3, 5, requires_grad=True) + >>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N) + >>> output = criterion(input, target) + >>> output.backward() + """ + + def __init__(self, alpha, gamma = 2.0, reduction= 'none', smooth_target = False , eps = None) -> None: + super().__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + self.smooth_target = smooth_target + self.eps = eps + if self.smooth_target: + self.smooth_kernel = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False) + self.smooth_kernel.weight = torch.nn.Parameter(torch.tensor([[[0.2, 0.9, 0.2]]]), requires_grad=False) + self.smooth_kernel = self.smooth_kernel.to(torch.device("cuda")) + + def forward(self, input, target): + n = input.shape[0] + out_size = (n,) + input.shape[2:] + + # compute softmax over the classes axis + input_soft = input.softmax(1) + log_input_soft = input.log_softmax(1) + + # create the labels one hot tensor + D = input.shape[1] + if self.smooth_target: + target_one_hot = F.one_hot(target, num_classes=D).to(input).view(-1, D) # [N*H*W, D] + target_one_hot = self.smooth_kernel(target_one_hot.float().unsqueeze(1)).squeeze(1) # [N*H*W, D] + target_one_hot = target_one_hot.view(*target.shape, D).permute(0, 3, 1, 2) + else: + target_one_hot = F.one_hot(target, num_classes=D).to(input).permute(0, 3, 1, 2) + # compute the actual focal loss + weight = torch.pow(-input_soft + 1.0, self.gamma) + + focal = -self.alpha * weight * log_input_soft + loss_tmp = torch.einsum('bc...,bc...->b...', (target_one_hot, focal)) + + if self.reduction == 'none': + loss = loss_tmp + elif self.reduction == 'mean': + loss = torch.mean(loss_tmp) + elif self.reduction == 'sum': + loss = torch.sum(loss_tmp) + else: + raise NotImplementedError(f"Invalid reduction mode: {self.reduction}") + return loss \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..e2f7c9ab1f5e7f607dd91491eccc3c042547d103 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_loss.py @@ -0,0 +1,209 @@ +""" +Deigned for camera modality with depth supervision. +""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from opencood.loss.point_pillar_loss import PointPillarLoss + +class PointPillarDepthLoss(PointPillarLoss): + def __init__(self, args): + super().__init__(args) + self.depth = args['depth'] + + + self.depth_weight = self.depth['weight'] + self.smooth_target = True if 'smooth_target' in self.depth and self.depth['smooth_target'] else False + self.use_fg_mask = True if 'use_fg_mask' in self.depth and self.depth['use_fg_mask'] else False + self.fg_weight = 3.25 + self.bg_weight = 0.25 + if self.smooth_target: + self.depth_loss_func = FocalLoss(alpha=0.25, gamma=2.0, reduction="none", smooth_target=True) + else: + self.depth_loss_func = FocalLoss(alpha=0.25, gamma=2.0, reduction="none") + + # def forward(self, output_dict, target_dict, suffix=""): + # """ + # Parameters + # ---------- + # output_dict : dict + # target_dict : dict + # """ + + # total_loss = super().forward(output_dict, target_dict, suffix) + + # ######## Depth Supervision ######## + # if f"depth_items{suffix}" in output_dict and output_dict[f'depth_items{suffix}'] is not None: + # # depth logdit: [N, D, H, W] + # # depth gt indices: [N, H, W] + # # fg_mask: [N, H, W] + # depth_logit, depth_gt_indices = output_dict[f'depth_items{suffix}'][0], output_dict[f'depth_items{suffix}'][1] + # depth_loss = self.depth_loss_func(depth_logit, depth_gt_indices) + # if self.use_fg_mask: + # fg_mask = output_dict[f'depth_items{suffix}'][-1] + # weight_mask = (fg_mask > 0) * self.fg_weight + (fg_mask == 0) * self.bg_weight + # depth_loss *= weight_mask + + # depth_loss = depth_loss.mean() * self.depth_weight + + # total_loss += depth_loss + # self.loss_dict.update({'depth_loss': depth_loss}) + + # return total_loss + + def forward(self, output_dict, target_dict, suffix=""): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + + total_loss = super().forward(output_dict, target_dict, suffix) + all_depth_loss = 0 + depth_items_list = [x for x in output_dict.keys() if x.startswith(f"depth_items{suffix}")] + ######## Depth Supervision ######## + for depth_item_name in depth_items_list: + depth_item = output_dict[depth_item_name] + + # depth logdit: [N, D, H, W] + # depth gt indices: [N, H, W] + # fg_mask: [N, H, W] + depth_logit, depth_gt_indices = depth_item[0], depth_item[1] + depth_loss = self.depth_loss_func(depth_logit, depth_gt_indices) + if self.use_fg_mask: + fg_mask = depth_item[-1] + weight_mask = (fg_mask > 0) * self.fg_weight + (fg_mask == 0) * self.bg_weight + depth_loss *= weight_mask + + depth_loss = depth_loss.mean() * self.depth_weight + all_depth_loss += depth_loss + + total_loss += all_depth_loss + self.loss_dict.update({'depth_loss': all_depth_loss}) # no update the total loss in dict + + return total_loss + + + def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict.get('total_loss', 0) + reg_loss = self.loss_dict.get('reg_loss', 0) + cls_loss = self.loss_dict.get('cls_loss', 0) + dir_loss = self.loss_dict.get('dir_loss', 0) + iou_loss = self.loss_dict.get('iou_loss', 0) + depth_loss = self.loss_dict.get('depth_loss', 0) + + + print("[epoch %d][%d/%d]%s || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f || Depth Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, suffix, + total_loss, cls_loss, reg_loss, dir_loss, iou_loss, depth_loss)) + + if not writer is None: + writer.add_scalar('Regression_loss' + suffix, reg_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss' + suffix, cls_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Dir_loss' + suffix, dir_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Iou_loss' + suffix, iou_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Depth_loss' + suffix, depth_loss, + epoch*batch_len + batch_id) + + +class FocalLoss(nn.Module): + r"""Criterion that computes Focal loss. + + According to :cite:`lin2018focal`, the Focal loss is computed as follows: + + .. math:: + + \text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t) + + Where: + - :math:`p_t` is the model's estimated probability for each class. + + Args: + alpha: Weighting factor :math:`\alpha \in [0, 1]`. + gamma: Focusing parameter :math:`\gamma >= 0`. + reduction: Specifies the reduction to apply to the + output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction + will be applied, ``'mean'``: the sum of the output will be divided by + the number of elements in the output, ``'sum'``: the output will be + summed. + eps: Deprecated: scalar to enforce numerical stability. This is no longer + used. + + Shape: + - Input: :math:`(N, C, *)` where C = number of classes. + - Target: :math:`(N, *)` where each value is + :math:`0 ≤ targets[i] ≤ C−1`. + + Example: + >>> N = 5 # num_classes + >>> kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'} + >>> criterion = FocalLoss(**kwargs) + >>> input = torch.randn(1, N, 3, 5, requires_grad=True) + >>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N) + >>> output = criterion(input, target) + >>> output.backward() + """ + + def __init__(self, alpha, gamma = 2.0, reduction= 'none', smooth_target = False , eps = None) -> None: + super().__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + self.smooth_target = smooth_target + self.eps = eps + if self.smooth_target: + self.smooth_kernel = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False) + self.smooth_kernel.weight = torch.nn.Parameter(torch.tensor([[[0.2, 0.9, 0.2]]]), requires_grad=False) + self.smooth_kernel = self.smooth_kernel.to(torch.device("cuda")) + + def forward(self, input, target): + n = input.shape[0] + out_size = (n,) + input.shape[2:] + + # compute softmax over the classes axis + input_soft = input.softmax(1) + log_input_soft = input.log_softmax(1) + + # create the labels one hot tensor + D = input.shape[1] + if self.smooth_target: + target_one_hot = F.one_hot(target, num_classes=D).to(input).view(-1, D) # [N*H*W, D] + target_one_hot = self.smooth_kernel(target_one_hot.float().unsqueeze(1)).squeeze(1) # [N*H*W, D] + target_one_hot = target_one_hot.view(*target.shape, D).permute(0, 3, 1, 2) + else: + target_one_hot = F.one_hot(target, num_classes=D).to(input).permute(0, 3, 1, 2) + # compute the actual focal loss + weight = torch.pow(-input_soft + 1.0, self.gamma) + + focal = -self.alpha * weight * log_input_soft + loss_tmp = torch.einsum('bc...,bc...->b...', (target_one_hot, focal)) + + if self.reduction == 'none': + loss = loss_tmp + elif self.reduction == 'mean': + loss = torch.mean(loss_tmp) + elif self.reduction == 'sum': + loss = torch.sum(loss_tmp) + else: + raise NotImplementedError(f"Invalid reduction mode: {self.reduction}") + return loss \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_disconet_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_disconet_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..52441644cf7b3e47ffad03806a6fae6fae39f546 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_disconet_loss.py @@ -0,0 +1,108 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from opencood.loss.point_pillar_loss import PointPillarLoss + +class PointPillarDiscoNetLoss(PointPillarLoss): + def __init__(self, args): + super(PointPillarDiscoNetLoss, self).__init__(args) + self.kd = args['kd'] + + def forward(self, output_dict, target_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + total_loss = super().forward(output_dict, target_dict) + + ########## KL loss ############ + rm = output_dict['reg_preds'] # [B, 14, 50, 176] + psm = output_dict['cls_preds'] # [B, 2, 50, 176] + feature = output_dict['feature'] + + teacher_rm = output_dict['teacher_reg_preds'] + teather_psm = output_dict['teacher_cls_preds'] + + + + feature = output_dict['feature'] + teacher_feature = output_dict['teacher_feature'] + kl_loss_mean = nn.KLDivLoss(size_average=True, reduce=True) + + N, C, H, W = teacher_feature.shape + teacher_feature = teacher_feature.permute(0,2,3,1).reshape(N*H*W, C) + student_feature = feature.permute(0,2,3,1).reshape(N*H*W, C) + kd_loss_feature = kl_loss_mean( + F.log_softmax(student_feature, dim=1), F.softmax(teacher_feature, dim=1) + ) + + kd_loss = kd_loss_feature + + if self.kd.get('decoder_kd', False): + N, C, H, W = teacher_rm.shape + teacher_rm = teacher_rm.permute(0,2,3,1).reshape(N*H*W, C) + student_rm = rm.permuate(0,2,3,1).reshape(N*H*W, C) + kd_loss_rm = kl_loss_mean( + F.log_softmax(student_rm, dim=1), F.softmax(teacher_rm, dim=1) + ) + + N, C, H, W = teacher_psm.shape + teacher_psm = teather_psm.permute(0,2,3,1).reshape(N*H*W, C) + student_psm = psm.permuate(0,2,3,1).reshape(N*H*W, C) + kd_loss_psm = kl_loss_mean( + F.log_softmax(student_psm, dim=1), F.softmax(teacher_psm, dim=1) + ) + + kd_loss += kd_loss_rm + kd_loss_psm + + kd_loss *= self.kd['weight'] + total_loss += kd_loss + self.loss_dict.update({'total_loss': total_loss.item(), + 'kd_loss': kd_loss.item()}) + + + return total_loss + + + def logging(self, epoch, batch_id, batch_len, writer = None): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict.get('total_loss', 0) + reg_loss = self.loss_dict.get('reg_loss', 0) + cls_loss = self.loss_dict.get('cls_loss', 0) + dir_loss = self.loss_dict.get('dir_loss', 0) + iou_loss = self.loss_dict.get('iou_loss', 0) + kd_loss = self.loss_dict.get('kd_loss', 0) + + + print("[epoch %d][%d/%d] || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f || KD Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, + total_loss, cls_loss, reg_loss, dir_loss, iou_loss, kd_loss)) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', cls_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Dir_loss', dir_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Iou_loss', iou_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Kd_loss', kd_loss, + epoch*batch_len + batch_id) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..0c4fc65c55920a16d33d2f9351492dcc6cfda903 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_loss.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- +# Author: Yifan Lu +# Add direction classification loss +# The originally point_pillar_loss.py, can not determine if the box heading is opposite to the GT. + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from opencood.utils.common_utils import limit_period +from opencood.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor +from icecream import ic + +class PointPillarLoss(nn.Module): + def __init__(self, args): + super(PointPillarLoss, self).__init__() + self.pos_cls_weight = args['pos_cls_weight'] + + self.cls = args['cls'] + self.reg = args['reg'] + + if 'dir' in args: + self.dir = args['dir'] + else: + self.dir = None + + if 'iou' in args: + from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import aligned_boxes_iou3d_gpu + self.iou_loss_func = aligned_boxes_iou3d_gpu + self.iou = args['iou'] + else: + self.iou = None + + self.loss_dict = {} + + def forward(self, output_dict, target_dict, suffix=""): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + if 'record_len' in output_dict: + batch_size = int(output_dict['record_len'].sum()) + elif 'batch_size' in output_dict: + batch_size = output_dict['batch_size'] + else: + batch_size = target_dict['pos_equal_one'].shape[0] + + cls_labls = target_dict['pos_equal_one'].view(batch_size, -1, 1) + positives = cls_labls > 0 + negatives = target_dict['neg_equal_one'].view(batch_size, -1, 1) > 0 + # cared = torch.logical_or(positives, negatives) + # cls_labls = cls_labls * cared.type_as(cls_labls) + # num_normalizer = cared.sum(1, keepdim=True) + pos_normalizer = positives.sum(1, keepdim=True).float() + + # rename variable + if f'psm{suffix}' in output_dict: + output_dict[f'cls_preds{suffix}'] = output_dict[f'psm{suffix}'] + if f'rm{suffix}' in output_dict: + output_dict[f'reg_preds{suffix}'] = output_dict[f'rm{suffix}'] + if f'dm{suffix}' in output_dict: + output_dict[f'dir_preds{suffix}'] = output_dict[f'dm{suffix}'] + + total_loss = 0 + + # cls loss + cls_preds = output_dict[f'cls_preds{suffix}'].permute(0, 2, 3, 1).contiguous() \ + .view(batch_size, -1, 1) + cls_weights = positives * self.pos_cls_weight + negatives * 1.0 + cls_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_loss = sigmoid_focal_loss(cls_preds, cls_labls, weights=cls_weights, **self.cls) + cls_loss = cls_loss.sum() * self.cls['weight'] / batch_size + + # reg loss + reg_weights = positives / torch.clamp(pos_normalizer, min=1.0) + reg_preds = output_dict[f'reg_preds{suffix}'].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 7) + reg_targets = target_dict['targets'].view(batch_size, -1, 7) + reg_preds, reg_targets = self.add_sin_difference(reg_preds, reg_targets) + reg_loss = weighted_smooth_l1_loss(reg_preds, reg_targets, weights=reg_weights, sigma=self.reg['sigma']) + reg_loss = reg_loss.sum() * self.reg['weight'] / batch_size + + + ######## direction ########## + if self.dir: + dir_targets = self.get_direction_target(target_dict['targets'].view(batch_size, -1, 7)) + dir_logits = output_dict[f"dir_preds{suffix}"].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) # [N, H*W*#anchor, 2] + + dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) + dir_loss = dir_loss.flatten() * reg_weights.flatten() + dir_loss = dir_loss.sum() * self.dir['weight'] / batch_size + total_loss += dir_loss + self.loss_dict.update({'dir_loss': dir_loss.item()}) + + + ######## IoU ########### + if self.iou: + iou_preds = output_dict["iou_preds{suffix}"].permute(0, 2, 3, 1).contiguous() + pos_pred_mask = reg_weights.squeeze(dim=-1) > 0 # (4, 70400) + iou_pos_preds = iou_preds.view(batch_size, -1)[pos_pred_mask] + boxes3d_pred = VoxelPostprocessor.delta_to_boxes3d(output_dict[f'reg_preds{suffix}'].permute(0, 2, 3, 1).contiguous().detach(), + output_dict['anchor_box'])[pos_pred_mask] + boxes3d_tgt = VoxelPostprocessor.delta_to_boxes3d(target_dict['targets'], + output_dict['anchor_box'])[pos_pred_mask] + iou_weights = reg_weights[pos_pred_mask].view(-1) + iou_pos_targets = self.iou_loss_func(boxes3d_pred.float()[:, [0, 1, 2, 5, 4, 3, 6]], # hwl -> dx dy dz + boxes3d_tgt.float()[:, [0, 1, 2, 5, 4, 3, 6]]).detach().squeeze() + iou_pos_targets = 2 * iou_pos_targets.view(-1) - 1 + iou_loss = weighted_smooth_l1_loss(iou_pos_preds, iou_pos_targets, weights=iou_weights, sigma=self.iou['sigma']) + + iou_loss = iou_loss.sum() * self.iou['weight'] / batch_size + total_loss += iou_loss + self.loss_dict.update({'iou_loss': iou_loss.item()}) + + total_loss += reg_loss + cls_loss + + self.loss_dict.update({'total_loss': total_loss.item(), + 'reg_loss': reg_loss.item(), + 'cls_loss': cls_loss.item()}) + + return total_loss + + + @staticmethod + def add_sin_difference(boxes1, boxes2, dim=6): + assert dim != -1 + rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \ + torch.cos(boxes2[..., dim:dim + 1]) + rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \ + torch.sin(boxes2[..., dim:dim + 1]) + + boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding, + boxes1[..., dim + 1:]], dim=-1) + boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding, + boxes2[..., dim + 1:]], dim=-1) + return boxes1, boxes2 + + def get_direction_target(self, reg_targets): + """ + Args: + reg_targets: [N, H * W * #anchor_num, 7] + The last term is (theta_gt - theta_a) + + Returns: + dir_targets: + theta_gt: [N, H * W * #anchor_num, NUM_BIN] + NUM_BIN = 2 + """ + num_bins = self.dir['args']['num_bins'] + dir_offset = self.dir['args']['dir_offset'] + anchor_yaw = np.deg2rad(np.array(self.dir['args']['anchor_yaw'])) # for direction classification + self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1) # [1,2,1] + self.anchor_num = self.anchor_yaw_map.shape[1] + + H_times_W_times_anchor_num = reg_targets.shape[1] + anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1] + rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum] + offset_rot = limit_period(rot_gt - dir_offset, 0, 2 * np.pi) + dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long() # [N, H*W*anchornum] + dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1) + # one_hot: + # if rot_gt > 0, then the label is 1, then the regression target is [0, 1] + dir_cls_targets = one_hot_f(dir_cls_targets, num_bins) + return dir_cls_targets + + + + def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict.get('total_loss', 0) + reg_loss = self.loss_dict.get('reg_loss', 0) + cls_loss = self.loss_dict.get('cls_loss', 0) + dir_loss = self.loss_dict.get('dir_loss', 0) + iou_loss = self.loss_dict.get('iou_loss', 0) + + + print("[epoch %d][%d/%d]%s || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, suffix, + total_loss, cls_loss, reg_loss, dir_loss, iou_loss)) + + if not writer is None: + writer.add_scalar('Regression_loss'+suffix, reg_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss'+suffix, cls_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Dir_loss'+suffix, dir_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Iou_loss'+suffix, iou_loss, + epoch*batch_len + batch_id) + +def one_hot_f(tensor, num_bins, dim=-1, on_value=1.0, dtype=torch.float32): + tensor_onehot = torch.zeros(*list(tensor.shape), num_bins, dtype=dtype, device=tensor.device) + tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value) + return tensor_onehot + +def softmax_cross_entropy_with_logits(logits, labels): + param = list(range(len(logits.shape))) + transpose_param = [0] + [param[-1]] + param[1:-1] + logits = logits.permute(*transpose_param) + loss_ftor = torch.nn.CrossEntropyLoss(reduction="none") + loss = loss_ftor(logits, labels.max(dim=-1)[1]) + return loss + +def weighted_smooth_l1_loss(preds, targets, sigma=3.0, weights=None): + diff = preds - targets + abs_diff = torch.abs(diff) + abs_diff_lt_1 = torch.le(abs_diff, 1 / (sigma ** 2)).type_as(abs_diff) + loss = abs_diff_lt_1 * 0.5 * torch.pow(abs_diff * sigma, 2) + \ + (abs_diff - 0.5 / (sigma ** 2)) * (1.0 - abs_diff_lt_1) + if weights is not None: + loss *= weights + return loss + + +def sigmoid_focal_loss(preds, targets, weights=None, **kwargs): + assert 'gamma' in kwargs and 'alpha' in kwargs + # sigmoid cross entropy with logits + # more details: https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits + per_entry_cross_ent = torch.clamp(preds, min=0) - preds * targets.type_as(preds) + per_entry_cross_ent += torch.log1p(torch.exp(-torch.abs(preds))) + # focal loss + prediction_probabilities = torch.sigmoid(preds) + p_t = (targets * prediction_probabilities) + ((1 - targets) * (1 - prediction_probabilities)) + modulating_factor = torch.pow(1.0 - p_t, kwargs['gamma']) + alpha_weight_factor = targets * kwargs['alpha'] + (1 - targets) * (1 - kwargs['alpha']) + + loss = modulating_factor * alpha_weight_factor * per_entry_cross_ent + if weights is not None: + loss *= weights + return loss \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_uncertainty_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_uncertainty_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..0b9d2f9fbc04b457249b8a09811e7b73f4296d92 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_uncertainty_loss.py @@ -0,0 +1,289 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from opencood.loss.point_pillar_loss import PointPillarLoss, \ + one_hot_f, softmax_cross_entropy_with_logits, weighted_smooth_l1_loss, sigmoid_focal_loss +import d3d.mathh as mathh +from opencood.utils.common_utils import limit_period +from opencood.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor +from functools import partial + +class PointPillarUncertaintyLoss(PointPillarLoss): + def __init__(self, args): + super(PointPillarUncertaintyLoss, self).__init__(args) + self.uncertainty = args['uncertainty'] + self.uncertainty_dim = args['uncertainty']['dim'] # 2 means x, y; 3 means x, y, yaw; 7 means x y z dh dw dl yaw + self.unc_loss_func = KLLoss(args['uncertainty']) + + + def forward(self, output_dict, target_dict, suffix=""): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + if 'record_len' in output_dict: + batch_size = int(output_dict['record_len'].sum()) + elif 'batch_size' in output_dict: + batch_size = output_dict['batch_size'] + else: + batch_size = target_dict['pos_equal_one'].shape[0] + + cls_labls = target_dict['pos_equal_one'].view(batch_size, -1, 1) + positives = cls_labls > 0 + negatives = target_dict['neg_equal_one'].view(batch_size, -1, 1) > 0 + + pos_normalizer = positives.sum(1, keepdim=True).float() + + # rename variable + if f'psm{suffix}' in output_dict: + output_dict[f'cls_preds{suffix}'] = output_dict[f'psm{suffix}'] + if f'rm{suffix}' in output_dict: + output_dict[f'reg_preds{suffix}'] = output_dict[f'rm{suffix}'] + if f'dm{suffix}' in output_dict: + output_dict[f'dir_preds{suffix}'] = output_dict[f'dm{suffix}'] + if f'sm{suffix}' in output_dict: + output_dict[f'unc_preds{suffix}'] = output_dict[f'sm{suffix}'] + + total_loss = 0 + + # cls loss + cls_preds = output_dict[f'cls_preds{suffix}'].permute(0, 2, 3, 1).contiguous() \ + .view(batch_size, -1, 1) + cls_weights = positives * self.pos_cls_weight + negatives * 1.0 + cls_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_loss = sigmoid_focal_loss(cls_preds, cls_labls, weights=cls_weights, **self.cls) + cls_loss = cls_loss.sum() * self.cls['weight'] / batch_size + + # reg loss + reg_weights = positives / torch.clamp(pos_normalizer, min=1.0) + reg_preds = output_dict[f'reg_preds{suffix}'].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 7) + reg_targets = target_dict['targets'].view(batch_size, -1, 7) + reg_preds_w_angle, reg_targets_w_angle = self.add_sin_difference_and_angle(reg_preds, reg_targets) # note the difference + reg_loss = weighted_smooth_l1_loss(reg_preds_w_angle[...,:7], reg_targets_w_angle[...,:7], weights=reg_weights, sigma=self.reg['sigma']) + reg_loss = reg_loss.sum() * self.reg['weight'] / batch_size + + # uncertainty loss + ######## kl ######### + unc_preds = output_dict[f'unc_preds{suffix}'].permute(0, 2, 3, 1).contiguous() # [N, H, W, #anchor_num * 3] + unc_preds = unc_preds.view(unc_preds.size(0), -1, self.uncertainty_dim) + + unc_loss = self.unc_loss_func(reg_preds_w_angle, + reg_targets_w_angle, + unc_preds, + reg_weights) + + unc_loss = unc_loss.sum() / unc_preds.shape[0] + unc_loss *= self.uncertainty['weight'] + + + ######## direction ########## + if self.dir: + dir_targets = self.get_direction_target(target_dict['targets'].view(batch_size, -1, 7)) + dir_logits = output_dict[f"dir_preds{suffix}"].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) # [N, H*W*#anchor, 2] + + dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) + dir_loss = dir_loss.flatten() * reg_weights.flatten() + dir_loss = dir_loss.sum() * self.dir['weight'] / batch_size + total_loss += dir_loss + self.loss_dict.update({'dir_loss': dir_loss.item()}) + + + ######## IoU ########### + if self.iou: + iou_preds = output_dict["iou_preds{suffix}"].permute(0, 2, 3, 1).contiguous() + pos_pred_mask = reg_weights.squeeze(dim=-1) > 0 # (4, 70400) + iou_pos_preds = iou_preds.view(batch_size, -1)[pos_pred_mask] + boxes3d_pred = VoxelPostprocessor.delta_to_boxes3d(output_dict[f'reg_preds{suffix}'].permute(0, 2, 3, 1).contiguous().detach(), + output_dict['anchor_box'])[pos_pred_mask] + boxes3d_tgt = VoxelPostprocessor.delta_to_boxes3d(target_dict['targets'], + output_dict['anchor_box'])[pos_pred_mask] + iou_weights = reg_weights[pos_pred_mask].view(-1) + iou_pos_targets = self.iou_loss_func(boxes3d_pred.float()[:, [0, 1, 2, 5, 4, 3, 6]], # hwl -> dx dy dz + boxes3d_tgt.float()[:, [0, 1, 2, 5, 4, 3, 6]]).detach().squeeze() + iou_pos_targets = 2 * iou_pos_targets.view(-1) - 1 + iou_loss = weighted_smooth_l1_loss(iou_pos_preds, iou_pos_targets, weights=iou_weights, sigma=self.iou['sigma']) + + iou_loss = iou_loss.sum() * self.iou['weight'] / batch_size + total_loss += iou_loss + self.loss_dict.update({'iou_loss': iou_loss.item()}) + + total_loss += reg_loss + cls_loss + unc_loss + + self.loss_dict.update({'total_loss': total_loss.item(), + 'reg_loss': reg_loss.item(), + 'cls_loss': cls_loss.item(), + 'unc_loss': unc_loss.item()}) + + return total_loss + + + def logging(self, epoch, batch_id, batch_len, writer = None): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict.get('total_loss', 0) + reg_loss = self.loss_dict.get('reg_loss', 0) + cls_loss = self.loss_dict.get('cls_loss', 0) + dir_loss = self.loss_dict.get('dir_loss', 0) + iou_loss = self.loss_dict.get('iou_loss', 0) + unc_loss = self.loss_dict.get('unc_loss', 0) + + + print("[epoch %d][%d/%d] || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f || Unc Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, + total_loss, cls_loss, reg_loss, dir_loss, iou_loss, unc_loss)) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', cls_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Dir_loss', dir_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Iou_loss', iou_loss, + epoch*batch_len + batch_id) + writer.add_scalar('Unc_loss', unc_loss, + epoch*batch_len + batch_id) + + @staticmethod + def add_sin_difference_and_angle(boxes1, boxes2, dim=6): + """ + This is different with base PointPillarLoss's add_sin_difference function. + We retain the angle, and put it at last dimension + + add_sin_difference returns [B, H*W, 7] + -> + add_sin_difference_and_angle returns [B, H*W, 8] + + """ + assert dim != -1 + + # sin(theta1 - theta2) = sin(theta1)*cos(theta2) - cos(theta1)*sin(theta2) + rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \ + torch.cos(boxes2[..., dim:dim + 1]) + + rad_tg_encoding = torch.cos(boxes1[..., dim: dim + 1]) * \ + torch.sin(boxes2[..., dim: dim + 1]) + + boxes1_w_angle = torch.cat([boxes1[..., :dim], rad_pred_encoding, + boxes1[..., dim:]], dim=-1) # originally, boxes1[..., dim + 1:]], dim=-1) + boxes2_w_angle = torch.cat([boxes2[..., :dim], rad_tg_encoding, + boxes2[..., dim:]], dim=-1) # originally, boxes1[..., dim + 1:]], dim=-1) + + return boxes1_w_angle, boxes2_w_angle + + +class KLLoss(nn.Module): + def __init__(self, args): + super(KLLoss, self).__init__() + + self.angle_weight = args['angle_weight'] + self.uncertainty_dim = args['dim'] + if args['xy_loss_type'] == "l2": + self.xy_loss = self.kl_loss_l2 + elif args['xy_loss_type'] == "l1": + self.xy_loss = self.kl_loss_l1 + else: + raise "not implemented" + + if args['angle_loss_type'] == "l2": + self.angle_loss = self.kl_loss_l2 + elif args['angle_loss_type'] == "von-mise": + lambda_V = args['lambda_V'] + s0 = args['s0'] + limit_period = args['limit_period'] + self.angle_loss = partial(self.kl_loss_angular, lambda_V=lambda_V, s0=s0, limit_period=limit_period) + else: + raise "not implemented" + + @staticmethod + def kl_loss_l2(diff, s): + """ + Args: + diff: [B, 2] + s: [B, 2] + Returns: + loss: [B, 2] + """ + loss = 0.5*(torch.exp(-s) * (diff**2) + s) + return loss + + @staticmethod + def kl_loss_l1(diff, s): + """ + Args: + diff: [B, 2] + s: [B, 2] + Returns: + loss: [B, 2] + """ + loss = 0.5*torch.exp(-s) * torch.abs(diff) + s + return loss + + @staticmethod + def kl_loss_angular(diff, s, lambda_V=1, s0=1, limit_period=False): + """ + Args: + diff: [B, 1] + s: [B, 1] + if limit_period, + diff + 180 ~ diff. + Returns: + loss: [B, 1] + """ + exp_minus_s = torch.exp(-s) + if limit_period: + cos_abs = torch.abs(torch.cos(diff)) + loss = loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * cos_abs.detach() + lambda_V * F.elu(s-s0) + else: + loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * torch.cos(diff) + lambda_V * F.elu(s-s0) + + return loss + + def forward(self, input: torch.Tensor, + target: torch.Tensor, + sm: torch.Tensor, + weights: torch.Tensor = None): + target = torch.where(torch.isnan(target), input, target) # ignore nan targets + + if self.uncertainty_dim == 3: # x,y,yaw + xy_diff = input[...,:2] - target[...,:2] + loss1 = self.xy_loss(xy_diff, sm[...,:2]) + theta_diff = input[...,7:8] - target[...,7:8] + loss2 = self.angle_weight * self.angle_loss(theta_diff, sm[...,2:3]) + loss = torch.cat((loss1, loss2), dim=-1) + + elif self.uncertainty_dim == 7: # all regression target + other_diff = input[...,:6] - target[...,:6] + theta_diff = input[...,7:8] - target[...,7:8] + diff = torch.cat((other_diff, theta_diff), dim=-1) + loss = self.xy_loss(diff, sm) + + elif self.uncertainty_dim == 2: # x,y + xy_diff = input[...,:2] - target[...,:2] + loss = self.xy_loss(xy_diff, sm[...,:2]) + else: + raise "not implemented" + + # anchor-wise weighting + if weights is not None: + assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] + + loss = loss * weights + + return loss \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/uncertainty_loss_old.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/uncertainty_loss_old.py new file mode 100644 index 0000000000000000000000000000000000000000..e4ca42c8a7070f94ea9094728a68bf28c52fcfc0 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/uncertainty_loss_old.py @@ -0,0 +1,482 @@ +# -*- coding: utf-8 -*- +# Author: Yifan Lu +# License: TDG-Attribution-NonCommercial-NoDistrib +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import d3d.mathh as mathh +from opencood.utils.common_utils import limit_period +from functools import partial + +class WeightedSmoothL1Loss(nn.Module): + """ + Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss + https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py + | 0.5 * x ** 2 / beta if abs(x) < beta + smoothl1(x) = | + | abs(x) - 0.5 * beta otherwise, + where x = input - target. + """ + def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None): + """ + Args: + beta: Scalar float. + L1 to L2 change point. + For beta values < 1e-5, L1 loss is computed. + code_weights: (#codes) float list if not None. + Code-wise weights. + """ + super(WeightedSmoothL1Loss, self).__init__() + self.beta = beta + if code_weights is not None: + self.code_weights = np.array(code_weights, dtype=np.float32) + self.code_weights = torch.from_numpy(self.code_weights).cuda() + + @staticmethod + def smooth_l1_loss(diff, beta): + if beta < 1e-5: + loss = torch.abs(diff) + else: + n = torch.abs(diff) + loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta) + + return loss + + def forward(self, input: torch.Tensor, + target: torch.Tensor, weights: torch.Tensor = None): + """ + Args: + input: (B, #anchors, #codes) float tensor. + Ecoded predicted locations of objects. + target: (B, #anchors, #codes) float tensor. + Regression targets. + weights: (B, #anchors) float tensor if not None. + + Returns: + loss: (B, #anchors) float tensor. + Weighted smooth l1 loss without reduction. + """ + target = torch.where(torch.isnan(target), input, target) # ignore nan targets + + diff = input - target + loss = self.smooth_l1_loss(diff, self.beta) + + # anchor-wise weighting + if weights is not None: + assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] + loss = loss * weights.unsqueeze(-1) + + return loss + + + + +class KLLoss(nn.Module): + def __init__(self, args): + super(KLLoss, self).__init__() + + self.angle_weight = args['angle_weight'] + self.uncertainty_dim = args['uncertainty_dim'] + if args['xy_loss_type'] == "l2": + self.xy_loss = self.kl_loss_l2 + elif args['xy_loss_type'] == "l1": + self.xy_loss = self.kl_loss_l1 + else: + raise "not implemented" + + if args['angle_loss_type'] == "l2": + self.angle_loss = self.kl_loss_l2 + elif args['angle_loss_type'] == "von": + lambda_V = args['lambda_V'] + s0 = args['s0'] + limit_period = args['limit_period'] + self.angle_loss = partial(self.kl_loss_angular, lambda_V=lambda_V, s0=s0, limit_period=limit_period) + else: + raise "not implemented" + + + + + @staticmethod + def kl_loss_l2(diff, s): + """ + Args: + diff: [B, 2] + s: [B, 2] + Returns: + loss: [B, 2] + """ + loss = 0.5*(torch.exp(-s) * (diff**2) + s) + return loss + + @staticmethod + def kl_loss_l1(diff, s): + """ + Args: + diff: [B, 2] + s: [B, 2] + Returns: + loss: [B, 2] + """ + loss = 0.5*torch.exp(-s) * torch.abs(diff) + s + return loss + + @staticmethod + def kl_loss_angular(diff, s, lambda_V=1, s0=1, limit_period=False): + """ + Args: + diff: [B, 1] + s: [B, 1] + if limit_period, + diff + 180 ~ diff. + Returns: + loss: [B, 1] + """ + exp_minus_s = torch.exp(-s) + if limit_period: + cos_abs = torch.abs(torch.cos(diff)) + loss = loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * cos_abs.detach() + lambda_V * F.elu(s-s0) + else: + loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * torch.cos(diff) + lambda_V * F.elu(s-s0) + + return loss + + + def forward(self, input: torch.Tensor, + target: torch.Tensor, + sm: torch.Tensor, + weights: torch.Tensor = None): + target = torch.where(torch.isnan(target), input, target) # ignore nan targets + + + + if self.uncertainty_dim == 3: + xy_diff = input[...,:2] - target[...,:2] + loss1 = self.xy_loss(xy_diff, sm[...,:2]) + + theta_diff = input[...,7:8] - target[...,7:8] + + loss2 = self.angle_weight * self.angle_loss(theta_diff, sm[...,2:3]) + + loss = torch.cat((loss1, loss2), dim=-1) + + elif self.uncertainty_dim == 7: + ## is this right? + other_diff = input[...,:6] - target[...,:6] + theta_diff = input[...,7:8] - target[...,7:8] + + diff = torch.cat((other_diff, theta_diff), dim=-1) + loss = self.xy_loss(diff, sm) + + elif self.uncertainty_dim == 2: + xy_diff = input[...,:2] - target[...,:2] + loss = self.xy_loss(xy_diff, sm[...,:2]) + else: + raise "not implemented" + + # anchor-wise weighting + if weights is not None: + assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] + loss = loss * weights.unsqueeze(-1) + + return loss + + + +class PointPillarUncertaintyLoss(nn.Module): + def __init__(self, args): + super(PointPillarUncertaintyLoss, self).__init__() + self.reg_loss_func = WeightedSmoothL1Loss() + self.alpha = 0.25 + self.gamma = 2.0 + + self.cls_weight = args['cls_weight'] + self.kl_weight = args['kl_weight'] + self.reg_coe = args['reg'] + self.uncertainty_dim = args['kl_args']['uncertainty_dim'] + + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_weight = args['dir_args']['dir_weight'] + self.dir_offset = args['dir_args']['args']['dir_offset'] + self.num_bins = args['dir_args']['args']['num_bins'] + anchor_yaw = np.deg2rad(np.array(args['dir_args']['anchor_yaw'])) # for direction classification + self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1) # [1,2,1] + self.anchor_num = self.anchor_yaw_map.shape[1] + + else: + self.use_dir =False + + + self.kl_loss_func = KLLoss(args['kl_args']) + + self.loss_dict = {} + + def forward(self, output_dict, target_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + rm = output_dict['rm'] # [B, 14, 50, 176] + psm = output_dict['psm'] # [B, 2, 50, 176] + sm = output_dict['sm'] # log of sigma^2 / scale [B, 6, 50 176] + targets = target_dict['targets'] + + cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C + + box_cls_labels = target_dict['pos_equal_one'] # [B, 50, 176, 2] + box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous() # -> [B, 50*176*2], two types of anchor + + positives = box_cls_labels > 0 + negatives = box_cls_labels == 0 + negative_cls_weights = negatives * 1.0 + cls_weights = (negative_cls_weights + 1.0 * positives).float() # all 1 + reg_weights = positives.float() + + pos_normalizer = positives.sum(1, keepdim=True).float() # positive number per sample + reg_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_weights /= torch.clamp(pos_normalizer, min=1.0) + cls_targets = box_cls_labels + cls_targets = cls_targets.unsqueeze(dim=-1) + + cls_targets = cls_targets.squeeze(dim=-1) + one_hot_targets = torch.zeros( + *list(cls_targets.shape), 2, + dtype=cls_preds.dtype, device=cls_targets.device + ) + one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0) + cls_preds = cls_preds.view(psm.shape[0], -1, 1) + one_hot_targets = one_hot_targets[..., 1:] + + cls_loss_src = self.cls_loss_func(cls_preds, + one_hot_targets, + weights=cls_weights) # [N, M] + cls_loss = cls_loss_src.sum() / psm.shape[0] + conf_loss = cls_loss * self.cls_weight + + ########## regression ########## + rm = rm.permute(0, 2, 3, 1).contiguous() + rm = rm.view(rm.size(0), -1, 7) + targets = targets.view(targets.size(0), -1, 7) + + box_preds_sin, reg_targets_sin = self.add_sin_difference_dim(rm, + targets) + loc_loss_src =\ + self.reg_loss_func(box_preds_sin[...,:7], + reg_targets_sin[...,:7], + weights=reg_weights) + reg_loss = loc_loss_src.sum() / rm.shape[0] + reg_loss *= self.reg_coe + + + ######## direction ########## + if self.use_dir: + dir_targets = self.get_direction_target(targets) + N = output_dict["dm"].shape[0] + dir_logits = output_dict["dm"].permute(0, 2, 3, 1).contiguous().view(N, -1, 2) # [N, H*W*#anchor, 2] + + + dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) + + dir_loss = dir_loss.view(dir_logits.shape[:2]) * reg_weights # [N, H*W*anchor_num] + + dir_loss = dir_loss.sum() * self.dir_weight / N + + ######## kl ######### + sm = sm.permute(0, 2, 3, 1).contiguous() # [N, H, W, #anchor_num * 3] + sm = sm.view(sm.size(0), -1, self.uncertainty_dim) + + kl_loss_src = \ + self.kl_loss_func(box_preds_sin, + reg_targets_sin, + sm, + reg_weights) + + kl_loss = kl_loss_src.sum() / sm.shape[0] + kl_loss *= self.kl_weight + + # total_loss = reg_loss + conf_loss + kl_loss + total_loss = reg_loss + conf_loss + + self.loss_dict.update({'total_loss': total_loss, + 'reg_loss': reg_loss, + 'conf_loss': conf_loss, + 'kl_loss': kl_loss}) + + if self.use_dir: + # total_loss += dir_loss + self.loss_dict.update({'dir_loss': dir_loss}) + + + return total_loss + + def get_direction_target(self, reg_targets): + """ + Args: + reg_targets: [N, H * W * #anchor_num, 7] + The last term is (theta_gt - theta_a) + + Returns: + dir_targets: + theta_gt: [N, H * W * #anchor_num, NUM_BIN] + NUM_BIN = 2 + """ + # (1, 2, 1) + H_times_W_times_anchor_num = reg_targets.shape[1] + anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1] + rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum] + offset_rot = limit_period(rot_gt - self.dir_offset, 0, 2 * np.pi) + dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / self.num_bins)).long() # [N, H*W*anchornum] + dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=self.num_bins - 1) + # one_hot: + # if rot_gt > 0, then the label is 1, then the regression target is [0, 1] + dir_cls_targets = one_hot_f(dir_cls_targets, self.num_bins) + return dir_cls_targets + + + + def cls_loss_func(self, input: torch.Tensor, + target: torch.Tensor, + weights: torch.Tensor): + """ + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + weights: (B, #anchors) float tensor. + Anchor-wise weights. + + Returns: + weighted_loss: (B, #anchors, #classes) float tensor after weighting. + """ + pred_sigmoid = torch.sigmoid(input) + alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha) + pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid + focal_weight = alpha_weight * torch.pow(pt, self.gamma) + + bce_loss = self.sigmoid_cross_entropy_with_logits(input, target) + + loss = focal_weight * bce_loss + + if weights.shape.__len__() == 2 or \ + (weights.shape.__len__() == 1 and target.shape.__len__() == 2): + weights = weights.unsqueeze(-1) + + assert weights.shape.__len__() == loss.shape.__len__() + + return loss * weights + + @staticmethod + def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor): + """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits: + max(x, 0) - x * z + log(1 + exp(-abs(x))) in + https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits + + Args: + input: (B, #anchors, #classes) float tensor. + Predicted logits for each class + target: (B, #anchors, #classes) float tensor. + One-hot encoded classification targets + + Returns: + loss: (B, #anchors, #classes) float tensor. + Sigmoid cross entropy loss without reduction + """ + loss = torch.clamp(input, min=0) - input * target + \ + torch.log1p(torch.exp(-torch.abs(input))) + return loss + + @staticmethod + def add_sin_difference_dim(boxes1, boxes2, dim=6): + """ + This is different with other loss function. + Here we especially retain the angel + + Add sin difference ? + Replace sin difference ! + + Returns: + [B, H*W, 7] -> [B, H*W, 8] + """ + assert dim != -1 + + # sin(theta1 - theta2) = sin(theta1)*cos(theta2) - cos(theta1)*sin(theta2) + + rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \ + torch.cos(boxes2[..., dim:dim + 1]) + + rad_tg_encoding = torch.cos(boxes1[..., dim: dim + 1]) * \ + torch.sin(boxes2[..., dim: dim + 1]) + + # boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding, + # boxes1[..., dim + 1:]], dim=-1) + # boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding, + # boxes2[..., dim + 1:]], dim=-1) + + boxes1_encoded = torch.cat([boxes1[..., :dim], rad_pred_encoding, + boxes1[..., dim:]], dim=-1) + boxes2_encoded = torch.cat([boxes2[..., :dim], rad_tg_encoding, + boxes2[..., dim:]], dim=-1) + + return boxes1_encoded, boxes2_encoded + + + def logging(self, epoch, batch_id, batch_len, writer = None): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict['total_loss'] + reg_loss = self.loss_dict['reg_loss'] + conf_loss = self.loss_dict['conf_loss'] + kl_loss = self.loss_dict['kl_loss'] + + + print_msg = ("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f || KL Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, + total_loss.item(), conf_loss.item(), reg_loss.item(), kl_loss.item())) + + if self.use_dir: + dir_loss = self.loss_dict['dir_loss'] + print_msg += " || Dir Loss: %.4f" % dir_loss.item() + + print(print_msg) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', conf_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('kl_loss', kl_loss.item(), + epoch*batch_len + batch_id) + if self.use_dir: + writer.add_scalar('dir_loss', dir_loss.item(), + epoch*batch_len + batch_id) + +def one_hot_f(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32): + tensor_onehot = torch.zeros(*list(tensor.shape), depth, dtype=dtype, device=tensor.device) # [4, 70400, 2] + tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value) # [4, 70400, 2] + return tensor_onehot + +def softmax_cross_entropy_with_logits(logits, labels): + param = list(range(len(logits.shape))) + transpose_param = [0] + [param[-1]] + param[1:-1] + logits = logits.permute(*transpose_param) + loss_ftor = torch.nn.CrossEntropyLoss(reduction="none") + loss = loss_ftor(logits, labels.max(dim=-1)[1]) + return loss diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/voxel_net_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/voxel_net_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..a257fa9edbc4f81322eee5e3f73d8f8b0e8d7e50 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/voxel_net_loss.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + + + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class VoxelNetLoss(nn.Module): + def __init__(self, args): + super(VoxelNetLoss, self).__init__() + self.smoothl1loss = nn.SmoothL1Loss(size_average=False) + self.alpha = args['alpha'] + self.beta = args['beta'] + self.reg_coe = args['reg'] + self.loss_dict = {} + + def forward(self, output_dict, target_dict): + """ + Parameters + ---------- + output_dict : dict + target_dict : dict + """ + rm = output_dict['rm'] + psm = output_dict['psm'] + + pos_equal_one = target_dict['pos_equal_one'] + neg_equal_one = target_dict['neg_equal_one'] + targets = target_dict['targets'] + + p_pos = F.sigmoid(psm.permute(0, 2, 3, 1)) + rm = rm.permute(0, 2, 3, 1).contiguous() + rm = rm.view(rm.size(0), rm.size(1), rm.size(2), -1, 7) + targets = targets.view(targets.size(0), targets.size(1), + targets.size(2), -1, 7) + pos_equal_one_for_reg = pos_equal_one.unsqueeze( + pos_equal_one.dim()).expand(-1, -1, -1, -1, 7) + + rm_pos = rm * pos_equal_one_for_reg + targets_pos = targets * pos_equal_one_for_reg + + cls_pos_loss = -pos_equal_one * torch.log(p_pos + 1e-6) + cls_pos_loss = cls_pos_loss.sum() / (pos_equal_one.sum() + 1e-6) + + cls_neg_loss = -neg_equal_one * torch.log(1 - p_pos + 1e-6) + cls_neg_loss = cls_neg_loss.sum() / (neg_equal_one.sum() + 1e-6) + + reg_loss = self.smoothl1loss(rm_pos, targets_pos) + reg_loss = reg_loss / (pos_equal_one.sum() + 1e-6) + conf_loss = self.alpha * cls_pos_loss + self.beta * cls_neg_loss + + total_loss = self.reg_coe * reg_loss + conf_loss + + self.loss_dict.update({'total_loss': total_loss, + 'reg_loss': reg_loss, + 'conf_loss': conf_loss}) + + return total_loss + + def logging(self, epoch, batch_id, batch_len, writer = None): + """ + Print out the loss function for current iteration. + + Parameters + ---------- + epoch : int + Current epoch for training. + batch_id : int + The current batch. + batch_len : int + Total batch length in one iteration of training, + writer : SummaryWriter + Used to visualize on tensorboard + """ + total_loss = self.loss_dict['total_loss'] + reg_loss = self.loss_dict['reg_loss'] + conf_loss = self.loss_dict['conf_loss'] + + print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f" + " || Loc Loss: %.4f" % ( + epoch, batch_id + 1, batch_len, + total_loss.item(), conf_loss.item(), reg_loss.item())) + + if not writer is None: + writer.add_scalar('Regression_loss', reg_loss.item(), + epoch*batch_len + batch_id) + writer.add_scalar('Confidence_loss', conf_loss.item(), + epoch*batch_len + batch_id) diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1abc5fb11a40ad42f073d24eeef31aaef94fb5a Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/__init__.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/center_point_codriving.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/center_point_codriving.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7be5a81881416b375dbc97a555af57c205a7ef50 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/center_point_codriving.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_multiclass.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_multiclass.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0397ae7179f77b8786a1d413973b97ca9acc9e6 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_multiclass.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_single_multiclass.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_single_multiclass.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..edfe74f8dceee0ea598ad9fe9125c48ac4d3691a Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_single_multiclass.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point.py new file mode 100644 index 0000000000000000000000000000000000000000..34c657370fe11b637894a2efc8ab43c9d5d17b2c --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- + + +import torch +import torch.nn as nn +import numpy as np + + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv + + +class CenterPoint(nn.Module): + def __init__(self, args): + super(CenterPoint, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + # self.out_size_factor = args[''] + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + input_channels = 128*2 if self.shrink_flag else 128*3 + self.cls_head = nn.Conv2d(input_channels, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(input_channels, 8 * args['anchor_number'], + kernel_size=1) + # self.conv_cls = nn.Conv2d(input_channels, args['anchor_number'], + # kernel_size=1) + # self.conv_box = nn.Conv2d(input_channels, 8 * args['anchor_number'], + # kernel_size=1) + self.init_weight() + + def init_weight(self): + pi = 0.01 + nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) ) + nn.init.normal_(self.reg_head.weight, mean=0, std=0.001) + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points} + + batch_dict = self.pillar_vfe(batch_dict) + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + + cls = self.cls_head(spatial_features_2d) + bbox = self.reg_head(spatial_features_2d) + # cls = self.conv_cls(spatial_features_2d) + # bbox = self.conv_box(spatial_features_2d) + + + # 把bbox 的第二维度变成7 + _, bbox_temp = self.generate_predicted_boxes(cls, bbox) + + # print(bbox.equal(bbox_temp)) + output_dict = {'cls_preds': cls, + 'reg_preds': bbox_temp, + 'bbox_preds': bbox} # 计算loss的时候使用 'bbox', 在生成output的时候 'rm' + + return output_dict + + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + # batch_reg = box_preds[:, 0:2, :, :] # x,y,z + # batch_hei = box_preds[:, 2:3, :, :] + # batch_dim = torch.exp(box_preds[:, 3:6, :, :]) + # # batch_dim = box_preds[:, 3:6, :, :] # w h l + # batch_rots = box_preds[:, 6:7, :, :] + # batch_rotc = box_preds[:, 7:8, :, :] + # rot = torch.atan2(batch_rots, batch_rotc) + + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + # batch_hei = box_preds[..., 2:3] + # batch_dim = torch.exp(box_preds[..., 3:6]) + + h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0] + w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1] + l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2] + batch_dim = torch.cat([h,w,l], dim=-1) + batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2] + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1]) + # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1) + # box_preds = box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_cls_preds = cls_preds.view(batch, H*W, -1) + return cls_preds, batch_box_preds diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline.py new file mode 100644 index 0000000000000000000000000000000000000000..f4478bc7fb7939b5a11461b89eeb38949fcbc435 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline.py @@ -0,0 +1,217 @@ +# Author: Yifan Lu +# a class that integrate multiple simple fusion methods (Single Scale) +# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm +import torch +import torch.nn as nn +from icecream import ic +import numpy as np +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion +from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion +from opencood.utils.transformation_utils import normalize_pairwise_tfm + +class CenterPointBaseline(nn.Module): + """ + F-Cooper implementation with point pillar backbone. + """ + def __init__(self, args): + super(CenterPointBaseline, self).__init__() + + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + is_resnet = args['base_bev_backbone'].get("resnet", False) + if is_resnet: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + + if args['fusion_method'] == "max": + self.fusion_net = MaxFusion() + if args['fusion_method'] == "att": + self.fusion_net = AttFusion(args['att']['feat_dim']) + if args['fusion_method'] == "disconet": + self.fusion_net = DiscoFusion(args['disconet']['feat_dim']) + if args['fusion_method'] == "v2vnet": + self.fusion_net = V2VNetFusion(args['v2vnet']) + if args['fusion_method'] == 'v2xvit': + self.fusion_net = V2XViTFusion(args['v2xvit']) + if args['fusion_method'] == 'when2comm': + self.fusion_net = When2commFusion(args['when2comm']) + + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if "compression" in args: + self.compression = True + self.naive_compressor = NaiveCompressor(self.out_channel, args['compression']) + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'], + kernel_size=1) + self.use_dir = False + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + self.init_weight() + + def init_weight(self): + pi = 0.01 + nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) ) + nn.init.normal_(self.reg_head.weight, mean=0, std=0.001) + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + # calculate pairwise affine transformation matrix + _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0 + t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0]) + batch_dict = self.backbone(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + + fused_feature = self.fusion_net(spatial_features_2d, record_len, t_matrix) + + cls = self.cls_head(fused_feature) + bbox = self.reg_head(fused_feature) + + # 把bbox 的第二维度变成7 + _, bbox_temp = self.generate_predicted_boxes(cls, bbox) + + # print(bbox.equal(bbox_temp)) + output_dict = {'cls_preds': cls, + 'reg_preds': bbox_temp, + 'bbox_preds': bbox} # 计算loss的时候使用 'bbox', 在生成output的时候 'rm' + + if self.use_dir: + output_dict.update({'dir_preds': self.dir_head(fused_feature)}) + + return output_dict + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + # batch_reg = box_preds[:, 0:2, :, :] # x,y,z + # batch_hei = box_preds[:, 2:3, :, :] + # batch_dim = torch.exp(box_preds[:, 3:6, :, :]) + # # batch_dim = box_preds[:, 3:6, :, :] # w h l + # batch_rots = box_preds[:, 6:7, :, :] + # batch_rotc = box_preds[:, 7:8, :, :] + # rot = torch.atan2(batch_rots, batch_rotc) + + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + # batch_hei = box_preds[..., 2:3] + # batch_dim = torch.exp(box_preds[..., 3:6]) + + h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0] + w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1] + l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2] + batch_dim = torch.cat([h,w,l], dim=-1) + batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2] + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1]) + # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1) + # box_preds = box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_cls_preds = cls_preds.view(batch, H*W, -1) + return cls_preds, batch_box_preds \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiclass.py new file mode 100644 index 0000000000000000000000000000000000000000..e6657de7f59e464d213b62820c2a17dfd76f0bdc --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiclass.py @@ -0,0 +1,262 @@ +import torch.nn as nn +import numpy as np +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +# from opencood.models.sub_modules.compress_core import CompressCore +from opencood.models.sub_modules.naive_compress import NaiveCompressor +# from opencood.models.sub_modules.dcn_net import DCNNet +# from opencood.models.fuse_modules.where2comm import Where2comm +from opencood.models.fuse_modules.where2comm_attn import Where2comm +import torch +import torch.nn.functional as F + +class centerpointbaselinemulticlass(nn.Module): + def __init__(self, args): + super(centerpointbaselinemulticlass, self).__init__() + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + + + if 'resnet' in args['base_bev_backbone']: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + # used to downsample the feature map for efficient computation + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if 'compression' in args and args['compression'] > 0: + self.compression = True + self.naive_compressor = NaiveCompressor(self.out_channel, args['compression']) + + self.dcn = False + if 'dcn' in args: + self.dcn = True + self.dcn_net = DCNNet(args['dcn']) + + # self.fusion_net = TransformerFusion(args['fusion_args']) + self.fusion_net = Where2comm(args['fusion_args']) + self.multi_scale = args['fusion_args']['multi_scale'] + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'], + kernel_size=1) + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + self.init_weight() + + def init_weight(self): + pi = 0.01 + nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) ) + nn.init.normal_(self.reg_head.weight, mean=0, std=0.001) + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, data_dict): + if type(data_dict) == dict: + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + # pairwise_t_matrix = data_dict['pairwise_t_matrix'] + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + # N, C, H', W'. [N, 384, 100, 352] + spatial_features_2d = batch_dict['spatial_features_2d'] + # print(spatial_features_2d) + elif type(data_dict) == list: + spatial_features_2d = [] + for data in data_dict: + voxel_features = data['processed_lidar']['voxel_features'] + voxel_coords = data['processed_lidar']['voxel_coords'] + voxel_num_points = data['processed_lidar']['voxel_num_points'] + record_len = data['record_len'] + # pairwise_t_matrix = data_dict['pairwise_t_matrix'] + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + + batch_dict = self.backbone(batch_dict) + # N, C, H', W'. + spatial_feature_2d = batch_dict['spatial_features_2d'] + spatial_features_2d.append(spatial_feature_2d) + spatial_features_2d = torch.cat(spatial_features_2d) + else: + print("wrong type of data_dict") + + + # downsample feature to reduce memory + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + # compressor + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + # dcn + if self.dcn: + spatial_features_2d = self.dcn_net(spatial_features_2d) + # spatial_features_2d is [sum(cav_num), 256, 50, 176] + # output only contains ego + # [B, 256, 50, 176] + psm_single = self.cls_head(spatial_features_2d) + # print(spatial_features_2d) + rm_single = self.reg_head(spatial_features_2d) + + fused_feature = spatial_features_2d + cls = self.cls_head(fused_feature) # fused_feature [B, 128, 96, 288] -> [B, 3, 96, 288] + bbox = self.reg_head(fused_feature) # fused_feature [B, 128, 96, 288] -> [B, 24, 96, 288] + + if not self.training: + _, C, H, W = cls.shape + cls = psm_single[0].unsqueeze(0).contiguous().view(1, -1, H, W) + bbox = rm_single[0].unsqueeze(0).contiguous().view(1, -1, H, W) + + + box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous() + bbox_temp_list = [] + num_class = int(box_preds_for_infer.shape[3]/8) + box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8) + for i in range(num_class): + box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:] + box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2) + _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass) + bbox_temp_list.append(bbox_temp) + bbox_temp_list = torch.stack(bbox_temp_list, dim=1) + + + _, bbox_temp = self.generate_predicted_boxes(cls, bbox) + + feature_list = [] + feature_regroup = self.regroup(spatial_features_2d, record_len) + for ego_id in range(len(feature_regroup)): + feature_list.append(feature_regroup[ego_id][0:1]) + feature_egos = torch.cat(feature_list, dim=0) + result_dict = {'fused_feature':feature_egos} + + output_dict = {'cls_preds': cls, + 'reg_preds': bbox_temp, + 'reg_preds_multiclass': bbox_temp_list, + 'bbox_preds': bbox + } + output_dict.update(result_dict) + + + + _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single) + + output_dict.update({'cls_preds_single': psm_single, + 'reg_preds_single': bbox_temp_single, + 'bbox_preds_single': rm_single, + # 'comm_rate': communication_rates + }) + + + return output_dict + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + # batch_hei = box_preds[..., 2:3] + # batch_dim = torch.exp(box_preds[..., 3:6]) + + h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0] + w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1] + l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2] + batch_dim = torch.cat([h,w,l], dim=-1) + batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2] + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + + return cls_preds, batch_box_preds diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiscale.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiscale.py new file mode 100644 index 0000000000000000000000000000000000000000..18b16a6723353ee9bad972a78e3dcdc6a101460e --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiscale.py @@ -0,0 +1,209 @@ +# Author: Yifan Lu +# a class that integrate multiple simple fusion methods (Single Scale) +# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm + +import torch.nn as nn +from icecream import ic +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion +from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion +from opencood.utils.transformation_utils import normalize_pairwise_tfm +import numpy as np +import torch + +class CenterPointBaselineMultiscale(nn.Module): + """ + F-Cooper implementation with point pillar backbone. + """ + def __init__(self, args): + super(CenterPointBaselineMultiscale, self).__init__() + + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + + self.fusion_net = nn.ModuleList() + for i in range(len(args['base_bev_backbone']['layer_nums'])): + if args['fusion_method'] == "max": + self.fusion_net.append(MaxFusion()) + if args['fusion_method'] == "att": + self.fusion_net.append(AttFusion(args['att']['feat_dim'][i])) + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if "compression" in args: + self.compression = True + self.naive_compressor = NaiveCompressor(64, args['compression']) + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'], + kernel_size=1) + self.use_dir = False + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + self.init_weight() + + def init_weight(self): + pi = 0.01 + nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) ) + nn.init.normal_(self.reg_head.weight, mean=0, std=0.001) + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + # calculate pairwise affine transformation matrix + _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0 + t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0]) + + spatial_features = batch_dict['spatial_features'] + + if self.compression: + spatial_features = self.naive_compressor(spatial_features) + + # multiscale fusion + feature_list = self.backbone.get_multiscale_feature(spatial_features) + fused_feature_list = [] + for i, fuse_module in enumerate(self.fusion_net): + fused_feature_list.append(fuse_module(feature_list[i], record_len, t_matrix)) + fused_feature = self.backbone.decode_multiscale_feature(fused_feature_list) + + if self.shrink_flag: + fused_feature = self.shrink_conv(fused_feature) + + cls = self.cls_head(fused_feature) + bbox = self.reg_head(fused_feature) + # 把bbox 的第二维度变成7 + _, bbox_temp = self.generate_predicted_boxes(cls, bbox) + + # print(bbox.equal(bbox_temp)) + output_dict = {'cls_preds': cls, + 'reg_preds': bbox_temp, + 'bbox_preds': bbox} # 计算loss的时候使用 'bbox', 在生成output的时候 'rm' + + if self.use_dir: + output_dict.update({'dir_preds': self.dir_head(fused_feature)}) + + return output_dict + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + # batch_reg = box_preds[:, 0:2, :, :] # x,y,z + # batch_hei = box_preds[:, 2:3, :, :] + # batch_dim = torch.exp(box_preds[:, 3:6, :, :]) + # # batch_dim = box_preds[:, 3:6, :, :] # w h l + # batch_rots = box_preds[:, 6:7, :, :] + # batch_rotc = box_preds[:, 7:8, :, :] + # rot = torch.atan2(batch_rots, batch_rotc) + + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + # batch_hei = box_preds[..., 2:3] + # batch_dim = torch.exp(box_preds[..., 3:6]) + + h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0] + w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1] + l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2] + batch_dim = torch.cat([h,w,l], dim=-1) + batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2] + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1]) + # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1) + # box_preds = box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_cls_preds = cls_preds.view(batch, H*W, -1) + return cls_preds, batch_box_preds \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_codriving.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_codriving.py new file mode 100644 index 0000000000000000000000000000000000000000..e6b3fb2e35e440db238cc28e436804c7a753a0bf --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_codriving.py @@ -0,0 +1,301 @@ +import torch.nn as nn +import numpy as np +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.fuse_modules.codriving_attn import Where2comm +import torch + +class centerpointcodriving(nn.Module): + def __init__(self, args): + super(centerpointcodriving, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + if 'resnet' in args['base_bev_backbone']: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + # used to downsample the feature map for efficient computation + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if 'compression' in args and args['compression'] > 0: + self.compression = True + self.naive_compressor = NaiveCompressor(self.out_channel, args['compression']) + + self.dcn = False + if 'dcn' in args: + self.dcn = True + self.dcn_net = DCNNet(args['dcn']) + + # self.fusion_net = TransformerFusion(args['fusion_args']) + self.fusion_net = Where2comm(args['fusion_args']) + self.multi_scale = args['fusion_args']['multi_scale'] + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'], + kernel_size=1) + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + if 'early_fusion' in args: + self.early_flag = args['early_fusion'] + else: + self.early_flag = False + + self.init_weight() + + def init_weight(self): + pi = 0.01 + nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) ) + nn.init.normal_(self.reg_head.weight, mean=0, std=0.001) + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, data_dict, waypoints=None): + voxel_features = data_dict['processed_lidar']['voxel_features'] # e.g. (34814,32,4) + voxel_coords = data_dict['processed_lidar']['voxel_coords'] # e.g (34814,4) + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] # e.g (34814) + record_len = data_dict['record_len'] + + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + # N, C, H', W'. [N, 384, 100, 352] + spatial_features_2d = batch_dict['spatial_features_2d'] + + + + # feat_3d = batch_dict['spatial_features'][0].detach().cpu().numpy() # shape: (C, H, W) + # feat_2d = batch_dict['spatial_features_2d'][0].detach().cpu().numpy() # shape: (C, H, W) + + # # 2) Convert each to a grayscale image by averaging across channels + # feat_3d_gray = np.mean(feat_3d, axis=0) # shape: (H, W) + # feat_2d_gray = np.mean(feat_2d, axis=0) # shape: (H, W) + + # # 3) Normalize each to [0, 255] + # def normalize_to_uint8(img): + # img_min, img_max = img.min(), img.max() + # if img_max - img_min < 1e-6: + # # Edge case if everything is the same value + # return np.zeros_like(img, dtype=np.uint8) + # normalized = (img - img_min) / (img_max - img_min) + # return (normalized * 255).astype(np.uint8) + + # feat_3d_gray = normalize_to_uint8(feat_3d_gray) + # feat_2d_gray = normalize_to_uint8(feat_2d_gray) + # import cv2 + # # 4) Save as images + # cv2.imwrite("debug/spatial_features.jpg", feat_3d_gray) + # cv2.imwrite("debug/spatial_features_2d.jpg", feat_2d_gray) + # import pdb; pdb.set_trace() + + + + + # downsample feature to reduce memory + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) # [B, 384, 96, 288] -> [B, 128, 96, 288] + # compressor + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + # dcn + if self.dcn: + spatial_features_2d = self.dcn_net(spatial_features_2d) + # spatial_features_2d is [sum(cav_num), 256, 50, 176] + # output only contains ego + # [B, 256, 50, 176] + psm_single = self.cls_head(spatial_features_2d) # spatial_features_2d: [B, 128, 96, 288] + rm_single = self.reg_head(spatial_features_2d) + + # print('spatial_features_2d: ', spatial_features_2d.shape) + if self.multi_scale: + fused_feature, communication_rates, result_dict = self.fusion_net(batch_dict['spatial_features'], # [BN, 64, 192, 576] + psm_single, + record_len, + pairwise_t_matrix, + self.backbone, + waypoints) + # downsample feature to reduce memory + if self.shrink_flag: + fused_feature = self.shrink_conv(fused_feature) + elif self.early_flag: + fused_feature_tuple = self.regroup(spatial_features_2d, record_len) + feature_bank = [] + for feature_ in fused_feature_tuple: + feature_bank.append(feature_[0]) + fused_feature = torch.stack(feature_bank, dim=0) + result_dict = {} + communication_rates = 0 + else: + fused_feature, communication_rates, result_dict = self.fusion_net(spatial_features_2d, + psm_single, + record_len, + pairwise_t_matrix) + + + cls = self.cls_head(fused_feature) + bbox = self.reg_head(fused_feature) + + box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous() + bbox_temp_list = [] + num_class = int(box_preds_for_infer.shape[3]/8) + box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8) + for i in range(num_class): + box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:] + box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2) + _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass) + bbox_temp_list.append(bbox_temp) + bbox_temp_list = torch.stack(bbox_temp_list, dim=1) + + + _, bbox_temp = self.generate_predicted_boxes(cls, bbox) + + output_dict = {'cls_preds': cls, + 'reg_preds': bbox_temp, + 'reg_preds_multiclass': bbox_temp_list, + 'bbox_preds': bbox + } + + result_dict.update({'fused_feature':fused_feature}) + + output_dict.update(result_dict) + _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single) + output_dict.update({'cls_preds_single': psm_single, + 'reg_preds_single': bbox_temp_single, + 'bbox_preds_single': rm_single, + 'comm_rate': communication_rates, + }) + + ####### output box for single head ########## + psm_single_regroup = self.regroup(psm_single, record_len) + rm_single_regroup = self.regroup(rm_single, record_len) + psm_single_ego_list = [] + rm_single_ego_list = [] + for b in range(len(record_len)): + psm_single_ego_list.append(psm_single_regroup[b][0:1]) + rm_single_ego_list.append(rm_single_regroup[b][0:1]) + psm_single_ego = torch.cat((psm_single_ego_list), 0) + rm_single_ego = torch.cat((rm_single_ego_list), 0) + # generate box + box_preds_for_infer = rm_single_ego.permute(0, 2, 3, 1).contiguous() + bbox_temp_list_single = [] + num_class = int(box_preds_for_infer.shape[3]/8) + box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8) + for i in range(num_class): + box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:] + box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2) + _, bbox_temp = self.generate_predicted_boxes(psm_single_ego[:, i, :, :], box_preds_for_infer_singleclass) + bbox_temp_list_single.append(bbox_temp) + bbox_temp_list_single = torch.stack(bbox_temp_list_single, dim=1) + output_dict.update({'cls_preds_single_ego': psm_single_ego, + 'reg_preds_multiclass_single_ego': bbox_temp_list_single, + 'bbox_preds_single_ego': rm_single_ego + }) + + return output_dict + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + # batch_hei = box_preds[..., 2:3] + # batch_dim = torch.exp(box_preds[..., 3:6]) + + h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0] + w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1] + l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2] + batch_dim = torch.cat([h,w,l], dim=-1) + batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2] + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + + return cls_preds, batch_box_preds \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_intermediate.py new file mode 100644 index 0000000000000000000000000000000000000000..5b31cb8d3abbab5e6c152d24d69cb4a945f86719 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_intermediate.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- + + +import torch +import torch.nn as nn +import numpy as np + + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.att_bev_backbone import AttBEVBackbone + + +class CenterPointIntermediate(nn.Module): + def __init__(self, args): + super(CenterPointIntermediate, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = AttBEVBackbone(args['base_bev_backbone'], 64) + # self.out_size_factor = args[''] + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + self.conv_cls = nn.Conv2d( + 128*3, 1, + kernel_size=1 + ) + self.conv_box = nn.Conv2d( + 128*3, 8, ## xyz,hwl,sin(r),cos(r) + kernel_size=1 + ) # in_channels out_channels + self.init_weight() + + def init_weight(self): + pi = 0.01 + nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi) ) + nn.init.normal_(self.conv_box.weight, mean=0, std=0.001) + + + + def forward(self, data_dict): + + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + lidar_pose = data_dict['lidar_pose'] + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len, + 'pairwise_t_matrix': pairwise_t_matrix} + + batch_dict = self.pillar_vfe(batch_dict) + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + # if self.dcn and not self.before_backbone: + # spatial_features_2d = self.dcn_net(spatial_features_2d) + + cls = self.conv_cls(spatial_features_2d) ## [2, 1, h, w] + bbox = self.conv_box(spatial_features_2d) ## [2, 8, h, w] + + + # 把bbox 的第二维度变成7 + _, bbox_temp = self.generate_predicted_boxes(cls, bbox) + + # print(bbox.equal(bbox_temp)) + output_dict = {'cls_preds': cls, + 'reg_preds': bbox_temp, + 'cls': cls, + 'bbox_preds':bbox} # 计算loss的时候使用 'bbox', 在生成output的时候 'rm' + + return output_dict + + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + # batch_reg = box_preds[:, 0:2, :, :] # x,y,z + # batch_hei = box_preds[:, 2:3, :, :] + # batch_dim = torch.exp(box_preds[:, 3:6, :, :]) + # # batch_dim = box_preds[:, 3:6, :, :] # w h l + # batch_rots = box_preds[:, 6:7, :, :] + # batch_rotc = box_preds[:, 7:8, :, :] + # rot = torch.atan2(batch_rots, batch_rotc) + + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + batch_hei = box_preds[..., 2:3] + + batch_dim = torch.exp(box_preds[..., 3:6]) + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1]) + # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1) + # box_preds = box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_cls_preds = cls_preds.view(batch, H*W, -1) + return cls_preds, batch_box_preds diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm.py new file mode 100644 index 0000000000000000000000000000000000000000..57f9d040ed2cf42d830b229983e9184def345290 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm.py @@ -0,0 +1,226 @@ +import torch.nn as nn +import numpy as np +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.sub_modules.dcn_net import DCNNet +# from opencood.models.fuse_modules.where2comm import Where2comm +from opencood.models.fuse_modules.where2comm_attn import Where2comm +import torch + +class CenterPointWhere2comm(nn.Module): + def __init__(self, args): + super(CenterPointWhere2comm, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + if 'resnet' in args['base_bev_backbone']: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + # used to downsample the feature map for efficient computation + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if 'compression' in args and args['compression'] > 0: + self.compression = True + self.naive_compressor = NaiveCompressor(self.out_channel, args['compression']) + + self.dcn = False + if 'dcn' in args: + self.dcn = True + self.dcn_net = DCNNet(args['dcn']) + + # self.fusion_net = TransformerFusion(args['fusion_args']) + self.fusion_net = Where2comm(args['fusion_args']) + self.multi_scale = args['fusion_args']['multi_scale'] + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'], + kernel_size=1) + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + self.init_weight() + + def init_weight(self): + pi = 0.01 + nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) ) + nn.init.normal_(self.reg_head.weight, mean=0, std=0.001) + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + # N, C, H', W'. [N, 384, 100, 352] + spatial_features_2d = batch_dict['spatial_features_2d'] + + # downsample feature to reduce memory + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + # compressor + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + # dcn + if self.dcn: + spatial_features_2d = self.dcn_net(spatial_features_2d) + # spatial_features_2d is [sum(cav_num), 256, 50, 176] + # output only contains ego + # [B, 256, 50, 176] + psm_single = self.cls_head(spatial_features_2d) + rm_single = self.reg_head(spatial_features_2d) + + # print('spatial_features_2d: ', spatial_features_2d.shape) + if self.multi_scale: + fused_feature, communication_rates, result_dict = self.fusion_net(batch_dict['spatial_features'], + psm_single, + record_len, + pairwise_t_matrix, + self.backbone) + # downsample feature to reduce memory + if self.shrink_flag: + fused_feature = self.shrink_conv(fused_feature) + else: + fused_feature, communication_rates, result_dict = self.fusion_net(spatial_features_2d, + psm_single, + record_len, + pairwise_t_matrix) + + + # print('fused_feature: ', fused_feature.shape) + cls = self.cls_head(fused_feature) + bbox = self.reg_head(fused_feature) + + _, bbox_temp = self.generate_predicted_boxes(cls, bbox) + + output_dict = {'cls_preds': cls, + 'reg_preds': bbox_temp, + 'bbox_preds': bbox + } + output_dict.update(result_dict) + + _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single) + + output_dict.update({'cls_preds_single': psm_single, + 'reg_preds_single': bbox_temp_single, + 'bbox_preds_single': rm_single, + 'comm_rate': communication_rates + }) + return output_dict + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + # batch_hei = box_preds[..., 2:3] + # batch_dim = torch.exp(box_preds[..., 3:6]) + + h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0] + w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1] + l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2] + batch_dim = torch.cat([h,w,l], dim=-1) + batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2] + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1]) + # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1) + # box_preds = box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_cls_preds = cls_preds.view(batch, H*W, -1) + return cls_preds, batch_box_preds \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm_multiclass.py new file mode 100644 index 0000000000000000000000000000000000000000..10a4961e69f13a0444814c9f5a9d6c39dd740a9a --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm_multiclass.py @@ -0,0 +1,246 @@ +import torch.nn as nn +import numpy as np +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +# from opencood.models.sub_modules.dcn_net import DCNNet +# from opencood.models.fuse_modules.where2comm import Where2comm +from opencood.models.fuse_modules.where2comm_attn import Where2comm +import torch + +class centerpointwhere2commmulticlass(nn.Module): + def __init__(self, args): + super(centerpointwhere2commmulticlass, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + if 'resnet' in args['base_bev_backbone']: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + # used to downsample the feature map for efficient computation + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if 'compression' in args and args['compression'] > 0: + self.compression = True + self.naive_compressor = NaiveCompressor(self.out_channel, args['compression']) + + self.dcn = False + if 'dcn' in args: + self.dcn = True + self.dcn_net = DCNNet(args['dcn']) + + # self.fusion_net = TransformerFusion(args['fusion_args']) + self.fusion_net = Where2comm(args['fusion_args']) + self.multi_scale = args['fusion_args']['multi_scale'] + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'], + kernel_size=1) + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + if 'early_fusion' in args: + self.early_flag = args['early_fusion'] + else: + self.early_flag = False + + self.init_weight() + + def init_weight(self): + pi = 0.01 + nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) ) + nn.init.normal_(self.reg_head.weight, mean=0, std=0.001) + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] # (34814,32,4) + voxel_coords = data_dict['processed_lidar']['voxel_coords'] # (34814,4) + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] # (34814) + record_len = data_dict['record_len'] + + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + # N, C, H', W'. [N, 384, 100, 352] + spatial_features_2d = batch_dict['spatial_features_2d'] + + # downsample feature to reduce memory + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + # compressor + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + # dcn + if self.dcn: + spatial_features_2d = self.dcn_net(spatial_features_2d) + # spatial_features_2d is [sum(cav_num), 256, 50, 176] + # output only contains ego + # [B, 256, 50, 176] + psm_single = self.cls_head(spatial_features_2d) + rm_single = self.reg_head(spatial_features_2d) + + # print('spatial_features_2d: ', spatial_features_2d.shape) + if self.multi_scale: + fused_feature, communication_rates, result_dict = self.fusion_net(batch_dict['spatial_features'], + psm_single, + record_len, + pairwise_t_matrix, + self.backbone) + # downsample feature to reduce memory + if self.shrink_flag: + fused_feature = self.shrink_conv(fused_feature) + elif self.early_flag: + fused_feature_tuple = self.regroup(spatial_features_2d, record_len) + feature_bank = [] + for feature_ in fused_feature_tuple: + feature_bank.append(feature_[0]) + fused_feature = torch.stack(feature_bank, dim=0) + result_dict = {} + communication_rates = 0 + else: + fused_feature, communication_rates, result_dict = self.fusion_net(spatial_features_2d, + psm_single, + record_len, + pairwise_t_matrix) + + + # print('fused_feature: ', fused_feature.shape) + cls = self.cls_head(fused_feature) # fused_feature [12, 128, 96, 288] -> [12, 3, 96, 288] + bbox = self.reg_head(fused_feature) # fused_feature [12, 128, 96, 288] -> [12, 24, 96, 288] + + box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous() + bbox_temp_list = [] + num_class = int(box_preds_for_infer.shape[3]/8) + box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8) + for i in range(num_class): # num_class + box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:] + box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2) + _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass) + bbox_temp_list.append(bbox_temp) + bbox_temp_list = torch.stack(bbox_temp_list, dim=1) + + + _, bbox_temp = self.generate_predicted_boxes(cls, bbox) + + output_dict = {'cls_preds': cls, # (4,1,100,100) -> [1, 3, 92, 92] + 'reg_preds': bbox_temp, #(4,10000,7) + 'reg_preds_multiclass': bbox_temp_list, # [1, 3, 27648, 7] + 'bbox_preds': bbox #(4,8,100,100) -> [1, 24, 92, 92] + } + output_dict.update(result_dict) + + _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single) + + output_dict.update({'cls_preds_single': psm_single, # [12, 1, 100, 100] + 'reg_preds_single': bbox_temp_single, # [12, 10000, 7] + 'bbox_preds_single': rm_single, # [12, 8, 100, 100] + 'comm_rate': communication_rates + }) + return output_dict + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + # batch_hei = box_preds[..., 2:3] + # batch_dim = torch.exp(box_preds[..., 3:6]) + + h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0] + w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1] + l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2] + batch_dim = torch.cat([h,w,l], dim=-1) + batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2] + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + + return cls_preds, batch_box_preds \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/ciassd.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/ciassd.py new file mode 100644 index 0000000000000000000000000000000000000000..8b69e455b399f793969364527f463c9c6b1e7e18 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/ciassd.py @@ -0,0 +1,51 @@ +import torch +from torch import nn +import numpy as np + +from opencood.models.sub_modules.mean_vfe import MeanVFE +from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x +from opencood.models.sub_modules.height_compression import HeightCompression +from opencood.models.sub_modules.cia_ssd_utils import SSFA, Head + + +class CIASSD(nn.Module): + def __init__(self, args): + super(CIASSD, self).__init__() + lidar_range = np.array(args['lidar_range']) + grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) / + np.array(args['voxel_size'])).astype(np.int64) + self.vfe = MeanVFE(args['mean_vfe'], args['mean_vfe']['num_point_features']) + self.spconv_block = VoxelBackBone8x(args['spconv'], + input_channels=args['spconv']['num_features_in'], + grid_size=grid_size) + self.map_to_bev = HeightCompression(args['map2bev']) + self.ssfa = SSFA(args['ssfa']) + self.head = Head(**args['head']) + + def forward(self, batch_dict): + voxel_features = batch_dict['processed_lidar']['voxel_features'] + voxel_coords = batch_dict['processed_lidar']['voxel_coords'] + voxel_num_points = batch_dict['processed_lidar']['voxel_num_points'] + + # save memory + batch_dict.pop('processed_lidar') + batch_dict.update({'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points}) + + batch_dict['batch_size'] = batch_dict['object_bbx_center'].shape[0] + + batch_dict = self.vfe(batch_dict) + batch_dict = self.spconv_block(batch_dict) + batch_dict = self.map_to_bev(batch_dict) + out = self.ssfa(batch_dict['spatial_features']) + out = self.head(out) + batch_dict['preds_dict_stage1'] = out + + return batch_dict + + + +if __name__=="__main__": + model = SSFA(None) + print(model) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/codriving.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/codriving.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2efbf59deef2733bb821ecab028ba455cc668834 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/codriving.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/where2comm.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/where2comm.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9bd17f146ddb2f0435d47717e318b3c00e18c88f Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/where2comm.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/codriving.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/codriving.py new file mode 100644 index 0000000000000000000000000000000000000000..aa8cbafc0c6e9577fca1c1a32ec052e5bfb2ddbc --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/codriving.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +# Author: Yue Hu , Genjia Liu +# License: TDG-Attribution-NonCommercial-NoDistrib + +import torch +import torch.nn as nn +import numpy as np +import copy +import random + +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple + +class Communication(nn.Module): + def __init__(self, args): + super(Communication, self).__init__() + + self.smooth = False + self.thre = args['thre'] + if 'gaussian_smooth' in args: + # Gaussian Smooth + self.smooth = True + kernel_size = args['gaussian_smooth']['k_size'] + c_sigma = args['gaussian_smooth']['c_sigma'] + self.gaussian_filter = nn.Conv2d(1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2) + self.init_gaussian_filter(kernel_size, c_sigma) + self.gaussian_filter.requires_grad = False + self.det_range = args['cav_lidar_range'] + self.use_driving_request = args['driving_request'] + + self.args = args + + def init_gaussian_filter(self, k_size=5, sigma=1): + def _gen_gaussian_kernel(k_size=5, sigma=1): + center = k_size // 2 + x, y = np.mgrid[0 - center : k_size - center, 0 - center : k_size - center] + g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma))) + return g + gaussian_kernel = _gen_gaussian_kernel(k_size, sigma) + self.gaussian_filter.weight.data = torch.Tensor(gaussian_kernel).to(self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0) + self.gaussian_filter.bias.data.zero_() + + def forward(self, batch_confidence_maps, record_len, pairwise_t_matrix, waypoints=None): + # batch_confidence_maps:[(L1, H, W), (L2, H, W), ...] + # pairwise_t_matrix: (B,L,L,2,3) + # thre: threshold of objectiveness + # a_ji = (1 - q_i)*q_ji + B, L, _, _, _ = pairwise_t_matrix.shape + _, _, H, W = batch_confidence_maps[0].shape + + ### get matrix for inverse transform + pairwise_t_matrix_inverse = pairwise_t_matrix.clone() + + pairwise_t_matrix_inverse[...,0,1] = pairwise_t_matrix_inverse[...,0,1] / (H / W) + pairwise_t_matrix_inverse[...,1,0] = pairwise_t_matrix_inverse[...,1,0] / (W / H) + + pairwise_t_matrix_inverse[...,0,2] *= -1 + pairwise_t_matrix_inverse[...,1,2] *= -1 + + pairwise_t_matrix_inverse_2 = pairwise_t_matrix_inverse.clone() + + pairwise_t_matrix_inverse[...,0,1] = pairwise_t_matrix_inverse_2[...,1,0] + pairwise_t_matrix_inverse[...,1,0] = pairwise_t_matrix_inverse_2[...,0,1] + + pairwise_t_matrix_inverse[...,0,1] = pairwise_t_matrix_inverse[...,0,1] * (H / W) + pairwise_t_matrix_inverse[...,1,0] = pairwise_t_matrix_inverse[...,1,0] * (W / H) + + communication_masks = [] + communication_rates = [] + batch_communication_maps = [] + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + # t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + ori_communication_maps = batch_confidence_maps[b].sigmoid().max(dim=1)[0].unsqueeze(1) # dim1=2 represents the confidence of two anchors + + if False: # self.smooth: + processed_communication_maps = self.gaussian_filter(ori_communication_maps) + # normalize to 0-1 + if processed_communication_maps.max() >0: + processed_communication_maps = processed_communication_maps/processed_communication_maps.max()*ori_communication_maps.max() + else: + processed_communication_maps = ori_communication_maps + + ########## driving request ############ + if waypoints is not None: # only used with waypoints prediction model + # assert B==1 # waypoints.size(0)==len(record_len) + + from opencood.utils.waypoint2map import waypoints2map_radius # radius=40 sigma_reverse=5 + bev_grad_cam = waypoints2map_radius( waypoints.cpu().numpy(), radius=self.args.get('radius',160), sigma_reverse=self.args.get('sigma_reverse',2), \ + grid_coord=[batch_confidence_maps[b].size(2),batch_confidence_maps[b].size(3), \ + self.det_range[4]/(self.det_range[4]-self.det_range[1]),\ + self.det_range[3]/(self.det_range[3]-self.det_range[0])] \ + , det_range=self.det_range) # (1,10,2) -> (1,192,576) + + bev_grad_cam_tensor = torch.tensor(bev_grad_cam).to(batch_confidence_maps[0].device) + # warp request map + N = record_len[b].item() + grad_cam_repeat = bev_grad_cam_tensor[0][None, None].repeat(N,1,1,1) # bev_grad_cam_tensor[b][None, None].repeat(N,1,1,1) + t_matrix = pairwise_t_matrix_inverse[b][:N, :N, :, :] + warpped_grad_cam = warp_affine_simple(grad_cam_repeat, + t_matrix[0, :, :, :], + (H, W)).clamp(0,1) + + processed_communication_maps = processed_communication_maps * torch.clamp((warpped_grad_cam.to(processed_communication_maps.dtype)*5/(warpped_grad_cam.max()+1e-7)), min=1e-4, max=1 - 1e-4) + + ############################################ + + communication_maps = processed_communication_maps + + ones_mask = torch.ones_like(communication_maps).to(communication_maps.device) + zeros_mask = torch.zeros_like(communication_maps).to(communication_maps.device) + + if self.args.get('random_thre',False): + thre_list = [0.001,0.003,0.01,0.02,0.1] + thre = random.choice(thre_list) + thre = np.random.uniform(0.5*thre, 1.5*thre) + else: + thre = self.thre + + + communication_mask = torch.where(communication_maps>= thre, ones_mask, zeros_mask) + + communication_rate = communication_mask[1:N].sum()/(H*W) + + # communication_mask = warp_affine_simple(communication_mask, + # t_matrix[0, :, :, :], + # (H, W)) + + communication_mask_nodiag = communication_mask.clone() + ones_mask = torch.ones_like(communication_mask).to(communication_mask.device) + communication_mask_nodiag[0] = ones_mask[0] + + communication_masks.append(communication_mask_nodiag) + communication_rates.append(communication_rate) + batch_communication_maps.append(ori_communication_maps*communication_mask_nodiag) + communication_rates = sum(communication_rates)/B + # communication_masks = torch.stack(communication_masks, dim=0) ## torch.concat + communication_masks = torch.concat(communication_masks, dim=0) + + return batch_communication_maps, communication_masks, communication_rates \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm.py new file mode 100644 index 0000000000000000000000000000000000000000..26db8147c65ff5a060f2f53b9f11360325d5308d --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# Author: Yue Hu +# License: TDG-Attribution-NonCommercial-NoDistrib + +import torch +import torch.nn as nn +import numpy as np + +class Communication(nn.Module): + def __init__(self, args): + super(Communication, self).__init__() + + self.smooth = False + self.thre = args['thre'] + if 'gaussian_smooth' in args: + # Gaussian Smooth + self.smooth = True + kernel_size = args['gaussian_smooth']['k_size'] + c_sigma = args['gaussian_smooth']['c_sigma'] + self.gaussian_filter = nn.Conv2d(1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2) + self.init_gaussian_filter(kernel_size, c_sigma) + self.gaussian_filter.requires_grad = False + + def init_gaussian_filter(self, k_size=5, sigma=1): + def _gen_gaussian_kernel(k_size=5, sigma=1): + center = k_size // 2 + x, y = np.mgrid[0 - center : k_size - center, 0 - center : k_size - center] + g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma))) + return g + gaussian_kernel = _gen_gaussian_kernel(k_size, sigma) + self.gaussian_filter.weight.data = torch.Tensor(gaussian_kernel).to(self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0) + self.gaussian_filter.bias.data.zero_() + + def forward(self, batch_confidence_maps, record_len, pairwise_t_matrix): + # batch_confidence_maps:[(L1, H, W), (L2, H, W), ...] + # pairwise_t_matrix: (B,L,L,2,3) + # thre: threshold of objectiveness + # a_ji = (1 - q_i)*q_ji + B, L, _, _, _ = pairwise_t_matrix.shape + _, _, H, W = batch_confidence_maps[0].shape + + communication_masks = [] + communication_rates = [] + batch_communication_maps = [] + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + # t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + ori_communication_maps = batch_confidence_maps[b].sigmoid().max(dim=1)[0].unsqueeze(1) # dim1=2 represents the confidence of two anchors + + if self.smooth: + communication_maps = self.gaussian_filter(ori_communication_maps) + else: + communication_maps = ori_communication_maps + + ones_mask = torch.ones_like(communication_maps).to(communication_maps.device) + zeros_mask = torch.zeros_like(communication_maps).to(communication_maps.device) + communication_mask = torch.where(communication_maps>self.thre, ones_mask, zeros_mask) + + communication_rate = communication_mask[0].sum()/(H*W) + + # communication_mask = warp_affine_simple(communication_mask, + # t_matrix[0, :, :, :], + # (H, W)) + + communication_mask_nodiag = communication_mask.clone() + ones_mask = torch.ones_like(communication_mask).to(communication_mask.device) + communication_mask_nodiag[0] = ones_mask[0] # [::2] + + communication_masks.append(communication_mask_nodiag) + communication_rates.append(communication_rate) + batch_communication_maps.append(ori_communication_maps*communication_mask_nodiag) + communication_rates = sum(communication_rates)/B + communication_masks = torch.stack(communication_masks, dim=0) ## torch.concat + return batch_communication_maps, communication_masks, communication_rates \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm_v0.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm_v0.py new file mode 100644 index 0000000000000000000000000000000000000000..3bf6c9f274845b3c185b17c111bec2edcb898d56 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm_v0.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# Author: Yue Hu , Genjia Liu +# License: TDG-Attribution-NonCommercial-NoDistrib + +import torch +import torch.nn as nn +import numpy as np + +class Communication(nn.Module): + def __init__(self, args): + super(Communication, self).__init__() + + self.smooth = False + self.thre = args['thre'] + if 'gaussian_smooth' in args: + # Gaussian Smooth + self.smooth = True + kernel_size = args['gaussian_smooth']['k_size'] + c_sigma = args['gaussian_smooth']['c_sigma'] + self.gaussian_filter = nn.Conv2d(1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2) + self.init_gaussian_filter(kernel_size, c_sigma) + self.gaussian_filter.requires_grad = False + + def init_gaussian_filter(self, k_size=5, sigma=1): + def _gen_gaussian_kernel(k_size=5, sigma=1): + center = k_size // 2 + x, y = np.mgrid[0 - center : k_size - center, 0 - center : k_size - center] + g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma))) + return g + gaussian_kernel = _gen_gaussian_kernel(k_size, sigma) + self.gaussian_filter.weight.data = torch.Tensor(gaussian_kernel).to(self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0) + self.gaussian_filter.bias.data.zero_() + + def forward(self, batch_confidence_maps, record_len, pairwise_t_matrix): + # batch_confidence_maps:[(L1, H, W), (L2, H, W), ...] + # pairwise_t_matrix: (B,L,L,2,3) + # thre: threshold of objectiveness + # a_ji = (1 - q_i)*q_ji + B, L, _, _, _ = pairwise_t_matrix.shape + _, _, H, W = batch_confidence_maps[0].shape + + communication_masks = [] + communication_rates = [] + batch_communication_maps = [] + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + # t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + ori_communication_maps = batch_confidence_maps[b].sigmoid().max(dim=1)[0].unsqueeze(1) # dim1=2 represents the confidence of two anchors + + if self.smooth: + communication_maps = self.gaussian_filter(ori_communication_maps) + else: + communication_maps = ori_communication_maps + + ones_mask = torch.ones_like(communication_maps).to(communication_maps.device) + zeros_mask = torch.zeros_like(communication_maps).to(communication_maps.device) + communication_mask = torch.where(communication_maps>self.thre, ones_mask, zeros_mask) + + communication_rate = communication_mask[1:N].sum()/(H*W) + + # communication_mask = warp_affine_simple(communication_mask, + # t_matrix[0, :, :, :], + # (H, W)) + + communication_mask_nodiag = communication_mask.clone() + ones_mask = torch.ones_like(communication_mask).to(communication_mask.device) + communication_mask_nodiag[::N] = ones_mask[::N] # [::2] + + communication_masks.append(communication_mask_nodiag) + communication_rates.append(communication_rate) + batch_communication_maps.append(ori_communication_maps*communication_mask_nodiag) + communication_rates = sum(communication_rates)/B + communication_masks = torch.concat(communication_masks, dim=0) ## torch.concat + return batch_communication_maps, communication_masks, communication_rates \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/da_modules/gsl.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/da_modules/gsl.py new file mode 100644 index 0000000000000000000000000000000000000000..0f1ddf068d7493b7627344836beb920d27a40387 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/da_modules/gsl.py @@ -0,0 +1,35 @@ +""" +https://github.com/DerrickXuNu/MPDA/blob/9879d4b615/opencood/models/da_modules/gradient_layer.py +""" + +import torch + + +class _GradientScalarLayer(torch.autograd.Function): + @staticmethod + def forward(ctx, input, weight): + ctx.weight = weight + return input.view_as(input) + + @staticmethod + def backward(ctx, grad_output): + grad_input = grad_output.clone() + return ctx.weight * grad_input, None + + +gradient_scalar = _GradientScalarLayer.apply + + +class GradientScalarLayer(torch.nn.Module): + def __init__(self, weight): + super(GradientScalarLayer, self).__init__() + self.weight = weight + + def forward(self, input): + return gradient_scalar(input, self.weight) + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" + tmpstr += "weight=" + str(self.weight) + tmpstr += ")" + return tmpstr \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fpvrcnn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fpvrcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..05e114b817904dbd31ea87ec8b3d93441f549b44 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fpvrcnn.py @@ -0,0 +1,90 @@ +import random, os + +import torch +from torch import nn +import numpy as np + +from opencood.models.sub_modules.mean_vfe import MeanVFE +from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x +from opencood.models.sub_modules.height_compression import HeightCompression +from opencood.models.sub_modules.cia_ssd_utils import SSFA, Head +from opencood.models.sub_modules.vsa import VoxelSetAbstraction +from opencood.models.sub_modules.roi_head import RoIHead +from opencood.models.sub_modules.matcher import Matcher +from opencood.data_utils.post_processor.fpvrcnn_postprocessor import \ + FpvrcnnPostprocessor +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple + +class FPVRCNN(nn.Module): + def __init__(self, args): + super(FPVRCNN, self).__init__() + lidar_range = np.array(args['lidar_range']) + grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) / + np.array(args['voxel_size'])).astype(np.int64) + self.vfe = MeanVFE(args['mean_vfe'], + args['mean_vfe']['num_point_features']) + self.spconv_block = VoxelBackBone8x(args['spconv'], + input_channels=args['spconv'][ + 'num_features_in'], + grid_size=grid_size) + self.map_to_bev = HeightCompression(args['map2bev']) + self.ssfa = SSFA(args['ssfa']) + self.head = Head(**args['head']) + self.post_processor = FpvrcnnPostprocessor(args['post_processer'], + train=True) + self.vsa = VoxelSetAbstraction(args['vsa'], args['voxel_size'], + args['lidar_range'], + num_bev_features=128, + num_rawpoint_features=3) + self.matcher = Matcher(args['matcher'], args['lidar_range']) + self.roi_head = RoIHead(args['roi_head']) + self.train_stage2 = args['activate_stage2'] + self.discrete_ratio = args['voxel_size'][0] + + def forward(self, batch_dict): + voxel_features = batch_dict['processed_lidar']['voxel_features'] + voxel_coords = batch_dict['processed_lidar']['voxel_coords'] + voxel_num_points = batch_dict['processed_lidar']['voxel_num_points'] + + # save memory + batch_dict.pop('processed_lidar') + batch_dict.update({'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'batch_size': int(batch_dict['record_len'].sum()), + 'proj_first': batch_dict['proj_first'], + 'lidar_pose': batch_dict['lidar_pose']}) + + batch_dict = self.vfe(batch_dict) + batch_dict = self.spconv_block(batch_dict) + batch_dict = self.map_to_bev(batch_dict) + + out = self.ssfa(batch_dict['spatial_features']) + batch_dict['stage1_out'] = self.head(out) + + data_dict, output_dict = {}, {} + data_dict['ego'], output_dict['ego'] = batch_dict, batch_dict + + pred_box3d_list, scores_list = \ + self.post_processor.post_process(data_dict, output_dict, + stage1=True) + + # if proj_first is False + # the boxes are predicted in each coordinate + batch_dict['det_boxes'] = pred_box3d_list + batch_dict['det_scores'] = scores_list + + if pred_box3d_list is not None and self.train_stage2: + batch_dict = self.vsa(batch_dict) + batch_dict = self.matcher(batch_dict) + batch_dict = self.roi_head(batch_dict) + + return batch_dict + + + + + +if __name__ == "__main__": + model = SSFA(None) + print(model) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3727a0cd3647dcfd06a2cda11b904fdae01ea3ca Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/__init__.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/att_fuse.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/att_fuse.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cefe8da1e1971d2c15651acec68a9a8669c1bc9 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/att_fuse.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/codriving_attn.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/codriving_attn.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2c4efac4a61c8400257332f0ea79111825d46c6 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/codriving_attn.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/f_cooper_fuse.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/f_cooper_fuse.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d78c180a80fb1e2c8e58a9ceb9d102de45061fdf Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/f_cooper_fuse.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fuse_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fuse_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d985eb540048cb448161ab7a55ce030015987cd1 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fuse_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fusion_in_one.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fusion_in_one.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ab039a409e093d4a7f60551422d2d522fe54351 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fusion_in_one.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/where2comm_attn.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/where2comm_attn.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ca5bd7b1d959f56bbc3b78e0b731219fa975334 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/where2comm_attn.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/att_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/att_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..a5e173110da6c413f4ccb542b7437dd6b4dfde0b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/att_fuse.py @@ -0,0 +1,223 @@ +""" +Implementation of Attn Fusion +""" + +import torch +import torch.nn as nn + +from opencood.models.sub_modules.torch_transformation_utils import \ + get_discretized_transformation_matrix, get_transformation_matrix, \ + warp_affine_simple, get_rotated_roi +from matplotlib import pyplot as plt +from icecream import ic +import torch.nn.functional as F +import numpy as np + +class ScaledDotProductAttention(nn.Module): + """ + Scaled Dot-Product Attention proposed in "Attention Is All You Need" + Compute the dot products of the query with all keys, divide each by sqrt(dim), + and apply a softmax function to obtain the weights on the values + Args: dim, mask + dim (int): dimention of attention + mask (torch.Tensor): tensor containing indices to be masked + Inputs: query, key, value, mask + - **query** (batch, q_len, d_model): tensor containing projection + vector for decoder. + - **key** (batch, k_len, d_model): tensor containing projection + vector for encoder. + - **value** (batch, v_len, d_model): tensor containing features of the + encoded input sequence. + - **mask** (-): tensor containing indices to be masked + Returns: context, attn + - **context**: tensor containing the context vector from + attention mechanism. + - **attn**: tensor containing the attention (alignment) from the + encoder outputs. + """ + + def __init__(self, dim): + super(ScaledDotProductAttention, self).__init__() + self.sqrt_dim = np.sqrt(dim) + + def forward(self, query, key, value): + score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim + attn = F.softmax(score, -1) + context = torch.bmm(attn, value) + return context + +class AttFusion(nn.Module): + def __init__(self, args): + super(AttFusion, self).__init__() + + self.discrete_ratio = args['voxel_size'][0] # voxel_size[0]=0.4 + self.downsample_rate = args['downsample_rate'] + self.att = ScaledDotProductAttention(args['in_channels']) + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, xx, record_len, pairwise_t_matrix): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = xx.shape + B, L = pairwise_t_matrix.shape[:2] + + split_x = self.regroup(xx, record_len) + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + batch_node_features = split_x + # iteratively update the features for num_iteration times + + out = [] + # iterate each batch + for b in range(B): + + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + updated_node_features = [] + + # update each node i + i = 0 # ego + x = warp_affine_simple(batch_node_features[b], t_matrix[i, :, :, :], (H, W)) + + cav_num = x.shape[0] + x = x.view(cav_num, C, -1).permute(2, 0, 1) # (H*W, cav_num, C), perform self attention on each pixel. + h = self.att(x, x, x) + h = h.permute(1, 2, 0).view(cav_num, C, H, W)[0, ...] # C, W, H before + out.append(h) + + out = torch.stack(out) + + return out + + + # def forward_debug(self, x, origin_x, record_len, pairwise_t_matrix): + # """ + # Fusion forwarding + # Used for debug and visualization + + + # Parameters + # ---------- + # x : torch.Tensor + # input data, (sum(n_cav), C, H, W) + + # origin_x: torch.Tensor + # pillars (sum(n_cav), C, H * downsample_rate, W * downsample_rate) + + # record_len : list + # shape: (B) + + # pairwise_t_matrix : torch.Tensor + # The transformation matrix from each cav to ego, + # shape: (B, L, L, 4, 4) + + # Returns + # ------- + # Fused feature. + # """ + # from matplotlib import pyplot as plt + + # _, C, H, W = x.shape + # B, L = pairwise_t_matrix.shape[:2] + + # split_x = self.regroup(x, record_len) + # split_origin_x = self.regroup(origin_x, record_len) + + # # (B,L,L,2,3) + # pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + # pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + # pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + # pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + # pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + # # (B*L,L,1,H,W) + # roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x) + # for b in range(B): + # N = record_len[b] + # for i in range(N): + # one_tensor = torch.ones((L,1,H,W)).to(x) + # roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W)) + + # batch_node_features = split_x + # # iteratively update the features for num_iteration times + + # # visualize warped feature map + # for b in range(B): + # # number of valid agent + # N = record_len[b] + # # (N,N,4,4) + # # t_matrix[i, j]-> from i to j + # t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + # # update each node i + # i = 0 # ego + # mask = roi_mask[b, i, :N, ...] + # # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # # Notice we put i one the first dim of t_matrix. Different from original. + # # t_matrix[i,j] = Tji + # neighbor_feature = warp_affine_simple(batch_node_features[b], + # t_matrix[i, :, :, :], + # (H, W)) + # for idx in range(N): + # plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy()) + # plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/feature_{b}_{idx}") + # plt.clf() + # plt.imshow(mask[idx][0].detach().cpu().numpy()) + # plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/mask_feature_{b}_{idx}") + # plt.clf() + + + + # # visualize origin pillar feature + # origin_node_features = split_origin_x + + # for b in range(B): + # N = record_len[b] + # # (N,N,4,4) + # # t_matrix[i, j]-> from i to j + # t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + # i = 0 # ego + # # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # # Notice we put i one the first dim of t_matrix. Different from original. + # # t_matrix[i,j] = Tji + # neighbor_feature = warp_affine_simple(origin_node_features[b], + # t_matrix[i, :, :, :], + # (H*self.downsample_rate, W*self.downsample_rate)) + + # for idx in range(N): + # plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy()) + # plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/origin_{b}_{idx}") + # plt.clf() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/codriving_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/codriving_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..15868495726f36cf56b0017e1b895269130a8c82 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/codriving_attn.py @@ -0,0 +1,349 @@ +from turtle import update +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple +from opencood.models.comm_modules.codriving import Communication + + +class ScaledDotProductAttention(nn.Module): + """ + Scaled Dot-Product Attention proposed in "Attention Is All You Need" + Compute the dot products of the query with all keys, divide each by sqrt(dim), + and apply a softmax function to obtain the weights on the values + Args: dim, mask + dim (int): dimention of attention + mask (torch.Tensor): tensor containing indices to be masked + Inputs: query, key, value, mask + - **query** (batch, q_len, d_model): tensor containing projection + vector for decoder. + - **key** (batch, k_len, d_model): tensor containing projection + vector for encoder. + - **value** (batch, v_len, d_model): tensor containing features of the + encoded input sequence. + - **mask** (-): tensor containing indices to be masked + Returns: context, attn + - **context**: tensor containing the context vector from + attention mechanism. + - **attn**: tensor containing the attention (alignment) from the + encoder outputs. + """ + + def __init__(self, dim): + super(ScaledDotProductAttention, self).__init__() + self.sqrt_dim = np.sqrt(dim) + + def forward(self, query, key, value): + score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim + attn = F.softmax(score, -1) + context = torch.bmm(attn, value) + return context + +class AttenFusion(nn.Module): + def __init__(self, feature_dim): + super(AttenFusion, self).__init__() + self.att = ScaledDotProductAttention(feature_dim) + + def forward(self, x): + cav_num, C, H, W = x.shape + x = x.view(cav_num, C, -1).permute(2, 0, 1) # (H*W, cav_num, C), perform self attention on each pixel. + x = self.att(x, x, x) + x = x.permute(1, 2, 0).view(cav_num, C, H, W)[0] # C, W, H before + return x + +class MaxFusion(nn.Module): + def __init__(self): + super(MaxFusion, self).__init__() + + def forward(self, x): + return torch.max(x, dim=0)[0] + + +class EncodeLayer(nn.Module): + def __init__(self, channels, n_head=8, dropout=0): + super(EncodeLayer, self).__init__() + self.attn = nn.MultiheadAttention(channels, n_head, dropout) + self.linear1 = nn.Linear(channels, channels) + self.linear2 = nn.Linear(channels, channels) + + self.norm1 = nn.LayerNorm(channels) + self.norm2 = nn.LayerNorm(channels) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + self.relu = nn.ReLU() + + def forward(self, q, k, v, confidence_map=None): + """ + order (seq, batch, feature) + Args: + q: (1, H*W, C) + k: (N, H*W, C) + v: (N, H*W, C) + Returns: + outputs: () + """ + residual = q + if confidence_map is not None: + context, weight = self.attn(q,k,v, quality_map=confidence_map) # (1, H*W, C) + else: + context, weight = self.attn(q,k,v) # (1, H*W, C) + context = self.dropout1(context) + output1 = self.norm1(residual + context) + + # feed forward net + residual = output1 # (1, H*W, C) + context = self.linear2(self.relu(self.linear1(output1))) + context = self.dropout2(context) + output2 = self.norm2(residual + context) + + return output2 + +class TransformerFusion(nn.Module): + def __init__(self, channels=256, n_head=8, with_spe=True, with_scm=True, dropout=0): + super(TransformerFusion, self).__init__() + + self.encode_layer = EncodeLayer(channels, n_head, dropout) + self.with_spe = with_spe + self.with_scm = with_scm + + def forward(self, batch_neighbor_feature, batch_neighbor_feature_pe, batch_confidence_map, record_len): + x_fuse = [] + B = len(record_len) + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + neighbor_feature = batch_neighbor_feature[b] + _, C, H, W = neighbor_feature.shape + neighbor_feature_flat = neighbor_feature.view(N,C,H*W) # (N, C, H*W) + + if self.with_spe: + neighbor_feature_pe = batch_neighbor_feature_pe[b] + neighbor_feature_flat_pe = neighbor_feature_pe.view(N,C,H*W) # (N, C, H*W) + query = neighbor_feature_flat_pe[0:1,...].permute(0,2,1) # (1, H*W, C) + key = neighbor_feature_flat_pe.permute(0,2,1) # (N, H*W, C) + else: + query = neighbor_feature_flat[0:1,...].permute(0,2,1) # (1, H*W, C) + key = neighbor_feature_flat.permute(0,2,1) # (N, H*W, C) + + value = neighbor_feature_flat.permute(0,2,1) + + if self.with_scm: + confidence_map = batch_confidence_map[b] + fused_feature = self.encode_layer(query, key, value, confidence_map) # (1, H*W, C) + else: + fused_feature = self.encode_layer(query, key, value) # (1, H*W, C) + + fused_feature = fused_feature.permute(0,2,1).reshape(1, C, H, W) + + x_fuse.append(fused_feature) + x_fuse = torch.concat(x_fuse, dim=0) + return x_fuse + +def add_pe_map(x): + # scale = 2 * math.pi + temperature = 10000 + num_pos_feats = x.shape[-3] // 2 # positional encoding dimension. C = 2d + + mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device) #[H, W] + not_mask = ~mask + y_embed = not_mask.cumsum(0, dtype=torch.float32) # [H, W] + x_embed = not_mask.cumsum(1, dtype=torch.float32) # [H, W] + + dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device) # [0,1,2,...,d] + dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) # 10000^(2k/d), k is [0,0,1,1,...,d/2,d/2] + + pos_x = x_embed[:, :, None] / dim_t + pos_y = y_embed[:, :, None] / dim_t + pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2) + pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2) + pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1) # [C, H, W] + + if len(x.shape) == 4: + x_pe = x + pos[None,:,:,:] + elif len(x.shape) == 5: + x_pe = x + pos[None,None,:,:,:] + return x_pe + + +class Where2comm(nn.Module): + def __init__(self, args): + super(Where2comm, self).__init__() + + self.communication = False + self.round = 1 + if 'communication' in args: + self.communication = True + self.naive_communication = Communication(args['communication']) + if 'round' in args['communication']: + self.round = args['communication']['round'] + self.discrete_ratio = args['voxel_size'][0] # voxel_size[0]=0.4 + self.downsample_rate = args['downsample_rate'] # 2/4, downsample rate from original feature map [200, 704] + + self.agg_mode = args['agg_operator']['mode'] + self.multi_scale = args['multi_scale'] + if self.multi_scale: + layer_nums = args['layer_nums'] + num_filters = args['num_filters'] + self.num_levels = len(layer_nums) + self.fuse_modules = nn.ModuleList() + for idx in range(self.num_levels): + if self.agg_mode == 'ATTEN': + fuse_network = AttenFusion(num_filters[idx]) + elif self.agg_mode == 'MAX': + fuse_network = MaxFusion() + elif self.agg_mode == 'Transformer': + fuse_network = TransformerFusion( + channels=num_filters[idx], + n_head=args['agg_operator']['n_head'], + with_spe=args['agg_operator']['with_spe'], + with_scm=args['agg_operator']['with_scm']) + self.fuse_modules.append(fuse_network) + else: + if self.agg_mode == 'ATTEN': + self.fuse_modules = AttenFusion(args['agg_operator']['feature_dim']) + elif self.agg_mode == 'MAX': + self.fuse_modules = MaxFusion() + elif self.agg_mode == 'Transformer': + self.fuse_network = TransformerFusion( + channels=args['agg_operator']['feature_dim'], + n_head=args['agg_operator']['n_head'], + with_spe=args['agg_operator']['with_spe'], + with_scm=args['agg_operator']['with_scm']) + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, rm, record_len, pairwise_t_matrix, backbone=None, waypoints=None): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + feature_list = [] + for b in range(B): + feature_list.append({}) + + if self.multi_scale: + ups = [] + # backbone.__dict__() + with_resnet = True if hasattr(backbone, 'resnet') else False + if with_resnet: + feats = backbone.resnet(x) # e.g. x: [2, 64, 192, 576] -> ([2, 64, 96, 288], [2, 128, 48, 144], [2, 256, 24, 72]) + + for i in range(self.num_levels): + x = feats[i] if with_resnet else backbone.blocks[i](x) + + ############ 1. Communication (Mask the features) ######### + if i==0: + if self.communication: + batch_confidence_maps = self.regroup(rm, record_len) + _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix, waypoints=waypoints) + # communication_masks = communication_masks.squeeze(0) + x = x * communication_masks + else: + communication_rates = torch.tensor(0).to(x.device) + else: + if self.communication: + communication_masks = F.max_pool2d(communication_masks, kernel_size=2) + x = x * communication_masks + + ############ 2. Split the confidence map ####################### + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + batch_node_features = self.regroup(x, record_len) + + ############ 3. Fusion #################################### + x_fuse = [] + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + node_features = batch_node_features[b] + C, H, W = node_features.shape[1:] + neighbor_feature = warp_affine_simple(node_features, + t_matrix[0, :, :, :], + (H, W)) + x_fuse.append(self.fuse_modules[i](neighbor_feature)) # [N,C,H,W] + + feature_list[b][i] = neighbor_feature + + x_fuse = torch.stack(x_fuse) + + ############ 4. Deconv #################################### + if len(backbone.deblocks) > 0: + ups.append(backbone.deblocks[i](x_fuse)) + else: + ups.append(x_fuse) + + if len(ups) > 1: + x_fuse = torch.cat(ups, dim=1) # ups[0],ups[1],ups[2] [1, 128, 96, 288] + elif len(ups) == 1: + x_fuse = ups[0] + + if len(backbone.deblocks) > self.num_levels: + x_fuse = backbone.deblocks[-1](x_fuse) + else: + ############ 1. Split the features ####################### + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + batch_node_features = self.regroup(x, record_len) + batch_confidence_maps = self.regroup(rm, record_len) + + ############ 2. Communication (Mask the features) ######### + if self.communication: + _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix) + else: + communication_rates = torch.tensor(0).to(x.device) + + ############ 3. Fusion #################################### + x_fuse = [] + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + node_features = batch_node_features[b] + if self.communication: + node_features = node_features * communication_masks[b] + neighbor_feature = warp_affine_simple(node_features, + t_matrix[0, :, :, :], + (H, W)) + x_fuse.append(self.fuse_modules(neighbor_feature)) + x_fuse = torch.stack(x_fuse) + + return x_fuse, communication_rates, {'features_before_fusion':feature_list} # ms_atten x:[1, 384, 96, 288] diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..31491ec6363f85e6147703884ea4c0d726f1b369 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_fuse.py @@ -0,0 +1,251 @@ +""" +Implementation of deformable fusion + +The design is: for ego agent f_0 and collaborative agent f_1. + +f_0[x0,y0] may not correspond to f_1[x0,y0] + +So it will learn an offset (delta_x and delta_y) for this pixel position. +Then f_0[x0,y0] will fuse with f_1[x0+delta_x, y0+delta_y] +""" + +from this import d +import torch +import torch.nn as nn +import torch.nn.functional as F +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple +from icecream import ic + +def regroup(x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + +class RigidOffset(nn.Module): + """ Learn a rigid transformation grid for the whole feature map + """ + + def __init__(self, in_ch, hidden_ch=32): + super(RigidOffset, self).__init__() + self.model = nn.Sequential( + nn.Conv2d(in_ch, hidden_ch, kernel_size=(3, 3), padding=1), + nn.LeakyReLU(negative_slope=0.01), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(hidden_ch, hidden_ch, kernel_size=(3, 3), padding=1), + nn.LeakyReLU(negative_slope=0.01), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(hidden_ch, hidden_ch, kernel_size=(3, 3), padding=1), + nn.LeakyReLU(negative_slope=0.01), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.AdaptiveAvgPool2d(output_size=1), + nn.Flatten(), + nn.Linear(in_features=hidden_ch, out_features=hidden_ch, bias=True), + nn.LeakyReLU(negative_slope=0.01), + nn.Linear(in_features=hidden_ch, out_features=3, bias=True), + ) + + def forward(self, x, return_M=False): + """ + Args: + x.shape:(sum(record_len_minus1), 2C, H, W) + Returns: + out.shape: (sum(record_len_minus1), H, W, 2) + """ + N, _, H, W = x.shape + xytheta = self.model(x) # [sum(record_len_minus1), 3], 3 corresponds to x, y, theta + + + cos = torch.cos(xytheta[:, 2]) + sin = torch.sin(xytheta[:, 2]) + + M = torch.zeros((N, 2, 3), device=x.device) + M[:, 0, 0] = cos + M[:, 0, 1] = sin + M[:, 1, 0] = -sin + M[:, 1, 1] = cos + M[:, 0, 2] = xytheta[:, 0] + M[:, 1, 2] = xytheta[:, 1] + + grid = F.affine_grid(M, size=x.shape) + + if return_M: + return grid, M + + return grid + + +class ArbitraryOffset(nn.Module): + """ Learn a offset/residual grid for each pixel + """ + + def __init__(self, in_ch, out_ch=2, hidden_ch=32): + """ + Args: + in_ch: is 2 times feature channel, since they concat together + """ + super(ArbitraryOffset, self).__init__() + self.model = nn.Sequential( + nn.Conv2d(in_ch, hidden_ch, 3, 1, 1), + nn.InstanceNorm2d(hidden_ch), + nn.LeakyReLU(negative_slope=0.01), + nn.Conv2d(hidden_ch, hidden_ch // 2, 3, 1, 1), + nn.InstanceNorm2d(hidden_ch // 2), + nn.LeakyReLU(negative_slope=0.01), + nn.Conv2d(hidden_ch // 2, hidden_ch // 4, 1, 1, 0), + nn.InstanceNorm2d(hidden_ch // 4), + nn.LeakyReLU(negative_slope=0.01), + nn.Conv2d(hidden_ch // 4, 2, 1, 1, 0) + ) + + def forward(self, x): + """ + Args: + x.shape:(sum(record_len_minus1), 2C, H, W) + Returns: + out.shape: (sum(record_len_minus1), H, W, 2) + """ + N, _, H, W = x.shape + + x = self.model(x) + + grid_residual = x.reshape(N, H, W, 2) + + M_origin = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) + grid_origin = F.affine_grid(M_origin, size=(1, 1, H, W)).to(x.device) + + grid = grid_residual + grid_origin + return grid + + +class DeformFusion(nn.Module): + """ deformable fusion for multiscale feature map + For each pixel in ego agent's feature map, + it will learn a offset to fuse the feature. + """ + + def __init__(self, in_ch, deform_method, cycle_consist_loss=False): + """ + Args: + in_ch: channels num of one agent's feature map. + """ + super(DeformFusion, self).__init__() + self.cycle_consistency_loss = cycle_consist_loss + + if deform_method == "rigid": + self.grid_net = RigidOffset(in_ch * 2) + elif deform_method == "arbitrary": + self.grid_net = ArbitraryOffset(in_ch * 2) + + + def forward(self, features, record_len, pairwise_t_matrix, lidar_pose=None): + """ + Args: + features: List[torch.Tensor] + multiscale features. features[i] is (sum(cav), C, H, W), different i, different C, H, W + record_len: torch.tensor + record cav number + pairwise_t_matrix: torch.Tensor, + already normalized. shape [B, N_max, N_max, 2, 3] + lidar_pose: torch.Tensor + shape [(sum(cav), 6)], this is only used to calculate intersection. If proj_first=False, then equal to pairwise_t_matrix + """ + + ##### first align them to ego coordinate, espeically when proj_first = False. + device = features[0].device + record_len_minus1 = record_len - 1 + + if(torch.sum(record_len_minus1)==0): + return features + + ms_split_x = [regroup(features[i], record_len) for i in range(len(features))] + ms_split_x_warp = [] + + for split_x in ms_split_x: # different scale + split_x_warp = [] + H, W = split_x[0].shape[2:] + for b, xx in enumerate(split_x): # different samples + N = xx.shape[0] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + i = 0 # ego + split_x_warp.append(warp_affine_simple(xx, t_matrix[i, :, :, :], (H, W))) # [N_,C,H,W], N_ varies + ms_split_x_warp.append(split_x_warp) + + + ##### we caculate the grid by scale=1 feature, and share it with all scales. + split_x = ms_split_x_warp[0] # first scale + H, W = split_x[0].shape[:2] + + cat_features = [] + for b, xx in enumerate(split_x): + N = xx.shape[0] + cat_feature = torch.cat([xx[0:1].expand(N - 1, -1, -1, -1), xx[1:]], dim=1) # (N-1, 2C, H, W) + cat_features.append(cat_feature) + + cat_feature = torch.cat(cat_features, dim=0) # (sum(record_len_minus1), 2C, H, W) + + grid_offset = self.grid_net(cat_feature) # (sum(record_len_minus1), H, W, 2) + + grid = grid_offset # (sum(record_len_minus1),H,W,2) + ms_grid = [grid[:,::2**i,::2**i,:] for i in range(len(features))] + + ms_split_grid = [regroup(grid, record_len_minus1) for grid in ms_grid] # [[N1-1,H,W,2], [N2-1,H,W,2],...], shared for all scales. + + ##### fusion + ms_fused_features = [] + for scale, split_x in enumerate(ms_split_x_warp): + fused_features = [] + for b, xx in enumerate(split_x): + if xx.shape[0] == 1: + fused_features.append(xx[0]) + else: + neighbor_feature_deform = torch.cat([F.grid_sample(xx[1:], ms_split_grid[scale][b]), xx[0:1]], dim=0) # (N-1, C, H, W) + fuesd_feature = torch.max(neighbor_feature_deform, dim=0)[0] + fused_features.append(fuesd_feature) + ms_fused_features.append(torch.stack(fused_features)) + + + if self.cycle_consistency_loss: + split_x = ms_split_x[0] # before warping to the ego agent, scale = 1 + H, W = split_x[0].shape[2:] + + cat_features = [] + for b, xx in enumerate(split_x): + N = xx.shape[0] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + """ + [agent0, agent1] -> M_0 + [agent1, agent2] -> M_1 + ... + [agentN-1,agent0] -> M_N-1 + M_0@M_1@...@M_N-1 = I + + The latter should align to the former agent. + """ + latter_agent = torch.cat([xx[1:],xx[:1]], dim=0) # [agent1,agent2,..., agent0] + t_matrix_adj = torch.stack([t_matrix[i,(i+1)%N] for i in range(N)]) + latter_agent_warp = warp_affine_simple(latter_agent, t_matrix_adj, dsize=(H,W)) + cat_feature = torch.cat([xx, latter_agent_warp], dim=1) + cat_features.append(cat_feature) + + cat_feature = torch.cat(cat_features, dim=0) # (sum(record_len), 2C, H, W) + _, M = self.grid_net(cat_feature, return_M=True) # (sum(record_len)*H*W, 2) + + M_homo = F.pad(M, (0, 0, 0, 1), "constant", 0) # pad 2nd to last by (0, 1) + M_homo[:, 2, 2] = 1 + + split_M = regroup(M_homo, record_len) + + return ms_fused_features + + +if __name__ == "__main__": + features = [torch.randn(4,64,200,704), torch.randn(4,128,100,352), torch.randn(4,256,50,176)] + record_len = torch.tensor([1,3]) + pairwise_t_matirx = torch.eye(4).view(1,1,1,4,4).expand(2,5,5,4,4) + + model = DeformFusion(in_ch=64, deform_method='rigid', cycle_consist_loss=True) + + out = model(features, record_len, pairwise_t_matirx) + for xx in out: + print(xx.shape) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_transformer_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_transformer_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..0e13e58bf68f04dfee64e4ed01948644251e7aa4 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_transformer_fuse.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Implementation of transformer encoder fusion. +It is only a method to fuse features +Not rely on specific backbone. +""" + +import torch +import torch.nn as nn + +from opencood.models.sub_modules.torch_transformation_utils import \ + get_discretized_transformation_matrix, get_transformation_matrix, \ + warp_affine_simple, get_rotated_roi +import torch.nn.functional as F +from icecream import ic +from mmcv.ops import DeformConv2dPack as DCN2d + +class MultiheadAttBlock(nn.Module): + def __init__(self, channels, n_head=8, dropout=0): + super(MultiheadAttBlock, self).__init__() + self.attn = nn.MultiheadAttention(channels, n_head, dropout) + + def forward(self, q, k, v): + """ + order (seq, batch, feature) + Args: + q: (1, H*W, C) + k: (N, H*W, C) + v: (N, H*W, C) + Returns: + outputs: () + """ + context, weight = self.attn(q,k,v) # (1, H*W, C) + + return context + +class TransformerBlock(nn.Module): + def __init__(self, channels, n_head=8, dropout=0): + super(TransformerBlock, self).__init__() + self.attn = nn.MultiheadAttention(channels, n_head, dropout) + self.linear1 = nn.Linear(channels, channels) + self.linear2 = nn.Linear(channels, channels) + + self.norm1 = nn.LayerNorm(channels) + self.norm2 = nn.LayerNorm(channels) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + self.relu = nn.ReLU() + + def forward(self, q, k, v): + """ + order (seq, batch, feature) + Args: + q: (1, H*W, C) + k: (N, H*W, C) + v: (N, H*W, C) + Returns: + outputs: () + """ + residual = q + context, weight = self.attn(q,k,v) # (1, H*W, C) + context = self.dropout1(context) + output1 = self.norm1(residual + context) + + # feed forward net + residual = output1 # (1, H*W, C) + context = self.linear2(self.relu(self.linear1(output1))) + context = self.dropout2(context) + output2 = self.norm2(residual + context) + + return output2 + + + +class DeformTransformerFusion(nn.Module): + def __init__(self, args): + super(DeformTransformerFusion, self).__init__() + + self.channels = args['in_channels'] + self.n_head = args['n_head'] + self.dropout = args['dropout_rate'] + self.discrete_ratio = args['voxel_size'][0] # voxel_size[0]=0.4 0.16m one pixel? I think it's 0.4m per pixel, according to [200, 704] + self.downsample_rate = args['downsample_rate'] # 4, downsample rate from original feature map [200, 704] + + self.deform_conv1 = DCN2d(self.channels, self.channels, kernel_size=1, stride=1, padding=0) + self.deform_conv2 = DCN2d(self.channels, self.channels, kernel_size=3, stride=1, padding=1) + if args['only_attention']: + self.transformer_block = MultiheadAttBlock(self.channels, self.n_head, self.dropout) + else: + self.transformer_block = TransformerBlock(self.channels, self.n_head, self.dropout) + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, record_len, pairwise_t_matrix): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + split_x = self.regroup(x, record_len) + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + # (B*L,L,1,H,W) + roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x) + for b in range(B): + N = record_len[b] + for i in range(N): + one_tensor = torch.ones((L,1,H,W)).to(x) + roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W)) + + batch_node_features = split_x + # iteratively update the features for num_iteration times + + out = [] + # iterate each batch + for b in range(B): + + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + updated_node_features = [] + + # update each node i + i = 0 # ego + # (N,1,H,W) + mask = roi_mask[b, i, :N, ...] + + # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + + neighbor_feature_deform1_flat = self.deform_conv1(neighbor_feature).view(N,C,H*W) + neighbor_feature_deform2_flat = self.deform_conv2(neighbor_feature).view(N,C,H*W) + + neighbor_feature_flat = neighbor_feature.view(N,C,H*W) # (N, C, H*W) + + + + query = neighbor_feature_flat[0:1,...].permute(0,2,1) # (1, H*W, C) + + key0 = neighbor_feature_flat.permute(0,2,1) # (N, H*W, C) + key1 = neighbor_feature_deform1_flat.permute(0,2,1) # (N, H*W, C) + key2 = neighbor_feature_deform2_flat.permute(0,2,1) # (N, H*W, C) + + key = torch.cat((key0,key1,key2), dim=0) # (3N, H*W, C) + + value = key + + fusion_result = self.transformer_block(query, key, value) # (1, H*W, C) + fusion_result = fusion_result.permute(0,2,1).reshape(1, C, H, W)[0] + + out.append(fusion_result) + + out = torch.stack(out) + + return out + + + + + + + + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/disco_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/disco_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..6222d72128d0b08557484155fa86f68d6cf7ae01 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/disco_fuse.py @@ -0,0 +1,96 @@ +# fusion method by disconet +# no kd loss +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple + +def regroup(x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + +class DiscoFusion(nn.Module): + def __init__(self, args): + super(DiscoFusion, self).__init__() + self.discrete_ratio = args['voxel_size'][0] # voxel_size[0]=0.4 + self.downsample_rate = args['downsample_rate'] # 2/4, downsample rate from original feature map [200, 704] + self.pixel_weight_layer = PixelWeightLayer(args['in_channels']) + + def forward(self, x, record_len, pairwise_t_matrix): + ########## FUSION START ########## + # we concat ego's feature with other agent + # first transform feature to ego's coordinate + split_x = regroup(x, record_len) + + B = pairwise_t_matrix.shape[0] + _, C, H, W = x.shape + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + out = [] + + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + # update each node i + i = 0 # ego + # (N, C, H, W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(split_x[b], + t_matrix[i, :, :, :], + (H, W)) + + # (N, C, H, W) + ego_feature = split_x[b][0].view(1, C, H, W).expand(N, -1, -1, -1) + # (N, 2C, H, W) + neighbor_feature_cat = torch.cat((neighbor_feature, ego_feature), dim=1) + # (N, 1, H, W) + agent_weight = self.pixel_weight_layer(neighbor_feature_cat) + # (N, 1, H, W) + agent_weight = F.softmax(agent_weight, dim=0) + + agent_weight = agent_weight.expand(-1, C, -1, -1) + # (N, C, H, W) + feature_fused = torch.sum(agent_weight * neighbor_feature, dim=0) + out.append(feature_fused) + + return torch.stack(out) + + + +class PixelWeightLayer(nn.Module): + def __init__(self, channel): + super(PixelWeightLayer, self).__init__() + + self.conv1_1 = nn.Conv2d(channel * 2, 128, kernel_size=1, stride=1, padding=0) + self.bn1_1 = nn.BatchNorm2d(128) + + self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0) + self.bn1_2 = nn.BatchNorm2d(32) + + self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0) + self.bn1_3 = nn.BatchNorm2d(8) + + self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0) + # self.bn1_4 = nn.BatchNorm2d(1) + + def forward(self, x): + x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) + x_1 = F.relu(self.bn1_1(self.conv1_1(x))) + x_1 = F.relu(self.bn1_2(self.conv1_2(x_1))) + x_1 = F.relu(self.bn1_3(self.conv1_3(x_1))) + x_1 = F.relu(self.conv1_4(x_1)) + + return x_1 \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/f_cooper_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/f_cooper_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..0527e7694fb74911287af0daf11937354e991604 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/f_cooper_fuse.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Implementation of F-cooper maxout fusing. +""" +import torch +import torch.nn as nn + + +class SpatialFusion(nn.Module): + def __init__(self): + super(SpatialFusion, self).__init__() + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, record_len): + # x: B, C, H, W, split x:[(B1, C, W, H), (B2, C, W, H)] + split_x = self.regroup(x, record_len) + out = [] + + for xx in split_x: + xx = torch.max(xx, dim=0, keepdim=True)[0] + out.append(xx) + return torch.cat(out, dim=0) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fuse_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fuse_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a42d172cc5b33044d81a899b89a4b75d700a704a --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fuse_utils.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch +import numpy as np + +from einops import rearrange +from opencood.utils.common_utils import torch_tensor_to_numpy + + +def regroup(dense_feature, record_len, max_len): + """ + Regroup the data based on the record_len. + Parameters + ---------- + dense_feature : torch.Tensor + N, C, H, W + record_len : list + [sample1_len, sample2_len, ...] + max_len : int + Maximum cav number + Returns + ------- + regroup_feature : torch.Tensor + B, L, C, H, W + """ + cum_sum_len = list(np.cumsum(torch_tensor_to_numpy(record_len))) + split_features = torch.tensor_split(dense_feature, + cum_sum_len[:-1]) + regroup_features = [] + mask = [] + + for split_feature in split_features: + # M, C, H, W + feature_shape = split_feature.shape + + # the maximum M is 5 as most 5 cavs + padding_len = max_len - feature_shape[0] + mask.append([1] * feature_shape[0] + [0] * padding_len) + + padding_tensor = torch.zeros(padding_len, feature_shape[1], + feature_shape[2], feature_shape[3]) + padding_tensor = padding_tensor.to(split_feature.device) + + split_feature = torch.cat([split_feature, padding_tensor], + dim=0) + + # 1, 5C, H, W + split_feature = split_feature.view(-1, + feature_shape[2], + feature_shape[3]).unsqueeze(0) + regroup_features.append(split_feature) + + # B, 5C, H, W + regroup_features = torch.cat(regroup_features, dim=0) + # B, L, C, H, W + regroup_features = rearrange(regroup_features, + 'b (l c) h w -> b l c h w', + l=max_len) + mask = torch.from_numpy(np.array(mask)).to(regroup_features.device) + + return regroup_features, mask \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fusion_in_one.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fusion_in_one.py new file mode 100644 index 0000000000000000000000000000000000000000..32afb3fcc00d6700c432e50a32d0ac0ad689cd23 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fusion_in_one.py @@ -0,0 +1,505 @@ +""" +A model zoo for intermediate fusion. +Please make sure your pairwise_t_matrix is normalized before using it. +Enjoy it. +""" + +import torch +from torch import nn +from icecream import ic +from opencood.models.fuse_modules.att_fuse import ScaledDotProductAttention +from opencood.models.sub_modules.torch_transformation_utils import \ + warp_affine_simple +from opencood.models.fuse_modules.fuse_utils import regroup as Regroup +from opencood.models.fuse_modules.att_fuse import ScaledDotProductAttention +from opencood.models.comm_modules.where2comm import Communication +from opencood.models.fuse_modules.where2comm_attn import TransformerFusion +import torch.nn.functional as F + +def regroup(x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + +def warp_feature(x, record_len, pairwise_t_matrix): + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + split_x = regroup(x, record_len) + batch_node_features = split_x + out = [] + # iterate each batch + for b in range(B): + N = record_len[b] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + # update each node i + i = 0 # ego + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + out.append(neighbor_feature) + + out = torch.cat(out, dim=0) + + return out + +class MaxFusion(nn.Module): + def __init__(self): + super(MaxFusion, self).__init__() + + def forward(self, x, record_len, pairwise_t_matrix): + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + split_x = regroup(x, record_len) + batch_node_features = split_x + out = [] + # iterate each batch + for b in range(B): + N = record_len[b] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + # update each node i + i = 0 # ego + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + out.append(torch.max(neighbor_feature, dim=0)[0]) + out = torch.stack(out) + + return out + +class AttFusion(nn.Module): + def __init__(self, feature_dims): + super(AttFusion, self).__init__() + self.att = ScaledDotProductAttention(feature_dims) + + def forward(self, xx, record_len, pairwise_t_matrix): + _, C, H, W = xx.shape + B, L = pairwise_t_matrix.shape[:2] + split_x = regroup(xx, record_len) + batch_node_features = split_x + out = [] + # iterate each batch + for b in range(B): + N = record_len[b] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + # update each node i + i = 0 # ego + x = warp_affine_simple(batch_node_features[b], t_matrix[i, :, :, :], (H, W)) + cav_num = x.shape[0] + x = x.view(cav_num, C, -1).permute(2, 0, 1) # (H*W, cav_num, C), perform self attention on each pixel. + h = self.att(x, x, x) + h = h.permute(1, 2, 0).view(cav_num, C, H, W)[0, ...] # C, W, H before + out.append(h) + + out = torch.stack(out) + return out + +class DiscoFusion(nn.Module): + def __init__(self, feature_dims): + super(DiscoFusion, self).__init__() + from opencood.models.fuse_modules.disco_fuse import PixelWeightLayer + self.pixel_weight_layer = PixelWeightLayer(feature_dims) + + def forward(self, xx, record_len, pairwise_t_matrix): + _, C, H, W = xx.shape + B, L = pairwise_t_matrix.shape[:2] + split_x = regroup(xx, record_len) + out = [] + + for b in range(B): + N = record_len[b] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + i = 0 # ego + neighbor_feature = warp_affine_simple(split_x[b], + t_matrix[i, :, :, :], + (H, W)) + # (N, C, H, W) + ego_feature = split_x[b][0].view(1, C, H, W).expand(N, -1, -1, -1) + # (N, 2C, H, W) + neighbor_feature_cat = torch.cat((neighbor_feature, ego_feature), dim=1) + # (N, 1, H, W) + agent_weight = self.pixel_weight_layer(neighbor_feature_cat) + # (N, 1, H, W) + agent_weight = F.softmax(agent_weight, dim=0) + + agent_weight = agent_weight.expand(-1, C, -1, -1) + # (N, C, H, W) + feature_fused = torch.sum(agent_weight * neighbor_feature, dim=0) + out.append(feature_fused) + + return torch.stack(out) + +class V2VNetFusion(nn.Module): + def __init__(self, args): + super(V2VNetFusion, self).__init__() + from opencood.models.sub_modules.convgru import ConvGRU + in_channels = args['in_channels'] + H, W = args['conv_gru']['H'], args['conv_gru']['W'] # remember to modify for v2xsim dataset + kernel_size = args['conv_gru']['kernel_size'] + num_gru_layers = args['conv_gru']['num_layers'] + self.num_iteration = args['num_iteration'] + self.gru_flag = args['gru_flag'] + self.agg_operator = args['agg_operator'] + + self.msg_cnn = nn.Conv2d(in_channels * 2, in_channels, kernel_size=3, + stride=1, padding=1) + self.conv_gru = ConvGRU(input_size=(H, W), + input_dim=in_channels * 2, + hidden_dim=[in_channels] * num_gru_layers, + kernel_size=kernel_size, + num_layers=num_gru_layers, + batch_first=True, + bias=True, + return_all_layers=False) + self.mlp = nn.Linear(in_channels, in_channels) + + def forward(self, x, record_len, pairwise_t_matrix): + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + split_x = regroup(x, record_len) + # (B*L,L,1,H,W) + roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x) + for b in range(B): + N = record_len[b] + for i in range(N): + one_tensor = torch.ones((L,1,H,W)).to(x) + roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W)) + + batch_node_features = split_x + # iteratively update the features for num_iteration times + for l in range(self.num_iteration): + + batch_updated_node_features = [] + # iterate each batch + for b in range(B): + + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + updated_node_features = [] + + # update each node i + for i in range(N): + # (N,1,H,W) + mask = roi_mask[b, i, :N, ...] + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + + # (N,C,H,W) + ego_agent_feature = batch_node_features[b][i].unsqueeze( + 0).repeat(N, 1, 1, 1) + #(N,2C,H,W) + neighbor_feature = torch.cat( + [neighbor_feature, ego_agent_feature], dim=1) + # (N,C,H,W) + # message contains all feature map from j to ego i. + message = self.msg_cnn(neighbor_feature) * mask + + # (C,H,W) + if self.agg_operator=="avg": + agg_feature = torch.mean(message, dim=0) + elif self.agg_operator=="max": + agg_feature = torch.max(message, dim=0)[0] + else: + raise ValueError("agg_operator has wrong value") + # (2C, H, W) + cat_feature = torch.cat( + [batch_node_features[b][i, ...], agg_feature], dim=0) + # (C,H,W) + if self.gru_flag: + gru_out = \ + self.conv_gru(cat_feature.unsqueeze(0).unsqueeze(0))[ + 0][ + 0].squeeze(0).squeeze(0) + else: + gru_out = batch_node_features[b][i, ...] + agg_feature + updated_node_features.append(gru_out.unsqueeze(0)) + # (N,C,H,W) + batch_updated_node_features.append( + torch.cat(updated_node_features, dim=0)) + batch_node_features = batch_updated_node_features + # (B,C,H,W) + out = torch.cat( + [itm[0, ...].unsqueeze(0) for itm in batch_node_features], dim=0) + # (B,C,H,W) -> (B, H, W, C) -> (B,C,H,W) + out = self.mlp(out.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) + + return out + +class V2XViTFusion(nn.Module): + def __init__(self, args): + super(V2XViTFusion, self).__init__() + from opencood.models.sub_modules.v2xvit_basic import V2XTransformer + self.fusion_net = V2XTransformer(args['transformer']) + + def forward(self, x, record_len, pairwise_t_matrix): + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + regroup_feature, mask = Regroup(x, record_len, L) + prior_encoding = \ + torch.zeros(len(record_len), L, 3, 1, 1).to(record_len.device) + + # prior encoding should include [velocity, time_delay, infra], but it is not supported by all basedataset. + # it is possible to modify the xxx_basedataset.py and intermediatefusiondataset.py to retrieve these information + prior_encoding = prior_encoding.repeat(1, 1, 1, + regroup_feature.shape[3], + regroup_feature.shape[4]) + + regroup_feature = torch.cat([regroup_feature, prior_encoding], dim=2) + regroup_feature_new = [] + + for b in range(B): + ego = 0 + regroup_feature_new.append(warp_affine_simple(regroup_feature[b], pairwise_t_matrix[b, ego], (H, W))) + regroup_feature = torch.stack(regroup_feature_new) + + # b l c h w -> b l h w c + regroup_feature = regroup_feature.permute(0, 1, 3, 4, 2) + # transformer fusion. In perfect setting, there is no delay. + # it is possible to modify the xxx_basedataset.py and intermediatefusiondataset.py to retrieve these information + spatial_correction_matrix = torch.eye(4).expand(len(record_len), L, 4, 4).to(record_len.device) + fused_feature = self.fusion_net(regroup_feature, mask, spatial_correction_matrix) + # b h w c -> b c h w + fused_feature = fused_feature.permute(0, 3, 1, 2) + + return fused_feature + +class When2commFusion(nn.Module): + def __init__(self, args): + super(When2commFusion, self).__init__() + import numpy as np + from opencood.models.fuse_modules.when2com_fuse import policy_net4, km_generator_v2, MIMOGeneralDotProductAttention, AdditiveAttentin + + self.in_channels = args['in_channels'] + self.feat_H = args['H'] + self.feat_W = args['W'] + self.query_size = args['query_size'] + self.key_size = args['key_size'] + + + self.query_key_net = policy_net4(self.in_channels) + self.key_net = km_generator_v2(out_size=self.key_size) + self.query_net = km_generator_v2(out_size=self.query_size) + # self.attention_net = MIMOGeneralDotProductAttention(self.query_size, self.key_size) + self.attention_net = AdditiveAttentin(self.key_size, self.query_size) + + def forward(self, x, record_len, pairwise_t_matrix): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + weight: torch.Tensor + Weight of aggregating coming message + shape: (B, L, L) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + split_x = regroup(x, record_len) + batch_node_features = split_x + updated_node_features = [] + for b in range(B): + + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + # update each node i + # (N,1,H,W) + # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[0, :, :, :], + (H, W)) + query_key_maps = self.query_key_net(neighbor_feature) + + keys = self.key_net(query_key_maps).unsqueeze(0) # [N, C_k] + query = self.query_net(query_key_maps[0].unsqueeze(0)).unsqueeze(0) # [1, C_q] + + neighbor_feature = neighbor_feature.unsqueeze(0) # [1, N, C, H, W] + + feat_fuse, prob_action = self.attention_net(query, keys, neighbor_feature, sparse=False) + + updated_node_features.append(feat_fuse) + + out = torch.cat(updated_node_features, dim=0) + + return out + + + +class Where2commFusion(nn.Module): + def __init__(self, args): + super(Where2commFusion, self).__init__() + + self.communication = False + self.round = 1 + if 'communication' in args: + self.communication = True + self.naive_communication = Communication(args['communication']) + if 'round' in args['communication']: + self.round = args['communication']['round'] + + self.agg_mode = args['agg_operator']['mode'] + self.multi_scale = args['multi_scale'] + if self.multi_scale: + layer_nums = args['layer_nums'] + num_filters = args['num_filters'] + self.num_levels = len(layer_nums) + self.fuse_modules = nn.ModuleList() + for idx in range(self.num_levels): + if self.agg_mode == 'ATTEN': + fuse_network = AttFusion(num_filters[idx]) + elif self.agg_mode == 'MAX': + fuse_network = MaxFusion() + elif self.agg_mode == 'Transformer': + fuse_network = TransformerFusion( + channels=num_filters[idx], + n_head=args['agg_operator']['n_head'], + with_spe=args['agg_operator']['with_spe'], + with_scm=args['agg_operator']['with_scm']) + self.fuse_modules.append(fuse_network) + else: + if self.agg_mode == 'ATTEN': + self.fuse_modules = AttFusion(args['agg_operator']['feature_dim']) + elif self.agg_mode == 'MAX': + self.fuse_modules = MaxFusion() + elif self.agg_mode == 'Transformer': + self.fuse_network = TransformerFusion( + channels=args['agg_operator']['feature_dim'], + n_head=args['agg_operator']['n_head'], + with_spe=args['agg_operator']['with_spe'], + with_scm=args['agg_operator']['with_scm']) + + def forward(self, x, rm, record_len, pairwise_t_matrix, backbone=None, heads=None): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + if self.multi_scale: + ups = [] + # backbone.__dict__() + with_resnet = True if hasattr(backbone, 'resnet') else False + if with_resnet: + feats = backbone.resnet(x) + + for i in range(self.num_levels): + x = feats[i] if with_resnet else backbone.blocks[i](x) + + ############ 1. Communication (Mask the features) ######### + if i==0: + if self.communication: + batch_confidence_maps = regroup(rm, record_len) + _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix) + x = x * communication_masks + else: + communication_rates = torch.tensor(0).to(x.device) + + ############ 2. Split the confidence map ####################### + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + batch_node_features = regroup(x, record_len) + + ############ 3. Fusion #################################### + x_fuse = [] + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + node_features = batch_node_features[b] + C, H, W = node_features.shape[1:] + neighbor_feature = warp_affine_simple(node_features, + t_matrix[0, :, :, :], + (H, W)) + x_fuse.append(self.fuse_modules[i](neighbor_feature)) + x_fuse = torch.stack(x_fuse) + + ############ 4. Deconv #################################### + if len(backbone.deblocks) > 0: + ups.append(backbone.deblocks[i](x_fuse)) + else: + ups.append(x_fuse) + + if len(ups) > 1: + x_fuse = torch.cat(ups, dim=1) + elif len(ups) == 1: + x_fuse = ups[0] + + if len(backbone.deblocks) > self.num_levels: + x_fuse = backbone.deblocks[-1](x_fuse) + else: + ############ 1. Split the features ####################### + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + batch_node_features = self.regroup(x, record_len) + batch_confidence_maps = self.regroup(rm, record_len) + + ############ 2. Communication (Mask the features) ######### + if self.communication: + _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix) + else: + communication_rates = torch.tensor(0).to(x.device) + + ############ 3. Fusion #################################### + x_fuse = [] + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + node_features = batch_node_features[b] + if self.communication: + node_features = node_features * communication_masks[b] + neighbor_feature = warp_affine_simple(node_features, + t_matrix[0, :, :, :], + (H, W)) + x_fuse.append(self.fuse_modules(neighbor_feature)) + x_fuse = torch.stack(x_fuse) + + return x_fuse, communication_rates, {} \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/max_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/max_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..538cf4f2039a6bcc6a9df728c355326a19af6f3d --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/max_fuse.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + +import torch +import torch.nn as nn + +from opencood.models.sub_modules.torch_transformation_utils import \ + get_discretized_transformation_matrix, get_transformation_matrix, \ + warp_affine_simple, get_rotated_roi +from opencood.models.sub_modules.convgru import ConvGRU +from icecream import ic +from matplotlib import pyplot as plt + +class MaxFusion(nn.Module): + def __init__(self, args): + super(MaxFusion, self).__init__() + self.discrete_ratio = args['voxel_size'][0] # voxel_size[0]=0.4 + self.downsample_rate = args['downsample_rate'] # 2/4, downsample rate from original feature map [200, 704] + + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, record_len, pairwise_t_matrix): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + split_x = self.regroup(x, record_len) + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + # (B*L,L,1,H,W) + roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x) + for b in range(B): + N = record_len[b] + for i in range(N): + one_tensor = torch.ones((L,1,H,W)).to(x) + roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W)) + + batch_node_features = split_x + # iteratively update the features for num_iteration times + + out = [] + # iterate each batch + for b in range(B): + + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + updated_node_features = [] + + # update each node i + i = 0 # ego + # (N,1,H,W) + mask = roi_mask[b, i, :N, ...] + # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + out.append(torch.max(neighbor_feature, dim=0)[0]) + out = torch.stack(out) + + return out + + + def forward_debug(self, x, origin_x, record_len, pairwise_t_matrix): + """ + Fusion forwarding + Used for debug and visualization + + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + origin_x: torch.Tensor + pillars (sum(n_cav), C, H * downsample_rate, W * downsample_rate) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + from matplotlib import pyplot as plt + + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + split_x = self.regroup(x, record_len) + split_origin_x = self.regroup(origin_x, record_len) + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + # (B*L,L,1,H,W) + roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x) + for b in range(B): + N = record_len[b] + for i in range(N): + one_tensor = torch.ones((L,1,H,W)).to(x) + roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W)) + + batch_node_features = split_x + # iteratively update the features for num_iteration times + + # visualize warped feature map + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + # update each node i + i = 0 # ego + mask = roi_mask[b, i, :N, ...] + # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + for idx in range(N): + plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy()) + plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/feature_{b}_{idx}") + plt.clf() + plt.imshow(mask[idx][0].detach().cpu().numpy()) + plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/mask_feature_{b}_{idx}") + plt.clf() + + + + # visualize origin pillar feature + origin_node_features = split_origin_x + + for b in range(B): + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + i = 0 # ego + # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(origin_node_features[b], + t_matrix[i, :, :, :], + (H*self.downsample_rate, W*self.downsample_rate)) + + for idx in range(N): + plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy()) + plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/origin_{b}_{idx}") + plt.clf() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/mean_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/mean_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..61d33d787e0ceaa256d6045d07d7a3b13d68e988 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/mean_fuse.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Implementation of V2VNet Fusion +""" + +import torch +import torch.nn as nn + +from opencood.models.sub_modules.torch_transformation_utils import \ + get_discretized_transformation_matrix, get_transformation_matrix, \ + warp_affine_simple, get_rotated_roi +from opencood.models.sub_modules.convgru import ConvGRU +from icecream import ic +from matplotlib import pyplot as plt +from icecream import ic + +class MeanFusion(nn.Module): + def __init__(self, args): + super(MeanFusion, self).__init__() + + self.discrete_ratio = args['voxel_size'][0] # voxel_size[0]=0.4 0.16m one pixel? I think it's 0.4m per pixel, according to [200, 704] + self.downsample_rate = args['downsample_rate'] # 4, downsample rate from original feature map [200, 704] + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, record_len, pairwise_t_matrix): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + split_x = self.regroup(x, record_len) + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + # (B*L,L,1,H,W) + roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x) + for b in range(B): + N = record_len[b] + for i in range(N): + one_tensor = torch.ones((L,1,H,W)).to(x) + roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W)) + + batch_node_features = split_x + # iteratively update the features for num_iteration times + + out = [] + # iterate each batch + for b in range(B): + + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + # update each node i + i = 0 # ego + # (N,1,H,W) + mask = roi_mask[b, i, :N, ...] + # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + + out.append(torch.mean(neighbor_feature, dim=0)) + out = torch.stack(out) + + return out + + + + + + + + + + + + + + + + +# from matplotlib import pyplot as plt +# for idx in range(3): +# plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy()) +# plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/in_mean/agent{idx}") +# plt.clf() +# plt.imshow(mask[idx][0].detach().cpu().numpy()) +# plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/in_mean/mask{idx}") +# plt.clf() + +# raise \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/modality_aware_fusion.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/modality_aware_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..fa2b1efbeb98b185f5e65d5824262c84a7f5ba28 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/modality_aware_fusion.py @@ -0,0 +1,74 @@ +import torch +import torch.nn as nn +from opencood.models.fuse_modules.fusion_in_one import regroup, warp_feature +from opencood.models.fuse_modules.att_fuse import ScaledDotProductAttention +from opencood.models.sub_modules.torch_transformation_utils import \ + warp_affine_simple + +# TODO +# https://github.com/microsoft/Swin-Transformer/tree/f92123a0035930d89cf53fcb8257199481c4428d/kernels/window_process + + +class MAttFusion(nn.Module): + def __init__(self, feature_dims): + super().__init__() + print(feature_dims) + print(type(feature_dims)) + self.att = ScaledDotProductAttention(feature_dims) + + def forward(self, x, record_len, pairwise_t_matrix, lidar_agent_indicator): + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + split_x = regroup(x, record_len) + split_lidar_indicator = regroup(lidar_agent_indicator, record_len) + + batch_node_features = split_x + batch_node_lidar_agent = split_lidar_indicator + + out = [] + # iterate each batch + for b in range(B): + N = record_len[b] + lidar_agent = batch_node_lidar_agent[b] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + # update each node i + i = 0 # ego + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + if sum(lidar_agent) !=0 and sum(lidar_agent) != N: + # multi modality aware + lidar_feature = torch.max(neighbor_feature[lidar_agent], dim=0)[0] # [C, H, W] + camera_feature = torch.max(neighbor_feature[1-lidar_agent], dim=0)[0] # [C, H, W] + N_lidar = sum(lidar_agent) + N_camera = N - N_lidar + + # spatial attention 3x3 + camera_feature_3x3 = [] + x_offsets = [-1, 0, 1] + y_offsets = [-1, 0, 1] + for x_offset in x_offsets: + for y_offset in y_offsets: + camera_feature_3x3.append(torch.roll(camera_feature, (x_offset, y_offset), (0,1))) + camera_feature_3x3 = torch.stack(camera_feature_3x3, dim=0) # 9, C, H, W + + key = lidar_feature.view(1, C, -1).permute(2, 0, 1) # [H*W, 1, C] + query = camera_feature_3x3.view(9, C, -1).permute(2, 0, 1) # [H*W, N_camera, C] + value = query + h = self.att(key, query, value) + h = h.permute(1, 2, 0).view(1, C, H, W)[0, ...] # [C, H, W] + out.append(torch.maximum(h, lidar_feature)) + + else: + # single modality + cav_num = neighbor_feature.shape[0] + x = neighbor_feature.view(cav_num, C, -1).permute(2, 0, 1) # (H*W, cav_num, C), perform self attention on each pixel. + h = self.att(x, x, x) + h = h.permute(1, 2, 0).view(cav_num, C, H, W)[0, ...] # C, W, H before + out.append(h) + + + out = torch.stack(out) + + return out diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/ms_max_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/ms_max_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..99550922a2534ac916342e462d24b284e43df1ef --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/ms_max_fuse.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + +import torch +import torch.nn as nn +from opencood.models.sub_modules.resblock import ResNetModified, BasicBlock, Bottleneck +from opencood.models.sub_modules.torch_transformation_utils import \ + get_discretized_transformation_matrix, get_transformation_matrix, \ + warp_affine_simple, get_rotated_roi +from opencood.models.sub_modules.convgru import ConvGRU +from icecream import ic +from matplotlib import pyplot as plt + +class MSMaxFusion(nn.Module): + def __init__(self, args): + super(MSMaxFusion, self).__init__() + self.discrete_ratio = args['voxel_size'][0] # voxel_size[0]=0.4 + self.downsample_rate = args['downsample_rate'] # 2/4, downsample rate from original feature map [200, 704] + self.outC = args['outC'] + layer_nums = args['layer_nums'] + num_filters = args['num_filters'] + layer_strides = args['layer_strides'] + upsample_strides = args['upsample_strides'] + num_upsample_filters = args['num_upsample_filter'] + self.level_num = len(layer_nums) + + self.resnet = ResNetModified(BasicBlock, + layer_nums, + layer_strides, + num_filters) + num_levels = len(layer_nums) + + self.fuse_modules = nn.ModuleList() + self.deblocks = nn.ModuleList() + + for idx in range(num_levels): + + fuse_network = MaxFusion() + self.fuse_modules.append(fuse_network) + + if len(upsample_strides) > 0: + stride = upsample_strides[idx] + self.deblocks.append(nn.Sequential( + nn.ConvTranspose2d( + num_filters[idx], num_upsample_filters[idx], + upsample_strides[idx], + stride=upsample_strides[idx], bias=False + ), + nn.BatchNorm2d(num_upsample_filters[idx], + eps=1e-3, momentum=0.01), + nn.ReLU() + )) + + + c_in = sum(num_upsample_filters) + if len(upsample_strides) > num_levels: + self.deblocks.append(nn.Sequential( + nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1], + stride=upsample_strides[-1], bias=False), + nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01), + nn.ReLU(), + )) + + self.num_bev_features = c_in + + self.conv_last = nn.Conv2d(sum(num_upsample_filters), outC, kernel_size=3, stride=1, padding=1) + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, record_len, pairwise_t_matrix): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + features = self.resnet(x) + + ups = [] + + for i in range(self.level_num): + x_fuse = self.fuse_modules[i](features[i], record_len, pairwise_t_matrix) + if len(self.deblocks) > 0: + ups.append(self.deblocks[i](x_fuse)) + else: + ups.append(x_fuse) + + if len(ups) > 1: + x = torch.cat(ups, dim=1) + elif len(ups) == 1: + x = ups[0] + + x = self.conv_last(x) + + return x + + + +class MaxFusion(nn.Module): + def __init__(self): + super(MaxFusion, self).__init__() + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, record_len, pairwise_t_matrix): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The affine transformation matrix from each cav to ego, already normalized + shape: (B, L, L, 2, 3) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + split_x = self.regroup(x, record_len) + + batch_node_features = split_x + + out = [] + # iterate each batch + for b in range(B): + + N = record_len[b] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + # update each node i + i = 0 # ego + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + out.append(torch.max(neighbor_feature, dim=0)[0]) + out = torch.stack(out) + + return out \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/self_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/self_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..a6d468a17689e2117f70d7b254902e1019413635 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/self_attn.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang , Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple + +DEBUG=False + +class ScaledDotProductAttention(nn.Module): + """ + Scaled Dot-Product Attention proposed in "Attention Is All You Need" + Compute the dot products of the query with all keys, divide each by sqrt(dim), + and apply a softmax function to obtain the weights on the values + Args: dim, mask + dim (int): dimention of attention + mask (torch.Tensor): tensor containing indices to be masked + Inputs: query, key, value, mask + - **query** (batch, q_len, d_model): tensor containing projection + vector for decoder. + - **key** (batch, k_len, d_model): tensor containing projection + vector for encoder. + - **value** (batch, v_len, d_model): tensor containing features of the + encoded input sequence. + - **mask** (-): tensor containing indices to be masked + Returns: context, attn + - **context**: tensor containing the context vector from + attention mechanism. + - **attn**: tensor containing the attention (alignment) from the + encoder outputs. + """ + + def __init__(self, dim): + super(ScaledDotProductAttention, self).__init__() + self.sqrt_dim = np.sqrt(dim) + + def forward(self, query, key, value): + score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim + attn = F.softmax(score, -1) + context = torch.bmm(attn, value) + return context + + +class AttFusion(nn.Module): + def __init__(self, feature_dim): + super(AttFusion, self).__init__() + self.att = ScaledDotProductAttention(feature_dim) + + def forward(self, x, record_len, pairwise_t_matrix): + """ + pairwise_t_matrix : [N,N,2,3] + """ + split_x = self.regroup(x, record_len) + batch_size = len(record_len) + C, H, W = split_x[0].shape[1:] # C, W, H before + out = [] + for b, xx in enumerate(split_x): + N = xx.shape[0] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + i = 0 # ego + xx = warp_affine_simple(xx, t_matrix[i, :, :, :], (H, W)) + + cav_num = xx.shape[0] + xx = xx.view(cav_num, C, -1).permute(2, 0, 1) # (H*W, cav_num, C), perform self attention on each pixel. + h = self.att(xx, xx, xx) + h = h.permute(1, 2, 0).view(cav_num, C, H, W)[0, ...].unsqueeze(0) # C, W, H before + out.append(h) + return torch.cat(out, dim=0) + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + + + def forward_debug(self, x, origin_x, record_len, pairwise_t_matrix): + split_x = self.regroup(x, record_len) + split_origin_x = self.regroup(origin_x, record_len) + batch_size = len(record_len) + C, H, W = split_x[0].shape[1:] # C, W, H before + H_origin, W_origin = split_origin_x[0].shape[2:] + out = [] + from matplotlib import pyplot as plt + for b, xx in enumerate(split_x): + N = xx.shape[0] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + i = 0 + xx = warp_affine_simple(xx, t_matrix[i, :, :, :], (H, W)) + origin_xx = warp_affine_simple(split_origin_x[b], t_matrix[i, :, :, :], (H_origin, W_origin)) + + for idx in range(N): + plt.imshow(torch.max(xx[idx],0)[0].detach().cpu().numpy()) + plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/feature_{b}_{idx}") + plt.clf() + plt.imshow(torch.max(origin_xx[idx],0)[0].detach().cpu().numpy()) + plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/origin_feature_{b}_{idx}") + plt.clf() + raise \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..8578dc41ee877f144ebe568ee9c829f3d52ca53d --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer.py @@ -0,0 +1,146 @@ +""" +Implementation of Simple transformer fusion. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +import kornia + + +class TransformerMessage(nn.Module): + def __init__(self, + in_channels=64, + trans_layer=[3]): + super(TransformerMessage, self).__init__() + self.in_channels = in_channels + + self.trans_layer = trans_layer + + dropout = 0 + nhead = 8 + for c_layer in self.trans_layer: + d_model = in_channels + cross_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) + # Implementation of Feedforward model + linear1 = nn.Linear(d_model, d_model) + linear2 = nn.Linear(d_model, d_model) + + norm1 = nn.LayerNorm(d_model) + norm2 = nn.LayerNorm(d_model) + dropout0 = nn.Dropout(dropout) + dropout1 = nn.Dropout(dropout) + dropout2 = nn.Dropout(dropout) + self.__setattr__('cross_attn'+str(c_layer), cross_attn) + self.__setattr__('linear1_'+str(c_layer), linear1) + self.__setattr__('linear2_'+str(c_layer), linear2) + self.__setattr__('norm1_'+str(c_layer), norm1) + self.__setattr__('norm2_'+str(c_layer), norm2) + self.__setattr__('dropout0_'+str(c_layer), dropout0) + self.__setattr__('dropout1_'+str(c_layer), dropout1) + self.__setattr__('dropout2_'+str(c_layer), dropout2) + + def add_pe_map(self, x, normalized=True): + """ Add positional encoding to feature map. + Args: + x: torch.Tensor + [N, C, H, W] + + """ + # scale = 2 * math.pi + temperature = 10000 + num_pos_feats = x.shape[-3] // 2 # d + + mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device) + not_mask = ~mask + y_embed = not_mask.cumsum(0, dtype=torch.float32) + x_embed = not_mask.cumsum(1, dtype=torch.float32) + + dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device) + dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) + + pos_x = x_embed[:, :, None] / dim_t + pos_y = y_embed[:, :, None] / dim_t + pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2) + pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2) + pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1) + + if len(x.shape) == 5: + x = x + pos[None,None,:,:,:] + elif len(x.shape) == 6: + x = x + pos[None,None,None,:,:,:] + return x + + def forward(self, x, shift_mats, shift_mats_rev, agent_mask): + batch, max_agent_num, c, h, w = x.shape + + # ================================ + # First, transform to each coord + feat_shifted = [] + for agent_i in range(max_agent_num): + # shift_mat_i = shift_mats[:, agent_i, :, :] + shift_mat_rev_i = shift_mats_rev[:, agent_i, :, :] + feat_i = x[:, agent_i, :, :, :] + feat_shifted_i = [] + for agent_j in range(max_agent_num): + shift_mat_j = shift_mats[:, agent_j, :, :] + shift_mat = shift_mat_j.view(batch, 3, 3) @ shift_mat_rev_i.view(batch, 3, 3) + feat = kornia.warp_perspective(feat_i, shift_mat, dsize=(100 * 2, 100 * 2), align_corners=False) + feat_shifted_i.append(feat) + feat_shifted_i = torch.cat([f.unsqueeze(1) for f in feat_shifted_i], dim=1) + feat_shifted.append(feat_shifted_i) + feat_shifted = torch.cat([f.unsqueeze(1) for f in feat_shifted], dim=1) + + + # ================================ + # x_fuse, _, _ = self.TRANSFORMER_MESSAGE([[],[],[],local_com_mat], [transformed_feature], num_agent_tensor) + + for i, c_layer in enumerate(self.trans_layer): + batch_updated_features = torch.zeros(batch, max_agent_num, c, h, w).to(shift_mats.device) + for batch_i in range(batch): + N = int(torch.sum(agent_mask[batch_i])) + feat_map = x[batch_i:batch_i+1, :N, :, :, :] + val_feat = feat_shifted[batch_i:batch_i+1, :N, :N, :, :, :] + + feat_map = self.add_pe_map(feat_map) + # [b,N,C,H,W] -> [b,N,H,W,C] + # [b,N,N,C,H,W] -> [N,b,N,H,W,C] + src = feat_map.permute(0,1,3,4,2).contiguous().view(N*h*w,c).contiguous().unsqueeze(0) + tgt = val_feat.permute(1,0,2,4,5,3).contiguous().view(N, N*h*w,c).contiguous() + # print(src.shape) # torch.Size([1, 120000, 64]) + # print(tgt.shape) # torch.Size([N, 120000, 64]) + + src2, weight_mat = eval('self.cross_attn'+str(c_layer))(src, tgt, value=tgt, attn_mask=None, key_padding_mask=None) + src = src + eval('self.dropout1_'+str(c_layer))(src2) + src = eval('self.norm1_'+str(c_layer))(src) + src2 = eval('self.linear2_'+str(c_layer))(eval('self.dropout0_'+str(c_layer))(F.relu(eval('self.linear1_'+str(c_layer))(src)))) + src = src + eval('self.dropout2_'+str(c_layer))(src2) + src = eval('self.norm2_'+str(c_layer))(src) + + feat_fuse = src.view(1, N, h, w, c).contiguous().permute(0, 1, 4, 2, 3).contiguous() + # print(feat_fuse.shape) # torch.Size([1, N, 64, 200, 200]) + batch_updated_features[batch_i, :N, :, :, :] = feat_fuse.squeeze(0) + + return batch_updated_features, None + + +if __name__=="__main__": + from icecream import ic + x = torch.rand((64,6,8)) # [C,H,W] + temperature = 10000 + num_pos_feats = x.shape[-3] // 2 # [d] + + mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device) #[H, W] + not_mask = ~mask + y_embed = not_mask.cumsum(0, dtype=torch.float32) # [H, W] + x_embed = not_mask.cumsum(1, dtype=torch.float32) # [H, W] + + dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device) # [0,1,2,...,d] + dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) # 10000^(2k/d), k is [0,0,1,1,...,d/2,d/2] + + pos_x = x_embed[:, :, None] / dim_t + pos_y = y_embed[:, :, None] / dim_t + + pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2) + pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2) + pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1) diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..cf915d078837ba4614472be541b07809426543d4 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer_fuse.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Implementation of transformer encoder fusion. +""" + +import torch +import torch.nn as nn + +from opencood.models.sub_modules.torch_transformation_utils import \ + get_discretized_transformation_matrix, get_transformation_matrix, \ + warp_affine_simple, get_rotated_roi +import torch.nn.functional as F +from icecream import ic +from matplotlib import pyplot as plt + +# class MultiheadAttBlock(nn.Module): +# def __init__(self, channels, n_head=8, dropout=0): +# super(MultiheadAttBlock, self).__init__() +# self.attn = nn.MultiheadAttention(channels, n_head, dropout) + +# def forward(self, q, k, v): +# """ +# order (seq, batch, feature) +# Args: +# q: (1, H*W, C) +# k: (N, H*W, C) +# v: (N, H*W, C) +# Returns: +# outputs: () +# """ +# context, weight = self.attn(q,k,v) # (1, H*W, C) + +# return context + + +class EncodeLayer(nn.Module): + def __init__(self, channels, n_head=8, dropout=0): + super(EncodeLayer, self).__init__() + self.attn = nn.MultiheadAttention(channels, n_head, dropout) + self.linear1 = nn.Linear(channels, channels) + self.linear2 = nn.Linear(channels, channels) + + self.norm1 = nn.LayerNorm(channels) + self.norm2 = nn.LayerNorm(channels) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + self.relu = nn.ReLU() + + def forward(self, q, k, v): + """ + order (seq, batch, feature) + Args: + q: (1, H*W, C) + k: (N, H*W, C) + v: (N, H*W, C) + Returns: + outputs: () + """ + residual = q + context, weight = self.attn(q,k,v) # (1, H*W, C) + context = self.dropout1(context) + output1 = self.norm1(residual + context) + + # feed forward net + residual = output1 # (1, H*W, C) + context = self.linear2(self.relu(self.linear1(output1))) + context = self.dropout2(context) + output2 = self.norm2(residual + context) + + return output2 + + + + + +class TransformerFusion(nn.Module): + def __init__(self, args): + super(TransformerFusion, self).__init__() + + self.channels = args['in_channels'] + self.n_head = args['n_head'] + self.dropout = args['dropout_rate'] + + self.discrete_ratio = args['voxel_size'][0] + self.downsample_rate = args['downsample_rate'] + + self.encode_layer = EncodeLayer(self.channels, self.n_head, self.dropout) + + def add_pe_map(self, x, normalized=True): + # scale = 2 * math.pi + temperature = 10000 + num_pos_feats = x.shape[-3] // 2 # positional encoding dimension. C = 2d + + mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device) #[H, W] + not_mask = ~mask + y_embed = not_mask.cumsum(0, dtype=torch.float32) # [H, W] + x_embed = not_mask.cumsum(1, dtype=torch.float32) # [H, W] + + dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device) # [0,1,2,...,d] + dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) # 10000^(2k/d), k is [0,0,1,1,...,d/2,d/2] + + pos_x = x_embed[:, :, None] / dim_t + pos_y = y_embed[:, :, None] / dim_t + pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2) + pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2) + pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1) # [C, H, W] + + if len(x.shape) == 4: + x = x + pos[None,:,:,:] + elif len(x.shape) == 5: + x = x + pos[None,None,:,:,:] + return x + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, record_len, pairwise_t_matrix): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + split_x = self.regroup(x, record_len) + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + # (B*L,L,1,H,W) + roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x) + for b in range(B): + N = record_len[b] + for i in range(N): + one_tensor = torch.ones((L,1,H,W)).to(x) + roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W)) + + batch_node_features = split_x + # iteratively update the features for num_iteration times + + out = [] + # iterate each batch + for b in range(B): + + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + updated_node_features = [] + + # update each node i + i = 0 # ego + # (N,1,H,W) + mask = roi_mask[b, i, :N, ...] + + # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + + neighbor_feature_flat = neighbor_feature.view(N,C,H*W) # (N, C, H*W) + neighbor_feature_flat_pe = self.add_pe_map(neighbor_feature).view(N,C,H*W) # (N, C, H*W) + + query = neighbor_feature_flat_pe[0:1,...].permute(0,2,1) # (1, H*W, C) + key = neighbor_feature_flat_pe.permute(0,2,1) # (N, H*W, C) + value = neighbor_feature_flat.permute(0,2,1) + + + + fusion_result = self.encode_layer(query, key, value) # (1, H*W, C) + fusion_result = fusion_result.permute(0,2,1).reshape(1, C, H, W)[0] + + out.append(fusion_result) + + out = torch.stack(out) + + return out + + + + + + + + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2v_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2v_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..689631882edf6489126ba363f4435729af616d88 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2v_fuse.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Implementation of V2VNet Fusion +""" + +from email import message_from_binary_file +import torch +import torch.nn as nn + +from opencood.models.sub_modules.torch_transformation_utils import \ + get_discretized_transformation_matrix, get_transformation_matrix, \ + warp_affine_simple, get_rotated_roi +from opencood.models.sub_modules.convgru import ConvGRU +from icecream import ic +from matplotlib import pyplot as plt +from icecream import ic + +class V2VNetFusion(nn.Module): + def __init__(self, args): + super(V2VNetFusion, self).__init__() + + in_channels = args['in_channels'] + H, W = args['conv_gru']['H'], args['conv_gru']['W'] # remember to modify for v2xsim dataset + kernel_size = args['conv_gru']['kernel_size'] + num_gru_layers = args['conv_gru']['num_layers'] + + self.discrete_ratio = args['voxel_size'][0] + self.downsample_rate = args['downsample_rate'] + self.num_iteration = args['num_iteration'] + self.gru_flag = args['gru_flag'] + self.agg_operator = args['agg_operator'] + + self.msg_cnn = nn.Conv2d(in_channels * 2, in_channels, kernel_size=3, + stride=1, padding=1) + self.conv_gru = ConvGRU(input_size=(H, W), + input_dim=in_channels * 2, + hidden_dim=[in_channels] * num_gru_layers, + kernel_size=kernel_size, + num_layers=num_gru_layers, + batch_first=True, + bias=True, + return_all_layers=False) + self.mlp = nn.Linear(in_channels, in_channels) + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, record_len, pairwise_t_matrix, weight=None): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + weight: torch.Tensor + Weight of aggregating coming message + shape: (B, L, L) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + split_x = self.regroup(x, record_len) + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + # (B*L,L,1,H,W) + roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x) + for b in range(B): + N = record_len[b] + for i in range(N): + one_tensor = torch.ones((L,1,H,W)).to(x) + roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W)) + + batch_node_features = split_x + # iteratively update the features for num_iteration times + for l in range(self.num_iteration): + + batch_updated_node_features = [] + # iterate each batch + for b in range(B): + + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + updated_node_features = [] + + # update each node i + for i in range(N): + # (N,1,H,W) + mask = roi_mask[b, i, :N, ...] + # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[i, :, :, :], + (H, W)) + + # (N,C,H,W) + ego_agent_feature = batch_node_features[b][i].unsqueeze( + 0).repeat(N, 1, 1, 1) + #(N,2C,H,W) + neighbor_feature = torch.cat( + [neighbor_feature, ego_agent_feature], dim=1) + # (N,C,H,W) + # message contains all feature map from j to ego i. + message = self.msg_cnn(neighbor_feature) * mask + + # (C,H,W) + if self.agg_operator=="avg": + agg_feature = torch.mean(message, dim=0) + elif self.agg_operator=="max": + agg_feature = torch.max(message, dim=0)[0] + elif self.agg_operator=='weight': + agg_feature = torch.sum(message * weight[b][i,:N].view(-1,1,1,1), dim=0) + else: + raise ValueError("agg_operator has wrong value") + # (2C, H, W) + cat_feature = torch.cat( + [batch_node_features[b][i, ...], agg_feature], dim=0) + # (C,H,W) + if self.gru_flag: + gru_out = \ + self.conv_gru(cat_feature.unsqueeze(0).unsqueeze(0))[ + 0][ + 0].squeeze(0).squeeze(0) + else: + gru_out = batch_node_features[b][i, ...] + agg_feature + updated_node_features.append(gru_out.unsqueeze(0)) + # (N,C,H,W) + batch_updated_node_features.append( + torch.cat(updated_node_features, dim=0)) + batch_node_features = batch_updated_node_features + # (B,C,H,W) + out = torch.cat( + [itm[0, ...].unsqueeze(0) for itm in batch_node_features], dim=0) + # (B,C,H,W) -> (B, H, W, C) -> (B,C,H,W) + out = self.mlp(out.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) + + return out + + + +# from matplotlib import pyplot as plt +# neighbor_feature = neighbor_feature.detach().cpu().numpy() +# for j in range(N): +# plt.imshow(neighbor_feature[j].max(axis=0)) +# plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/v2x_fuse_{j}") +# plt.clf() +# raise \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2xvit_fuse[not_use].py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2xvit_fuse[not_use].py new file mode 100644 index 0000000000000000000000000000000000000000..ba3cb0b3f3fdab10af925c6d321db6b8c3356ae7 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2xvit_fuse[not_use].py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + +import torch +import torch.nn as nn +from opencood.models.fuse_modules.fuse_utils import regroup +from opencood.models.sub_modules.v2xvit_basic import V2XTransformer +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple + + +class V2XViTFusion(nn.Module): + def __init__(self, args): + super(V2XViTFusion, self).__init__() + self.discrete_ratio = args['voxel_size'][0] # voxel_size[0]=0.4 + self.downsample_rate = args['downsample_rate'] # 2/4, downsample rate from original feature map [200, 704] + self.fusion_net = V2XTransformer(args['transformer']) + + def forward(self, x, record_len, pairwise_t_matrix): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + regroup_feature, mask = regroup(x, record_len, L) + prior_encoding = \ + torch.zeros(len(record_len), L, 3, 1, 1).to(record_len.device) + + # prior encoding added + prior_encoding = prior_encoding.repeat(1, 1, 1, + regroup_feature.shape[3], + regroup_feature.shape[4]) + + regroup_feature = torch.cat([regroup_feature, prior_encoding], dim=2) + regroup_feature_new = [] + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + for b in range(B): + # (B,L,L,2,3) + ego = 0 + regroup_feature_new.append(warp_affine_simple(regroup_feature[b], pairwise_t_matrix[b, ego], (H, W))) + regroup_feature = torch.stack(regroup_feature_new) + + # b l c h w -> b l h w c + regroup_feature = regroup_feature.permute(0, 1, 3, 4, 2) + # transformer fusion + spatial_correction_matrix = torch.eye(4).expand(len(record_len), L, 4, 4).to(record_len.device) + fused_feature = self.fusion_net(regroup_feature, mask, spatial_correction_matrix) + # b h w c -> b c h w + fused_feature = fused_feature.permute(0, 3, 1, 2) + + return fused_feature diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/when2com_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/when2com_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..3a3ffe71fcc902d9547b09eaa6833e35919980c0 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/when2com_fuse.py @@ -0,0 +1,363 @@ +# -*- coding: utf-8 -*- +# Author: Yue Hu <18671129361@sjtu.edu.cn> +# License: TDG-Attribution-NonCommercial-NoDistrib + +""" +Implementation of When2com Fusion +""" + +import torch +import torch.nn as nn +import numpy as np + +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple + + +class When2comFusion(nn.Module): + def __init__(self, args): + super(When2comFusion, self).__init__() + + self.discrete_ratio = args['voxel_size'][0] + self.downsample_rate = args['downsample_rate'] + + self.in_channels = args['in_channels'] + self.feat_H = args['H'] + self.feat_W = args['W'] + self.query_size = args['query_size'] + self.key_size = args['key_size'] + self.mode = args['mode'] + self.agent_num = 2 + + self.query_key_net = policy_net4(self.in_channels) + self.key_net = km_generator(out_size=self.key_size, input_feat_h=self.feat_H//4, input_feat_w=self.feat_W//4) + self.query_net = km_generator(out_size=self.query_size, input_feat_h=self.feat_H//4, input_feat_w=self.feat_W//4) + self.attention_net = MIMOGeneralDotProductAttention(self.query_size, self.key_size) + + def activated_select(self, val_mat, prob_action, thres=0.2): + coef_act = torch.mul(prob_action, (prob_action > thres).float()) + attn_shape = coef_act.shape + bats, key_num, query_num = attn_shape[0], attn_shape[1], attn_shape[2] + coef_act_exp = coef_act.view(bats, key_num, query_num, 1, 1, 1) + + output = coef_act_exp * val_mat # (batch,4,channel,size,size) + feat_act = output.sum(1) # (batch,1,channel,size,size) + + # compute connect + count_coef = coef_act.clone() + ind = np.diag_indices(self.agent_num) + count_coef[:, ind[0], ind[1]] = 0 + num_connect = torch.nonzero(count_coef).shape[0] / ( + self.agent_num * count_coef.shape[0] + ) + return feat_act, coef_act, num_connect + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, record_len, pairwise_t_matrix, weight=None): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + weight: torch.Tensor + Weight of aggregating coming message + shape: (B, L, L) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + split_x = self.regroup(x, record_len) + batch_node_features = split_x + updated_node_features = [] + for b in range(B): + + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + + # update each node i + # (N,1,H,W) + # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective + # Notice we put i one the first dim of t_matrix. Different from original. + # t_matrix[i,j] = Tji + neighbor_feature = warp_affine_simple(batch_node_features[b], + t_matrix[0, :, :, :], + (H, W)) + query_key_maps = self.query_key_net(neighbor_feature) + keys = self.key_net(query_key_maps) + query = self.query_net(query_key_maps[0].unsqueeze(0)) + + query = query.unsqueeze(0) + keys = keys.unsqueeze(0) + neighbor_feature = neighbor_feature.unsqueeze(1).unsqueeze(0) + + feat_fuse, prob_action = self.attention_net(query, keys, neighbor_feature, sparse=True) + + if self.mode == "activated": + feat_fuse, connect_mat, num_connect = self.activated_select(neighbor_feature, prob_action) + + updated_node_features.append(feat_fuse.squeeze(0)) + + out = torch.cat(updated_node_features, dim=0) + + return out + +class conv2DBatchNormRelu(nn.Module): + def __init__( + self, + in_channels, + n_filters, + k_size, + stride, + padding, + bias=True, + dilation=1, + is_batchnorm=True, + ): + super(conv2DBatchNormRelu, self).__init__() + + conv_mod = nn.Conv2d( + int(in_channels), + int(n_filters), + kernel_size=k_size, + padding=padding, + stride=stride, + bias=bias, + dilation=dilation, + ) + + if is_batchnorm: + self.cbr_unit = nn.Sequential( + conv_mod, nn.BatchNorm2d(int(n_filters)), nn.ReLU(inplace=True) + ) + else: + self.cbr_unit = nn.Sequential(conv_mod, nn.ReLU(inplace=True)) + + def forward(self, inputs): + outputs = self.cbr_unit(inputs) + return outputs + + +class Sparsemax(nn.Module): + """Sparsemax function.""" + + def __init__(self, dim=None): + """Initialize sparsemax activation + + Args: + dim (int, optional): The dimension over which to apply the sparsemax function. + """ + super(Sparsemax, self).__init__() + + self.dim = -1 if dim is None else dim + + def forward(self, input): + """Forward function. + Args: + input (torch.Tensor): Input tensor. First dimension should be the batch size + Returns: + torch.Tensor: [batch_size x number_of_logits] Output tensor + """ + # Sparsemax currently only handles 2-dim tensors, + # so we reshape and reshape back after sparsemax + original_size = input.size() + input = input.view(-1, input.size(self.dim)) + + dim = 1 + number_of_logits = input.size(dim) + + # Translate input by max for numerical stability + input = input - torch.max(input, dim=dim, keepdim=True)[0].expand_as(input) + + # Sort input in descending order. + # (NOTE: Can be replaced with linear time selection method described here: + # http://stanford.edu/~jduchi/projects/DuchiShSiCh08.html) + zs = torch.sort(input=input, dim=dim, descending=True)[0] + range = torch.range(start=1, end=number_of_logits, device=input.device).view(1, -1) + range = range.expand_as(zs) + + # Determine sparsity of projection + bound = 1 + range * zs + cumulative_sum_zs = torch.cumsum(zs, dim) + is_gt = torch.gt(bound, cumulative_sum_zs).type(input.type()) + k = torch.max(is_gt * range, dim, keepdim=True)[0] + + # Compute threshold function + zs_sparse = is_gt * zs + + # Compute taus + taus = (torch.sum(zs_sparse, dim, keepdim=True) - 1) / k + taus = taus.expand_as(input) + + # Sparsemax + self.output = torch.max(torch.zeros_like(input), input - taus) + + output = self.output.view(original_size) + + return output + + def backward(self, grad_output): + """Backward function.""" + dim = 1 + + nonzeros = torch.ne(self.output, 0) + sum = torch.sum(grad_output * nonzeros, dim=dim) / torch.sum(nonzeros, dim=dim) + self.grad_input = nonzeros * (grad_output - sum.expand_as(grad_output)) + + return self.grad_input + +class km_generator(nn.Module): + def __init__(self, out_size=128, input_feat_h=25, input_feat_w=63): + super(km_generator, self).__init__() + # self.n_feat = int(256 * (input_feat_h//4 + 1) * (input_feat_w//4 + 1)) + self.n_feat = int(256 * input_feat_h * input_feat_w) + self.fc = nn.Sequential( + nn.Linear(self.n_feat, 256), # + nn.ReLU(inplace=True), + nn.Linear(256, 128), # + nn.ReLU(inplace=True), + nn.Linear(128, out_size)) # + + def forward(self, feat_map): + outputs = self.fc(feat_map.view(-1, self.n_feat)) + return outputs + +class km_generator_v2(nn.Module): + def __init__(self, out_size=128): + super(km_generator_v2, self).__init__() + # N, C = 256, H, W + self.conv1 = conv2DBatchNormRelu(256, 128, k_size=3, stride=2, padding=1) + self.avgp = nn.AdaptiveAvgPool2d((5, 7)) + self.n_feat = int(128*5*7) + self.fc = nn.Sequential( + nn.Linear(self.n_feat, 256), # + nn.ReLU(inplace=True), + nn.Linear(256, 128), # + nn.ReLU(inplace=True), + nn.Linear(128, out_size)) # + + def forward(self, feat_map): + feat_map = self.avgp(self.conv1(feat_map)) + outputs = self.fc(feat_map.view(-1, self.n_feat)) + return outputs + +class policy_net4(nn.Module): + def __init__(self, in_channel): + super(policy_net4, self).__init__() + # Encoder + # down 1 + self.conv1 = conv2DBatchNormRelu(in_channel, 512, k_size=3, stride=1, padding=1) + self.conv2 = conv2DBatchNormRelu(512, 256, k_size=3, stride=1, padding=1) + self.conv3 = conv2DBatchNormRelu(256, 256, k_size=3, stride=2, padding=1) + + # down 2 + self.conv4 = conv2DBatchNormRelu(256, 256, k_size=3, stride=1, padding=1) + self.conv5 = conv2DBatchNormRelu(256, 256, k_size=3, stride=2, padding=1) + + def forward(self, x): + outputs = self.conv1(x) + outputs = self.conv2(outputs) + outputs = self.conv3(outputs) + outputs = self.conv4(outputs) + outputs = self.conv5(outputs) + return outputs + +class MIMOGeneralDotProductAttention(nn.Module): + ''' Scaled Dot-Product Attention ''' + + def __init__(self, query_size, key_size, warp_flag=True, attn_dropout=0.1): + super().__init__() + self.sparsemax = Sparsemax(dim=1) + self.softmax = nn.Softmax(dim=1) + self.linear = nn.Linear(query_size, key_size) + self.warp_flag = warp_flag + print('Msg size: ',query_size,' Key size: ', key_size) + + def forward(self, qu, k, v, sparse=True): + # qu (b, q_agents, query_size) + # k (b, k_agents, key_size) + # v (b, k_agents, q_agents, c, h, w) + query = self.linear(qu) # (b, q_agents, key_size) + + # normalization + # query_norm = query.norm(p=2,dim=2).unsqueeze(2).expand_as(query) + # query = query.div(query_norm + 1e-9) + + # k_norm = k.norm(p=2,dim=2).unsqueeze(2).expand_as(k) + # k = k.div(k_norm + 1e-9) + # generate the + attn_orig = torch.bmm(k, query.transpose(2, 1)) # (b, k_agents, q_agents) column: differnt keys and the same query + + # scaling [not sure] + # scaling = torch.sqrt(torch.tensor(k.shape[2],dtype=torch.float32)).cuda() + # attn_orig = attn_orig/ scaling # (b,5,5) column: differnt keys and the same query + + attn_orig_softmax = self.softmax(attn_orig) # (b, k_agents, q_agents) + # attn_orig_softmax = self.sparsemax(attn_orig) + + attn_shape = attn_orig_softmax.shape + bats, key_num, query_num = attn_shape[0], attn_shape[1], attn_shape[2] + attn_orig_softmax_exp = attn_orig_softmax.view(bats, key_num, query_num, 1, 1, 1) + + if self.warp_flag: + v_exp = v + else: + v_exp = torch.unsqueeze(v, 2) + v_exp = v_exp.expand(-1, -1, query_num, -1, -1, -1) + + output = attn_orig_softmax_exp * v_exp # (b, k_agents, q_agents, c, h, w) + output_sum = output.sum(1) # (b, q_agents, c, h, w) + + return output_sum, attn_orig_softmax + + +class AdditiveAttentin(nn.Module): + def __init__(self, c_k, c_q): + super().__init__() + # self.dropout = nn.Dropout(attn_dropout) + self.softmax = nn.Softmax(dim=1) + self.sparsemax = Sparsemax(dim=1) + self.linear_feat = nn.Linear(c_k, 128) + self.linear_context = nn.Linear(c_q, 128) + self.linear_out = nn.Linear(128, 1) + + def forward(self, q, k, v, sparse=True): + temp1 = self.linear_feat(k) # [b, N, 128] + temp2 = self.linear_context(q) # [b, 1, 128] + attn_orig = torch.bmm(temp1, temp2.transpose(2, 1)) + if sparse: + attn_orig = self.sparsemax(attn_orig) # [b, N, 1] + else: + attn_orig = self.softmax(attn_orig) # [b, N, 1] + attn = attn_orig.unsqueeze(-1).unsqueeze(-1) # [b, N, 1, 1, 1] + output = attn * v # [b, N, C, H, W] + output = output.sum(1) # (b, C, H, W) + return output, attn \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/where2comm_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/where2comm_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..7fcb589c0b872333463f30746e4b5ec5a6f1e060 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/where2comm_attn.py @@ -0,0 +1,341 @@ +from turtle import update +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple +from opencood.models.comm_modules.where2comm import Communication + + +class ScaledDotProductAttention(nn.Module): + """ + Scaled Dot-Product Attention proposed in "Attention Is All You Need" + Compute the dot products of the query with all keys, divide each by sqrt(dim), + and apply a softmax function to obtain the weights on the values + Args: dim, mask + dim (int): dimention of attention + mask (torch.Tensor): tensor containing indices to be masked + Inputs: query, key, value, mask + - **query** (batch, q_len, d_model): tensor containing projection + vector for decoder. + - **key** (batch, k_len, d_model): tensor containing projection + vector for encoder. + - **value** (batch, v_len, d_model): tensor containing features of the + encoded input sequence. + - **mask** (-): tensor containing indices to be masked + Returns: context, attn + - **context**: tensor containing the context vector from + attention mechanism. + - **attn**: tensor containing the attention (alignment) from the + encoder outputs. + """ + + def __init__(self, dim): + super(ScaledDotProductAttention, self).__init__() + self.sqrt_dim = np.sqrt(dim) + + def forward(self, query, key, value): + score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim + attn = F.softmax(score, -1) + context = torch.bmm(attn, value) + return context + +class AttenFusion(nn.Module): + def __init__(self, feature_dim): + super(AttenFusion, self).__init__() + self.att = ScaledDotProductAttention(feature_dim) + + def forward(self, x): + cav_num, C, H, W = x.shape + x = x.view(cav_num, C, -1).permute(2, 0, 1) # (H*W, cav_num, C), perform self attention on each pixel. + x = self.att(x, x, x) + x = x.permute(1, 2, 0).view(cav_num, C, H, W)[0] # C, W, H before + return x + +class MaxFusion(nn.Module): + def __init__(self): + super(MaxFusion, self).__init__() + + def forward(self, x): + return torch.max(x, dim=0)[0] + + +class EncodeLayer(nn.Module): + def __init__(self, channels, n_head=8, dropout=0): + super(EncodeLayer, self).__init__() + self.attn = nn.MultiheadAttention(channels, n_head, dropout) + self.linear1 = nn.Linear(channels, channels) + self.linear2 = nn.Linear(channels, channels) + + self.norm1 = nn.LayerNorm(channels) + self.norm2 = nn.LayerNorm(channels) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + self.relu = nn.ReLU() + + def forward(self, q, k, v, confidence_map=None): + """ + order (seq, batch, feature) + Args: + q: (1, H*W, C) + k: (N, H*W, C) + v: (N, H*W, C) + Returns: + outputs: () + """ + residual = q + if confidence_map is not None: + context, weight = self.attn(q,k,v, quality_map=confidence_map) # (1, H*W, C) + else: + context, weight = self.attn(q,k,v) # (1, H*W, C) + context = self.dropout1(context) + output1 = self.norm1(residual + context) + + # feed forward net + residual = output1 # (1, H*W, C) + context = self.linear2(self.relu(self.linear1(output1))) + context = self.dropout2(context) + output2 = self.norm2(residual + context) + + return output2 + +class TransformerFusion(nn.Module): + def __init__(self, channels=256, n_head=8, with_spe=True, with_scm=True, dropout=0): + super(TransformerFusion, self).__init__() + + self.encode_layer = EncodeLayer(channels, n_head, dropout) + self.with_spe = with_spe + self.with_scm = with_scm + + def forward(self, batch_neighbor_feature, batch_neighbor_feature_pe, batch_confidence_map, record_len): + x_fuse = [] + B = len(record_len) + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + neighbor_feature = batch_neighbor_feature[b] + _, C, H, W = neighbor_feature.shape + neighbor_feature_flat = neighbor_feature.view(N,C,H*W) # (N, C, H*W) + + if self.with_spe: + neighbor_feature_pe = batch_neighbor_feature_pe[b] + neighbor_feature_flat_pe = neighbor_feature_pe.view(N,C,H*W) # (N, C, H*W) + query = neighbor_feature_flat_pe[0:1,...].permute(0,2,1) # (1, H*W, C) + key = neighbor_feature_flat_pe.permute(0,2,1) # (N, H*W, C) + else: + query = neighbor_feature_flat[0:1,...].permute(0,2,1) # (1, H*W, C) + key = neighbor_feature_flat.permute(0,2,1) # (N, H*W, C) + + value = neighbor_feature_flat.permute(0,2,1) + + if self.with_scm: + confidence_map = batch_confidence_map[b] + fused_feature = self.encode_layer(query, key, value, confidence_map) # (1, H*W, C) + else: + fused_feature = self.encode_layer(query, key, value) # (1, H*W, C) + + fused_feature = fused_feature.permute(0,2,1).reshape(1, C, H, W) + + x_fuse.append(fused_feature) + x_fuse = torch.concat(x_fuse, dim=0) + return x_fuse + +def add_pe_map(x): + # scale = 2 * math.pi + temperature = 10000 + num_pos_feats = x.shape[-3] // 2 # positional encoding dimension. C = 2d + + mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device) #[H, W] + not_mask = ~mask + y_embed = not_mask.cumsum(0, dtype=torch.float32) # [H, W] + x_embed = not_mask.cumsum(1, dtype=torch.float32) # [H, W] + + dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device) # [0,1,2,...,d] + dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) # 10000^(2k/d), k is [0,0,1,1,...,d/2,d/2] + + pos_x = x_embed[:, :, None] / dim_t + pos_y = y_embed[:, :, None] / dim_t + pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2) + pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2) + pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1) # [C, H, W] + + if len(x.shape) == 4: + x_pe = x + pos[None,:,:,:] + elif len(x.shape) == 5: + x_pe = x + pos[None,None,:,:,:] + return x_pe + + +class Where2comm(nn.Module): + def __init__(self, args): + super(Where2comm, self).__init__() + + self.communication = False + self.round = 1 + if 'communication' in args: + self.communication = True + self.naive_communication = Communication(args['communication']) + if 'round' in args['communication']: + self.round = args['communication']['round'] + self.discrete_ratio = args['voxel_size'][0] # voxel_size[0]=0.4 + self.downsample_rate = args['downsample_rate'] # 2/4, downsample rate from original feature map [200, 704] + + self.agg_mode = args['agg_operator']['mode'] + self.multi_scale = args['multi_scale'] + if self.multi_scale: + layer_nums = args['layer_nums'] + num_filters = args['num_filters'] + self.num_levels = len(layer_nums) + self.fuse_modules = nn.ModuleList() + for idx in range(self.num_levels): + if self.agg_mode == 'ATTEN': + fuse_network = AttenFusion(num_filters[idx]) + elif self.agg_mode == 'MAX': + fuse_network = MaxFusion() + elif self.agg_mode == 'Transformer': + fuse_network = TransformerFusion( + channels=num_filters[idx], + n_head=args['agg_operator']['n_head'], + with_spe=args['agg_operator']['with_spe'], + with_scm=args['agg_operator']['with_scm']) + self.fuse_modules.append(fuse_network) + else: + if self.agg_mode == 'ATTEN': + self.fuse_modules = AttenFusion(args['agg_operator']['feature_dim']) + elif self.agg_mode == 'MAX': + self.fuse_modules = MaxFusion() + elif self.agg_mode == 'Transformer': + self.fuse_network = TransformerFusion( + channels=args['agg_operator']['feature_dim'], + n_head=args['agg_operator']['n_head'], + with_spe=args['agg_operator']['with_spe'], + with_scm=args['agg_operator']['with_scm']) + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, x, rm, record_len, pairwise_t_matrix, backbone=None): + """ + Fusion forwarding. + + Parameters + ---------- + x : torch.Tensor + input data, (sum(n_cav), C, H, W) + + record_len : list + shape: (B) + + pairwise_t_matrix : torch.Tensor + The transformation matrix from each cav to ego, + shape: (B, L, L, 4, 4) + + Returns + ------- + Fused feature. + """ + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # (B,L,L,2,3) + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + if self.multi_scale: + ups = [] + # backbone.__dict__() + with_resnet = True if hasattr(backbone, 'resnet') else False + if with_resnet: + feats = backbone.resnet(x) + + for i in range(self.num_levels): + x = feats[i] if with_resnet else backbone.blocks[i](x) + + ############ 1. Communication (Mask the features) ######### + if i==0: + if self.communication: + batch_confidence_maps = self.regroup(rm, record_len) + _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix) + x = x * communication_masks + else: + communication_rates = torch.tensor(0).to(x.device) + else: + if self.communication: + communication_masks = F.max_pool2d(communication_masks, kernel_size=2) + x = x * communication_masks + + ############ 2. Split the confidence map ####################### + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + batch_node_features = self.regroup(x, record_len) + + ############ 3. Fusion #################################### + x_fuse = [] + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + node_features = batch_node_features[b] + C, H, W = node_features.shape[1:] + neighbor_feature = warp_affine_simple(node_features, + t_matrix[0, :, :, :], + (H, W)) + x_fuse.append(self.fuse_modules[i](neighbor_feature)) + x_fuse = torch.stack(x_fuse) + + ############ 4. Deconv #################################### + if len(backbone.deblocks) > 0: + ups.append(backbone.deblocks[i](x_fuse)) + else: + ups.append(x_fuse) + + if len(ups) > 1: + x_fuse = torch.cat(ups, dim=1) + elif len(ups) == 1: + x_fuse = ups[0] + + if len(backbone.deblocks) > self.num_levels: + x_fuse = backbone.deblocks[-1](x_fuse) + else: + ############ 1. Split the features ####################### + # split x:[(L1, C, H, W), (L2, C, H, W), ...] + # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...] + batch_node_features = self.regroup(x, record_len) + batch_confidence_maps = self.regroup(rm, record_len) + + ############ 2. Communication (Mask the features) ######### + if self.communication: + _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix) + else: + communication_rates = torch.tensor(0).to(x.device) + + ############ 3. Fusion #################################### + x_fuse = [] + for b in range(B): + # number of valid agent + N = record_len[b] + # (N,N,4,4) + # t_matrix[i, j]-> from i to j + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + node_features = batch_node_features[b] + if self.communication: + node_features = node_features * communication_masks[b] + neighbor_feature = warp_affine_simple(node_features, + t_matrix[0, :, :, :], + (H, W)) + x_fuse.append(self.fuse_modules(neighbor_feature)) + x_fuse = torch.stack(x_fuse) + + return x_fuse, communication_rates, {} diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fvoxelrcnn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fvoxelrcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..5e9742cd514f9a7f39d3cf8174a9c5cb4645ec53 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fvoxelrcnn.py @@ -0,0 +1,82 @@ +import random, os + +import torch +from torch import nn +import numpy as np +from icecream import ic +from opencood.models.sub_modules.mean_vfe import MeanVFE +from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x +from opencood.models.sub_modules.height_compression import HeightCompression +from opencood.models.sub_modules.cia_ssd_utils import SSFA, Head +from opencood.models.sub_modules.matcher_v2 import MatcherV2 +from opencood.models.sub_modules.voxel_rcnn_head import VoxelRCNNHead +from opencood.data_utils.post_processor.fpvrcnn_postprocessor import \ + FpvrcnnPostprocessor +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple + +class FVoxelRCNN(nn.Module): + def __init__(self, args): + super(FVoxelRCNN, self).__init__() + lidar_range = np.array(args['lidar_range']) + grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) / + np.array(args['voxel_size'])).astype(np.int64) + self.vfe = MeanVFE(args['mean_vfe'], + args['mean_vfe']['num_point_features']) + self.spconv_block = VoxelBackBone8x(args['spconv'], + input_channels=args['spconv'][ + 'num_features_in'], + grid_size=grid_size) + self.map_to_bev = HeightCompression(args['map2bev']) + # set experiment to validate the ssfa module + self.ssfa = SSFA(args['ssfa']) + self.head = Head(**args['head']) + self.post_processor = FpvrcnnPostprocessor(args['post_processer'], + train=self.training) + self.matcher = MatcherV2(args['matcher'], args['lidar_range']) + self.roi_head = VoxelRCNNHead(args['roi_head'], self.spconv_block.backbone_channels) + self.train_stage2 = args['activate_stage2'] + + def forward(self, batch_dict): + # lidar + voxel_features = batch_dict['processed_lidar']['voxel_features'] + voxel_coords = batch_dict['processed_lidar']['voxel_coords'] + voxel_num_points = batch_dict['processed_lidar']['voxel_num_points'] + # cemera + + # save memory + batch_dict.pop('processed_lidar') + batch_dict.update({'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'batch_size': int(batch_dict['record_len'].sum()), + 'proj_first': batch_dict['proj_first'], + 'lidar_pose': batch_dict['lidar_pose']}) + + batch_dict = self.vfe(batch_dict) + batch_dict = self.spconv_block(batch_dict) + batch_dict = self.map_to_bev(batch_dict) + + out = self.ssfa(batch_dict['spatial_features']) + batch_dict['stage1_out'] = self.head(out) + ### stage 1 ### finished + + data_dict, output_dict = {}, {} + data_dict['ego'], output_dict['ego'] = batch_dict, batch_dict + + pred_box3d_list, scores_list = \ + self.post_processor.post_process(data_dict, output_dict, + stage1=True) + + batch_dict['det_boxes'] = pred_box3d_list + batch_dict['det_scores'] = scores_list + + if pred_box3d_list is not None and self.train_stage2: + batch_dict = self.matcher(batch_dict) + batch_dict = self.roi_head(batch_dict) + return batch_dict + + + +if __name__ == "__main__": + model = SSFA(None) + print(model) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_encoders.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_encoders.py new file mode 100644 index 0000000000000000000000000000000000000000..1d7db9bdae228302d4a2c974fb44647aa96a6edf --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_encoders.py @@ -0,0 +1,301 @@ +# -*- coding: utf-8 -*- +# Author: Yifan Lu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch +import torch.nn as nn +import numpy as np +from opencood.models.sub_modules.lss_submodule import Up, CamEncode, BevEncode, CamEncode_Resnet101 +from opencood.utils.camera_utils import gen_dx_bx, cumsum_trick, QuickCumsum, depth_discretization +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.mean_vfe import MeanVFE +from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x +from opencood.models.sub_modules.height_compression import HeightCompression + + + +class PointPillar(nn.Module): + def __init__(self, args): + super(PointPillar, self).__init__() + grid_size = (np.array(args['lidar_range'][3:6]) - np.array(args['lidar_range'][0:3])) / \ + np.array(args['voxel_size']) + grid_size = np.round(grid_size).astype(np.int64) + args['point_pillar_scatter']['grid_size'] = grid_size + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + + + def forward(self, data_dict, modality_name): + voxel_features = data_dict[f'inputs_{modality_name}']['voxel_features'] + voxel_coords = data_dict[f'inputs_{modality_name}']['voxel_coords'] + voxel_num_points = data_dict[f'inputs_{modality_name}']['voxel_num_points'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points} + + batch_dict = self.pillar_vfe(batch_dict) + batch_dict = self.scatter(batch_dict) + lidar_feature_2d = batch_dict['spatial_features'] # H0, W0 + return lidar_feature_2d + +class SECOND(nn.Module): + def __init__(self, args): + super(SECOND, self).__init__() + lidar_range = np.array(args['lidar_range']) + grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) / + np.array(args['voxel_size'])).astype(np.int64) + self.vfe = MeanVFE(args['mean_vfe'], + args['mean_vfe']['num_point_features']) + self.spconv_block = VoxelBackBone8x(args['spconv'], + input_channels=args['spconv'][ + 'num_features_in'], + grid_size=grid_size) + self.map_to_bev = HeightCompression(args['map2bev']) + + def forward(self, data_dict, modality_name): + voxel_features = data_dict[f'inputs_{modality_name}']['voxel_features'] + voxel_coords = data_dict[f'inputs_{modality_name}']['voxel_coords'] + voxel_num_points = data_dict[f'inputs_{modality_name}']['voxel_num_points'] + batch_size = voxel_coords[:,0].max() + 1 + + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'batch_size': batch_size} + + batch_dict = self.vfe(batch_dict) + batch_dict = self.spconv_block(batch_dict) + batch_dict = self.map_to_bev(batch_dict) + return batch_dict['spatial_features'] + +class LiftSplatShoot(nn.Module): + def __init__(self, args): + super(LiftSplatShoot, self).__init__() + self.grid_conf = args['grid_conf'] # 网格配置参数 + self.data_aug_conf = args['data_aug_conf'] # 数据增强配置参数 + dx, bx, nx = gen_dx_bx(self.grid_conf['xbound'], + self.grid_conf['ybound'], + self.grid_conf['zbound'], + ) # 划分网格 + + self.dx = dx.clone().detach().requires_grad_(False).to(torch.device("cuda")) # [0.4,0.4,20] + self.bx = bx.clone().detach().requires_grad_(False).to(torch.device("cuda")) # [-49.8,-49.8,0] + self.nx = nx.clone().detach().requires_grad_(False).to(torch.device("cuda")) # [250,250,1] + self.depth_supervision = args['depth_supervision'] + self.downsample = args['img_downsample'] # 下采样倍数 + self.camC = args['img_features'] # 图像特征维度 + self.frustum = self.create_frustum().clone().detach().requires_grad_(False).to(torch.device("cuda")) # frustum: DxfHxfWx3(41x8x16x3) + self.use_quickcumsum = True + self.D, _, _, _ = self.frustum.shape # D: 41 + self.camera_encoder_type = args['camera_encoder'] + if self.camera_encoder_type == 'EfficientNet': + self.camencode = CamEncode(self.D, self.camC, self.downsample, \ + self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision']) + elif self.camera_encoder_type == 'Resnet101': + self.camencode = CamEncode_Resnet101(self.D, self.camC, self.downsample, \ + self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision']) + + def create_frustum(self): + # make grid in image plane + ogfH, ogfW = self.data_aug_conf['final_dim'] # 原始图片大小 ogfH:128 ogfW:288 + fH, fW = ogfH // self.downsample, ogfW // self.downsample # 下采样16倍后图像大小 fH: 12 fW: 22 + # ds = torch.arange(*self.grid_conf['dbound'], dtype=torch.float).view(-1, 1, 1).expand(-1, fH, fW) # 在深度方向上划分网格 ds: DxfHxfW(41x12x22) + ds = torch.tensor(depth_discretization(*self.grid_conf['ddiscr'], self.grid_conf['mode']), dtype=torch.float).view(-1,1,1).expand(-1, fH, fW) + + D, _, _ = ds.shape # D: 41 表示深度方向上网格的数量 + xs = torch.linspace(0, ogfW - 1, fW, dtype=torch.float).view(1, 1, fW).expand(D, fH, fW) # 在0到288上划分18个格子 xs: DxfHxfW(41x12x22) + ys = torch.linspace(0, ogfH - 1, fH, dtype=torch.float).view(1, fH, 1).expand(D, fH, fW) # 在0到127上划分8个格子 ys: DxfHxfW(41x12x22) + + # D x H x W x 3 + frustum = torch.stack((xs, ys, ds), -1) # 堆积起来形成网格坐标, frustum[i,j,k,0]就是(i,j)位置,深度为k的像素的宽度方向上的栅格坐标 frustum: DxfHxfWx3 + return frustum + + def get_geometry(self, rots, trans, intrins, post_rots, post_trans): + """Determine the (x,y,z) locations (in the ego frame) + of the points in the point cloud. + Returns B x N x D x H/downsample x W/downsample x 3 + """ + B, N, _ = trans.shape # B:4(batchsize) N: 4(相机数目) + + # undo post-transformation + # B x N x D x H x W x 3 + # 抵消数据增强及预处理对像素的变化 + points = self.frustum - post_trans.view(B, N, 1, 1, 1, 3) + points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3).matmul(points.unsqueeze(-1)) + + # cam_to_ego + points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3], # points[:, :, :, :, :, 2:3] ranges from [4, 45) meters + points[:, :, :, :, :, 2:3] + ), 5) # 将像素坐标(u,v,d)变成齐次坐标(du,dv,d) + # d[u,v,1]^T=intrins*rots^(-1)*([x,y,z]^T-trans) + combine = rots.matmul(torch.inverse(intrins)) + points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1) + points += trans.view(B, N, 1, 1, 1, 3) # 将像素坐标d[u,v,1]^T转换到车体坐标系下的[x,y,z] + + return points # B x N x D x H x W x 3 (4 x 4 x 41 x 16 x 22 x 3) + + def get_cam_feats(self, x): + """Return B x N x D x H/downsample x W/downsample x C + """ + B, N, C, imH, imW = x.shape # B: 4 N: 4 C: 3 imH: 256 imW: 352 + + x = x.view(B*N, C, imH, imW) # B和N两个维度合起来 x: 16 x 4 x 256 x 352 + depth_items, x = self.camencode(x) # 进行图像编码 x: B*N x C x D x fH x fW(24 x 64 x 41 x 16 x 22) + x = x.view(B, N, self.camC, self.D, imH//self.downsample, imW//self.downsample) #将前两维拆开 x: B x N x C x D x fH x fW(4 x 6 x 64 x 41 x 16 x 22) + x = x.permute(0, 1, 3, 4, 5, 2) # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64) + + return x, depth_items + + def voxel_pooling(self, geom_feats, x): + # geom_feats: B x N x D x H x W x 3 (4 x 6 x 41 x 16 x 22 x 3), D is discretization in "UD" or "LID" + # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64), D is num_bins + + B, N, D, H, W, C = x.shape # B: 4 N: 6 D: 41 H: 16 W: 22 C: 64 + Nprime = B*N*D*H*W # Nprime + + # flatten x + x = x.reshape(Nprime, C) # 将图像展平,一共有 B*N*D*H*W 个点 + + # flatten indices + + geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long() # 将[-48,48] [-10 10]的范围平移到 [0, 240), [0, 1) 计算栅格坐标并取整 + geom_feats = geom_feats.view(Nprime, 3) # 将像素映射关系同样展平 geom_feats: B*N*D*H*W x 3 + batch_ix = torch.cat([torch.full([Nprime//B, 1], ix, + device=x.device, dtype=torch.long) for ix in range(B)]) # 每个点对应于哪个batch + geom_feats = torch.cat((geom_feats, batch_ix), 1) # geom_feats: B*N*D*H*W x 4, geom_feats[:,3]表示batch_id + + # filter out points that are outside box + # 过滤掉在边界线之外的点 x:0~240 y: 0~240 z: 0 + kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\ + & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\ + & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2]) + x = x[kept] + geom_feats = geom_feats[kept] + + # get tensors from the same voxel next to each other + ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\ + + geom_feats[:, 1] * (self.nx[2] * B)\ + + geom_feats[:, 2] * B\ + + geom_feats[:, 3] # 给每一个点一个rank值,rank相等的点在同一个batch,并且在在同一个格子里面 + sorts = ranks.argsort() + x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts] # 按照rank排序,这样rank相近的点就在一起了 + # x: 168648 x 64 geom_feats: 168648 x 4 ranks: 168648 + + # cumsum trick + if not self.use_quickcumsum: + x, geom_feats = cumsum_trick(x, geom_feats, ranks) + else: + x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks) # 一个batch的一个格子里只留一个点 x: 29072 x 64 geom_feats: 29072 x 4 + + # griddify (B x C x Z x X x Y) + # final = torch.zeros((B, C, self.nx[2], self.nx[0], self.nx[1]), device=x.device) # final: 4 x 64 x Z x X x Y + # final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 0], geom_feats[:, 1]] = x # 将x按照栅格坐标放到final中 + + # modify griddify (B x C x Z x Y x X) by Yifan Lu 2022.10.7 + # ------> x + # | + # | + # y + final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device) # final: 4 x 64 x Z x Y x X + final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x # 将x按照栅格坐标放到final中 + + # collapse Z + final = torch.cat(final.unbind(dim=2), 1) # 消除掉z维 + + return final # final: 4 x 64 x 240 x 240 # B, C, H, W + + def get_voxels(self, x, rots, trans, intrins, post_rots, post_trans): + geom = self.get_geometry(rots, trans, intrins, post_rots, post_trans) # 像素坐标到自车中坐标的映射关系 geom: B x N x D x H x W x 3 (4 x N x 42 x 16 x 22 x 3) + x_img, depth_items = self.get_cam_feats(x) # 提取图像特征并预测深度编码 x: B x N x D x fH x fW x C(4 x N x 42 x 16 x 22 x 64) + x = self.voxel_pooling(geom, x_img) # x: 4 x 64 x 240 x 240 + + return x, depth_items + + def forward(self, data_dict, modality_name): + # x: [4,4,3,256, 352] + # rots: [4,4,3,3] + # trans: [4,4,3] + # intrins: [4,4,3,3] + # post_rots: [4,4,3,3] + # post_trans: [4,4,3] + image_inputs_dict = data_dict[f'inputs_{modality_name}'] + x, rots, trans, intrins, post_rots, post_trans = \ + image_inputs_dict['imgs'], image_inputs_dict['rots'], image_inputs_dict['trans'], image_inputs_dict['intrins'], image_inputs_dict['post_rots'], image_inputs_dict['post_trans'] + x, depth_items = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans) # 将图像转换到BEV下,x: B x C x 240 x 240 (4 x 64 x 240 x 240) + + if self.depth_supervision: + self.depth_items = depth_items + + return x + + +class LiftSplatShootVoxel(LiftSplatShoot): + def voxel_pooling(self, geom_feats, x): + # geom_feats: B x N x D x H x W x 3 (4 x 6 x 41 x 16 x 22 x 3), D is discretization in "UD" or "LID" + # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64), D is num_bins + + B, N, D, H, W, C = x.shape # B: 4 N: 6 D: 41 H: 16 W: 22 C: 64 + Nprime = B*N*D*H*W # Nprime + + # flatten x + x = x.reshape(Nprime, C) # 将图像展平,一共有 B*N*D*H*W 个点 + + # flatten indices + + geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long() # 将[-48,48] [-10 10]的范围平移到 [0, 240), [0, 1) 计算栅格坐标并取整 + geom_feats = geom_feats.view(Nprime, 3) # 将像素映射关系同样展平 geom_feats: B*N*D*H*W x 3 + batch_ix = torch.cat([torch.full([Nprime//B, 1], ix, + device=x.device, dtype=torch.long) for ix in range(B)]) # 每个点对应于哪个batch + geom_feats = torch.cat((geom_feats, batch_ix), 1) # geom_feats: B*N*D*H*W x 4, geom_feats[:,3]表示batch_id + + # filter out points that are outside box + # 过滤掉在边界线之外的点 x:0~240 y: 0~240 z: 0 + kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\ + & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\ + & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2]) + x = x[kept] + geom_feats = geom_feats[kept] + + # get tensors from the same voxel next to each other + ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\ + + geom_feats[:, 1] * (self.nx[2] * B)\ + + geom_feats[:, 2] * B\ + + geom_feats[:, 3] # 给每一个点一个rank值,rank相等的点在同一个batch,并且在在同一个格子里面 + sorts = ranks.argsort() + x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts] # 按照rank排序,这样rank相近的点就在一起了 + # x: 168648 x 64 geom_feats: 168648 x 4 ranks: 168648 + + # cumsum trick + if not self.use_quickcumsum: + x, geom_feats = cumsum_trick(x, geom_feats, ranks) + else: + x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks) # 一个batch的一个格子里只留一个点 x: 29072 x 64 geom_feats: 29072 x 4 + + # griddify (B x C x Z x X x Y) + # final = torch.zeros((B, C, self.nx[2], self.nx[0], self.nx[1]), device=x.device) # final: 4 x 64 x Z x X x Y + # final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 0], geom_feats[:, 1]] = x # 将x按照栅格坐标放到final中 + + # modify griddify (B x C x Z x Y x X) by Yifan Lu 2022.10.7 + # ------> x + # | + # | + # y + final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device) # final: 4 x 64 x Z x Y x X + final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x # 将x按照栅格坐标放到final中 + + # collapse Z + #final = torch.max(final.unbind(dim=2), 1)[0] # 消除掉z维 + final = torch.max(final, 2)[0] # 消除掉z维 + return final # final: 4 x 64 x 240 x 240 # B, C, H, W \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_late.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_late.py new file mode 100644 index 0000000000000000000000000000000000000000..5bd48c15ffcd64f3f5657d051947a539414eccaa --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_late.py @@ -0,0 +1,110 @@ +# Author: Yifan Lu +# In this heterogeneous version, feature align start before backbone. + +import torch +import torch.nn as nn +import numpy as np +from icecream import ic +import torchvision +from collections import OrderedDict, Counter +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +import importlib + +class HeterModelLate(nn.Module): + def __init__(self, args): + super(HeterModelLate, self).__init__() + modality_name_list = list(args.keys()) + modality_name_list = [x for x in modality_name_list if x.startswith("m") and x[1:].isdigit()] + self.modality_name_list = modality_name_list + self.cav_range = args['lidar_range'] + self.sensor_type_dict = OrderedDict() + + # setup each modality model + for modality_name in self.modality_name_list: + model_setting = args[modality_name] + sensor_name = model_setting['sensor_type'] + self.sensor_type_dict[modality_name] = sensor_name + + # import model + encoder_filename = "opencood.models.heter_encoders" + encoder_lib = importlib.import_module(encoder_filename) + encoder_class = None + target_model_name = model_setting['core_method'].replace('_', '') + + for name, cls in encoder_lib.__dict__.items(): + if name.lower() == target_model_name.lower(): + encoder_class = cls + + # build encoder + setattr(self, f"encoder_{modality_name}", encoder_class(model_setting['encoder_args'])) + # depth supervision for camera + if model_setting['encoder_args'].get("depth_supervision", False) : + setattr(self, f"depth_supervision_{modality_name}", True) + else: + setattr(self, f"depth_supervision_{modality_name}", False) + + # setup backbone (very light-weight) + setattr(self, f"backbone_{modality_name}", ResNetBEVBackbone(model_setting['backbone_args'])) + if sensor_name == "camera": + camera_mask_args = model_setting['camera_mask_args'] + setattr(self, f"crop_ratio_W_{modality_name}", (self.cav_range[3]) / (camera_mask_args['grid_conf']['xbound'][1])) + setattr(self, f"crop_ratio_H_{modality_name}", (self.cav_range[4]) / (camera_mask_args['grid_conf']['ybound'][1])) + + # setup layers (actual backbone) + setattr(self, f"layers_{modality_name}", ResNetBEVBackbone(model_setting['layers_args'])) + setattr(self, f"layers_num_{modality_name}", len(model_setting['layers_args']['num_upsample_filter'])) + + # setup shrink head + setattr(self, f"shrink_conv_{modality_name}", DownsampleConv(model_setting['shrink_header'])) + + # setup detection head + in_head = model_setting['head_args']['in_head'] + setattr(self, f'cls_head_{modality_name}', nn.Conv2d(in_head, args['anchor_number'], kernel_size=1)) + setattr(self, f'reg_head_{modality_name}', nn.Conv2d(in_head, args['anchor_number'] * 7, kernel_size=1)) + setattr(self, f'dir_head_{modality_name}', nn.Conv2d(in_head, args['anchor_number'] * args['dir_args']['num_bins'], kernel_size=1)) + + + def forward(self, data_dict): + output_dict = {} + modality_name = [x for x in list(data_dict.keys()) if x.startswith("inputs_")] + assert len(modality_name) == 1 + modality_name = modality_name[0].lstrip('inputs_') + + feature = eval(f"self.encoder_{modality_name}")(data_dict, modality_name) + feature = eval(f"self.backbone_{modality_name}")({"spatial_features": feature})['spatial_features_2d'] + + if self.sensor_type_dict[modality_name] == "camera": + # should be padding. Instead of masking + _, _, H, W = feature.shape + feature = torchvision.transforms.CenterCrop( + (int(H*eval(f"self.crop_ratio_H_{modality_name}")), int(W*eval(f"self.crop_ratio_W_{modality_name}"))) + )(feature) + + if eval(f"self.depth_supervision_{modality_name}"): + output_dict.update({ + f"depth_items_{modality_name}": eval(f"self.encoder_{modality_name}").depth_items + }) + + # multiscale fusion. + # Here we do not use layer0 of the "self.layers_{modality_name}" + # We assume feature from the "self.backbone_{modality_name}" is the first-scale feature + feature_list = [feature] + + for i in range(1, eval(f"self.layers_num_{modality_name}")): + feature = eval(f"self.layers_{modality_name}").get_layer_i_feature(feature, layer_i=i) + feature_list.append(feature) + + feature = eval(f"self.layers_{modality_name}").decode_multiscale_feature(feature_list) + + feature = eval(f"self.shrink_conv_{modality_name}")(feature) + + cls_preds = eval(f"self.cls_head_{modality_name}")(feature) + reg_preds = eval(f"self.reg_head_{modality_name}")(feature) + dir_preds = eval(f"self.dir_head_{modality_name}")(feature) + + output_dict.update({'cls_preds': cls_preds, + 'reg_preds': reg_preds, + 'dir_preds': dir_preds}) + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_sharedhead.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_sharedhead.py new file mode 100644 index 0000000000000000000000000000000000000000..62e79e941f5585cb00643d2ac87970f7d7b83607 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_sharedhead.py @@ -0,0 +1,294 @@ +# Author: Yifan Lu +# In this heterogeneous version, feature align start before backbone. + +import torch +import torch.nn as nn +import numpy as np +from icecream import ic +from collections import OrderedDict, Counter +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.feature_alignnet import AlignNet +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion +from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion, warp_feature +from opencood.utils.transformation_utils import normalize_pairwise_tfm +from opencood.utils.model_utils import check_trainable_module, fix_bn, unfix_bn +import importlib +import torchvision + +class HeterModelSharedhead(nn.Module): + def __init__(self, args): + super(HeterModelSharedhead, self).__init__() + self.args = args + modality_name_list = list(args.keys()) + modality_name_list = [x for x in modality_name_list if x.startswith("m") and x[1:].isdigit()] + self.modality_name_list = modality_name_list + + self.ego_modality = args['ego_modality'] + self.stage2_added_modality = args.get('stage2_added_modality', None) + + self.cav_range = args['lidar_range'] + self.sensor_type_dict = OrderedDict() + + # setup each modality model + for modality_name in self.modality_name_list: + model_setting = args[modality_name] + sensor_name = model_setting['sensor_type'] + self.sensor_type_dict[modality_name] = sensor_name + + # import model + encoder_filename = "opencood.models.heter_encoders" + encoder_lib = importlib.import_module(encoder_filename) + encoder_class = None + target_model_name = model_setting['core_method'].replace('_', '') + + for name, cls in encoder_lib.__dict__.items(): + if name.lower() == target_model_name.lower(): + encoder_class = cls + + """ + Encoder building + """ + setattr(self, f"encoder_{modality_name}", encoder_class(model_setting['encoder_args'])) + if model_setting['encoder_args'].get("depth_supervision", False): + setattr(self, f"depth_supervision_{modality_name}", True) + else: + setattr(self, f"depth_supervision_{modality_name}", False) + + """ + Backbone building + """ + setattr(self, f"backbone_{modality_name}", ResNetBEVBackbone(model_setting['backbone_args'])) + + """ + Aligner building + """ + setattr(self, f"aligner_{modality_name}", AlignNet(model_setting['aligner_args'])) + if sensor_name == "camera": + camera_mask_args = model_setting['camera_mask_args'] + setattr(self, f"crop_ratio_W_{modality_name}", (self.cav_range[3]) / (camera_mask_args['grid_conf']['xbound'][1])) + setattr(self, f"crop_ratio_H_{modality_name}", (self.cav_range[4]) / (camera_mask_args['grid_conf']['ybound'][1])) + setattr(self, f"xdist_{modality_name}", (camera_mask_args['grid_conf']['xbound'][1] - camera_mask_args['grid_conf']['xbound'][0])) + setattr(self, f"ydist_{modality_name}", (camera_mask_args['grid_conf']['ybound'][1] - camera_mask_args['grid_conf']['ybound'][0])) + + """For feature transformation""" + self.H = (self.cav_range[4] - self.cav_range[1]) + self.W = (self.cav_range[3] - self.cav_range[0]) + self.fake_voxel_size = 1 + + """ + single supervision + """ + self.supervise_single = False + if args.get("supervise_single", False): + self.supervise_single = True + in_head_single = args['in_head_single'] + setattr(self, f'cls_head_single', nn.Conv2d(in_head_single, args['anchor_number'], kernel_size=1)) + setattr(self, f'reg_head_single', nn.Conv2d(in_head_single, args['anchor_number'] * 7, kernel_size=1)) + setattr(self, f'dir_head_single', nn.Conv2d(in_head_single, args['anchor_number'] * args['dir_args']['num_bins'], kernel_size=1)) + + + """ + Fusion, by default multiscale fusion: + """ + self.backbone = ResNetBEVBackbone(args['fusion_backbone']) + self.fusion_net = nn.ModuleList() + + for i in range(len(args['fusion_backbone']['layer_nums'])): + if args['fusion_method'] == "max": + self.fusion_net.append(MaxFusion()) + if args['fusion_method'] == "att": + self.fusion_net.append(AttFusion(args['att']['feat_dim'][i])) + + + """ + Shrink header + """ + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + + """ + Shared Heads + """ + self.cls_head = nn.Conv2d(args['in_head'], args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(args['in_head'], 7 * args['anchor_number'], + kernel_size=1) + self.dir_head = nn.Conv2d(args['in_head'], args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + + self.model_train_init() + + # check again which module is not fixed. + check_trainable_module(self) + + def model_train_init(self): + if self.stage2_added_modality is None: + return + """ + In stage 2, only ONE modality's aligner is trainable. + We first fix all modules, and set the aligner trainable. + """ + # fix all modules + self.eval() + for p in self.parameters(): + p.requires_grad_(False) + + # unfix aligner module + for p in eval(f"self.aligner_{self.stage2_added_modality}").parameters(): + p.requires_grad_(True) + eval(f"self.aligner_{self.stage2_added_modality}").apply(unfix_bn) + + + def forward(self, data_dict): + output_dict = {} + agent_modality_list = data_dict['agent_modality_list'] + t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], self.H, self.W, self.fake_voxel_size) + record_len = data_dict['record_len'] + # print(agent_modality_list) + + modality_count_dict = Counter(agent_modality_list) + modality_feature_dict = {} + + for modality_name in self.modality_name_list: + if modality_name not in modality_count_dict: + continue + feature = eval(f"self.encoder_{modality_name}")(data_dict, modality_name) + feature = eval(f"self.backbone_{modality_name}")({"spatial_features": feature})['spatial_features_2d'] + feature = eval(f"self.aligner_{modality_name}")(feature) + modality_feature_dict[modality_name] = feature + + + """ + Spatial Align + """ + if len(self.ego_modality) == 2 and eval(f"self.aligner_{self.ego_modality}.spatial_align_flag"): + """ + e.g. + self.ego_modality = 'm4'. The length of string is 2. + record_len = [2, 3, 3] + agent_modality_list = [m4, m1, m4, m4, m1, m4, m1, m1]. + ego_idx_in_allcav = [0, 2, 5] + student_idx_in_allcav = [0, 2, 3, 5] + ego_idx_in_student = [0, 1, 3] + + in eval, ego can be non-student. only student ego will perform spatial align. + """ + record_len_list = record_len.detach().cpu().numpy().tolist() + ego_idx_in_allcav = [0] + np.cumsum(record_len_list)[:-1].tolist() + + student_idx_in_allcav = [i for i, x in enumerate(agent_modality_list) if x == self.ego_modality] + student_ego_idx_in_allcav = [i for i in ego_idx_in_allcav if i in student_idx_in_allcav] + + student_ego_idx_in_student = [student_idx_in_allcav.index(x) for x in student_ego_idx_in_allcav] + student_ego_idx_in_ego = [ego_idx_in_allcav.index(x) for x in student_ego_idx_in_allcav] + spatial_align_sample = student_ego_idx_in_ego # within a batch, which samples will perform spatial align? only ego is student. + + if(len(spatial_align_sample)): + student_feature = modality_feature_dict[self.ego_modality][student_ego_idx_in_student] # ego in all student modality + + counting_dict = {modality_name: 0 for modality_name in self.modality_name_list} + teacher_feature_2d_list = [] # the same shape as 'feature', but replace ego modality feature with all zero. + + ego_aligner = eval(f"self.aligner_{self.ego_modality}") + + for modality_name in agent_modality_list: + feat_idx = counting_dict[modality_name] + agent_feature = modality_feature_dict[modality_name][feat_idx] + if modality_name in ego_aligner.teacher: + teacher_feature_2d_list.append(agent_feature) + else: + teacher_feature_2d_list.append(torch.zeros_like(agent_feature, device=agent_feature.device)) + counting_dict[modality_name] += 1 + + # unify the feature shape + _, _, H, W = modality_feature_dict[self.ego_modality].shape + target_H = int(H*eval(f"self.crop_ratio_H_{self.ego_modality}")) + target_W = int(W*eval(f"self.crop_ratio_W_{self.ego_modality}")) + crop_func = torchvision.transforms.CenterCrop((target_H, target_W)) + teacher_feature_2d_list = [crop_func(feat) for feat in teacher_feature_2d_list] + + teacher_feature_full = torch.stack(teacher_feature_2d_list) + teacher_feature = MaxFusion()(teacher_feature_full, record_len, t_matrix) + teacher_feature = torchvision.transforms.CenterCrop((H, W))(teacher_feature) + teacher_feature = teacher_feature[spatial_align_sample] + + modality_feature_dict[self.ego_modality][student_ego_idx_in_student] = \ + ego_aligner.spatail_align(student_feature, teacher_feature, + (eval(f"self.xdist_{self.ego_modality}"), eval(f"self.ydist_{self.ego_modality}"))) + + """ + Crop/Padd camera feature map. + """ + for modality_name in self.modality_name_list: + if modality_name in modality_count_dict: + if self.sensor_type_dict[modality_name] == "camera": + # should be padding. Instead of masking + feature = modality_feature_dict[modality_name] + _, _, H, W = feature.shape + target_H = int(H*eval(f"self.crop_ratio_H_{modality_name}")) + target_W = int(W*eval(f"self.crop_ratio_W_{modality_name}")) + + crop_func = torchvision.transforms.CenterCrop((target_H, target_W)) + modality_feature_dict[modality_name] = crop_func(feature) + if eval(f"self.depth_supervision_{modality_name}"): + output_dict.update({ + f"depth_items_{modality_name}": eval(f"self.encoder_{modality_name}").depth_items + }) + + """ + Assemble heter features + """ + counting_dict = {modality_name:0 for modality_name in self.modality_name_list} + heter_feature_2d_list = [] + for modality_name in agent_modality_list: + feat_idx = counting_dict[modality_name] + heter_feature_2d_list.append(modality_feature_dict[modality_name][feat_idx]) + counting_dict[modality_name] += 1 + + heter_feature_2d = torch.stack(heter_feature_2d_list) + + """ + Single supervision + """ + if self.supervise_single: + cls_preds_before_fusion = self.cls_head_single(heter_feature_2d) + reg_preds_before_fusion = self.reg_head_single(heter_feature_2d) + dir_preds_before_fusion = self.dir_head_single(heter_feature_2d) + output_dict.update({'cls_preds_single': cls_preds_before_fusion, + 'reg_preds_single': reg_preds_before_fusion, + 'dir_preds_single': dir_preds_before_fusion}) + + """ + Feature Fusion (multiscale). + + we omit self.backbone's first layer. + """ + + feature_list = [heter_feature_2d] + for i in range(1, len(self.fusion_net)): + heter_feature_2d = self.backbone.get_layer_i_feature(heter_feature_2d, layer_i=i) + feature_list.append(heter_feature_2d) + + fused_feature_list = [] + for i, fuse_module in enumerate(self.fusion_net): + fused_feature_list.append(fuse_module(feature_list[i], record_len, t_matrix)) + fused_feature = self.backbone.decode_multiscale_feature(fused_feature_list) + + if self.shrink_flag: + fused_feature = self.shrink_conv(fused_feature) + + cls_preds = self.cls_head(fused_feature) + reg_preds = self.reg_head(fused_feature) + dir_preds = self.dir_head(fused_feature) + + output_dict.update({'cls_preds': cls_preds, + 'reg_preds': reg_preds, + 'dir_preds': dir_preds}) + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot.py new file mode 100644 index 0000000000000000000000000000000000000000..d3b4c4ead5541cd70ff4b054e5d27e13e468c405 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot.py @@ -0,0 +1,220 @@ +""" +Copyright (C) 2020 NVIDIA Corporation. All rights reserved. +Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot. +Authors: Jonah Philion and Sanja Fidler +""" + +import torch +from torch import nn +from efficientnet_pytorch import EfficientNet +from torchvision.models.resnet import resnet18 +from icecream import ic + +from opencood.utils.camera_utils import gen_dx_bx, cumsum_trick, QuickCumsum, depth_discretization +from opencood.models.sub_modules.lss_submodule import Up, CamEncode, BevEncode, CamEncode_Resnet101 +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from matplotlib import pyplot as plt + + +class LiftSplatShoot(nn.Module): + def __init__(self, args): + super(LiftSplatShoot, self).__init__() + self.grid_conf = args['grid_conf'] # 网格配置参数 + self.data_aug_conf = args['data_aug_conf'] # 数据增强配置参数 + self.bevout_feature = args['bevout_feature'] + dx, bx, nx = gen_dx_bx(self.grid_conf['xbound'], + self.grid_conf['ybound'], + self.grid_conf['zbound'], + ) # 划分网格 + + self.dx = dx.clone().detach().requires_grad_(False).to(torch.device("cuda")) # [0.4,0.4,20] + self.bx = bx.clone().detach().requires_grad_(False).to(torch.device("cuda")) # [-49.8,-49.8,0] + self.nx = nx.clone().detach().requires_grad_(False).to(torch.device("cuda")) # [250,250,1] + + self.downsample = args['img_downsample'] # 下采样倍数 + self.camC = args['img_features'] # 图像特征维度 + self.frustum = self.create_frustum().clone().detach().requires_grad_(False).to(torch.device("cuda")) # frustum: DxfHxfWx3(41x8x16x3) + + self.D, _, _, _ = self.frustum.shape # D: 41 + self.camera_encoder_type = args['camera_encoder'] + if self.camera_encoder_type == 'EfficientNet': + self.camencode = CamEncode(self.D, self.camC, self.downsample, \ + self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision']) + elif self.camera_encoder_type == 'Resnet101': + self.camencode = CamEncode_Resnet101(self.D, self.camC, self.downsample, \ + self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision']) + + self.bevencode = BevEncode(inC=self.camC, outC=self.bevout_feature) + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + + self.cls_head = nn.Conv2d(self.bevout_feature, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.bevout_feature, 7 * args['anchor_number'], + kernel_size=1) + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.bevout_feature, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + else: + self.use_dir = False + + # toggle using QuickCumsum vs. autograd + self.use_quickcumsum = True + + # for p in self.parameters(): + # p.requires_grad = False + # for p in self.camencode.depth_head.parameters(): + # p.requires_grad = True + # print("freeze ",p) + + def create_frustum(self): + # make grid in image plane + ogfH, ogfW = self.data_aug_conf['final_dim'] # 原始图片大小 ogfH:128 ogfW:288 + fH, fW = ogfH // self.downsample, ogfW // self.downsample # 下采样16倍后图像大小 fH: 12 fW: 22 + # ds = torch.arange(*self.grid_conf['dbound'], dtype=torch.float).view(-1, 1, 1).expand(-1, fH, fW) # 在深度方向上划分网格 ds: DxfHxfW(41x12x22) + ds = torch.tensor(depth_discretization(*self.grid_conf['ddiscr'], self.grid_conf['mode']), dtype=torch.float).view(-1,1,1).expand(-1, fH, fW) + + D, _, _ = ds.shape # D: 41 表示深度方向上网格的数量 + xs = torch.linspace(0, ogfW - 1, fW, dtype=torch.float).view(1, 1, fW).expand(D, fH, fW) # 在0到288上划分18个格子 xs: DxfHxfW(41x12x22) + ys = torch.linspace(0, ogfH - 1, fH, dtype=torch.float).view(1, fH, 1).expand(D, fH, fW) # 在0到127上划分8个格子 ys: DxfHxfW(41x12x22) + + # D x H x W x 3 + frustum = torch.stack((xs, ys, ds), -1) # 堆积起来形成网格坐标, frustum[i,j,k,0]就是(i,j)位置,深度为k的像素的宽度方向上的栅格坐标 frustum: DxfHxfWx3 + return frustum + + def get_geometry(self, rots, trans, intrins, post_rots, post_trans): + """Determine the (x,y,z) locations (in the ego frame) + of the points in the point cloud. + Returns B x N x D x H/downsample x W/downsample x 3 + """ + B, N, _ = trans.shape # B:4(batchsize) N: 4(相机数目) + + # undo post-transformation + # B x N x D x H x W x 3 + # 抵消数据增强及预处理对像素的变化 + points = self.frustum - post_trans.view(B, N, 1, 1, 1, 3) + points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3).matmul(points.unsqueeze(-1)) + + # cam_to_ego + points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3], # points[:, :, :, :, :, 2:3] ranges from [4, 45) meters + points[:, :, :, :, :, 2:3] + ), 5) # 将像素坐标(u,v,d)变成齐次坐标(du,dv,d) + # d[u,v,1]^T=intrins*rots^(-1)*([x,y,z]^T-trans) + combine = rots.matmul(torch.inverse(intrins)) + points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1) + points += trans.view(B, N, 1, 1, 1, 3) # 将像素坐标d[u,v,1]^T转换到车体坐标系下的[x,y,z] + + return points # B x N x D x H x W x 3 (4 x 4 x 41 x 16 x 22 x 3) + + def get_cam_feats(self, x): + """Return B x N x D x H/downsample x W/downsample x C + """ + B, N, C, imH, imW = x.shape # B: 4 N: 4 C: 3 imH: 256 imW: 352 + + x = x.view(B*N, C, imH, imW) # B和N两个维度合起来 x: 16 x 4 x 256 x 352 + depth_items, x = self.camencode(x) # 进行图像编码 x: B*N x C x D x fH x fW(24 x 64 x 41 x 16 x 22) + x = x.view(B, N, self.camC, self.D, imH//self.downsample, imW//self.downsample) #将前两维拆开 x: B x N x C x D x fH x fW(4 x 6 x 64 x 41 x 16 x 22) + x = x.permute(0, 1, 3, 4, 5, 2) # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64) + + return x, depth_items + + def voxel_pooling(self, geom_feats, x): + # geom_feats: B x N x D x H x W x 3 (4 x 6 x 41 x 16 x 22 x 3), D is discretization in "UD" or "LID" + # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64), D is num_bins + + B, N, D, H, W, C = x.shape # B: 4 N: 6 D: 41 H: 16 W: 22 C: 64 + Nprime = B*N*D*H*W # Nprime + + # flatten x + x = x.reshape(Nprime, C) # 将图像展平,一共有 B*N*D*H*W 个点 + + # flatten indices + + geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long() # 将[-48,48] [-10 10]的范围平移到 [0, 240), [0, 1) 计算栅格坐标并取整 + geom_feats = geom_feats.view(Nprime, 3) # 将像素映射关系同样展平 geom_feats: B*N*D*H*W x 3 + batch_ix = torch.cat([torch.full([Nprime//B, 1], ix, + device=x.device, dtype=torch.long) for ix in range(B)]) # 每个点对应于哪个batch + geom_feats = torch.cat((geom_feats, batch_ix), 1) # geom_feats: B*N*D*H*W x 4, geom_feats[:,3]表示batch_id + + # filter out points that are outside box + # 过滤掉在边界线之外的点 x:0~240 y: 0~240 z: 0 + kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\ + & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\ + & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2]) + x = x[kept] + geom_feats = geom_feats[kept] + + # get tensors from the same voxel next to each other + ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\ + + geom_feats[:, 1] * (self.nx[2] * B)\ + + geom_feats[:, 2] * B\ + + geom_feats[:, 3] # 给每一个点一个rank值,rank相等的点在同一个batch,并且在在同一个格子里面 + sorts = ranks.argsort() + x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts] # 按照rank排序,这样rank相近的点就在一起了 + # x: 168648 x 64 geom_feats: 168648 x 4 ranks: 168648 + + # cumsum trick + if not self.use_quickcumsum: + x, geom_feats = cumsum_trick(x, geom_feats, ranks) + else: + x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks) # 一个batch的一个格子里只留一个点 x: 29072 x 64 geom_feats: 29072 x 4 + + # griddify (B x C x Z x X x Y) + # final = torch.zeros((B, C, self.nx[2], self.nx[0], self.nx[1]), device=x.device) # final: 4 x 64 x Z x X x Y + # final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 0], geom_feats[:, 1]] = x # 将x按照栅格坐标放到final中 + + # modify griddify (B x C x Z x Y x X) by Yifan Lu 2022.10.7 + # ------> x + # | + # | + # y + final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device) # final: 4 x 64 x Z x Y x X + final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x # 将x按照栅格坐标放到final中 + + # collapse Z + final = torch.cat(final.unbind(dim=2), 1) # 消除掉z维 + + return final # final: 4 x 64 x 240 x 240 # B, C, H, W + + def get_voxels(self, x, rots, trans, intrins, post_rots, post_trans): + geom = self.get_geometry(rots, trans, intrins, post_rots, post_trans) # 像素坐标到自车中坐标的映射关系 geom: B x N x D x H x W x 3 (4 x N x 42 x 16 x 22 x 3) + x_img, depth_items = self.get_cam_feats(x) # 提取图像特征并预测深度编码 x: B x N x D x fH x fW x C(4 x N x 42 x 16 x 22 x 64) + x = self.voxel_pooling(geom, x_img) # x: 4 x 64 x 240 x 240 + + return x, depth_items + + def forward(self, data_dict): + # x: [4,4,3,256, 352] + # rots: [4,4,3,3] + # trans: [4,4,3] + # intrins: [4,4,3,3] + # post_rots: [4,4,3,3] + # post_trans: [4,4,3] + image_inputs_dict = data_dict['image_inputs'] + x, rots, trans, intrins, post_rots, post_trans = \ + image_inputs_dict['imgs'], image_inputs_dict['rots'], image_inputs_dict['trans'], image_inputs_dict['intrins'], image_inputs_dict['post_rots'], image_inputs_dict['post_trans'] + x, depth_items = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans) # 将图像转换到BEV下,x: B x C x 240 x 240 (4 x 64 x 240 x 240) + + x = self.bevencode(x) # 用resnet18提取特征 x: 4 x C x 240 x 240 + + if self.shrink_flag: + x = self.shrink_conv(x) + # 4 x C x 120 x 120 + psm = self.cls_head(x) + rm = self.reg_head(x) + output_dict = {'cls_preds': psm, + 'reg_preds': rm, + 'depth_items': depth_items} + + if self.use_dir: + dm = self.dir_head(x) + output_dict.update({"dir_preds": dm}) + + return output_dict + + +def compile_model(grid_conf, data_aug_conf, outC): + return LiftSplatShoot(grid_conf, data_aug_conf, outC) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_intermediate.py new file mode 100644 index 0000000000000000000000000000000000000000..44c0ad669a4f4b7191be81d85ad77191ce8b0b98 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_intermediate.py @@ -0,0 +1,77 @@ +""" +Copyright (C) 2020 NVIDIA Corporation. All rights reserved. +Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot. +Authors: Jonah Philion and Sanja Fidler + +Intermediate fusion for camera based collaboration +""" + +from numpy import record +import torch +from torch import nn +from efficientnet_pytorch import EfficientNet +from torchvision.models.resnet import resnet18 +from icecream import ic +from opencood.models.lift_splat_shoot import LiftSplatShoot +from opencood.utils.camera_utils import gen_dx_bx, cumsum_trick, QuickCumsum +from opencood.models.sub_modules.lss_submodule import BevEncodeMSFusion, BevEncodeSSFusion, Up, CamEncode, BevEncode +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from matplotlib import pyplot as plt + + +class LiftSplatShootIntermediate(LiftSplatShoot): + def __init__(self, args): + super(LiftSplatShootIntermediate, self).__init__(args) + + fusion_args = args['fusion_args'] + self.ms = args['fusion_args']['core_method'].endswith("ms") + if self.ms: + self.bevencode = BevEncodeMSFusion(fusion_args) + else: + self.bevencode = BevEncodeSSFusion(fusion_args) + self.supervise_single = args['supervise_single'] + + for p in self.camencode.parameters(): + p.requires_grad_(False) + + if self.supervise_single: + self.cls_head_before_fusion = nn.Conv2d(self.bevout_feature, args['anchor_number'], kernel_size=1) + self.reg_head_before_fusion = nn.Conv2d(self.bevout_feature, 7 * args['anchor_number'], kernel_size=1) + if self.use_dir: + self.dir_head_before_fusion = nn.Conv2d(self.bevout_feature, args['dir_args']['num_bins'] * args['anchor_number'], kernel_size=1) # BIN_NUM = 2 + + + def forward(self, data_dict): + return self._forward(data_dict) + + def _forward(self, data_dict): + image_inputs_dict = data_dict['image_inputs'] + record_len = data_dict['record_len'] + x, rots, trans, intrins, post_rots, post_trans = \ + image_inputs_dict['imgs'], image_inputs_dict['rots'], image_inputs_dict['trans'], image_inputs_dict['intrins'], image_inputs_dict['post_rots'], image_inputs_dict['post_trans'] + x, depth_items = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans) # 将图像转换到BEV下,x: sum(record_len) x C x 240 x 240 (4 x 64 x 240 x 240) + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + x_single, x_fuse = self.bevencode(x, record_len, pairwise_t_matrix) + psm = self.cls_head(x_fuse) + rm = self.reg_head(x_fuse) + output_dict = {'cls_preds': psm, + 'reg_preds': rm, + 'depth_items': depth_items} + if self.use_dir: + dm = self.dir_head(x_fuse) + output_dict.update({"dir_preds": dm}) + + if self.supervise_single: + psm_single = self.cls_head_before_fusion(x_single) + rm_single = self.reg_head_before_fusion(x_single) + output_dict.update({'cls_preds_single': psm_single, + 'reg_preds_single': rm_single}) + if self.use_dir: + dm_single = self.dir_head_before_fusion(x_single) + output_dict.update({"dir_preds_single": dm_single}) + + return output_dict + + +def compile_model(grid_conf, data_aug_conf, outC): + return LiftSplatShootIntermediate(grid_conf, data_aug_conf, outC) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_voxel.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_voxel.py new file mode 100644 index 0000000000000000000000000000000000000000..91d750f2991ce3742b1f7432d0f1ad663eb32d30 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_voxel.py @@ -0,0 +1,220 @@ +""" +Copyright (C) 2020 NVIDIA Corporation. All rights reserved. +Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot. +Authors: Jonah Philion and Sanja Fidler +""" + +import torch +from torch import nn +from efficientnet_pytorch import EfficientNet +from torchvision.models.resnet import resnet18 +from icecream import ic + +from opencood.utils.camera_utils import gen_dx_bx, cumsum_trick, QuickCumsum, depth_discretization +from opencood.models.sub_modules.lss_submodule import Up, CamEncode, BevEncode, CamEncode_Resnet101 +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from matplotlib import pyplot as plt + + +class LiftSplatShootVoxel(nn.Module): + def __init__(self, args): + super(LiftSplatShootVoxel, self).__init__() + self.grid_conf = args['grid_conf'] # 网格配置参数 + self.data_aug_conf = args['data_aug_conf'] # 数据增强配置参数 + self.bevout_feature = args['bevout_feature'] + dx, bx, nx = gen_dx_bx(self.grid_conf['xbound'], + self.grid_conf['ybound'], + self.grid_conf['zbound'], + ) # 划分网格 + + self.dx = dx.clone().detach().requires_grad_(False).to(torch.device("cuda")) # [0.4,0.4,20] + self.bx = bx.clone().detach().requires_grad_(False).to(torch.device("cuda")) # [-49.8,-49.8,0] + self.nx = nx.clone().detach().requires_grad_(False).to(torch.device("cuda")) # [250,250,1] + + self.downsample = args['img_downsample'] # 下采样倍数 + self.camC = args['img_features'] # 图像特征维度 + self.frustum = self.create_frustum().clone().detach().requires_grad_(False).to(torch.device("cuda")) # frustum: DxfHxfWx3(41x8x16x3) + + self.D, _, _, _ = self.frustum.shape # D: 41 + self.camera_encoder_type = args['camera_encoder'] + if self.camera_encoder_type == 'EfficientNet': + self.camencode = CamEncode(self.D, self.camC, self.downsample, \ + self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision']) + elif self.camera_encoder_type == 'Resnet101': + self.camencode = CamEncode_Resnet101(self.D, self.camC, self.downsample, \ + self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision']) + + self.bevencode = BevEncode(inC=self.camC, outC=self.bevout_feature) + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + + self.cls_head = nn.Conv2d(self.bevout_feature, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.bevout_feature, 7 * args['anchor_number'], + kernel_size=1) + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.bevout_feature, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + else: + self.use_dir = False + + # toggle using QuickCumsum vs. autograd + self.use_quickcumsum = True + + # for p in self.parameters(): + # p.requires_grad = False + # for p in self.camencode.depth_head.parameters(): + # p.requires_grad = True + # print("freeze ",p) + + def create_frustum(self): + # make grid in image plane + ogfH, ogfW = self.data_aug_conf['final_dim'] # 原始图片大小 ogfH:128 ogfW:288 + fH, fW = ogfH // self.downsample, ogfW // self.downsample # 下采样16倍后图像大小 fH: 12 fW: 22 + # ds = torch.arange(*self.grid_conf['dbound'], dtype=torch.float).view(-1, 1, 1).expand(-1, fH, fW) # 在深度方向上划分网格 ds: DxfHxfW(41x12x22) + ds = torch.tensor(depth_discretization(*self.grid_conf['ddiscr'], self.grid_conf['mode']), dtype=torch.float).view(-1,1,1).expand(-1, fH, fW) + + D, _, _ = ds.shape # D: 41 表示深度方向上网格的数量 + xs = torch.linspace(0, ogfW - 1, fW, dtype=torch.float).view(1, 1, fW).expand(D, fH, fW) # 在0到288上划分18个格子 xs: DxfHxfW(41x12x22) + ys = torch.linspace(0, ogfH - 1, fH, dtype=torch.float).view(1, fH, 1).expand(D, fH, fW) # 在0到127上划分8个格子 ys: DxfHxfW(41x12x22) + + # D x H x W x 3 + frustum = torch.stack((xs, ys, ds), -1) # 堆积起来形成网格坐标, frustum[i,j,k,0]就是(i,j)位置,深度为k的像素的宽度方向上的栅格坐标 frustum: DxfHxfWx3 + return frustum + + def get_geometry(self, rots, trans, intrins, post_rots, post_trans): + """Determine the (x,y,z) locations (in the ego frame) + of the points in the point cloud. + Returns B x N x D x H/downsample x W/downsample x 3 + """ + B, N, _ = trans.shape # B:4(batchsize) N: 4(相机数目) + + # undo post-transformation + # B x N x D x H x W x 3 + # 抵消数据增强及预处理对像素的变化 + points = self.frustum - post_trans.view(B, N, 1, 1, 1, 3) + points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3).matmul(points.unsqueeze(-1)) + + # cam_to_ego + points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3], # points[:, :, :, :, :, 2:3] ranges from [4, 45) meters + points[:, :, :, :, :, 2:3] + ), 5) # 将像素坐标(u,v,d)变成齐次坐标(du,dv,d) + # d[u,v,1]^T=intrins*rots^(-1)*([x,y,z]^T-trans) + combine = rots.matmul(torch.inverse(intrins)) + points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1) + points += trans.view(B, N, 1, 1, 1, 3) # 将像素坐标d[u,v,1]^T转换到车体坐标系下的[x,y,z] + + return points # B x N x D x H x W x 3 (4 x 4 x 41 x 16 x 22 x 3) + + def get_cam_feats(self, x): + """Return B x N x D x H/downsample x W/downsample x C + """ + B, N, C, imH, imW = x.shape # B: 4 N: 4 C: 3 imH: 256 imW: 352 + + x = x.view(B*N, C, imH, imW) # B和N两个维度合起来 x: 16 x 4 x 256 x 352 + depth_items, x = self.camencode(x) # 进行图像编码 x: B*N x C x D x fH x fW(24 x 64 x 41 x 16 x 22) + x = x.view(B, N, self.camC, self.D, imH//self.downsample, imW//self.downsample) #将前两维拆开 x: B x N x C x D x fH x fW(4 x 6 x 64 x 41 x 16 x 22) + x = x.permute(0, 1, 3, 4, 5, 2) # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64) + + return x, depth_items + + def voxel_pooling(self, geom_feats, x): + # geom_feats: B x N x D x H x W x 3 (4 x 6 x 41 x 16 x 22 x 3), D is discretization in "UD" or "LID" + # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64), D is num_bins + + B, N, D, H, W, C = x.shape # B: 4 N: 6 D: 41 H: 16 W: 22 C: 64 + Nprime = B*N*D*H*W # Nprime + + # flatten x + x = x.reshape(Nprime, C) # 将图像展平,一共有 B*N*D*H*W 个点 + + # flatten indices + + geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long() # 将[-48,48] [-10 10]的范围平移到 [0, 240), [0, 1) 计算栅格坐标并取整 + geom_feats = geom_feats.view(Nprime, 3) # 将像素映射关系同样展平 geom_feats: B*N*D*H*W x 3 + batch_ix = torch.cat([torch.full([Nprime//B, 1], ix, + device=x.device, dtype=torch.long) for ix in range(B)]) # 每个点对应于哪个batch + geom_feats = torch.cat((geom_feats, batch_ix), 1) # geom_feats: B*N*D*H*W x 4, geom_feats[:,3]表示batch_id + + # filter out points that are outside box + # 过滤掉在边界线之外的点 x:0~240 y: 0~240 z: 0 + kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\ + & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\ + & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2]) + x = x[kept] + geom_feats = geom_feats[kept] + + # get tensors from the same voxel next to each other + ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\ + + geom_feats[:, 1] * (self.nx[2] * B)\ + + geom_feats[:, 2] * B\ + + geom_feats[:, 3] # 给每一个点一个rank值,rank相等的点在同一个batch,并且在在同一个格子里面 + sorts = ranks.argsort() + x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts] # 按照rank排序,这样rank相近的点就在一起了 + # x: 168648 x 64 geom_feats: 168648 x 4 ranks: 168648 + + # cumsum trick + if not self.use_quickcumsum: + x, geom_feats = cumsum_trick(x, geom_feats, ranks) + else: + x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks) # 一个batch的一个格子里只留一个点 x: 29072 x 64 geom_feats: 29072 x 4 + + # griddify (B x C x Z x X x Y) + # final = torch.zeros((B, C, self.nx[2], self.nx[0], self.nx[1]), device=x.device) # final: 4 x 64 x Z x X x Y + # final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 0], geom_feats[:, 1]] = x # 将x按照栅格坐标放到final中 + + # modify griddify (B x C x Z x Y x X) by Yifan Lu 2022.10.7 + # ------> x + # | + # | + # y + final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device) # final: 4 x 64 x Z x Y x X + final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x # 将x按照栅格坐标放到final中 + + # collapse Z + #final = torch.max(final.unbind(dim=2), 1)[0] # 消除掉z维 + final = torch.max(final, 2)[0] # 消除掉z维 + return final # final: 4 x 64 x 240 x 240 # B, C, H, W + + def get_voxels(self, x, rots, trans, intrins, post_rots, post_trans): + geom = self.get_geometry(rots, trans, intrins, post_rots, post_trans) # 像素坐标到自车中坐标的映射关系 geom: B x N x D x H x W x 3 (4 x N x 42 x 16 x 22 x 3) + x_img, depth_items = self.get_cam_feats(x) # 提取图像特征并预测深度编码 x: B x N x D x fH x fW x C(4 x N x 42 x 16 x 22 x 64) + x = self.voxel_pooling(geom, x_img) # x: 4 x 64 x 240 x 240 + + return x, depth_items + + def forward(self, data_dict): + # x: [4,4,3,256, 352] + # rots: [4,4,3,3] + # trans: [4,4,3] + # intrins: [4,4,3,3] + # post_rots: [4,4,3,3] + # post_trans: [4,4,3] + image_inputs_dict = data_dict['image_inputs'] + x, rots, trans, intrins, post_rots, post_trans = \ + image_inputs_dict['imgs'], image_inputs_dict['rots'], image_inputs_dict['trans'], image_inputs_dict['intrins'], image_inputs_dict['post_rots'], image_inputs_dict['post_trans'] + x, depth_items = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans) # 将图像转换到BEV下,x: B x C x 240 x 240 (4 x 64 x 240 x 240) + + x = self.bevencode(x) # 用resnet18提取特征 x: 4 x C x 240 x 240 + + if self.shrink_flag: + x = self.shrink_conv(x) + # 4 x C x 120 x 120 + psm = self.cls_head(x) + rm = self.reg_head(x) + output_dict = {'psm': psm, + 'rm': rm, + 'depth_items': depth_items} + + if self.use_dir: + dm = self.dir_head(x) + output_dict.update({"dm": dm}) + + return output_dict + + +def compile_model(grid_conf, data_aug_conf, outC): + return LiftSplatShootVoxel(grid_conf, data_aug_conf, outC) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor.py new file mode 100644 index 0000000000000000000000000000000000000000..e29d7ff60b3aba46d1bc4f073587253105cfe189 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor.py @@ -0,0 +1,310 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def conv3x3(in_planes, out_planes, stride=1, bias=False): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=bias) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(in_planes, planes, stride, bias=True) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes, bias=True) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + # out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + # out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1, downsample=None, + use_bn=True): + super(Bottleneck, self).__init__() + bias = not use_bn + self.use_bn = use_bn + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=bias) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=bias) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, + bias=bias) + self.bn3 = nn.BatchNorm2d(self.expansion * planes) + self.downsample = downsample + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + """ + Forward pass of residual block. + Parameters + ---------- + x : torch.Tensor + Shape (N, C, W, L). + + Returns + ------- + out : torch.Tensor + Shape (N, self.expansion*planes, W/stride, L/stride). + """ + residual = x + # (N, planes, W, L) + out = self.conv1(x) + if self.use_bn: + out = self.bn1(out) + out = self.relu(out) + # (N, planes, W/stride, L/stride) + out = self.conv2(out) + if self.use_bn: + out = self.bn2(out) + out = self.relu(out) + # (N, self.expansion*planes, W/stride, L/stride) + out = self.conv3(out) + if self.use_bn: + out = self.bn3(out) + + if self.downsample is not None: + # (N, self.expansion*planes, W/2, L/2) + residual = self.downsample(x) + out = self.relu(residual + out) + return out + + +class BackBone(nn.Module): + + def __init__(self, block, num_block, geom, use_bn=True): + super(BackBone, self).__init__() + + self.use_bn = use_bn + + # Block 1 + self.conv1 = conv3x3(geom["input_shape"][-1], 32) + self.conv2 = conv3x3(32, 32) + self.bn1 = nn.BatchNorm2d(32) + self.bn2 = nn.BatchNorm2d(32) + self.relu = nn.ReLU(inplace=True) + + # Block 2-5 + self.in_planes = 32 + self.block2 = self._make_layer(block, 24, num_blocks=num_block[0]) + self.block3 = self._make_layer(block, 48, num_blocks=num_block[1]) + self.block4 = self._make_layer(block, 64, num_blocks=num_block[2]) + self.block5 = self._make_layer(block, 96, num_blocks=num_block[3]) + + # Lateral layers + self.latlayer1 = nn.Conv2d(384, 196, kernel_size=1, stride=1, + padding=0) + self.latlayer2 = nn.Conv2d(256, 128, kernel_size=1, stride=1, + padding=0) + self.latlayer3 = nn.Conv2d(192, 96, kernel_size=1, stride=1, padding=0) + + # Top-down layers + self.deconv1 = nn.ConvTranspose2d(196, 128, kernel_size=3, stride=2, + padding=1, output_padding=1) + p = 0 if geom['label_shape'][1] == 175 else 1 + self.deconv2 = nn.ConvTranspose2d(128, 96, kernel_size=3, stride=2, + padding=1, output_padding=(1, p)) + + def encode(self, x): + x = self.conv1(x) + if self.use_bn: + x = self.bn1(x) + x = self.relu(x) + + x = self.conv2(x) + if self.use_bn: + x = self.bn2(x) + c1 = self.relu(x) + + # bottom up layers + c2 = self.block2(c1) + c3 = self.block3(c2) + c4 = self.block4(c3) + c5 = self.block5(c4) + + return c3, c4, c5 + + def decode(self, c3, c4, c5): + l5 = self.latlayer1(c5) + l4 = self.latlayer2(c4) + p5 = l4 + self.deconv1(l5) + l3 = self.latlayer3(c3) + p4 = l3 + self.deconv2(p5) + + return p4 + + def forward(self, x): + c3, c4, c5 = self.encode(x) + p4 = self.decode(c3, c4, c5) + + return p4 + + def _make_layer(self, block, planes, num_blocks): + + if self.use_bn: + # downsample the H*W by 1/2 + downsample = nn.Sequential( + nn.Conv2d(self.in_planes, planes * block.expansion, + kernel_size=1, stride=2, bias=False), + nn.BatchNorm2d(planes * block.expansion) + ) + else: + downsample = nn.Conv2d(self.in_planes, planes * block.expansion, + kernel_size=1, stride=2, bias=True) + + layers = [ + block(self.in_planes, planes, stride=2, downsample=downsample)] + + self.in_planes = planes * block.expansion + for i in range(1, num_blocks): + layers.append(block(self.in_planes, planes, stride=1)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def _upsample_add(self, x, y): + """Upsample and add two feature maps. + Args: + x: (Variable) top feature map to be upsampled. + y: (Variable) lateral feature map. + Returns: + (Variable) added feature map. + Note in PyTorch, when input size is odd, the upsampled feature map + with `F.upsample(..., scale_factor=2, mode='nearest')` + maybe not equal to the lateral feature map size. + e.g. + original input size: [N,_,15,15] -> + conv2d feature map size: [N,_,8,8] -> + upsampled feature map size: [N,_,16,16] + So we choose bilinear upsample which supports arbitrary output sizes. + """ + _, _, H, W = y.size() + return F.upsample(x, size=(H, W), mode='bilinear') + y + + +class Header(nn.Module): + + def __init__(self, use_bn=True): + super(Header, self).__init__() + + self.use_bn = use_bn + bias = not use_bn + self.conv1 = conv3x3(96, 96, bias=bias) + self.bn1 = nn.BatchNorm2d(96) + self.conv2 = conv3x3(96, 96, bias=bias) + self.bn2 = nn.BatchNorm2d(96) + self.conv3 = conv3x3(96, 96, bias=bias) + self.bn3 = nn.BatchNorm2d(96) + self.conv4 = conv3x3(96, 96, bias=bias) + self.bn4 = nn.BatchNorm2d(96) + + self.clshead = conv3x3(96, 1, bias=True) + self.reghead = conv3x3(96, 6, bias=True) + + def forward(self, x): + x = self.conv1(x) + if self.use_bn: + x = self.bn1(x) + x = self.conv2(x) + if self.use_bn: + x = self.bn2(x) + x = self.conv3(x) + if self.use_bn: + x = self.bn3(x) + x = self.conv4(x) + if self.use_bn: + x = self.bn4(x) + + cls = self.clshead(x) + reg = self.reghead(x) + + return cls, reg + + +class PIXOR(nn.Module): + """ + The Pixor backbone. The input of PIXOR nn module is a tensor of + [batch_size, height, weight, channel], The output of PIXOR nn module + is also a tensor of [batch_size, height/4, weight/4, channel]. Note that + we convert the dimensions to [C, H, W] for PyTorch's nn.Conv2d functions + + Parameters + ---------- + args : dict + The arguments of the model. + + Attributes + ---------- + backbone : opencood.object + The backbone used to extract features. + header : opencood.object + Header used to predict the classification and coordinates. + """ + + def __init__(self, args): + super(PIXOR, self).__init__() + geom = args["geometry_param"] + use_bn = args["use_bn"] + self.backbone = BackBone(Bottleneck, [3, 6, 6, 3], geom, use_bn) + self.header = Header(use_bn) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + prior = 0.01 + self.header.clshead.weight.data.fill_(-math.log((1.0 - prior) / prior)) + self.header.clshead.bias.data.fill_(0) + self.header.reghead.weight.data.fill_(0) + self.header.reghead.bias.data.fill_(0) + + def forward(self, data_dict): + bev_input = data_dict['processed_lidar']["bev_input"] + + features = self.backbone(bev_input) + # cls -- (N, 1, W/4, L/4) + # reg -- (N, 6, W/4, L/4) + cls, reg = self.header(features) + + output_dict = { + "cls": cls, + "reg": reg + } + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor_intermediate.py new file mode 100644 index 0000000000000000000000000000000000000000..4207049dc25c14387483fabef739520eed5cb0dd --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor_intermediate.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import math + +import torch.nn as nn + +from opencood.models.fuse_modules.self_attn import AttFusion +from opencood.models.pixor import Bottleneck, BackBone, Header + + +class BackBoneIntermediate(BackBone): + + def __init__(self, block, num_block, geom, use_bn=True): + super(BackBoneIntermediate, self).__init__(block, + num_block, + geom, use_bn) + + self.fusion_net3 = AttFusion(192) + self.fusion_net4 = AttFusion(256) + self.fusion_net5 = AttFusion(384) + + def forward(self, x, record_len): + # Here c3, c4, c5 includes all cav + c3, c4, c5 = self.encode(x) + + # Here c3, c4, c5 only include ego + c5 = self.fusion_net5(c5, record_len) + c4 = self.fusion_net4(c4, record_len) + c3 = self.fusion_net3(c3, record_len) + + p4 = self.decode(c3, c4, c5) + return p4 + + +class PIXORIntermediate(nn.Module): + """ + The Pixor backbone. The input of PIXOR nn module is a tensor of + [batch_size, height, weight, channel], The output of PIXOR nn module + is also a tensor of [batch_size, height/4, weight/4, channel]. Note that + we convert the dimensions to [C, H, W] for PyTorch's nn.Conv2d functions + + Parameters + ---------- + args : dict + The arguments of the model. + + Attributes + ---------- + backbone : opencood.object + The backbone used to extract features. + header : opencood.object + Header used to predict the classification and coordinates. + """ + + def __init__(self, args): + super(PIXORIntermediate, self).__init__() + geom = args["geometry_param"] + use_bn = args["use_bn"] + self.backbone = BackBoneIntermediate(Bottleneck, [3, 6, 6, 3], + geom, + use_bn) + self.header = Header(use_bn) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + prior = 0.01 + self.header.clshead.weight.data.fill_(-math.log((1.0 - prior) / prior)) + self.header.clshead.bias.data.fill_(0) + self.header.reghead.weight.data.fill_(0) + self.header.reghead.bias.data.fill_(0) + + def forward(self, data_dict): + bev_input = data_dict['processed_lidar']["bev_input"] + record_len = data_dict['record_len'] + + features = self.backbone(bev_input, record_len) + # cls -- (N, 1, W/4, L/4) + # reg -- (N, 6, W/4, L/4) + cls, reg = self.header(features) + + output_dict = { + "cls": cls, + "reg": reg + } + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar.py new file mode 100644 index 0000000000000000000000000000000000000000..cf1711746400adfb964afa40e8a79b1c4cedc150 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch +import torch.nn as nn + + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv + + +class PointPillar(nn.Module): + def __init__(self, args): + super(PointPillar, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + is_resnet = args['base_bev_backbone'].get("resnet", False) + if is_resnet: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], # 384 + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'], # 384 + kernel_size=1) + + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2, # 384 + else: + self.use_dir = False + + def forward(self, data_dict): + + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points} + + batch_dict = self.pillar_vfe(batch_dict) + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + + psm = self.cls_head(spatial_features_2d) + rm = self.reg_head(spatial_features_2d) + + output_dict = {'cls_preds': psm, + 'reg_preds': rm} + + if self.use_dir: + dm = self.dir_head(spatial_features_2d) + output_dict.update({'dir_preds': dm}) + + return output_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline.py new file mode 100644 index 0000000000000000000000000000000000000000..6fbe99a8bbd43bedd6e2f7322857a0288a5d046a --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline.py @@ -0,0 +1,138 @@ +# Author: Yifan Lu +# a class that integrate multiple simple fusion methods (Single Scale) +# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm + +import torch.nn as nn +from icecream import ic +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion +from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion +from opencood.utils.transformation_utils import normalize_pairwise_tfm + +class PointPillarBaseline(nn.Module): + """ + F-Cooper implementation with point pillar backbone. + """ + def __init__(self, args): + super(PointPillarBaseline, self).__init__() + + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + is_resnet = args['base_bev_backbone'].get("resnet", False) + if is_resnet: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + self.voxel_size = args['voxel_size'] + + if args['fusion_method'] == "max": + self.fusion_net = MaxFusion() + if args['fusion_method'] == "att": + self.fusion_net = AttFusion(args['att']['feat_dim']) + if args['fusion_method'] == "disconet": + self.fusion_net = DiscoFusion(args['disconet']['feat_dim']) + if args['fusion_method'] == "v2vnet": + self.fusion_net = V2VNetFusion(args['v2vnet']) + if args['fusion_method'] == 'v2xvit': + self.fusion_net = V2XViTFusion(args['v2xvit']) + if args['fusion_method'] == 'when2comm': + self.fusion_net = When2commFusion(args['when2comm']) + + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if "compression" in args: + self.compression = True + self.naive_compressor = NaiveCompressor(self.out_channel, args['compression']) + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'], + kernel_size=1) + self.use_dir = False + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + # calculate pairwise affine transformation matrix + _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0 + t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0]) + batch_dict = self.backbone(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + + fused_feature = self.fusion_net(spatial_features_2d, record_len, t_matrix) + + psm = self.cls_head(fused_feature) + rm = self.reg_head(fused_feature) + + output_dict = {'cls_preds': psm, + 'reg_preds': rm} + + if self.use_dir: + output_dict.update({'dir_preds': self.dir_head(fused_feature)}) + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline_multiscale.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline_multiscale.py new file mode 100644 index 0000000000000000000000000000000000000000..232ad55d68f47065c85de5f34031d49eeed25c30 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline_multiscale.py @@ -0,0 +1,135 @@ +# Author: Yifan Lu +# a class that integrate multiple simple fusion methods (Single Scale) +# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm + +import torch.nn as nn +from icecream import ic +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion +from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion +from opencood.utils.transformation_utils import normalize_pairwise_tfm + +class PointPillarBaselineMultiscale(nn.Module): + """ + F-Cooper implementation with point pillar backbone. + """ + def __init__(self, args): + super(PointPillarBaselineMultiscale, self).__init__() + + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + is_resnet = args['base_bev_backbone'].get("resnet", True) # default true + if is_resnet: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + self.voxel_size = args['voxel_size'] + + self.fusion_net = nn.ModuleList() + for i in range(len(args['base_bev_backbone']['layer_nums'])): + if args['fusion_method'] == "max": + self.fusion_net.append(MaxFusion()) + if args['fusion_method'] == "att": + self.fusion_net.append(AttFusion(args['att']['feat_dim'][i])) + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if "compression" in args: + self.compression = True + self.naive_compressor = NaiveCompressor(64, args['compression']) + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'], + kernel_size=1) + self.use_dir = False + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + # calculate pairwise affine transformation matrix + _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0 + t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0]) + + spatial_features = batch_dict['spatial_features'] + + if self.compression: + spatial_features = self.naive_compressor(spatial_features) + + # multiscale fusion + feature_list = self.backbone.get_multiscale_feature(spatial_features) + fused_feature_list = [] + for i, fuse_module in enumerate(self.fusion_net): + fused_feature_list.append(fuse_module(feature_list[i], record_len, t_matrix)) + fused_feature = self.backbone.decode_multiscale_feature(fused_feature_list) + + if self.shrink_flag: + fused_feature = self.shrink_conv(fused_feature) + + psm = self.cls_head(fused_feature) + rm = self.reg_head(fused_feature) + + output_dict = {'cls_preds': psm, + 'reg_preds': rm} + + if self.use_dir: + output_dict.update({'dir_preds': self.dir_head(fused_feature)}) + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_deform_transformer.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_deform_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..b3a64a770c6c6267ce2bcb9c494bc31fbb9903d2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_deform_transformer.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang , Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch.nn as nn +import torch + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.deformable_transformer_backbone import DeformableTransformerBackbone +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.point_pillar import PointPillar +from opencood.utils.transformation_utils import get_pairwise_transformation_torch +from opencood.utils.model_utils import weight_init + + +class PointPillarDeformTransformer(nn.Module): + def __init__(self, args): + super(PointPillarDeformTransformer, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = DeformableTransformerBackbone(args['deformable_transfomer_backbone']) + # used to downsample the feature map for efficient computation + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + + self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'], + kernel_size=1) + if args['backbone_fix']: + self.backbone_fix() + + self.apply(weight_init) + + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + lidar_pose = data_dict['lidar_pose'] # [sum(cav), 6] + + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len, + 'pairwise_t_matrix': pairwise_t_matrix, + 'lidar_pose': lidar_pose} + + + + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + + batch_dict = self.backbone(batch_dict) + # N, C, H', W'. [N, 384, 100, 352] + spatial_features_2d = batch_dict['spatial_features_2d'] + + # downsample feature to reduce memory + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + + psm = self.cls_head(spatial_features_2d) + rm = self.reg_head(spatial_features_2d) + + + output_dict = {'psm': psm, + 'rm': rm} + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet.py new file mode 100644 index 0000000000000000000000000000000000000000..080ba5f569f12ad237cefbbe3cc18f1fc91f707e --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch +import torch.nn as nn + + +import torch.nn.functional as F +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.data_utils.post_processor import UncertaintyVoxelPostprocessor +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.utils.transformation_utils import normalize_pairwise_tfm, regroup +from opencood.models.fuse_modules.fusion_in_one import DiscoFusion + +class PointPillarDiscoNet(nn.Module): + def __init__(self, args): + super(PointPillarDiscoNet, self).__init__() + self.discrete_ratio = args['voxel_size'][0] + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + self.voxel_size = args['voxel_size'] + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.fusion_net = DiscoFusion(self.out_channel) + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'], + kernel_size=1) + self.use_dir = False + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + + def forward(self, data_dict): + + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + + teacher_voxel_features = data_dict['teacher_processed_lidar']['voxel_features'] + teacher_voxel_coords = data_dict['teacher_processed_lidar']['voxel_coords'] + teacher_voxel_num_points = data_dict['teacher_processed_lidar']['voxel_num_points'] + + record_len = data_dict['record_len'] + lidar_pose = data_dict['lidar_pose'] + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len, + 'pairwise_t_matrix': pairwise_t_matrix} + + + batch_dict = self.pillar_vfe(batch_dict) + batch_dict = self.scatter(batch_dict) + + _, _, H0, W0 = batch_dict['spatial_features'].shape + t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0]) + + batch_dict = self.backbone(batch_dict) + + + spatial_features_2d = batch_dict['spatial_features_2d'] + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + + spatial_features_2d = self.fusion_net(spatial_features_2d, record_len, t_matrix) + + psm = self.cls_head(spatial_features_2d) + rm = self.reg_head(spatial_features_2d) + + output_dict = {'feature': spatial_features_2d, + 'cls_preds': psm, + 'reg_preds': rm} + if self.use_dir: + output_dict.update({'dir_preds': self.dir_head(spatial_features_2d)}) + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet_teacher.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet_teacher.py new file mode 100644 index 0000000000000000000000000000000000000000..323b562a0f97e101cd0affebe2ced9aadbf91055 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet_teacher.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch +import torch.nn as nn + + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv + +class PointPillarDiscoNetTeacher(nn.Module): + def __init__(self, args): + super(PointPillarDiscoNetTeacher, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'], + kernel_size=1) + + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + + def forward(self, data_dict): + + voxel_features = data_dict['teacher_processed_lidar']['voxel_features'] + voxel_coords = data_dict['teacher_processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['teacher_processed_lidar']['voxel_num_points'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points} + + batch_dict = self.pillar_vfe(batch_dict) + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + + psm = self.cls_head(spatial_features_2d) + rm = self.reg_head(spatial_features_2d) + + output_dict = {'teacher_feature': spatial_features_2d, + 'teacher_cls_preds': psm, + 'teacher_reg_preds': rm} + if self.use_dir: + output_dict.update({'dir_preds': self.dir_head(spatial_features_2d)}) + + + return output_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_intermediate.py new file mode 100644 index 0000000000000000000000000000000000000000..8c09ad6a343145f457ab1714fc50ed2611c3cd8b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_intermediate.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch +import torch.nn as nn + + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.att_bev_backbone import AttBEVBackbone +from opencood.models.sub_modules.dcn_net import DCNNet +from opencood.utils.transformation_utils import get_pairwise_transformation_torch +from opencood.data_utils.post_processor import UncertaintyVoxelPostprocessor + + +class PointPillarIntermediate(nn.Module): + def __init__(self, args): + super(PointPillarIntermediate, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = AttBEVBackbone(args['base_bev_backbone'], 64) + + self.dcn = False + if 'dcn' in args: + self.dcn = True + self.before_backbone = args['dcn']['before_backbone'] + self.dcn_net = DCNNet(args['dcn']) + + + self.cls_head = nn.Conv2d(128 * 3, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(128 * 3, 7 * args['anchor_num'], + kernel_size=1) + + def forward(self, data_dict): + + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + lidar_pose = data_dict['lidar_pose'] + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len, + 'pairwise_t_matrix': pairwise_t_matrix} + + + + batch_dict = self.pillar_vfe(batch_dict) + batch_dict = self.scatter(batch_dict) + + if self.dcn and self.before_backbone: + batch_dict['spatial_features'] = self.dcn_net(batch_dict['spatial_features']) + + batch_dict = self.backbone(batch_dict) + spatial_features_2d = batch_dict['spatial_features_2d'] + + if self.dcn and not self.before_backbone: + spatial_features_2d = self.dcn_net(spatial_features_2d) + + psm = self.cls_head(spatial_features_2d) + rm = self.reg_head(spatial_features_2d) + + output_dict = {'psm': psm, + 'rm': rm} + + return output_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_mash.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_mash.py new file mode 100644 index 0000000000000000000000000000000000000000..e095720da60bdb992abdb76d678dd5768d8dd94a --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_mash.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang , Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +from numpy import record +import torch +import torch.nn as nn +import torch.nn.functional as F + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.sub_modules.mash_utils import QueryEncoder, KeyEncoder, SmoothingNetwork + +from icecream import ic + + +class PointPillarMash(nn.Module): + def __init__(self, args): + super(PointPillarMash, self).__init__() + + self.max_cav = args['max_cav'] + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + + # used to downsample the feature map for efficient computation + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.compression = False + + if args['compression'] > 0: + self.compression = True + self.naive_compressor = NaiveCompressor(256, args['compression']) + + self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'], + kernel_size=1) + + mash_args = args['mash'] + self.query_encoder = QueryEncoder(mash_args["feature_dim"], mash_args['query_dim']) + self.key_encoder = KeyEncoder(mash_args["feature_dim"], mash_args['key_dim']) + self.queryKeySim = nn.Conv2d(mash_args['query_dim'], mash_args['key_dim'], 1, 1) + self.smoothing_net = SmoothingNetwork(in_ch=mash_args['H'] * mash_args['W'] + 1) + self.H = mash_args['H'] + self.W = mash_args['W'] + self.downsample_rate = mash_args['downsample_rate'] + self.discrete_ratio = args['voxel_size'][0] + + if args['backbone_fix']: + self.backbone_fix() + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + # N, C, H', W'. [N, 256, 50, 176] + spatial_features_2d = batch_dict['spatial_features_2d'] + + # downsample feature to reduce memory + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + # compressor + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + + # spatial_features_2d is [sum(cav_num), 256, 50, 176] + # output only contains ego + # [B, 256, 50, 176] + + B = len(record_len) + + querys = self.queryKeySim(self.query_encoder(spatial_features_2d)) + keys = self.key_encoder(spatial_features_2d) + + split_query = self.regroup(querys, record_len) + split_key = self.regroup(keys, record_len) + split_feature = self.regroup(spatial_features_2d, record_len) + + fuse_features = [] + estimate_volumes = [] + for b in range(B): + # N, C, H, W + feature = split_feature[b] + key = split_key[b] + query = split_query[b] + + ego = 0 + fuse_feature = [feature[ego]] + N = record_len[b] + + for i in range(1, N): + corr_volume = self.computeCorrespondenceVolume(query[ego], key[i]) + corr_volume_decoded = self.smoothCorrespondenceVolume(corr_volume) # (Hs*Ws+1, Ht, Wt) + grid, mask = self.idx2grid(corr_volume_decoded) # (1, H, W, 2) + weight = torch.max(corr_volume_decoded, dim=0, keepdim=True)[0] + estimate_volumes.append(corr_volume_decoded) + + warp_feature = F.grid_sample(feature[i].unsqueeze(0), grid).squeeze() + warp_feature *= weight + warp_feature *= mask + fuse_feature.append(warp_feature) + + # max / sum + fuse_features.append(torch.max(torch.stack(fuse_feature), dim = 0)[0]) + + # B,C,H,W + out_feature = torch.stack(fuse_features) + if estimate_volumes: + corr_vol = torch.stack(estimate_volumes) + else: + corr_vol = None + + psm = self.cls_head(out_feature) + rm = self.reg_head(out_feature) + + output_dict = {'psm': psm, + 'rm': rm, + 'corr_vol': corr_vol} + + return output_dict + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def computeCorrespondenceVolume(self, featA, featB): + """compute the similarity volume + + Args: + featA: [C, H, W], the query vectors of target agent + featB: [C, H, W], the key vectors of support agent + + Returns: + distAB: [Hs*Ws+1, Ht, Wt] + """ + C, H, W = featA.shape + + distAB = torch.zeros((self.H * self.W +1, self.H, self.W), device=featA.device) + fA = featA.permute(1,2,0).reshape(-1,C) # (H*W, C) + fB = featB.permute(1,2,0).reshape(-1,C) # (H*W, C) + + fA2 = torch.pow(torch.norm(fA,dim=-1),2).view(-1,1).repeat(1,fA.shape[0]) # (H*W, H*W) + fB2 = torch.pow(torch.norm(fB,dim=-1),2).view(-1,1).repeat(1,fB.shape[0]) # (H*W, H*W) + + + normA = torch.pow( fA2 + fB2.t() - 2.*torch.matmul(fA,fB.t()), 0.5 ) # (H*W, H*W) + + + distAB[:-1,...] = normA.permute(1,0).reshape(-1, H, W) + distAB[-1,:,:] = torch.norm(featA,p=2,dim=0) + distAB = -distAB # two pixel is similar, then distAB[pixel1,pixel2] is low. We want it high + + return distAB + + def smoothCorrespondenceVolume(self, distAB): + """ smooth the correspondence Volume + + Args: + distAB: (Hs*Ws+1, Ht, Wt) + Returns: + smoothed distAB + """ + distAB = distAB.unsqueeze(0) + output = self.smoothing_net(distAB) + output.squeeze_(0) + + return output + + def idx2grid(self, matches): + """ + Args: + matches: (Hs*Ws + 1, Ht, Wt) + """ + # should rewrite because H!=W + # matches = matches.unsqueeze(0) # [1, Hs*Ws + 1, Ht, Wt] + + + H, W = matches.shape[-2:] + X = torch.arange(W).view(1,-1).repeat(H,1).type(torch.long).view(-1).to(matches.device) # (Ht * Wt) + Y = torch.arange(H).view(-1,1).repeat(1,W).type(torch.long).view(-1).to(matches.device) # (Ht * Wt) + X = torch.cat([X,torch.tensor([0],device=matches.device)],0) + Y = torch.cat([Y,torch.tensor([0],device=matches.device)],0) + + idx = torch.argmax(matches.detach(),0).view(-1) # (Ht*Wt), the value is the index in supporting map + + # idx has no gradient + # mask select those have no correspondence. + # that means, ego's feature is used. + mask = (idx == (matches.shape[0] - 1)).view(H, W).to(matches.device) + + x = torch.index_select(X,0,idx).view(1,H,W) # x_src in affine_grid + y = torch.index_select(Y,0,idx).view(1,H,W) # y_src in affine_grid + x = 2*((1.*x/W)-0.5) # (1, H, W) + y = 2*((1.*y/H)-0.5) # (1, H, W) + + grid = torch.cat([x.unsqueeze(-1),y.unsqueeze(-1)],-1) # (1, 32, 32, 2) + + return grid, mask diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_multiclass.py new file mode 100644 index 0000000000000000000000000000000000000000..40fe35065b09ead67f47f48fc4c64fabbdf83eea --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_multiclass.py @@ -0,0 +1,217 @@ +# Author: Yifan Lu , Genjia Liu +# a class that integrate multiple simple fusion methods (Single Scale) +# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm +# To deployed with centerpoint_loss_multiclass + +import torch +import torch.nn as nn +from icecream import ic +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion +from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion +from opencood.utils.transformation_utils import normalize_pairwise_tfm +import torch.nn.functional as F + +class PointPillarMulticlass(nn.Module): + """ + F-Cooper implementation with point pillar backbone. + """ + def __init__(self, args): + super(PointPillarMulticlass, self).__init__() + + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + is_resnet = args['base_bev_backbone'].get("resnet", False) + if is_resnet: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + + if args['fusion_method'] == "max": + self.fusion_net = MaxFusion() + if args['fusion_method'] == "att": + self.fusion_net = AttFusion(args['att']['feat_dim']) + if args['fusion_method'] == "disconet": + self.fusion_net = DiscoFusion(args['disconet']['feat_dim']) + if args['fusion_method'] == "v2vnet": + self.fusion_net = V2VNetFusion(args['v2vnet']) + if args['fusion_method'] == 'v2xvit': + self.fusion_net = V2XViTFusion(args['v2xvit']) + if args['fusion_method'] == 'when2comm': + self.fusion_net = When2commFusion(args['when2comm']) + + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if "compression" in args: + self.compression = True + self.naive_compressor = NaiveCompressor(self.out_channel, args['compression']) + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'], + kernel_size=1) + self.use_dir = False + # if 'dir_args' in args.keys(): + # self.use_dir = True + # self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + # kernel_size=1) # BIN_NUM = 2 + + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + # calculate pairwise affine transformation matrix + _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0 + t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0]) + batch_dict = self.backbone(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + + fused_feature = self.fusion_net(spatial_features_2d, record_len, t_matrix) + + cls = self.cls_head(fused_feature) # [B, 256, 48, 144] -> [B, 3, 48, 144] + bbox = self.reg_head(fused_feature) # [B, 256, 48, 176] -> [B, 24, 48, 144] + + if fused_feature.size(2) == 48: + scaled_feature = F.interpolate(fused_feature, scale_factor=2, mode='nearest') # 'nearest', 'bilinear' + else: + scaled_feature = fused_feature + result_dict = {'fused_feature':scaled_feature} + + box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous() + bbox_temp_list = [] + num_class = int(box_preds_for_infer.shape[3]/8) + box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8) + for i in range(num_class): # num_class + box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:] + box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2) + _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass) + bbox_temp_list.append(bbox_temp) + bbox_temp_list = torch.stack(bbox_temp_list, dim=1) + + output_dict = {'cls_preds': cls, + 'bbox_preds': bbox, + 'reg_preds_multiclass': bbox_temp_list} # [1, 3, 6912, 7] + + output_dict.update(result_dict) + if self.use_dir: + output_dict.update({'dir_preds': self.dir_head(fused_feature)}) + + return output_dict + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + # batch_hei = box_preds[..., 2:3] + # batch_dim = torch.exp(box_preds[..., 3:6]) + + h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0] + w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1] + l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2] + batch_dim = torch.cat([h,w,l], dim=-1) + batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2] + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1]) + # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1) + # box_preds = box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_cls_preds = cls_preds.view(batch, H*W, -1) + return cls_preds, batch_box_preds diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_single_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_single_multiclass.py new file mode 100644 index 0000000000000000000000000000000000000000..ed57982a44f0e342069e2e1b054ba12967b7990b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_single_multiclass.py @@ -0,0 +1,242 @@ +# Author: Yifan Lu , Genjia Liu +# a class that integrate multiple simple fusion methods (Single Scale) +# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm +# To deployed with centerpoint_loss_multiclass + +import torch +import torch.nn as nn +from icecream import ic +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion +from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion +from opencood.utils.transformation_utils import normalize_pairwise_tfm +from opencood.utils.transformation_utils import get_relative_transformation +class PointPillarSingleMulticlass(nn.Module): + """ + F-Cooper implementation with point pillar backbone. + """ + def __init__(self, args): + super(PointPillarSingleMulticlass, self).__init__() + + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + is_resnet = args['base_bev_backbone'].get("resnet", False) + if is_resnet: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger + self.voxel_size = args['voxel_size'] + self.out_size_factor = args['out_size_factor'] + self.cav_lidar_range = args['lidar_range'] + + if args['fusion_method'] == "max": + self.fusion_net = MaxFusion() + if args['fusion_method'] == "att": + self.fusion_net = AttFusion(args['att']['feat_dim']) + if args['fusion_method'] == "disconet": + self.fusion_net = DiscoFusion(args['disconet']['feat_dim']) + if args['fusion_method'] == "v2vnet": + self.fusion_net = V2VNetFusion(args['v2vnet']) + if args['fusion_method'] == 'v2xvit': + self.fusion_net = V2XViTFusion(args['v2xvit']) + if args['fusion_method'] == 'when2comm': + self.fusion_net = When2commFusion(args['when2comm']) + + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.compression = False + if "compression" in args: + self.compression = True + self.naive_compressor = NaiveCompressor(self.out_channel, args['compression']) + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'], + kernel_size=1) + self.use_dir = False + # if 'dir_args' in args.keys(): + # self.use_dir = True + # self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + # kernel_size=1) # BIN_NUM = 2 + + if 'backbone_fix' in args.keys() and args['backbone_fix']: + self.backbone_fix() + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] if 'record_len' in data_dict.keys() else 3 + + # if key only contains "ego", like intermediate fusion + + # relative_t_matrix = get_relative_transformation(lidar_pose) # [N, 4, 4], cav_to_ego, T_ego_cav + # elif key contains "ego", "641", "649" ..., like late fusion + + ''' + if 'record_len' in data_dict: + record_len = data_dict['record_len'] + relative_t_matrix = data_dict['transformation_matrix'] + else: + relative_t_matrix = [] + for cav_id, cav_data in data_dict.items(): + + relative_t_matrix.append(cav_data['transformation_matrix']) + record_len = len(relative_t_matrix) + relative_t_matrix = torch.stack(relative_t_matrix, dim=0) + ''' + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + # calculate pairwise affine transformation matrix + _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0 + # t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0]) + batch_dict = self.backbone(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + + psm_single = self.cls_head(spatial_features_2d) + rm_single = self.reg_head(spatial_features_2d) + + fused_feature = spatial_features_2d + + cls = self.cls_head(fused_feature) # [B, 256, 48, 144] -> [B, 3, 48, 144] + bbox = self.reg_head(fused_feature) # [B, 256, 48, 176] -> [B, 24, 48, 144] + + box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous() + bbox_temp_list = [] + num_class = int(box_preds_for_infer.shape[3]/8) + box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8) + for i in range(num_class): # num_class + box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:] + box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2) + _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass) + bbox_temp_list.append(bbox_temp) + bbox_temp_list = torch.stack(bbox_temp_list, dim=1) + + _, bbox_temp = self.generate_predicted_boxes(cls, bbox) + + output_dict = {'cls_preds': cls, + 'bbox_preds': bbox, + 'reg_preds_multiclass': bbox_temp_list, + 'reg_preds': bbox_temp + } # [1, 3, 6912, 7] + + _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single) + + output_dict.update({'cls_preds_single': psm_single, # [BN, 1, 100, 100] + 'reg_preds_single': bbox_temp_single, # [BN, 10000, 7] + 'bbox_preds_single': rm_single, # [BN, 8, 100, 100] + # 'comm_rate': communication_rates + }) + + if self.use_dir: + output_dict.update({'dir_preds': self.dir_head(fused_feature)}) + + return output_dict + + def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None): + """ + Args: + batch_size: + cls_preds: (N, H, W, C1) + box_preds: (N, H, W, C2) + dir_cls_preds: (N, H, W, C3) + + Returns: + batch_cls_preds: (B, num_boxes, num_classes) + batch_box_preds: (B, num_boxes, 7+C) + + """ + box_preds = box_preds.permute(0, 2, 3, 1).contiguous() + + batch, H, W, code_size = box_preds.size() ## code_size 表示的是预测的尺寸 + + box_preds = box_preds.reshape(batch, H*W, code_size) + + batch_reg = box_preds[..., 0:2] + # batch_hei = box_preds[..., 2:3] + # batch_dim = torch.exp(box_preds[..., 3:6]) + + h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0] + w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1] + l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2] + batch_dim = torch.cat([h,w,l], dim=-1) + batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2] + + batch_rots = box_preds[..., 6:7] + batch_rotc = box_preds[..., 7:8] + + rot = torch.atan2(batch_rots, batch_rotc) + + ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) + ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device) + + xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] + ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] + + xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0] ## 基于feature_map 的size求解真实的坐标 + ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1] + + + batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2) + # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1]) + # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1) + # box_preds = box_preds.permute(0, 3, 1, 2).contiguous() + + # batch_cls_preds = cls_preds.view(batch, H*W, -1) + return cls_preds, batch_box_preds \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_uncertainty.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_uncertainty.py new file mode 100644 index 0000000000000000000000000000000000000000..9bd495187bdf2baa8ba4464d04860daf29b63c95 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_uncertainty.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch +import torch.nn as nn + + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.utils.model_utils import weight_init + +class PointPillarUncertainty(nn.Module): + def __init__(self, args): + super(PointPillarUncertainty, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + self.uncertainty_dim = args['uncertainty_dim'] # dim=3 means x, y, yaw, dim=2 means x, y + + self.cls_head = nn.Conv2d(128 * 3, args['anchor_num'], + kernel_size=1) + self.reg_head = nn.Conv2d(128 * 3, 7 * args['anchor_num'], + kernel_size=1) + + self.unc_head = nn.Conv2d(128 * 3, self.uncertainty_dim * args['anchor_num'], + kernel_size=1) + + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(128 * 3, args['dir_args']['num_bins'] * args['anchor_num'], + kernel_size=1) # BIN_NUM = 2 + else: + self.use_dir = False + + + self.apply(weight_init) + + + def forward(self, data_dict): + + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + + + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points} + + batch_dict = self.pillar_vfe(batch_dict) + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + cls_preds = self.cls_head(spatial_features_2d) + reg_preds = self.reg_head(spatial_features_2d) + unc_preds = self.unc_head(spatial_features_2d) # s is log(b) or log(sigma^2) + + output_dict = {'cls_preds': cls_preds, + 'reg_preds': reg_preds, + 'unc_preds': unc_preds} + + if self.use_dir: + dir_preds = self.dir_head(spatial_features_2d) + output_dict.update({'dir_preds': dir_preds}) + + return output_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_v2vnet_robust.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_v2vnet_robust.py new file mode 100644 index 0000000000000000000000000000000000000000..802a4c8192afeb92fd515473bab69c47dd1f06ea --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_v2vnet_robust.py @@ -0,0 +1,335 @@ +# -*- coding: utf-8 -*- +# Author: Hao Xiang , Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib +from numpy import record +import torch +import torch.nn as nn + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.sub_modules.v2v_robust_module import AttentionWrapper, PoseRegressionWraper, WeightedEM, get_intersection, regroup +from opencood.utils.pose_utils import generate_noise_torch +from opencood.utils.transformation_utils import get_pairwise_transformation_torch +from opencood.models.fuse_modules.v2v_fuse import V2VNetFusion + +from icecream import ic +from opencood.utils.model_utils import weight_init + +class PointPillarV2VNetRobust(nn.Module): + def __init__(self, args): + super(PointPillarV2VNetRobust, self).__init__() + + self.max_cav = args['max_cav'] + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + + # used to downsample the feature map for efficient computation + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.compression = False + + if args['compression'] > 0: + self.compression = True + self.naive_compressor = NaiveCompressor(256, args['compression']) + + self.fusion_net = V2VNetFusion(args['v2vfusion']) + + self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'], + kernel_size=1) + + + self.downsample_rate = args['robust']['downsample_rate'] + self.discrete_ratio = args['robust']['discrete_ratio'] + self.H = args['robust']['H'] + self.W = args['robust']['W'] + + self.affine_parameter = {"H":self.H, "W": self.W, "downsample_rate": self.downsample_rate, "discrete_ratio": self.discrete_ratio} + learnable_alpha = True if 'learnable_alpha' not in args['robust'] else args['robust']['learnable_alpha'] + + self.pose_reg_net = PoseRegressionWraper(args['robust']['feature_dim']*2, + args['robust']['hidden_dim'], + self.affine_parameter + ) + + self.attention_net = AttentionWrapper(args['robust']['feature_dim']*2, + args['robust']['hidden_dim'], + self.affine_parameter, + learnable_alpha, + ) + + self.stage = args['stage'] # [0, 1, 2] + + self.apply(weight_init) + + if self.stage == 1: + self.backbone_fix() + if self.stage == 2: + self.backbone_unfix() + + def backbone_fix(self): + """ + Fix the parameters of backbone for stage 1 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.fusion_net.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + for p in self.attention_net.parameters(): + p.requires_grad = False + + def backbone_unfix(self): + """ + unfix for stage 2 + """ + + for p in self.pillar_vfe.parameters(): + p.requires_grad = True + + for p in self.scatter.parameters(): + p.requires_grad = True + + for p in self.backbone.parameters(): + p.requires_grad = True + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = True + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = True + + for p in self.fusion_net.parameters(): + p.requires_grad = True + + for p in self.cls_head.parameters(): + p.requires_grad = True + for p in self.reg_head.parameters(): + p.requires_grad = True + + for p in self.attention_net.parameters(): + p.requires_grad = True + + def pose_correction(self, features, record_len, pairwise_t_matrix): + """ use pose regression module to correct relative pose + Args: + + Returns: + pairwise_t_matrix_new: + [B, L, L, 4, 4], the relative pose after correction. + """ + return self.pose_reg_net(features, record_len, pairwise_t_matrix) + + def global_correction(self, lidar_pose, pairwise_t_matrix, record_len): + """ + Args: + lidar_pose: [N, 3] + input noisy lidar pose + pairwise_t_matrix: [B, L, L, 4, 4] + relative pose after pose regression module + record_len: list, + shape [B] + + Returns: + lidar_pose_new: [N, 3] + refined lidar pose + """ + + B = len(record_len) + lidar_pose_new = [] + + # [[N1,3], [N2, 3], ...] + lidar_pose_split = regroup(lidar_pose, record_len) + + for b in range(B): + if record_len[b] == 1: + lidar_pose_new.append(lidar_pose_split[b]) + continue + lidar_pose = lidar_pose_split[b] + intersection_matrix = get_intersection(pairwise_t_matrix[b], self.affine_parameter) + lidar_pose_corrected = WeightedEM(lidar_pose, pairwise_t_matrix[b],intersection_matrix) + + lidar_pose_new.append(lidar_pose_corrected) + + lidar_pose_new = torch.cat(lidar_pose_new, dim=0) + + return lidar_pose_new + + + def noise_generator(self, lidar_pose, all_strong=False): + noise_s = generate_noise_torch(lidar_pose, pos_std=0.4, rot_std=4) # (N, 6) + noise_w = generate_noise_torch(lidar_pose, pos_std=0.01, rot_std=0.1) # (N, 6) + N = lidar_pose.shape[0] + + if all_strong: + choice = torch.zeros((N, 1), device=lidar_pose.device) # (N, 1) 0 choose strong, 1 choose weak + noise = noise_s + else: + choice = torch.randint(0, 2, (N, 1), device=lidar_pose.device) # (N, 1) 0 choose strong, 1 choose weak + noise = choice * noise_w + (1-choice) * noise_s + + return noise, choice + + + def train_forward(self, spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, stage): + """ + stage = 0, only training attentive_aggregation and v2vnet, strong noise and weak noise are used. + stage = 1, only training pose correction module. all strong noise + stage = 2, all component are used. all strong noise + + Args: + spatial_features_2d: (N, C, H, W) + record_len: list + lidar_pose: (N, 6), it will turn to [N, 3] quickly + """ + if stage == 0: + noise, choice = self.noise_generator(lidar_pose, all_strong=False) + + if stage == 1 or stage == 2: + noise, choice = self.noise_generator(lidar_pose, all_strong=True) + + lidar_pose += noise + lidar_pose = lidar_pose[:,[0,1,4]] # [N, 3] + pairwise_t_matrix = get_pairwise_transformation_torch(lidar_pose, self.max_cav, record_len, dof=3) + + # when training pairwise_t_matrix, pairwise_t_matrix carries given noise. + if self.stage == 0: + scores, weight = self.attention_net(spatial_features_2d, record_len, pairwise_t_matrix) + fused_feature = self.fusion_net(spatial_features_2d, record_len, pairwise_t_matrix, weight) + psm = self.cls_head(fused_feature) + rm = self.reg_head(fused_feature) + print("scores:", scores) + print("weight:", weight) + print("alpha:", self.attention_net.alpha) + + output_dict = {'stage': stage, + 'scores': scores, + 'choice': choice, + 'psm': psm, + 'rm': rm} + + if self.stage == 1: + pairwise_corr, _ = self.pose_correction(spatial_features_2d, record_len, pairwise_t_matrix) + output_dict = {'stage': stage, + 'pairwise_corr' : pairwise_corr, + 'pairwise_t_matrix': pairwise_t_matrix} + + if self.stage == 2: + pairwise_corr, pairwise_t_matrix_new = self.pose_correction(spatial_features_2d, record_len, pairwise_t_matrix) + lidar_pose_corrected = self.global_correction(lidar_pose, pairwise_t_matrix_new, record_len) # [N, 3] + + pairwise_t_matrix_corrected = get_pairwise_transformation_torch(lidar_pose_corrected, self.max_cav, record_len, dof=3) + scores, weight = self.attention_net(spatial_features_2d, record_len, pairwise_t_matrix_corrected) + fused_feature = self.fusion_net(spatial_features_2d, record_len, pairwise_t_matrix_corrected, weight) + psm = self.cls_head(fused_feature) + rm = self.reg_head(fused_feature) + + output_dict = {'stage': stage, + 'scores': scores, + 'psm': psm, + 'rm': rm, + 'pairwise_corr' : pairwise_corr, + 'pairwise_t_matrix': pairwise_t_matrix} + + return output_dict + + + + def eval_forward(self, spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, stage): + """ + same as stage=2 in training, but no noise added. + """ + lidar_pose = lidar_pose[:,[0,1,4]] # [N, 3] + pairwise_t_matrix = get_pairwise_transformation_torch(lidar_pose, self.max_cav, record_len, dof=3) + + pairwise_corr, pairwise_t_matrix = self.pose_correction(spatial_features_2d, record_len, pairwise_t_matrix) + + lidar_pose_corrected = self.global_correction(lidar_pose, pairwise_t_matrix, record_len) # [N, 3] + pairwise_t_matrix_corrected = get_pairwise_transformation_torch(lidar_pose_corrected, self.max_cav, record_len, dof=3) + + + scores, weight = self.attention_net(spatial_features_2d, record_len, pairwise_t_matrix_corrected) + fused_feature = self.fusion_net(spatial_features_2d, record_len, pairwise_t_matrix_corrected, weight) + psm = self.cls_head(fused_feature) + rm = self.reg_head(fused_feature) + + output_dict = {'stage': stage, + 'scores': scores, + 'psm': psm, + 'rm': rm, + 'pairwise_t_matrix': pairwise_t_matrix} + + return output_dict + + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + lidar_pose = data_dict['lidar_pose'] # [sum(cav), 6] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + # N, C, H', W'. [N, 256, 50, 176] + spatial_features_2d = batch_dict['spatial_features_2d'] + + # downsample feature to reduce memory + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + # compressor + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + + # spatial_features_2d is [sum(cav_num), 256, 50, 176] + # output only contains ego + # [B, 256, 50, 176] + + # lidar_pose -> pairwise_t_matrix , same content + + # if self.training: + # return self.train_forward(spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, self.stage) + # else: + # return self.eval_forward(spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, self.stage) + + return self.train_forward(spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, self.stage) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_where2comm.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_where2comm.py new file mode 100644 index 0000000000000000000000000000000000000000..cbc0db947cbea549aa83b677020cbf15fadee222 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_where2comm.py @@ -0,0 +1,156 @@ +import torch.nn as nn + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone +from opencood.models.sub_modules.downsample_conv import DownsampleConv +from opencood.models.sub_modules.naive_compress import NaiveCompressor +from opencood.models.sub_modules.dcn_net import DCNNet +# from opencood.models.fuse_modules.where2comm import Where2comm +from opencood.models.fuse_modules.where2comm_attn import Where2comm +import torch + +class PointPillarWhere2comm(nn.Module): + def __init__(self, args): + super(PointPillarWhere2comm, self).__init__() + + # PIllar VFE + self.pillar_vfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + self.scatter = PointPillarScatter(args['point_pillar_scatter']) + if 'resnet' in args['base_bev_backbone']: + self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) + else: + self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) + self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter']) + + + # used to downsample the feature map for efficient computation + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + self.compression = False + + if 'compression' in args and args['compression'] > 0: + self.compression = True + self.naive_compressor = NaiveCompressor(256, args['compression']) + + # self.fusion_net = TransformerFusion(args['fusion_args']) + self.fusion_net = Where2comm(args['fusion_args']) + self.multi_scale = args['fusion_args']['multi_scale'] + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'], + kernel_size=1) + self.use_dir = False + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'], + kernel_size=1) # BIN_NUM = 2 + if "backbone_fix" in args and args['backbone_fix']: + self.backbone_fix() + + def backbone_fix(self): + """ + Fix the parameters of backbone during finetune on timedelay。 + """ + for p in self.pillar_vfe.parameters(): + p.requires_grad = False + + for p in self.scatter.parameters(): + p.requires_grad = False + + for p in self.backbone.parameters(): + p.requires_grad = False + + if self.compression: + for p in self.naive_compressor.parameters(): + p.requires_grad = False + if self.shrink_flag: + for p in self.shrink_conv.parameters(): + p.requires_grad = False + + for p in self.cls_head.parameters(): + p.requires_grad = False + for p in self.reg_head.parameters(): + p.requires_grad = False + + def regroup(self, x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'record_len': record_len} + # n, 4 -> n, c + batch_dict = self.pillar_vfe(batch_dict) + # n, c -> N, C, H, W + batch_dict = self.scatter(batch_dict) + batch_dict = self.backbone(batch_dict) + # N, C, H', W'. [N, 384, 100, 352] + spatial_features_2d = batch_dict['spatial_features_2d'] + + # downsample feature to reduce memory + if self.shrink_flag: + spatial_features_2d = self.shrink_conv(spatial_features_2d) + # compressor + if self.compression: + spatial_features_2d = self.naive_compressor(spatial_features_2d) + # spatial_features_2d is [sum(cav_num), 256, 50, 176] + # output only contains ego + # [B, 256, 50, 176] + psm_single = self.cls_head(spatial_features_2d) + rm_single = self.reg_head(spatial_features_2d) + if self.use_dir: + dir_single = self.dir_head(spatial_features_2d) + + if self.multi_scale: + fused_feature, communication_rates, result_dict = self.fusion_net(batch_dict['spatial_features'], + psm_single, + record_len, + pairwise_t_matrix, + self.backbone) + # downsample feature to reduce memory + if self.shrink_flag: + fused_feature = self.shrink_conv(fused_feature) + else: + fused_feature, communication_rates, result_dict = self.fusion_net(spatial_features_2d, + psm_single, + record_len, + pairwise_t_matrix) + + + # print('fused_feature: ', fused_feature.shape) + psm = self.cls_head(fused_feature) + rm = self.reg_head(fused_feature) + + + output_dict = {'cls_preds': psm, + 'reg_preds': rm} + if self.use_dir: + output_dict.update({'dir_preds': self.dir_head(fused_feature), + 'dir_preds_single': dir_single}) + + output_dict.update(result_dict) + + output_dict.update({'cls_preds_single': psm_single, + 'reg_preds_single': rm_single, + 'comm_rate': communication_rates + }) + return output_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second.py new file mode 100644 index 0000000000000000000000000000000000000000..34efee7769009ecc873be262b2796f4cd32e9ad2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch.nn as nn + +from opencood.models.sub_modules.mean_vfe import MeanVFE +from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x +from opencood.models.sub_modules.height_compression import HeightCompression +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone + + +class Second(nn.Module): + def __init__(self, args): + super(Second, self).__init__() + + # mean_vfe + self.mean_vfe = MeanVFE(args['mean_vfe'], 4) + # sparse 3d backbone + self.backbone_3d = VoxelBackBone8x(args['backbone_3d'], + 4, args['grid_size']) + # height compression + self.height_compression = HeightCompression(args['height_compression']) + # base ben backbone + self.backbone_2d = BaseBEVBackbone(args['base_bev_backbone'], 256) + + # head + self.cls_head = nn.Conv2d(256 * 2, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(256 * 2, 7 * args['anchor_num'], + kernel_size=1) + + def forward(self, data_dict): + + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + batch_size = voxel_coords[:,0].max() + 1 # batch size is padded in the first idx + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'batch_size': batch_size} + + batch_dict = self.mean_vfe(batch_dict) + batch_dict = self.backbone_3d(batch_dict) + batch_dict = self.height_compression(batch_dict) + batch_dict = self.backbone_2d(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + psm = self.cls_head(spatial_features_2d) + rm = self.reg_head(spatial_features_2d) + + output_dict = {'psm': psm, + 'rm': rm} + + return output_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_intermediate.py new file mode 100644 index 0000000000000000000000000000000000000000..c86f9734f8b1c74dcf4ee64483130814c1ff17ec --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_intermediate.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch +import torch.nn as nn + +from opencood.models.sub_modules.mean_vfe import MeanVFE +from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x +from opencood.models.sub_modules.height_compression import HeightCompression +from opencood.models.sub_modules.att_bev_backbone import AttBEVBackbone + + +class SecondIntermediate(nn.Module): + def __init__(self, args): + super(SecondIntermediate, self).__init__() + + self.batch_size = args['batch_size'] + # mean_vfe + self.mean_vfe = MeanVFE(args['mean_vfe'], 4) + # sparse 3d backbone + self.backbone_3d = VoxelBackBone8x(args['backbone_3d'], + 4, args['grid_size']) + # height compression + self.height_compression = HeightCompression(args['height_compression']) + # base ben backbone + self.backbone_2d = AttBEVBackbone(args['base_bev_backbone'], 256) + + # head + self.cls_head = nn.Conv2d(256 * 2, args['anchor_number'], + kernel_size=1) + self.reg_head = nn.Conv2d(256 * 2, 7 * args['anchor_num'], + kernel_size=1) + + def forward(self, data_dict): + + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'batch_size': torch.sum(record_len).cpu().numpy(), + 'record_len': record_len} + + batch_dict = self.mean_vfe(batch_dict) + batch_dict = self.backbone_3d(batch_dict) + batch_dict = self.height_compression(batch_dict) + batch_dict = self.backbone_2d(batch_dict) + + spatial_features_2d = batch_dict['spatial_features_2d'] + + psm = self.cls_head(spatial_features_2d) + rm = self.reg_head(spatial_features_2d) + + output_dict = {'psm': psm, + 'rm': rm} + + return output_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa.py new file mode 100644 index 0000000000000000000000000000000000000000..1a05d9f0656e6cedb786b8b8828e0073d3d65cc2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch.nn as nn + +from opencood.models.sub_modules.mean_vfe import MeanVFE +from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x +from opencood.models.sub_modules.height_compression import HeightCompression +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.cia_ssd_utils import SSFA, Head +from opencood.models.sub_modules.downsample_conv import DownsampleConv +import numpy as np + +class SecondSSFA(nn.Module): + def __init__(self, args): + super(SecondSSFA, self).__init__() + lidar_range = np.array(args['lidar_range']) + grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) / + np.array(args['voxel_size'])).astype(np.int64) + self.vfe = MeanVFE(args['mean_vfe'], + args['mean_vfe']['num_point_features']) + self.spconv_block = VoxelBackBone8x(args['spconv'], + input_channels=args['spconv'][ + 'num_features_in'], + grid_size=grid_size) + self.map_to_bev = HeightCompression(args['map2bev']) + self.ssfa = SSFA(args['ssfa']) + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.head = Head(**args['head']) + + def forward(self, data_dict): + + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + batch_size = voxel_coords[:,0].max() + 1 # batch size is padded in the first idx + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'batch_size': batch_size} + + batch_dict = self.vfe(batch_dict) + batch_dict = self.spconv_block(batch_dict) + batch_dict = self.map_to_bev(batch_dict) + out = self.ssfa(batch_dict['spatial_features']) + if self.shrink_flag: + out = self.shrink_conv(out) + + return self.head(out) diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa_uncertainty.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa_uncertainty.py new file mode 100644 index 0000000000000000000000000000000000000000..580e2193111534d7bbbd16e599576e96dd6f03d3 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa_uncertainty.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch.nn as nn + +from opencood.models.sub_modules.mean_vfe import MeanVFE +from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x +from opencood.models.sub_modules.height_compression import HeightCompression +from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone +from opencood.models.sub_modules.cia_ssd_utils import SSFA +from opencood.models.sub_modules.downsample_conv import DownsampleConv +import numpy as np +from opencood.utils.model_utils import weight_init + +class SecondSSFAUncertainty(nn.Module): + def __init__(self, args): + super(SecondSSFAUncertainty, self).__init__() + lidar_range = np.array(args['lidar_range']) + grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) / + np.array(args['voxel_size'])).astype(np.int64) + self.vfe = MeanVFE(args['mean_vfe'], + args['mean_vfe']['num_point_features']) + self.spconv_block = VoxelBackBone8x(args['spconv'], + input_channels=args['spconv'][ + 'num_features_in'], + grid_size=grid_size) + self.map_to_bev = HeightCompression(args['map2bev']) + self.ssfa = SSFA(args['ssfa']) + self.out_channel = args['ssfa']['feature_num'] + + uncertainty_dim = args['uncertainty_dim'] + + self.shrink_flag = False + if 'shrink_header' in args: + self.shrink_flag = True + self.shrink_conv = DownsampleConv(args['shrink_header']) + self.out_channel = args['shrink_header']['dim'][-1] + + self.cls_head = nn.Conv2d(self.out_channel, args['anchor_num'], + kernel_size=1) + self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_num'], + kernel_size=1) + self.unc_head = nn.Conv2d(self.out_channel, uncertainty_dim * args['anchor_num'], + kernel_size=1) + + self.use_dir = False + if 'dir_args' in args.keys(): + self.use_dir = True + self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_num'], + kernel_size=1) # BIN_NUM = 2 + + self.apply(weight_init) + + def forward(self, data_dict): + + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + batch_size = voxel_coords[:, 0].max() + 1 # batch size is padded in the first idx + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'batch_size': batch_size} + + batch_dict = self.vfe(batch_dict) + batch_dict = self.spconv_block(batch_dict) + batch_dict = self.map_to_bev(batch_dict) + out = self.ssfa(batch_dict['spatial_features']) + if self.shrink_flag: + out = self.shrink_conv(out) + + cls_preds = self.cls_head(out) + reg_preds = self.reg_head(out) + unc_preds = self.unc_head(out) # s is log(b) or log(sigma^2) + + output_dict = {'cls_preds': cls_preds, + 'reg_preds': reg_preds, + 'unc_preds': unc_preds} + + if self.use_dir: + dir_preds = self.dir_head(out) + output_dict.update({'dir_preds': dir_preds}) + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b09d5d4aff56fd21e4b37fee1b6e7856a31e554a Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone_resnet.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone_resnet.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27d551ca0fba7bcb30c6408507f19fa69f497428 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone_resnet.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/downsample_conv.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/downsample_conv.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c080b27623f55b3d58d85cbd2b21e7eb39fe588 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/downsample_conv.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/naive_compress.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/naive_compress.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2f2d9706674f705bc03c734a4b529f0e488b0baf Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/naive_compress.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/pillar_vfe.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/pillar_vfe.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c64aeb0e45a13e06401683b27a9a5480279f5d6 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/pillar_vfe.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/point_pillar_scatter.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/point_pillar_scatter.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a72f47059b971145a2c912babb702dc667c11e14 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/point_pillar_scatter.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/resblock.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/resblock.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65d40d6621cf6163d68f647bcb37df12b06b5e54 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/resblock.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/torch_transformation_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/torch_transformation_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9af88dd06f6005f1a3700bbf1d0c7674590563ad Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/torch_transformation_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/att_bev_backbone.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/att_bev_backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..258475c502c7cf7602a3ebda7dbecf3e253d25c6 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/att_bev_backbone.py @@ -0,0 +1,166 @@ +import numpy as np +import torch +import torch.nn as nn + +from opencood.models.fuse_modules.self_attn import AttFusion +from opencood.models.sub_modules.auto_encoder import AutoEncoder + +DEBUG = False + +class AttBEVBackbone(nn.Module): + def __init__(self, model_cfg, input_channels): + super().__init__() + self.model_cfg = model_cfg + self.compress = False + + self.discrete_ratio = model_cfg['voxel_size'][0] + self.downsample_rate = 1 + + + + if 'compression' in model_cfg and model_cfg['compression'] > 0: + self.compress = True + self.compress_layer = model_cfg['compression'] + + if 'layer_nums' in self.model_cfg: + + assert len(self.model_cfg['layer_nums']) == \ + len(self.model_cfg['layer_strides']) == \ + len(self.model_cfg['num_filters']) + + layer_nums = self.model_cfg['layer_nums'] + layer_strides = self.model_cfg['layer_strides'] + num_filters = self.model_cfg['num_filters'] + else: + layer_nums = layer_strides = num_filters = [] + + if 'upsample_strides' in self.model_cfg: + assert len(self.model_cfg['upsample_strides']) \ + == len(self.model_cfg['num_upsample_filter']) + + num_upsample_filters = self.model_cfg['num_upsample_filter'] + upsample_strides = self.model_cfg['upsample_strides'] + + else: + upsample_strides = num_upsample_filters = [] + + num_levels = len(layer_nums) + c_in_list = [input_channels, *num_filters[:-1]] + + self.blocks = nn.ModuleList() + self.fuse_modules = nn.ModuleList() + self.deblocks = nn.ModuleList() + + if self.compress: + self.compression_modules = nn.ModuleList() + + for idx in range(num_levels): + cur_layers = [ + nn.ZeroPad2d(1), + nn.Conv2d( + c_in_list[idx], num_filters[idx], kernel_size=3, + stride=layer_strides[idx], padding=0, bias=False + ), + nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), + nn.ReLU() + ] + + fuse_network = AttFusion(num_filters[idx]) + self.fuse_modules.append(fuse_network) + if self.compress and self.compress_layer - idx > 0: + self.compression_modules.append(AutoEncoder(num_filters[idx], + self.compress_layer-idx)) + + for k in range(layer_nums[idx]): + cur_layers.extend([ + nn.Conv2d(num_filters[idx], num_filters[idx], + kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), + nn.ReLU() + ]) + + self.blocks.append(nn.Sequential(*cur_layers)) + if len(upsample_strides) > 0: + stride = upsample_strides[idx] + if stride >= 1: + self.deblocks.append(nn.Sequential( + nn.ConvTranspose2d( + num_filters[idx], num_upsample_filters[idx], + upsample_strides[idx], + stride=upsample_strides[idx], bias=False + ), + nn.BatchNorm2d(num_upsample_filters[idx], + eps=1e-3, momentum=0.01), + nn.ReLU() + )) + else: + stride = np.round(1 / stride).astype(np.int) + self.deblocks.append(nn.Sequential( + nn.Conv2d( + num_filters[idx], num_upsample_filters[idx], + stride, + stride=stride, bias=False + ), + nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, + momentum=0.01), + nn.ReLU() + )) + + c_in = sum(num_upsample_filters) + if len(upsample_strides) > num_levels: + self.deblocks.append(nn.Sequential( + nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1], + stride=upsample_strides[-1], bias=False), + nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01), + nn.ReLU(), + )) + + self.num_bev_features = c_in + + def forward(self, data_dict): + spatial_features = data_dict['spatial_features'] + if DEBUG: + origin_features = torch.clone(spatial_features) + record_len = data_dict['record_len'] + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + ups = [] + ret_dict = {} + x = spatial_features + + H, W = x.shape[2:] # 200, 704 + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + for i in range(len(self.blocks)): + x = self.blocks[i](x) + if self.compress and i < len(self.compression_modules): + x = self.compression_modules[i](x) + if DEBUG: + self.fuse_modules[i].forward_debug(x, origin_features, record_len, pairwise_t_matrix) + else: + x_fuse = self.fuse_modules[i](x, record_len, pairwise_t_matrix) + + stride = int(spatial_features.shape[2] / x.shape[2]) + ret_dict['spatial_features_%dx' % stride] = x + + if len(self.deblocks) > 0: + ups.append(self.deblocks[i](x_fuse)) + else: + ups.append(x_fuse) + + if len(ups) > 1: + x = torch.cat(ups, dim=1) + elif len(ups) == 1: + x = ups[0] + + if len(self.deblocks) > len(self.blocks): + x = self.deblocks[-1](x) + + data_dict['spatial_features_2d'] = x + return data_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/auto_encoder.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/auto_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..a3c548e4bd60d3552abda7e7cc69ae56d2d00d46 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/auto_encoder.py @@ -0,0 +1,67 @@ +import torch +import torch.nn as nn + + +class AutoEncoder(nn.Module): + def __init__(self, feature_num, layer_num): + super().__init__() + self.feature_num = feature_num + self.feature_stride = 2 + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + for i in range(layer_num): + cur_layers = [ + nn.ZeroPad2d(1), + nn.Conv2d( + feature_num, feature_num, kernel_size=3, + stride=2, padding=0, bias=False + ), + nn.BatchNorm2d(feature_num, eps=1e-3, momentum=0.01), + nn.ReLU()] + + cur_layers.extend([ + nn.Conv2d(feature_num, feature_num // self.feature_stride, + kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(feature_num // self.feature_stride, + eps=1e-3, momentum=0.01), + nn.ReLU() + ]) + + self.encoder.append(nn.Sequential(*cur_layers)) + feature_num = feature_num // self.feature_stride + + feature_num = self.feature_num + for i in range(layer_num): + cur_layers = [nn.Sequential( + nn.ConvTranspose2d( + feature_num // 2, feature_num, + kernel_size=2, + stride=2, bias=False + ), + nn.BatchNorm2d(feature_num, + eps=1e-3, momentum=0.01), + nn.ReLU() + )] + + cur_layers.extend([nn.Sequential( + nn.Conv2d( + feature_num, feature_num, kernel_size=3, + stride=1, bias=False, padding=1 + ), + nn.BatchNorm2d(feature_num, eps=1e-3, + momentum=0.01), + nn.ReLU() + )]) + self.decoder.append(nn.Sequential(*cur_layers)) + feature_num //= 2 + + def forward(self, x): + for i in range(len(self.encoder)): + x = self.encoder[i](x) + + for i in range(len(self.decoder)-1, -1, -1): + x = self.decoder[i](x) + + return x \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..38f6960abd3c407b268df86e7d0ee7012357cc23 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone.py @@ -0,0 +1,157 @@ +import numpy as np +import torch +import torch.nn as nn + + +class BaseBEVBackbone(nn.Module): + def __init__(self, model_cfg, input_channels): + super().__init__() + self.model_cfg = model_cfg + + if 'layer_nums' in self.model_cfg: + + assert len(self.model_cfg['layer_nums']) == \ + len(self.model_cfg['layer_strides']) == \ + len(self.model_cfg['num_filters']) + + layer_nums = self.model_cfg['layer_nums'] + layer_strides = self.model_cfg['layer_strides'] + num_filters = self.model_cfg['num_filters'] + else: + layer_nums = layer_strides = num_filters = [] + + if 'upsample_strides' in self.model_cfg: + assert len(self.model_cfg['upsample_strides']) \ + == len(self.model_cfg['num_upsample_filter']) + + num_upsample_filters = self.model_cfg['num_upsample_filter'] + upsample_strides = self.model_cfg['upsample_strides'] + + else: + upsample_strides = num_upsample_filters = [] + + num_levels = len(layer_nums) + self.num_levels = num_levels + c_in_list = [input_channels, *num_filters[:-1]] + + self.blocks = nn.ModuleList() + self.deblocks = nn.ModuleList() + + for idx in range(num_levels): + cur_layers = [ + nn.ZeroPad2d(1), + nn.Conv2d( + c_in_list[idx], num_filters[idx], kernel_size=3, + stride=layer_strides[idx], padding=0, bias=False + ), + nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), + nn.ReLU() + ] + for k in range(layer_nums[idx]): + cur_layers.extend([ + nn.Conv2d(num_filters[idx], num_filters[idx], + kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), + nn.ReLU() + ]) + + self.blocks.append(nn.Sequential(*cur_layers)) + if len(upsample_strides) > 0: + stride = upsample_strides[idx] + if stride >= 1: + self.deblocks.append(nn.Sequential( + nn.ConvTranspose2d( + num_filters[idx], num_upsample_filters[idx], + upsample_strides[idx], + stride=upsample_strides[idx], bias=False + ), + nn.BatchNorm2d(num_upsample_filters[idx], + eps=1e-3, momentum=0.01), + nn.ReLU() + )) + else: + stride = np.round(1 / stride).astype(np.int) + self.deblocks.append(nn.Sequential( + nn.Conv2d( + num_filters[idx], num_upsample_filters[idx], + stride, + stride=stride, bias=False + ), + nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, + momentum=0.01), + nn.ReLU() + )) + + c_in = sum(num_upsample_filters) + if len(upsample_strides) > num_levels: + self.deblocks.append(nn.Sequential( + nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1], + stride=upsample_strides[-1], bias=False), + nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01), + nn.ReLU(), + )) + + self.num_bev_features = c_in + + def forward(self, data_dict): + spatial_features = data_dict['spatial_features'] + + ups = [] + ret_dict = {} + x = spatial_features + + for i in range(len(self.blocks)): + x = self.blocks[i](x) + + stride = int(spatial_features.shape[2] / x.shape[2]) + ret_dict['spatial_features_%dx' % stride] = x + + if len(self.deblocks) > 0: + ups.append(self.deblocks[i](x)) + else: + ups.append(x) + + if len(ups) > 1: + x = torch.cat(ups, dim=1) + elif len(ups) == 1: + x = ups[0] + + if len(self.deblocks) > len(self.blocks): + x = self.deblocks[-1](x) + + data_dict['spatial_features_2d'] = x # [N,C,100,352] + + return data_dict + + + def get_multiscale_feature(self, spatial_features): + """ + before multiscale intermediate fusion + """ + feature_list = [] + x = spatial_features + for i in range(len(self.blocks)): + x = self.blocks[i](x) + feature_list.append(x) + + return feature_list + + def decode_multiscale_feature(self, x): + """ + after multiscale interemediate fusion + """ + ups = [] + for i in range(self.num_levels): + if len(self.deblocks) > 0: + ups.append(self.deblocks[i](x[i])) + else: + ups.append(x[i]) + if len(ups) > 1: + x = torch.cat(ups, dim=1) + elif len(ups) == 1: + x = ups[0] + + if len(self.deblocks) > self.num_levels: + x = self.deblocks[-1](x) + return x + \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone_resnet.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..69ada56db671c985eb53533fd2cd29c72ef7f5b3 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone_resnet.py @@ -0,0 +1,145 @@ +""" +Resblock is much strong than normal conv + +Provide api for multiscale intermeidate fuion +""" + +import numpy as np +import torch +import torch.nn as nn + +from opencood.models.sub_modules.resblock import ResNetModified, BasicBlock + +DEBUG = False + +class ResNetBEVBackbone(nn.Module): + def __init__(self, model_cfg, input_channels=64): + super().__init__() + self.model_cfg = model_cfg + + if 'layer_nums' in self.model_cfg: + + assert len(self.model_cfg['layer_nums']) == \ + len(self.model_cfg['layer_strides']) == \ + len(self.model_cfg['num_filters']) + + layer_nums = self.model_cfg['layer_nums'] + layer_strides = self.model_cfg['layer_strides'] + num_filters = self.model_cfg['num_filters'] + else: + layer_nums = layer_strides = num_filters = [] + + if 'upsample_strides' in self.model_cfg: + assert len(self.model_cfg['upsample_strides']) \ + == len(self.model_cfg['num_upsample_filter']) + + num_upsample_filters = self.model_cfg['num_upsample_filter'] + upsample_strides = self.model_cfg['upsample_strides'] + + else: + upsample_strides = num_upsample_filters = [] + + self.resnet = ResNetModified(BasicBlock, + layer_nums, + layer_strides, + num_filters, + inplanes = model_cfg.get('inplanes', 64)) + + num_levels = len(layer_nums) + self.num_levels = len(layer_nums) + self.deblocks = nn.ModuleList() + + for idx in range(num_levels): + if len(upsample_strides) > 0: + stride = upsample_strides[idx] + if stride >= 1: + self.deblocks.append(nn.Sequential( + nn.ConvTranspose2d( + num_filters[idx], num_upsample_filters[idx], + upsample_strides[idx], + stride=upsample_strides[idx], bias=False + ), + nn.BatchNorm2d(num_upsample_filters[idx], + eps=1e-3, momentum=0.01), + nn.ReLU() + )) + else: + stride = np.round(1 / stride).astype(np.int) + self.deblocks.append(nn.Sequential( + nn.Conv2d( + num_filters[idx], num_upsample_filters[idx], + stride, + stride=stride, bias=False + ), + nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, + momentum=0.01), + nn.ReLU() + )) + + c_in = sum(num_upsample_filters) + if len(upsample_strides) > num_levels: + self.deblocks.append(nn.Sequential( + nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1], + stride=upsample_strides[-1], bias=False), + nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01), + nn.ReLU(), + )) + + self.num_bev_features = c_in + + def forward(self, data_dict): + spatial_features = data_dict['spatial_features'] + + x = self.resnet(spatial_features) # tuple of features + ups = [] + + for i in range(self.num_levels): + if len(self.deblocks) > 0: + ups.append(self.deblocks[i](x[i])) + else: + ups.append(x[i]) + + if len(ups) > 1: + x = torch.cat(ups, dim=1) + elif len(ups) == 1: + x = ups[0] + + if len(self.deblocks) > self.num_levels: + x = self.deblocks[-1](x) + + data_dict['spatial_features_2d'] = x + return data_dict + + # these two functions are seperated for multiscale intermediate fusion + def get_multiscale_feature(self, spatial_features): + """ + before multiscale intermediate fusion + """ + x = self.resnet(spatial_features) # tuple of features + return x + + def decode_multiscale_feature(self, x): + """ + after multiscale interemediate fusion + """ + ups = [] + for i in range(self.num_levels): + if len(self.deblocks) > 0: + ups.append(self.deblocks[i](x[i])) + else: + ups.append(x[i]) + if len(ups) > 1: + x = torch.cat(ups, dim=1) + elif len(ups) == 1: + x = ups[0] + + if len(self.deblocks) > self.num_levels: + x = self.deblocks[-1](x) + return x + + def get_layer_i_feature(self, spatial_features, layer_i): + """ + before multiscale intermediate fusion + """ + return eval(f"self.resnet.layer{layer_i}")(spatial_features) # tuple of features + \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_transformer.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..0e05212a86deb085c000f4f674f403c8ed78e1c2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_transformer.py @@ -0,0 +1,124 @@ +import torch +from torch import nn + +from einops import rearrange + + +class PreNorm(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.norm = nn.LayerNorm(dim) + self.fn = fn + + def forward(self, x, **kwargs): + return self.fn(self.norm(x), **kwargs) + + +class FeedForward(nn.Module): + def __init__(self, dim, hidden_dim, dropout=0.): + super().__init__() + self.net = nn.Sequential( + nn.Linear(dim, hidden_dim), + nn.GELU(), + nn.Dropout(dropout), + nn.Linear(hidden_dim, dim), + nn.Dropout(dropout) + ) + + def forward(self, x): + return self.net(x) + + +class CavAttention(nn.Module): + """ + Vanilla CAV attention. + """ + def __init__(self, dim, heads, dim_head=64, dropout=0.1): + super().__init__() + inner_dim = heads * dim_head + + self.heads = heads + self.scale = dim_head ** -0.5 + + self.attend = nn.Softmax(dim=-1) + self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False) + + self.to_out = nn.Sequential( + nn.Linear(inner_dim, dim), + nn.Dropout(dropout) + ) + + def forward(self, x, mask, prior_encoding): + # x: (B, L, H, W, C) -> (B, H, W, L, C) + # mask: (B, L) + x = x.permute(0, 2, 3, 1, 4) + # mask: (B, 1, H, W, L, 1) + mask = mask.unsqueeze(1) + + # qkv: [(B, H, W, L, C_inner) *3] + qkv = self.to_qkv(x).chunk(3, dim=-1) + # q: (B, M, H, W, L, C) + q, k, v = map(lambda t: rearrange(t, 'b h w l (m c) -> b m h w l c', + m=self.heads), qkv) + + # attention, (B, M, H, W, L, L) + att_map = torch.einsum('b m h w i c, b m h w j c -> b m h w i j', + q, k) * self.scale + # add mask + att_map = att_map.masked_fill(mask == 0, -float('inf')) + # softmax + att_map = self.attend(att_map) + + # out:(B, M, H, W, L, C_head) + out = torch.einsum('b m h w i j, b m h w j c -> b m h w i c', att_map, + v) + out = rearrange(out, 'b m h w l c -> b h w l (m c)', + m=self.heads) + out = self.to_out(out) + # (B L H W C) + out = out.permute(0, 3, 1, 2, 4) + return out + + +class BaseEncoder(nn.Module): + def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.): + super().__init__() + self.layers = nn.ModuleList([]) + for _ in range(depth): + self.layers.append(nn.ModuleList([ + PreNorm(dim, CavAttention(dim, + heads=heads, + dim_head=dim_head, + dropout=dropout)), + PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)) + ])) + + def forward(self, x, mask): + for attn, ff in self.layers: + x = attn(x, mask=mask) + x + x = ff(x) + x + return x + + +class BaseTransformer(nn.Module): + def __init__(self, args): + super().__init__() + + dim = args['dim'] + depth = args['depth'] + heads = args['heads'] + dim_head = args['dim_head'] + mlp_dim = args['mlp_dim'] + dropout = args['dropout'] + max_cav = args['max_cav'] + + self.encoder = BaseEncoder(dim, depth, heads, dim_head, mlp_dim, + dropout) + + def forward(self, x, mask): + # B, L, H, W, C + output = self.encoder(x, mask) + # B, H, W, C + output = output[:, 0] + + return \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bev_roi_head.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bev_roi_head.py new file mode 100644 index 0000000000000000000000000000000000000000..713833d277b4ca4d2b731b24ca9296e695909c78 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bev_roi_head.py @@ -0,0 +1,230 @@ +import torch +import torch.nn as nn +from mmcv.ops import RoIAlignRotated +from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu +from opencood.utils import box_utils +from opencood.utils import common_utils +import numpy as np +from icecream import ic + +class BEVRoIHead(nn.Module): + def __init__(self, model_cfg, pc_range): + super().__init__() + self.model_cfg = model_cfg + self.pc_range = pc_range + self.roi_align_size = 3 + self.code_size = 7 + self.enlarge_ratio = model_cfg.get("enlarge_ratio", 1) + self.roialign_rotated = RoIAlignRotated(output_size=self.roi_align_size, spatial_scale=1, clockwise=True) + + c_out = self.model_cfg['in_channels'] # 128 + pre_channel = self.roi_align_size * self.roi_align_size * c_out # 3*3*128 + fc_layers = [self.model_cfg['n_fc_neurons']] * 2 + self.shared_fc_layers, pre_channel = self._make_fc_layers(pre_channel, + fc_layers) + + self.cls_layers, pre_channel = self._make_fc_layers(pre_channel, + fc_layers, + output_channels= + self.model_cfg[ + 'num_cls']) + self.iou_layers, _ = self._make_fc_layers(pre_channel, fc_layers, + output_channels= + self.model_cfg['num_cls']) + self.reg_layers, _ = self._make_fc_layers(pre_channel, fc_layers, + output_channels= + self.model_cfg[ + 'num_cls'] * 7) + + self._init_weights(weight_init='xavier') + + def _init_weights(self, weight_init='xavier'): + if weight_init == 'kaiming': + init_func = nn.init.kaiming_normal_ + elif weight_init == 'xavier': + init_func = nn.init.xavier_normal_ + elif weight_init == 'normal': + init_func = nn.init.normal_ + else: + raise NotImplementedError + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): + if weight_init == 'normal': + init_func(m.weight, mean=0, std=0.001) + else: + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001) + + def _make_fc_layers(self, input_channels, fc_list, output_channels=None): + fc_layers = [] + pre_channel = input_channels + for k in range(len(fc_list)): + fc_layers.extend([ + nn.Conv1d(pre_channel, fc_list[k], kernel_size=1, bias=False), + # nn.BatchNorm1d(fc_list[k]), + nn.ReLU() + ]) + pre_channel = fc_list[k] + if self.model_cfg['dp_ratio'] > 0: + fc_layers.append(nn.Dropout(self.model_cfg['dp_ratio'])) + if output_channels is not None: + fc_layers.append( + nn.Conv1d(pre_channel, output_channels, kernel_size=1, + bias=True)) + fc_layers = nn.Sequential(*fc_layers) + return fc_layers, pre_channel + + def forward(self, batch_dict): + batch_dict = self.assign_targets(batch_dict) + + # put roi back to dense feature map for rotated roi align. + batch_size = batch_dict['batch_size_2stage'] + # [[RoI_H0*RoI_W0, C], [RoI_H1*RoI_W1, C], ...] + feature_of_proposals_ego_list = batch_dict['feature_of_proposals_ego_list'] + C = feature_of_proposals_ego_list[0].shape[1] + device = feature_of_proposals_ego_list[0].device + + H, W = batch_dict['feature_shape'] + grid_size_H = (self.pc_range[4] - self.pc_range[1]) / H + grid_size_W = (self.pc_range[3] - self.pc_range[0]) / W + + # dense feature map + feature_map = torch.zeros((batch_size, C, H, W), device=device) + roi_cnt = 0 + for batch_idx, roi_fused in enumerate(batch_dict['roi_fused']): # per scene + for roi in roi_fused: + feature_map[batch_idx, :, roi[2]:roi[3], roi[0]:roi[1]] = \ + feature_of_proposals_ego_list[roi_cnt].permute(1,0).view(C, roi[3]-roi[2], roi[1]-roi[0]) + roi_cnt += 1 + + # proposal to rotated roi input, + # (batch_index, center_x, center_y, w, h, angle). The angle is in radian. + roi_input = torch.zeros((len(feature_of_proposals_ego_list), 6), device=device) + + box_cnt = 0 + for batch_idx, box_fused in enumerate(batch_dict['boxes_fused']): # per scene + # box_fused is [n_boxes, 7], x, y, z, h, w, l, yaw -> (center_x, center_y, w, h) + roi_input[box_cnt:box_cnt+box_fused.shape[0], 0] = batch_idx + roi_input[box_cnt:box_cnt+box_fused.shape[0], 1] = (box_fused[:, 0] - self.pc_range[0]) / grid_size_W + roi_input[box_cnt:box_cnt+box_fused.shape[0], 2] = (box_fused[:, 1] - self.pc_range[1]) / grid_size_H + roi_input[box_cnt:box_cnt+box_fused.shape[0], 3] = box_fused[:, 5] / grid_size_W * self.enlarge_ratio # box's l -> W + roi_input[box_cnt:box_cnt+box_fused.shape[0], 4] = box_fused[:, 4] / grid_size_H * self.enlarge_ratio # box's w -> H + roi_input[box_cnt:box_cnt+box_fused.shape[0], 5] = box_fused[:, 6] + box_cnt += box_fused.shape[0] + + # roi align + N_proposals = roi_input.shape[0] + # [sum(proposal), C, self.roi_align_size, self.roi_align_size] + pooled_feature = self.roialign_rotated(feature_map, roi_input) + # [sum(proposal), self.roi_align_size * self.roi_align_size * C, 1] + pooled_feature = pooled_feature.flatten(start_dim=2).permute(0,2,1).flatten(start_dim=1).unsqueeze(-1) + shared_features = self.shared_fc_layers(pooled_feature) + + rcnn_cls = self.cls_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) + rcnn_iou = self.iou_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) + rcnn_reg = self.reg_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) + + batch_dict['stage2_out'] = { + 'rcnn_cls': rcnn_cls, + 'rcnn_iou': rcnn_iou, + 'rcnn_reg': rcnn_reg, + } + + return batch_dict + + + def assign_targets(self, batch_dict): + batch_dict['rcnn_label_dict'] = { + 'rois': [], + 'gt_of_rois': [], + 'gt_of_rois_src': [], + 'cls_tgt': [], + 'reg_tgt': [], + 'iou_tgt': [], + 'rois_anchor': [], + 'record_len': [], + 'rois_scores_stage1': [] + } + pred_boxes = batch_dict['boxes_fused'] + pred_scores = batch_dict['scores_fused'] + gt_boxes = [b[m][:, [0, 1, 2, 5, 4, 3, 6]].float() for b, m in + zip(batch_dict['object_bbx_center'], + batch_dict['object_bbx_mask'].bool())] # hwl -> lwh order + for rois, scores, gts in zip(pred_boxes, pred_scores, gt_boxes): # each frame + rois = rois[:, [0, 1, 2, 5, 4, 3, 6]] # hwl -> lwh + if gts.shape[0] == 0: + gts = rois.clone() + + ious = boxes_iou3d_gpu(rois, gts) + max_ious, gt_inds = ious.max(dim=1) + gt_of_rois = gts[gt_inds] + rcnn_labels = (max_ious > 0.3).float() + mask = torch.logical_not(rcnn_labels.bool()) + + # set negative samples back to rois, no correction in stage2 for them + gt_of_rois[mask] = rois[mask] + gt_of_rois_src = gt_of_rois.clone().detach() + + # canoical transformation + roi_center = rois[:, 0:3] + # TODO: roi_ry > 0 in pcdet + roi_ry = rois[:, 6] % (2 * np.pi) + gt_of_rois[:, 0:3] = gt_of_rois[:, 0:3] - roi_center + gt_of_rois[:, 6] = gt_of_rois[:, 6] - roi_ry + + # transfer LiDAR coords to local coords + gt_of_rois = common_utils.rotate_points_along_z( + points=gt_of_rois.view(-1, 1, gt_of_rois.shape[-1]), + angle=-roi_ry.view(-1) + ).view(-1, gt_of_rois.shape[-1]) + + # flip orientation if rois have opposite orientation + heading_label = (gt_of_rois[:, 6] + ( + torch.div(torch.abs(gt_of_rois[:, 6].min()), + (2 * np.pi), rounding_mode='trunc') + + 1) * 2 * np.pi) % (2 * np.pi) # 0 ~ 2pi + opposite_flag = (heading_label > np.pi * 0.5) & ( + heading_label < np.pi * 1.5) + + # (0 ~ pi/2, 3pi/2 ~ 2pi) + heading_label[opposite_flag] = (heading_label[ + opposite_flag] + np.pi) % ( + 2 * np.pi) + flag = heading_label > np.pi + heading_label[flag] = heading_label[ + flag] - np.pi * 2 # (-pi/2, pi/2) + heading_label = torch.clamp(heading_label, min=-np.pi / 2, + max=np.pi / 2) + gt_of_rois[:, 6] = heading_label + + # generate regression target + rois_anchor = rois.clone().detach().view(-1, self.code_size) + rois_anchor[:, 0:3] = 0 + rois_anchor[:, 6] = 0 + + reg_targets = box_utils.box_encode( + gt_of_rois.view(-1, self.code_size), rois_anchor + ) + + batch_dict['rcnn_label_dict']['rois'].append(rois) + batch_dict['rcnn_label_dict']['rois_scores_stage1'].append(scores) + batch_dict['rcnn_label_dict']['gt_of_rois'].append(gt_of_rois) + batch_dict['rcnn_label_dict']['gt_of_rois_src'].append( + gt_of_rois_src) + batch_dict['rcnn_label_dict']['cls_tgt'].append(rcnn_labels) + batch_dict['rcnn_label_dict']['reg_tgt'].append(reg_targets) + batch_dict['rcnn_label_dict']['iou_tgt'].append(max_ious) + batch_dict['rcnn_label_dict']['rois_anchor'].append(rois_anchor) + batch_dict['rcnn_label_dict']['record_len'].append(rois.shape[0]) + + + # cat list to tensor + for k, v in batch_dict['rcnn_label_dict'].items(): + if k == 'record_len': + continue + batch_dict['rcnn_label_dict'][k] = torch.cat(v, dim=0) + + return batch_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bevformer.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bevformer.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align.py new file mode 100644 index 0000000000000000000000000000000000000000..d955d886c8281ba1882a36b72bde312b15f7757b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align.py @@ -0,0 +1,808 @@ +""" +This module is designed for box alignment +It should be used for 1-round communication, maybe adapt to 2-round communication latter +i.e, collaborative agent send the full feature map and noisy pose once together + +We will use g2o for pose graph optimization. +""" + + +from cv2 import threshold +from opencood.models.sub_modules.pose_graph_optim import PoseGraphOptimization2D +from opencood.utils.transformation_utils import pose_to_tfm +from opencood.utils import box_utils +from collections import OrderedDict +import numpy as np +import torch +import torch.nn.functional as F +import g2o +from icecream import ic +import os + +DEBUG = False + +def all_pair_l2(A, B): + """ All pair L2 distance for A and B + Args: + A : torch.Tensor + shape [N_A, D] + B : torch.Tensor + shape [N_B, D] + Returns: + C : torch.Tensor + shape [N_A, N_B] + """ + TwoAB = 2*A@B.T + C = torch.sqrt(torch.sum(A * A, 1, keepdim=True).expand_as(TwoAB) \ + + torch.sum(B * B, 1, keepdim=True).T.expand_as(TwoAB) \ + - TwoAB) + return C + +def box_alignment_relative_sample( + pred_corners_list, + noisy_lidar_pose, + clean_lidar_pose=None, + uncertainty_list=None, + order='hwl', + landmark_SE2=True, + adaptive_landmark=False): + """ Perform box alignment for one sample. + Correcting the relative pose. + + Args: + pred_corners_list: in each ego coordinate + [[N_1, 8, 3], ..., [N_cav1, 8, 3]] + + clean_lidar_poses: + [N_cav1, 6], in degree + + noisy_lidar_poses: + [N_cav1, 6], in degree + + uncertainty_list: + [[N_1, 3], [N_2, 3], ..., [N_cav1, 3]] + + landmark_SE2: + if True, the landmark is SE(2), otherwise R^2 + + adaptive_landmark: (when landmark_SE2 = True) + if True, landmark will turn to R^2 if yaw angles differ a lot + + Returns: + refined_lidar_poses: np.ndarray + [N_cav1, 3], + """ + + ## first transform point from ego coordinate to world coordinate, using lidar_pose. + N = noisy_lidar_pose.shape[0] + device = pred_corners_list[0].device + lidar_pose_noisy_tfm = pose_to_tfm(noisy_lidar_pose, dof=6) + + pred_corners_world_list = \ + [box_utils.project_box3d(pred_corners_list[i], lidar_pose_noisy_tfm[i]) for i in range(N)] # [[N1, 8, 3], [N2, 8, 3],...] + pred_box3d_list = \ + [box_utils.corner_to_center_torch(corner, order).to(device) for corner in pred_corners_list] # [[N1, 7], [N2, 7], ...], angle in radius + pred_box3d_world_list = \ + [box_utils.corner_to_center_torch(corner, order).to(device) for corner in pred_corners_world_list] # [[N1, 7], [N2, 7], ...], angle in radius + + pred_center_list = \ + [torch.mean(corner_tensor, dim=[1]) for corner_tensor in pred_corners_list] # [[N1,3], [N2,3], ...] + + pred_center_world_list = \ + [pred_box3d_world[:,:3] for pred_box3d_world in pred_box3d_world_list] + + pred_yaw_world_list = \ + [pred_box3d[:, 6] for pred_box3d in pred_box3d_world_list] + + pred_len = \ + [pred_center.shape[0] for pred_center in pred_center_list] + + + + + box_idx_to_agent = [] + for i in range(N): + box_idx_to_agent += [i] * pred_len[i] + + + pred_center_cat = torch.cat(pred_center_list, dim=0) # [sum(pred_box), 3] + pred_center_world_cat = torch.cat(pred_center_world_list, dim=0) # [sum(pred_box), 3] + pred_box3d_cat = torch.cat(pred_box3d_list, dim=0) # [sum(pred_box), 7] + pred_yaw_world_cat = torch.cat(pred_yaw_world_list) # [sum(pred_box)] + + + w_a = 1.6 # width of anchor + l_a = 3.9 # length of anchor + d_a_square = w_a ** 2 + l_a ** 2 # anchor's diag + + + if uncertainty_list is not None: + pred_log_sigma2_cat = torch.cat(uncertainty_list) + pred_certainty_cat = torch.exp(-pred_log_sigma2_cat) + pred_certainty_cat[:,:2] /= d_a_square # sigma_delta_x -> sigma_x. + + + pred_center_world_cat_cpu = pred_center_world_cat.cpu() # if use gpu, it will get nan. + pred_center_allpair_dist = all_pair_l2(pred_center_world_cat_cpu, pred_center_world_cat_cpu) # [sum(pred_box), sum(pred_box)] + + + # let pair from one vehicle be max distance + MAX_DIST = 10000 + cum = 0 + for i in range(N): + pred_center_allpair_dist[cum: cum + pred_len[i], cum: cum +pred_len[i]] = MAX_DIST + cum += pred_len[i] + + + cluster_id = N # let the vertex id of object start from N + cluster_dict = OrderedDict() + remain_box = set(range(cum)) + thres = 0.75 # l2 distance within the threshold, can be considered as one object. + for box_idx in range(cum): + + if box_idx not in remain_box: # already assigned + continue + within_thres_idx_tensor = (pred_center_allpair_dist[box_idx] < thres).nonzero().flatten() + within_thres_idx_list = within_thres_idx_tensor.cpu().numpy().tolist() + + if len(within_thres_idx_list) == 0: # if it's a single box + continue + + # start from within_thres_idx_list, find new box added to the cluster + explored = [box_idx] + unexplored = [idx for idx in within_thres_idx_list if idx in remain_box] + + while unexplored: + idx = unexplored[0] + within_thres_idx_tensor = (pred_center_allpair_dist[idx] < thres).nonzero().flatten() + within_thres_idx_list = within_thres_idx_tensor.cpu().numpy().tolist() + for newidx in within_thres_idx_list: + if (newidx not in explored) and (newidx not in unexplored) and (newidx in remain_box): + unexplored.append(newidx) + unexplored.remove(idx) + explored.append(idx) + + if len(explored) == 1: # it's a single box, neighbors have been assigned + remain_box.remove(box_idx) + continue + + cluster_box_idxs = explored + + cluster_dict[cluster_id] = OrderedDict() + cluster_dict[cluster_id]['box_idx'] = [idx for idx in cluster_box_idxs] + cluster_dict[cluster_id]['box_dist'] = [pred_center_cat[idx].norm() for idx in cluster_box_idxs] # distance to observer + cluster_dict[cluster_id]['box_center_world'] = [pred_center_world_cat[idx] for idx in cluster_box_idxs] # coordinate in world, [3,] + cluster_dict[cluster_id]['box_yaw'] = [pred_yaw_world_cat[idx] for idx in cluster_box_idxs] + + yaw_var = torch.var(torch.as_tensor(cluster_dict[cluster_id]['box_yaw']), unbiased=False) + + if landmark_SE2: + if adaptive_landmark and yaw_var > 0.2: + landmark = pred_center_world_cat[box_idx].clone()[:2] + else: + landmark = pred_center_world_cat[box_idx].clone() + landmark[2] = pred_yaw_world_cat[box_idx] + else: + landmark = pred_center_world_cat[box_idx].clone()[:2] + + + cluster_dict[cluster_id]['landmark'] = landmark.cpu().numpy() # [x, y, yaw] or [x, y] + cluster_dict[cluster_id]['landmark_SE2'] = True if landmark.shape[0] == 3 else False + + DEBUG = False + if DEBUG: + from icecream import ic + ic(cluster_dict[cluster_id]['box_idx']) + ic(cluster_dict[cluster_id]['box_center_world']) + ic(cluster_dict[cluster_id]['box_yaw']) + ic(cluster_dict[cluster_id]['landmark']) + + + cluster_id += 1 + for idx in cluster_box_idxs: + remain_box.remove(idx) + + vertex_num = cluster_id + agent_num = N + landmark_num = cluster_id - N + # ic(agent_num) + # ic(landmark_num) + + """ + Now we have clusters for objects. we can create pose graph. + First we consider center as landmark. + Maybe set corner as landmarks in the future. + """ + pgo = PoseGraphOptimization2D(verbose=False) + + # Add agent to vertexs + for agent_id in range(N): + v_id = agent_id + # notice lidar_pose use degree format, translate it to radians. + pose_np = noisy_lidar_pose[agent_id, [0,1,4]].cpu().numpy() + pose_np[2] = np.deg2rad(pose_np[2]) # radians + v_pose = g2o.SE2(pose_np) + + if agent_id == 0: + pgo.add_vertex(id=v_id, pose=v_pose, fixed=True) + else: + pgo.add_vertex(id=v_id, pose=v_pose, fixed=False) + + # Add landmark to vertexs + for landmark_id in range(N, cluster_id): + v_id = landmark_id + landmark = cluster_dict[landmark_id]['landmark'] # (3,) or (2,) + landmark_SE2 = cluster_dict[landmark_id]['landmark_SE2'] + + if landmark_SE2: + v_pose = g2o.SE2(landmark) + else: + v_pose = landmark + + pgo.add_vertex(id=v_id, pose=v_pose, fixed=False, SE2=landmark_SE2) + + # Add agent-landmark edge to edge + for landmark_id in range(N, cluster_id): + landmark_SE2 = cluster_dict[landmark_id]['landmark_SE2'] + + for box_idx in cluster_dict[landmark_id]['box_idx']: + agent_id = box_idx_to_agent[box_idx] + if landmark_SE2: + e_pose = g2o.SE2(pred_box3d_cat[box_idx][[0,1,6]].cpu().numpy().astype(np.float64)) + info = np.identity(3, dtype=np.float64) + if uncertainty_list is not None: + info[[0,1,2],[0,1,2]] = pred_certainty_cat[box_idx].cpu().numpy() + else: + e_pose = pred_box3d_cat[box_idx][[0,1]].cpu().numpy().astype(np.float64) + info = np.identity(2, dtype=np.float64) + if uncertainty_list is not None: + info[[0,1],[0,1]] = pred_certainty_cat[box_idx][:2].cpu().numpy() + + + pgo.add_edge(vertices=[agent_id, landmark_id], measurement=e_pose, information=info, SE2=landmark_SE2) + + pgo.optimize() + + pose_new_list = [] + for agent_id in range(N): + # print(pgo.get_pose(agent_id).vector()) + pose_new_list.append(pgo.get_pose(agent_id).vector()) + + refined_pose = np.array(pose_new_list) + refined_pose[:,2] = np.rad2deg(refined_pose[:,2]) # rad -> degree, same as source + + return refined_pose + +def box_alignment_sample(pred_corners_list, lidar_poses_for_tfm, noisy_lidar_poses, uncertainty_list=None, order='hwl'): + """ Perform box alignment for one sample. + Args: + pred_corners_list: in each ego coordinate + [[N_1, 8, 3], ..., [N_cav1, 8, 3]] + + lidar_poses: + [N_cav1, 6] , in degree + + scores_list: + [[N_1, 3], [N_2, 3], ..., [N_cav1, 3]] + + Returns: + refined_lidar_poses: np.ndarray + [N_cav1, 3], + """ + + ## first transform point from ego coordinate to world coordinate, using lidar_pose. + lidar_poses = lidar_poses_for_tfm + N = lidar_poses.shape[0] + device = pred_corners_list[0].device + lidar_pose_tfm = pose_to_tfm(lidar_poses, dof=6) # Tw_c + + + + pred_corners_world_list = \ + [box_utils.project_box3d(pred_corners_list[i], lidar_pose_tfm[i]) for i in range(N)] # [[N1, 8, 3], [N2, 8, 3],...] + pred_box3d_list = \ + [box_utils.corner_to_center_torch(corner, order).to(device) for corner in pred_corners_list] # [[N1, 7], [N2, 7], ...], angle in radius + pred_box3d_world_list = \ + [box_utils.corner_to_center_torch(corner, order).to(device) for corner in pred_corners_world_list] # [[N1, 7], [N2, 7], ...], angle in radius + + pred_center_list = \ + [torch.mean(corner_tensor, dim=[1]) for corner_tensor in pred_corners_list] # [[N1,3], [N2,3], ...] + + pred_center_world_list = \ + [pred_box3d_world[:,:3] for pred_box3d_world in pred_box3d_world_list] + + pred_yaw_world_list = \ + [pred_box3d[:, 6] for pred_box3d in pred_box3d_world_list] + + pred_len = \ + [pred_center.shape[0] for pred_center in pred_center_list] + + + box_idx_to_agent = [] + for i in range(N): + box_idx_to_agent += [i] * pred_len[i] + + if DEBUG: + vis_corners_list(pred_corners_world_list,filename="/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/gt_box_noisy_pose.png") + + + pred_center_cat = torch.cat(pred_center_list, dim=0) # [sum(pred_box), 3] + pred_center_world_cat = torch.cat(pred_center_world_list, dim=0) # [sum(pred_box), 3] + pred_box3d_cat = torch.cat(pred_box3d_list, dim=0) # [sum(pred_box), 7] + pred_yaw_world_cat = torch.cat(pred_yaw_world_list) # [sum(pred_box)] + + pred_center_world_cat_cpu = pred_center_world_cat.cpu() # if use gpu, it will get nan. + pred_center_allpair_dist = all_pair_l2(pred_center_world_cat_cpu, pred_center_world_cat_cpu) # [sum(pred_box), sum(pred_box)] + + + # let pair from one vehicle be max distance + MAX_DIST = 10000 + cum = 0 + for i in range(N): + pred_center_allpair_dist[cum: cum + pred_len[i], cum: cum +pred_len[i]] = MAX_DIST + cum += pred_len[i] + + + cluster_id = N # let the vertex id of object start from N + cluster_dict = OrderedDict() + remain_box = set(range(cum)) + thres = 1 # l2 distance within the threshold, can be considered as one object. + for box_idx in range(cum): + if box_idx not in remain_box: # already assigned + continue + within_thres_idx_tensor = (pred_center_allpair_dist[box_idx] < thres).nonzero().flatten() + within_thres_idx_list = within_thres_idx_tensor.cpu().numpy().tolist() + + if len(within_thres_idx_list) == 0: # if it's a single box + continue + + # start from within_thres_idx_list, find new box added to the cluster + explored = [box_idx] + unexplored = [idx for idx in within_thres_idx_list if idx in remain_box] + + while unexplored: + idx = unexplored[0] + within_thres_idx_tensor = (pred_center_allpair_dist[idx] < thres).nonzero().flatten() + within_thres_idx_list = within_thres_idx_tensor.cpu().numpy().tolist() + for newidx in within_thres_idx_list: + if (newidx not in explored) and (newidx not in unexplored) and (newidx in remain_box): + unexplored.append(newidx) + unexplored.remove(idx) + explored.append(idx) + + if len(explored) == 1: # it's a single box, neighbors have been assigned + remain_box.remove(box_idx) + continue + + cluster_box_idxs = explored + + cluster_dict[cluster_id] = OrderedDict() + cluster_dict[cluster_id]['box_idx'] = [idx for idx in cluster_box_idxs] + cluster_dict[cluster_id]['box_dist'] = [pred_center_cat[idx].norm() for idx in cluster_box_idxs] # distance to observer + cluster_dict[cluster_id]['box_center_world'] = [pred_center_world_cat[idx] for idx in cluster_box_idxs] # coordinate in world, [3,] + cluster_dict[cluster_id]['box_yaw'] = [pred_yaw_world_cat[idx] for idx in cluster_box_idxs] + + + box_dist = torch.as_tensor(cluster_dict[cluster_id]['box_dist']).to(device) + box_weight = F.normalize(1/box_dist, p=1, dim=0) # [n] + centers = torch.stack(cluster_dict[cluster_id]['box_center_world'], dim=0) # [n, 3] + yaws = torch.stack(cluster_dict[cluster_id]['box_yaw']) # [n] + + weighted_center = torch.sum(box_weight.unsqueeze(-1) * centers, dim=0) # [3,] + weighted_yaw = torch.sum(box_weight * yaws) # [1,] + + weighted_center[2] = weighted_yaw # just replace z to yaw + + cluster_dict[cluster_id]['se2'] = weighted_center # [x, y, yaw] + + # DEBUG = True + if DEBUG: + from icecream import ic + ic(cluster_dict[cluster_id]['box_idx']) + ic(centers) + ic(yaws) + ic(box_weight) + ic(cluster_dict[cluster_id]['se2']) + + cluster_dict[cluster_id].pop('box_dist') + cluster_dict[cluster_id].pop('box_center_world') + cluster_dict[cluster_id].pop('box_yaw') + + cluster_id += 1 + for idx in cluster_box_idxs: + remain_box.remove(idx) + + vertex_num = cluster_id + agent_num = N + landmark_num = cluster_id - N + # ic(agent_num) + # ic(landmark_num) + + """ + Now we have clusters for objects. we can create pose graph. + First we consider center as landmark. + Maybe set corner as landmarks in the future. + """ + pgo = PoseGraphOptimization2D(verbose=False) + if DEBUG: + pgo = PoseGraphOptimization2D(verbose=True) + # Add agent to vertexs + for agent_id in range(N): + v_id = agent_id + # notice lidar_pose use degree format, translate it to radius. + # pose_np = lidar_poses[agent_id, [0,1,4]].cpu().numpy() + pose_np = noisy_lidar_poses[agent_id, [0,1,4]].cpu().numpy() + pose_np[2] = np.deg2rad(pose_np[2]) # radius + v_pose = g2o.SE2(pose_np) + # if agent_id == 0 and DEBUG: + # pgo.add_vertex(id=v_id, pose=v_pose, fixed=True) + # else: + pgo.add_vertex(id=v_id, pose=v_pose, fixed=False) + + # Add landmark to vertexs + for landmark_id in range(N, cluster_id): + v_id = landmark_id + v_pose = g2o.SE2(cluster_dict[landmark_id]['se2'].cpu().numpy()) + pgo.add_vertex(id=v_id, pose=v_pose, fixed=False) + + # Add agent-landmark edge to edge + for landmark_id in range(N, cluster_id): + for box_idx in cluster_dict[landmark_id]['box_idx']: + agent_id = box_idx_to_agent[box_idx] + e_pose = g2o.SE2(pred_box3d_cat[box_idx][[0,1,6]].cpu().numpy()) + pgo.add_edge(vertices=[agent_id, landmark_id], measurement=e_pose, information=np.identity(3)) + + pgo.optimize() + + pose_new_list = [] + for agent_id in range(N): + # print(pgo.get_pose(agent_id).vector()) + pose_new_list.append(pgo.get_pose(agent_id).vector()) + + refined_pose = np.array(pose_new_list) + refined_pose[:,2] = np.rad2deg(refined_pose[:,2]) # rad -> degree, same as source + + return refined_pose + +def box_alignment(pred_corner3d_list, uncertainty_list, lidar_poses, record_len, proj_first=False): + """ + Args: + pred_corner3d_list: list of tensors, with shape [[N1_object, 8, 3], [N2_object, 8, 3], ...,[N_sumcav_object, 8, 3]] + box in each agent's coordinate. (proj_first=False) + + pred_box3d_list: not necessary + list of tensors, with shape [[N1_object, 7], [N2_object, 7], ...,[N_sumcav_object, 7]] + + scores_list: list of tensor, [[N1_object,], [N2_object,], ...,[N_sumcav_object,]] + box confidence score. + + lidar_poses: torch.Tensor [sum(cav), 6] + + record_len: torch.Tensor + Returns: + refined_lidar_pose: torch.Tensor [sum(cav), 6] + """ + refined_lidar_pose = [] + start_idx = 0 + for b in record_len: + refined_lidar_pose.append( + torch.from_numpy( + box_alignment_relative_sample( + pred_corner3d_list[start_idx: start_idx + b], + lidar_poses[start_idx: start_idx + b], + clean_lidar_pose=None, + uncertainty_list= None if uncertainty_list is None else uncertainty_list[start_idx: start_idx + b] + ) + ) + ) + start_idx += b + + return torch.cat(refined_lidar_pose, dim=0) + +def vis_corners_list(corner3d_list, filename="/GPFS/rhome/yifanlu/OpenCOOD/opencood/corners.png"): + """ + Args: + corner3d: list of torch.Tensor, shape [N, 8, 3] + + """ + COLOR = ['red','springgreen','dodgerblue', 'darkviolet'] + box_idx = 0 + + for idx in range(len(corner3d_list)): + corner3d = corner3d_list[idx] + if torch.is_tensor(corner3d): + corner3d = corner3d.cpu().numpy() + + corner2d = corner3d[:,:4,:2] + import matplotlib.pyplot as plt + for i in range(corner2d.shape[0]): + plt.scatter(corner2d[i,[0,1],0], corner2d[i,[0,1], 1], s=2, c=COLOR[idx]) + plt.plot(corner2d[i,[0,1,2,3,0],0], corner2d[i,[0,1,2,3,0], 1], linewidth=1, c=COLOR[idx]) + plt.text(corner2d[i,0,0], corner2d[i,0,1], s=str(box_idx), fontsize="xx-small") + box_idx += 1 + plt.gca().invert_yaxis() + plt.axis('equal') + plt.savefig(filename, dpi=400) + plt.clf() + +def vis_corners(corner3d, filename="/GPFS/rhome/yifanlu/OpenCOOD/opencood/corners.png"): + """ + Args: + corner3d: torch.Tensor, shape [N, 8, 3] + + box3d: torch.Tensor shape [N, 7] + """ + if torch.is_tensor(corner3d): + corner3d = corner3d.cpu().numpy() + + + corner2d = corner3d[:,:4,:2] + import matplotlib.pyplot as plt + for i in range(corner2d.shape[0]): + plt.scatter(corner2d[i,[0,1],0], corner2d[i,[0,1], 1], s=2) + plt.plot(corner2d[i,[0,1,2,3,0],0], corner2d[i,[0,1,2,3,0], 1]) + # plt.text(corner2d[i,0,0], corner2d[i,0,1], s=f"{box3d[i,0]:.2f},{box3d[i,1]:.2f},{box3d[i,6]:.2f}", fontsize='xx-small') + plt.axis('equal') + plt.savefig(filename, dpi=300) + plt.clf() + +def vis_pose(lidar_poses): + """ + Args: + lidar_poses: torch.Tensor shape [N_, 6], x,y,z, roll, yaw, pitch + """ + h = 1.56 + l = 3.9 + w = 1.6 + if torch.is_tensor(lidar_poses): + lidar_poses = lidar_poses.cpu().numpy() + + box3d = np.zeros((lidar_poses.shape[0], 7)) + box3d[:,0] = lidar_poses[:,0] + box3d[:,1] = lidar_poses[:,1] + box3d[:,3] = h # hwl order + box3d[:,4] = w + box3d[:,5] = l + box3d[:,6] = np.deg2rad(lidar_poses[:,4]) # degree -> radius + + corner3d = box_utils.boxes_to_corners_3d(box3d, order='hwl') + vis_corners(corner3d, box3d, "/GPFS/rhome/yifanlu/OpenCOOD/opencood/pose_corners.png") + +def test_pred_gt_box(): + gt_corners_list = torch.load("/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/previous_items/gt_box_list.pt") + data = torch.load("/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/stage1_output_02/0.pt") + pred_corner3d_list, pred_box3d_list, scores_list, record_len, lidar_pose, lidar_pose_clean = data + + lidar_pose_tfm = pose_to_tfm(lidar_pose, dof=6) + lidar_pose_clean_tfm = pose_to_tfm(lidar_pose_clean, dof=6) # Tw_c + N = lidar_pose.shape[0] + + pred_corners_world_list = \ + [box_utils.project_box3d(pred_corner3d_list[i], lidar_pose_tfm[i]) for i in range(N)] # [[N1, 8, 3], [N2, 8, 3],...] + + gt_corners_world_list = \ + [box_utils.project_box3d(gt_corners_list[i], lidar_pose_clean_tfm[i]) for i in range(N)] + + vis_corners_list([torch.cat(pred_corners_world_list, dim=0), torch.cat(gt_corners_world_list, dim=0)], filename="/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/gt_box_pred_box.png") + + + + +def test_gt_boxes_world(): + data = torch.load("/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/stage1_output_02/0.pt") + pred_corner3d_list, pred_box3d_list, scores_list, record_len, lidar_pose, lidar_pose_clean = data + + gt_poses_tensor = lidar_pose_clean + noisy_poses_tensor = lidar_pose + + gt_corners_list = torch.load("/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/previous_items/gt_box_list.pt") + + # refined_poses = box_alignment_sample(gt_corners_list, noisy_poses_tensor, noisy_poses_tensor) + refined_poses = box_alignment_relative_sample(pred_corner3d_list, noisy_poses_tensor, gt_poses_tensor) + print("before:\n", noisy_poses_tensor.cpu().numpy()[:,[0,1,4]]) + + print("after:\n", refined_poses) + + print("gt:\n", gt_poses_tensor.cpu().numpy()[:,[0,1,4]]) + + # gt_corners_world_list = \ + # [box_utils.project_box3d(gt_corners_list[i], lidar_pose_tfm[i]) for i in range(3)] # [[N1, 8, 3], [N2, 8, 3],...] + + # vis_corners_list(gt_corners_world_list, filename="/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/gt_corners.png") + + +def test_box_align_tmp(): + """ + This func input different noise_std pose (load from stored files). + And run pose graph optimization, compare the localization error w/wo uncertainty/landmark SE2, etc. + """ + noise_stds = ['02','04','06'] + items = ["16"] + torch.set_printoptions(precision=3, sci_mode=False) + np.set_printoptions(precision=3, suppress=True) + for item in items: + for noise_std in noise_stds: + file_dir = f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/stage1_output_{noise_std}_w_uncertainty/{item}.pt" + data = torch.load(file_dir) + pred_corner3d_list, pred_box3d_list, uncertainty_list, record_len, lidar_pose, lidar_pose_clean = data + lidar_pose[0] = lidar_pose_clean[0] + refined_pose_SE2 = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list, landmark_SE2=True) + refined_pose = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list, landmark_SE2=False) + # refined_pose = box_alignment_sample(pred_corner3d_list, lidar_pose, lidar_pose) + lidar_pose_clean = lidar_pose_clean[:,[0,1,4]].cpu().numpy() + print(f"noise std: {noise_std}: SE2") + print(np.abs(refined_pose_SE2 - lidar_pose_clean)) + # print(f"PointXY") + # print(np.abs(refined_pose - lidar_pose_clean)) + print(f"original error:") + lidar_pose = lidar_pose[:,[0,1,4]].cpu().numpy() + print(np.abs(lidar_pose - lidar_pose_clean)) + # print(refined_pose_w_u) + # print(lidar_pose_clean) + + +def test_box_align(noise_std="04", relative=True, use_uncertainty=False): + from glob import glob + data_dir = f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/stage1_output_{noise_std}_w_uncertainty/*" + trans_error_before_list = [] + rotate_error_before_list = [] + trans_error_after_list = [] + rotate_error_after_list = [] + + full_files = glob(data_dir) + for filename in full_files: + data = torch.load(filename) + + if relative is False: + pred_corner3d_list, pred_box3d_list, scores_list, record_len, lidar_pose, lidar_pose_clean = data + refined_pose = box_alignment_sample(pred_corner3d_list, None, lidar_pose) + elif relative is True: + pred_corner3d_list, pred_box3d_list, uncertainty_list, record_len, lidar_pose, lidar_pose_clean = data + lidar_pose[0] = lidar_pose_clean[0] + # if not use_uncertainty: + # uncertainty_list = None + refined_pose = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list) + uncertainty_list = None + refined_pose_wo_uncertainty = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list) + + lidar_pose = lidar_pose.cpu().numpy()[:,[0,1,4]] + lidar_pose_clean = lidar_pose_clean.cpu().numpy()[:,[0,1,4]] + np.set_printoptions(suppress=True, precision=4) + print(lidar_pose[1:]) + print(refined_pose_wo_uncertainty[1:]) + print(refined_pose[1:]) + print(lidar_pose_clean[1:]) + print() + + error_before = np.abs(lidar_pose - lidar_pose_clean) + error_after = np.abs(refined_pose - lidar_pose_clean) + + trans_error_before_list.append(np.mean(error_before[:,[0,1]])) + rotate_error_before_list.append(np.mean(error_before[:,2])) + + trans_error_after_list.append(np.mean(error_after[:,[0,1]])) + rotate_error_after_list.append(np.mean(error_after[:,2])) + + raise + + + out_quantile_dict = {0.8:None, 0.5:None, 0.3:None} + for q in out_quantile_dict.keys(): + out_quantile_dict[q] = (np.quantile(trans_error_before_list, q), + np.quantile(trans_error_after_list, q), + np.quantile(rotate_error_before_list, q), + np.quantile(rotate_error_after_list, q)) + + return out_quantile_dict + # return np.mean(trans_error_before_list), np.mean(rotate_error_before_list), np.mean(trans_error_after_list), np.mean(rotate_error_after_list) + +def main1(): + """ + This function test the box alignment performance on the subset of training set. + """ + for noise in ['02', '04', '06']: + out = test_box_align(noise, relative=True, use_uncertainty=True) + for k,v in out.items(): + with open(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/rel_quantile{k*100}_{noise}_w_u.txt", "w") as f: + f.write(f"trans error before: \t {v[0]}\n") + f.write(f"trans error after: \t {v[1]}\n\n") + + f.write(f"rotate error before: \t {v[2]}\n") + f.write(f"rotate error after: \t {v[3]}\n") + + + out = test_box_align(noise, relative=True, use_uncertainty=False) + for k,v in out.items(): + with open(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/rel_quantile{k*100}_{noise}_wo_u.txt", "w") as f: + f.write(f"trans error before: \t {v[0]}\n") + f.write(f"trans error after: \t {v[1]}\n\n") + + f.write(f"rotate error before: \t {v[2]}\n") + f.write(f"rotate error after: \t {v[3]}\n") + + +def vis_pose_graph( + poses, + pred_corner3d, + save_dir_path="/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/pose_graph_vis", + ): + """ + Args: + poses: list of np.ndarray + each item is a pose . [pose_before, ..., pose_refined] + + pred_corner3d: list + predicted box for each agent. + + """ + COLOR = ['red','springgreen','dodgerblue', 'darkviolet', 'orange'] + from opencood.utils.transformation_utils import get_relative_transformation + + if not os.path.exists(save_dir_path): + os.mkdir(save_dir_path) + + for iter, pose in enumerate(poses): + box_idx = 0 + # we first transform other agents' box to ego agent's coordinate + relative_t_matrix = get_relative_transformation(pose) + N = pose.shape[0] + + pred_corners3d_in_ego = [box_utils.project_box3d(pred_corner3d[i].cpu().numpy(), relative_t_matrix[i]) for i in range(N)] + + for agent_id in range(len(pred_corners3d_in_ego)): + corner3d = pred_corners3d_in_ego[agent_id] + agent_pos = relative_t_matrix[agent_id][:2,3] # agent's position in ego's coordinate + if torch.is_tensor(corner3d): + corner3d = corner3d.cpu().numpy() + + corner2d = corner3d[:,:4,:2] + center2d = np.mean(corner2d, axis=1) + import matplotlib.pyplot as plt + for i in range(corner2d.shape[0]): + plt.scatter(corner2d[i,[0,1],0], corner2d[i,[0,1], 1], s=2, c=COLOR[agent_id]) + plt.plot(corner2d[i,[0,1,2,3,0],0], corner2d[i,[0,1,2,3,0], 1], linewidth=1, c=COLOR[agent_id]) + plt.text(corner2d[i,0,0], corner2d[i,0,1], s=str(box_idx), fontsize="xx-small") + # add a line connecting box center and agent. + box_center = center2d[i] # [2,] + connection_x = [agent_pos[0], box_center[0]] + connection_y = [agent_pos[1], box_center[1]] + # print(connection_x) + # print(connection_y) + # print() + plt.plot(connection_x, connection_y,'--', linewidth=0.5, c=COLOR[agent_id], alpha=0.3) + box_idx += 1 + + filename = os.path.join(save_dir_path, f"{iter}.png") + plt.gca().invert_yaxis() + plt.axis('equal') + plt.savefig(filename, dpi=400) + plt.clf() + +def vis_pose_graphs(): + noise_stds = ['02','04','06'] + items = ["53", "63", "73", "83"] + torch.set_printoptions(precision=3, sci_mode=False) + np.set_printoptions(precision=3, suppress=True) + for item in items: + for noise_std in noise_stds: + file_dir = f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/stage1_output_{noise_std}_w_uncertainty/{item}.pt" + data = torch.load(file_dir) + pred_corner3d_list, pred_box3d_list, uncertainty_list, record_len, lidar_pose, lidar_pose_clean = data + lidar_pose[0] = lidar_pose_clean[0] + refined_pose_SE2 = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list, landmark_SE2=True) + ## visualize pred_corner3d with refined_pose. We can set different iteration to animate + save_dir_path = f"/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/pose_graph_vis/{item}_{noise_std}" + poses = [lidar_pose.cpu().numpy(), refined_pose_SE2] + vis_pose_graph(poses, pred_corner3d_list, save_dir_path) + + + + +def main2(): + pass + +if __name__ == "__main__": + # vis_pose_graphs() + test_box_align_tmp() + # main1() + # test_gt_boxes_world() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align_v2.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..c54e709fe978232ecd8aa6c085d7792d02138b42 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align_v2.py @@ -0,0 +1,437 @@ +""" +This module is designed for box alignment + +We will use g2o for pose graph optimization. +""" + + +from opencood.models.sub_modules.pose_graph_optim import PoseGraphOptimization2D +from opencood.utils.transformation_utils import pose_to_tfm +from opencood.utils.common_utils import check_torch_to_numpy +from opencood.utils import box_utils +from collections import OrderedDict +import numpy as np +import torch +import torch.nn.functional as F +import g2o +from icecream import ic +import copy +import os +import matplotlib.pyplot as plt + +DEBUG = False + +def vis_pose_graph(poses, pred_corner3d, save_dir_path, vis_agent=False): + """ + Args: + poses: list of np.ndarray + each item is a pose . [pose_before, ..., pose_refined] + + pred_corner3d: list + predicted box for each agent. + + vis_agent: bool + whether draw the agent's box + + """ + COLOR = ['red','springgreen','dodgerblue', 'darkviolet', 'orange'] + from opencood.utils.transformation_utils import get_relative_transformation + + if not os.path.exists(save_dir_path): + os.makedirs(save_dir_path) + + for iter, pose in enumerate(poses): + box_idx = 0 + # we first transform other agents' box to ego agent's coordinate + relative_t_matrix = get_relative_transformation(pose) + N = pose.shape[0] + nonempty_indices = [idx for (idx, corners) in enumerate(pred_corner3d) if len(corners)!=0] + pred_corners3d_in_ego = [box_utils.project_box3d(pred_corner3d[i], relative_t_matrix[i]) for i in nonempty_indices] + + for agent_id in range(len(pred_corners3d_in_ego)): + if agent_id not in nonempty_indices: + continue + corner3d = pred_corners3d_in_ego[agent_id] + agent_pos = relative_t_matrix[agent_id][:2,3] # agent's position in ego's coordinate + + if vis_agent: + plt.scatter(agent_pos[0], agent_pos[1], s=4, c=COLOR[agent_id]) + + corner2d = corner3d[:,:4,:2] + center2d = np.mean(corner2d, axis=1) + for i in range(corner2d.shape[0]): + plt.scatter(corner2d[i,[0,1],0], corner2d[i,[0,1], 1], s=2, c=COLOR[agent_id]) + plt.plot(corner2d[i,[0,1,2,3,0],0], corner2d[i,[0,1,2,3,0], 1], linewidth=1, c=COLOR[agent_id]) + plt.text(corner2d[i,0,0], corner2d[i,0,1], s=str(box_idx), fontsize="xx-small") + # add a line connecting box center and agent. + box_center = center2d[i] # [2,] + connection_x = [agent_pos[0], box_center[0]] + connection_y = [agent_pos[1], box_center[1]] + + plt.plot(connection_x, connection_y,'--', linewidth=0.5, c=COLOR[agent_id], alpha=0.3) + box_idx += 1 + + filename = os.path.join(save_dir_path, f"{iter}.png") + plt.gca().invert_yaxis() + plt.axis('equal') + plt.savefig(filename, dpi=400) + plt.clf() + + +def all_pair_l2(A, B): + """ All pair L2 distance for A and B + Args: + A : np.ndarray + shape [N_A, D] + B : np.ndarray + shape [N_B, D] + Returns: + C : np.ndarray + shape [N_A, N_B] + """ + TwoAB = 2*A@B.T # [N_A, N_B] + C = np.sqrt( + np.sum(A * A, 1, keepdims=True).repeat(TwoAB.shape[1], axis=1) \ + + np.sum(B * B, 1, keepdims=True).T.repeat(TwoAB.shape[0], axis=0) \ + - TwoAB + ) + return C + + + + +def box_alignment_relative_sample_np( + pred_corners_list, + noisy_lidar_pose, + uncertainty_list=None, + landmark_SE2=True, + adaptive_landmark=False, + normalize_uncertainty=False, + abandon_hard_cases = False, + drop_hard_boxes = False, + drop_unsure_edge = False, + use_uncertainty = True, + thres = 1.5, + yaw_var_thres = 0.2, + max_iterations = 1000): + """ Perform box alignment for one sample. + Correcting the relative pose. + + Args: + pred_corners_list: in each ego coordinate + [[N_1, 8, 3], ..., [N_cav1, 8, 3]] + + clean_lidar_poses: + [N_cav1, 6], in degree + + noisy_lidar_poses: + [N_cav1, 6], in degree + + uncertainty_list: + [[N_1, 3], [N_2, 3], ..., [N_cav1, 3]] + + landmark_SE2: + if True, the landmark is SE(2), otherwise R^2 + + adaptive_landmark: (when landmark_SE2 = True) + if True, landmark will turn to R^2 if yaw angles differ a lot + + normalize_uncertainty: bool + if True, normalize the uncertainty + + abandon_hard_cases: bool + if True, algorithm will just return original poses for hard cases + + drop_unsure_edge: bool + + Returns: + refined_lidar_poses: np.ndarray + [N_cav1, 3], + """ + if not use_uncertainty: + uncertainty_list = None + ## first transform point from ego coordinate to world coordinate, using lidar_pose. + order = 'lwh' # hwl + N = noisy_lidar_pose.shape[0] + lidar_pose_noisy_tfm = pose_to_tfm(noisy_lidar_pose) + + nonempty_indices = [idx for (idx, corners) in enumerate(pred_corners_list) if len(corners)!=0] # if one agent detects no boxes, its corners is just []. + + pred_corners_world_list = \ + [box_utils.project_box3d(pred_corners_list[i], lidar_pose_noisy_tfm[i]) for i in nonempty_indices] # [[N1, 8, 3], [N2, 8, 3],...] + pred_box3d_list = \ + [box_utils.corner_to_center(corner, order) for corner in pred_corners_list if len(corner)!=0] # [[N1, 7], [N2, 7], ...], angle in radian + pred_box3d_world_list = \ + [box_utils.corner_to_center(corner, order) for corner in pred_corners_world_list] # [[N1, 7], [N2, 7], ...], angle in radian + pred_center_list = \ + [np.mean(corners, axis=1) for corners in pred_corners_list if len(corners)!=0] # [[N1,3], [N2,3], ...] + + pred_center_world_list = \ + [pred_box3d_world[:,:3] for pred_box3d_world in pred_box3d_world_list] + pred_yaw_world_list = \ + [pred_box3d[:, 6] for pred_box3d in pred_box3d_world_list] + pred_len = \ + [len(corners) for corners in pred_corners_list] + + + box_idx_to_agent = [] + for i in range(N): + box_idx_to_agent += [i] * pred_len[i] + + pred_center_cat = np.concatenate(pred_center_list, axis=0) # [sum(pred_box), 3] + pred_center_world_cat = np.concatenate(pred_center_world_list, axis=0) # [sum(pred_box), 3] + pred_box3d_cat = np.concatenate(pred_box3d_list, axis=0) # [sum(pred_box), 7] + pred_yaw_world_cat = np.concatenate(pred_yaw_world_list, axis=0) # [sum(pred_box)] + + # hard-coded currently + w_a = 1.6 # width of anchor + l_a = 3.9 # length of anchor + d_a_square = w_a ** 2 + l_a ** 2 # anchor's diag + + + if uncertainty_list is not None: + pred_log_sigma2_cat = np.concatenate([i for i in uncertainty_list if len(i)!=0], axis=0) + # Since the regression target is x_t = (x_g - x_a)/d_a, + # var(x) = d_a^2 * var(x_t) + # so we 1/var(x) = 1/var(x_t) / d_a^2 + # sigma_{delta_x}^2 -> sigma_x^2. + pred_certainty_cat = np.exp(-pred_log_sigma2_cat) + pred_certainty_cat[:,:2] /= d_a_square + + + if normalize_uncertainty: + pred_certainty_cat = np.sqrt(pred_certainty_cat) + + + pred_center_allpair_dist = all_pair_l2(pred_center_world_cat, pred_center_world_cat) # [sum(pred_box), sum(pred_box)] + + # let pair from one vehicle be max distance + MAX_DIST = 10000 + cum = 0 + for i in range(N): + pred_center_allpair_dist[cum: cum + pred_len[i], cum: cum +pred_len[i]] = MAX_DIST # do not include itself + cum += pred_len[i] + + + cluster_id = N # let the vertex id of object start from N + cluster_dict = OrderedDict() + remain_box = set(range(cum)) + + for box_idx in range(cum): + + if box_idx not in remain_box: # already assigned + continue + + within_thres_idx_tensor = (pred_center_allpair_dist[box_idx] < thres).nonzero()[0] + within_thres_idx_list = within_thres_idx_tensor.tolist() + + if len(within_thres_idx_list) == 0: # if it's a single box + continue + + # start from within_thres_idx_list, find new box added to the cluster + explored = [box_idx] + unexplored = [idx for idx in within_thres_idx_list if idx in remain_box] + + while unexplored: + idx = unexplored[0] + within_thres_idx_tensor = (pred_center_allpair_dist[box_idx] < thres).nonzero()[0] + within_thres_idx_list = within_thres_idx_tensor.tolist() + for newidx in within_thres_idx_list: + if (newidx not in explored) and (newidx not in unexplored) and (newidx in remain_box): + unexplored.append(newidx) + unexplored.remove(idx) + explored.append(idx) + + if len(explored) == 1: # it's a single box, neighbors have been assigned + remain_box.remove(box_idx) + continue + + cluster_box_idxs = explored + + cluster_dict[cluster_id] = OrderedDict() + cluster_dict[cluster_id]['box_idx'] = [idx for idx in cluster_box_idxs] + cluster_dict[cluster_id]['box_center_world'] = [pred_center_world_cat[idx] for idx in cluster_box_idxs] # coordinate in world, [3,] + cluster_dict[cluster_id]['box_yaw'] = [pred_yaw_world_cat[idx] for idx in cluster_box_idxs] + + yaw_var = np.var(cluster_dict[cluster_id]['box_yaw']) + cluster_dict[cluster_id]['box_yaw_varies'] = yaw_var > yaw_var_thres + cluster_dict[cluster_id]['active'] = True + + + ########### adaptive_landmark ################## + if landmark_SE2: + if adaptive_landmark and yaw_var > yaw_var_thres: + landmark = pred_center_world_cat[box_idx][:2] + for _box_idx in cluster_box_idxs: + pred_certainty_cat[_box_idx] *= 2 + else: + landmark = copy.deepcopy(pred_center_world_cat[box_idx]) + landmark[2] = pred_yaw_world_cat[box_idx] + else: + landmark = pred_center_world_cat[box_idx][:2] + ################################################## + + + cluster_dict[cluster_id]['landmark'] = landmark # [x, y, yaw] or [x, y] + cluster_dict[cluster_id]['landmark_SE2'] = True if landmark.shape[0] == 3 else False + + DEBUG = False + if DEBUG: + from icecream import ic + ic(cluster_dict[cluster_id]['box_idx']) + ic(cluster_dict[cluster_id]['box_center_world']) + ic(cluster_dict[cluster_id]['box_yaw']) + ic(cluster_dict[cluster_id]['landmark']) + + + cluster_id += 1 + for idx in cluster_box_idxs: + remain_box.remove(idx) + + + vertex_num = cluster_id + agent_num = N + landmark_num = cluster_id - N + + + ########### abandon_hard_cases ########## + """ + We should think what is hard cases for agent-object pose graph optimization + 1. Overlapping boxes are rare (landmark_num <= 3) + 2. Yaw angles differ a lot + """ + + if abandon_hard_cases: + # case1: object num is smaller than 3 + if landmark_num <= 3: + return noisy_lidar_pose[:,[0,1,4]] + + # case2: more than half of the landmarks yaw varies + yaw_varies_cnt = sum([cluster_dict[i]["box_yaw_varies"] for i in range(agent_num, vertex_num)]) + if yaw_varies_cnt >= 0.5 * landmark_num: + return noisy_lidar_pose[:,[0,1,4]] + + ########### drop hard boxes ############ + + if drop_hard_boxes: + for landmark_id in range(agent_num, vertex_num): + if cluster_dict[landmark_id]['box_yaw_varies']: + cluster_dict[landmark_id]['active'] = False + + + + + """ + Now we have clusters for objects. we can create pose graph. + First we consider center as landmark. + Maybe set corner as landmarks in the future. + """ + pgo = PoseGraphOptimization2D() + + # Add agent to vertexs + for agent_id in range(agent_num): + v_id = agent_id + # notice lidar_pose use degree format, translate it to radians. + pose_np = noisy_lidar_pose[agent_id, [0,1,4]] + pose_np[2] = np.deg2rad(pose_np[2]) # radians + v_pose = g2o.SE2(pose_np) + + if agent_id == 0: + pgo.add_vertex(id=v_id, pose=v_pose, fixed=True) + else: + pgo.add_vertex(id=v_id, pose=v_pose, fixed=False) + + # Add object to vertexs + for landmark_id in range(agent_num, vertex_num): + v_id = landmark_id + landmark = cluster_dict[landmark_id]['landmark'] # (3,) or (2,) + landmark_SE2 = cluster_dict[landmark_id]['landmark_SE2'] + + if landmark_SE2: + v_pose = g2o.SE2(landmark) + else: + v_pose = landmark + + pgo.add_vertex(id=v_id, pose=v_pose, fixed=False, SE2=landmark_SE2) + + # Add agent-object edge to edge set + for landmark_id in range(agent_num, vertex_num): + landmark_SE2 = cluster_dict[landmark_id]['landmark_SE2'] + + if not cluster_dict[landmark_id]['active']: + continue + + for box_idx in cluster_dict[landmark_id]['box_idx']: + agent_id = box_idx_to_agent[box_idx] + if landmark_SE2: + e_pose = g2o.SE2(pred_box3d_cat[box_idx][[0,1,6]].astype(np.float64)) + info = np.identity(3, dtype=np.float64) + if uncertainty_list is not None: + info[[0,1,2],[0,1,2]] = pred_certainty_cat[box_idx] + + ############ drop_unsure_edge ########### + if drop_unsure_edge and sum(pred_certainty_cat[box_idx]) < 100: + continue + + else: + e_pose = pred_box3d_cat[box_idx][[0,1]].astype(np.float64) + info = np.identity(2, dtype=np.float64) + if uncertainty_list is not None: + info[[0,1],[0,1]] = pred_certainty_cat[box_idx][:2] + + ############ drop_unsure_edge ############ + if drop_unsure_edge and sum(pred_certainty_cat[box_idx]) < 100: + continue + + pgo.add_edge(vertices=[agent_id, landmark_id], measurement=e_pose, information=info, SE2=landmark_SE2) + + pgo.optimize(max_iterations) + + pose_new_list = [] + for agent_id in range(agent_num): + # print(pgo.get_pose(agent_id).vector()) + pose_new_list.append(pgo.get_pose(agent_id).vector()) + + refined_pose = np.array(pose_new_list) + refined_pose[:,2] = np.rad2deg(refined_pose[:,2]) # rad -> degree, same as source + + return refined_pose + +def box_alignment_relative_np(pred_corner3d_list, + uncertainty_list, + lidar_poses, + record_len, + **kwargs): + """ + Args: + pred_corner3d_list: list of tensors, with shape [[N1_object, 8, 3], [N2_object, 8, 3], ...,[N_sumcav_object, 8, 3]] + box in each agent's coordinate. (proj_first=False) + + pred_box3d_list: not necessary + list of tensors, with shape [[N1_object, 7], [N2_object, 7], ...,[N_sumcav_object, 7]] + + scores_list: list of tensor, [[N1_object,], [N2_object,], ...,[N_sumcav_object,]] + box confidence score. + + lidar_poses: torch.Tensor [sum(cav), 6] + + record_len: torch.Tensor + Returns: + refined_lidar_pose: torch.Tensor [sum(cav), 6] + """ + refined_lidar_pose = [] + start_idx = 0 + for b in record_len: + refined_lidar_pose.append( + box_alignment_relative_sample_np( + pred_corner3d_list[start_idx: start_idx + b], + lidar_poses[start_idx: start_idx + b], + uncertainty_list= None if uncertainty_list is None else uncertainty_list[start_idx: start_idx + b], + **kwargs + ) + ) + start_idx += b + + return np.cat(refined_lidar_pose, axis=0) + + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cbam.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cbam.py new file mode 100644 index 0000000000000000000000000000000000000000..9fc5d628f81643add567f51109b9e432a262d6b0 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cbam.py @@ -0,0 +1,279 @@ +import torch +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo + + +__all__ = ['ResNet', 'resnet18_cbam', 'resnet34_cbam', 'resnet50_cbam', 'resnet101_cbam', + 'resnet152_cbam'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + "3x3 convolution with padding" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + +def conv1x1(in_planes, out_planes, stride=1): + "1x1 convolution with padding" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, + padding=0, bias=False) + + +class ChannelAttention(nn.Module): + def __init__(self, in_planes, ratio=16): + super(ChannelAttention, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.max_pool = nn.AdaptiveMaxPool2d(1) + + self.fc = nn.Sequential(nn.Conv2d(in_planes, in_planes // 16, 1, bias=False), + nn.ReLU(), + nn.Conv2d(in_planes // 16, in_planes, 1, bias=False)) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = self.fc(self.avg_pool(x)) + max_out = self.fc(self.max_pool(x)) + out = avg_out + max_out + return self.sigmoid(out) + +class SpatialAttention(nn.Module): + def __init__(self, kernel_size=7): + super(SpatialAttention, self).__init__() + + self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = torch.mean(x, dim=1, keepdim=True) + max_out, _ = torch.max(x, dim=1, keepdim=True) + x = torch.cat([avg_out, max_out], dim=1) + x = self.conv1(x) + return self.sigmoid(x) + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv1x1(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv1x1(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + + self.ca = ChannelAttention(planes) + self.sa = SpatialAttention() + + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + out = self.ca(out) * out + out = self.sa(out) * out + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + + self.ca = ChannelAttention(planes * 4) + self.sa = SpatialAttention() + + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + out = self.ca(out) * out + out = self.sa(out) * out + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + + +def resnet18_cbam(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + pretrained_state_dict = model_zoo.load_url(model_urls['resnet18']) + now_state_dict = model.state_dict() + now_state_dict.update(pretrained_state_dict) + model.load_state_dict(now_state_dict) + return model + + +def resnet34_cbam(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + pretrained_state_dict = model_zoo.load_url(model_urls['resnet34']) + now_state_dict = model.state_dict() + now_state_dict.update(pretrained_state_dict) + model.load_state_dict(now_state_dict) + return model + + +def resnet50_cbam(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + pretrained_state_dict = model_zoo.load_url(model_urls['resnet50']) + now_state_dict = model.state_dict() + now_state_dict.update(pretrained_state_dict) + model.load_state_dict(now_state_dict) + return model + + +def resnet101_cbam(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + pretrained_state_dict = model_zoo.load_url(model_urls['resnet101']) + now_state_dict = model.state_dict() + now_state_dict.update(pretrained_state_dict) + model.load_state_dict(now_state_dict) + return model + + +def resnet152_cbam(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + pretrained_state_dict = model_zoo.load_url(model_urls['resnet152']) + now_state_dict = model.state_dict() + now_state_dict.update(pretrained_state_dict) + model.load_state_dict(now_state_dict) + return model \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cia_ssd_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cia_ssd_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ce6e1a77e1d971b6a1578fa833f7874194ed2a0b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cia_ssd_utils.py @@ -0,0 +1,101 @@ +import torch +from torch import nn + + + +class SSFA(nn.Module): + def __init__(self, args): + super(SSFA, self).__init__() + self._num_input_features = args['feature_num'] # 128 + + seq = [nn.ZeroPad2d(1)] + seq += get_conv_layers('Conv2d', 128, 128, n_layers=3, kernel_size=[3, 3, 3], + stride=[1, 1, 1], padding=[0, 1, 1], sequential=False) + self.bottom_up_block_0 = nn.Sequential(*seq) + self.bottom_up_block_1 = get_conv_layers('Conv2d', 128, 256, n_layers=3, kernel_size=[3, 3, 3], + stride=[2, 1, 1], padding=[1, 1, 1]) + + self.trans_0 = get_conv_layers('Conv2d', 128, 128, n_layers=1, kernel_size=[1], stride=[1], padding=[0]) + self.trans_1 = get_conv_layers('Conv2d', 256, 256, n_layers=1, kernel_size=[1], stride=[1], padding=[0]) + + self.deconv_block_0 = get_conv_layers('ConvTranspose2d', 256, 128, n_layers=1, kernel_size=[3], stride=[2], + padding=[1], output_padding=[1]) + self.deconv_block_1 = get_conv_layers('ConvTranspose2d', 256, 128, n_layers=1, kernel_size=[3], stride=[2], + padding=[1], output_padding=[1]) + + self.conv_0 = get_conv_layers('Conv2d', 128, 128, n_layers=1, kernel_size=[3], stride=[1], padding=[1]) + self.conv_1 = get_conv_layers('Conv2d', 128, 128, n_layers=1, kernel_size=[3], stride=[1], padding=[1]) + + self.w_0 = get_conv_layers('Conv2d', 128, 1, n_layers=1, kernel_size=[1], stride=[1], padding=[0], relu_last=False) + self.w_1 = get_conv_layers('Conv2d', 128, 1, n_layers=1, kernel_size=[1], stride=[1], padding=[0], relu_last=False) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.xavier_normal_(m.weight, gain=1) + if hasattr(m, "bias") and m.bias is not None: + nn.init.constant_(m.bias, 0) + + def forward(self, x): + x_0 = self.bottom_up_block_0(x) + x_1 = self.bottom_up_block_1(x_0) + x_trans_0 = self.trans_0(x_0) + x_trans_1 = self.trans_1(x_1) + x_middle_0 = self.deconv_block_0(x_trans_1) + x_trans_0 + x_middle_1 = self.deconv_block_1(x_trans_1) + x_output_0 = self.conv_0(x_middle_0) + x_output_1 = self.conv_1(x_middle_1) + + x_weight_0 = self.w_0(x_output_0) + x_weight_1 = self.w_1(x_output_1) + x_weight = torch.softmax(torch.cat([x_weight_0, x_weight_1], dim=1), dim=1) + x_output = x_output_0 * x_weight[:, 0:1, :, :] + x_output_1 * x_weight[:, 1:, :, :] + + return x_output.contiguous() + + +def get_conv_layers(conv_name, in_channels, out_channels, n_layers, kernel_size, stride, + padding, relu_last=True, sequential=True, **kwargs): + """ + Build convolutional layers. kernel_size, stride and padding should be a list with the lengths that match n_layers + """ + seq = [] + for i in range(n_layers): + seq.extend([getattr(nn, conv_name)(in_channels, out_channels, kernel_size[i], stride=stride[i], + padding=padding[i], bias=False, **{k: v[i] for k, v in kwargs.items()}), + nn.BatchNorm2d(out_channels, eps=1e-3, momentum=0.01)]) + if i < n_layers - 1 or relu_last: + seq.append(nn.ReLU()) + in_channels = out_channels + if sequential: + return nn.Sequential(*seq) + else: + return seq + + +class Head(nn.Module): + def __init__(self, num_input, num_pred, num_cls, num_iou=2, use_dir=False, num_dir=1): + super(Head, self).__init__() + self.use_dir = use_dir + + self.conv_box = nn.Conv2d(num_input, num_pred, 1) # 128 -> 14 + self.conv_cls = nn.Conv2d(num_input, num_cls, 1) # 128 -> 2 + self.conv_iou = nn.Conv2d(num_input, num_iou, 1, bias=False) + + if self.use_dir: + self.conv_dir = nn.Conv2d(num_input, num_dir, 1) # 128 -> 4 + + def forward(self, x): + box_preds = self.conv_box(x) + cls_preds = self.conv_cls(x) + ret_dict = {"reg_preds": box_preds, "cls_preds": cls_preds} + if self.use_dir: + dir_preds = self.conv_dir(x) # dir_preds.shape=[8, w, h, 4] + ret_dict["dir_preds"] = dir_preds + else: + ret_dict["dir_preds"] = torch.zeros((len(box_preds), 1, 2)) + + ret_dict["iou_preds"] = self.conv_iou(x) + + return ret_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/convgru.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/convgru.py new file mode 100644 index 0000000000000000000000000000000000000000..a489157ae3aef50ad9be825a8c54d79c0dd20ca5 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/convgru.py @@ -0,0 +1,197 @@ +import os +import torch +from torch import nn +from torch.autograd import Variable + + +class ConvGRUCell(nn.Module): + def __init__(self, input_size, input_dim, hidden_dim, kernel_size, bias): + """ + Initialize the ConvLSTM cell + :param input_size: (int, int) + Height and width of input tensor as (height, width). + :param input_dim: int + Number of channels of input tensor. + :param hidden_dim: int + Number of channels of hidden state. + :param kernel_size: (int, int) + Size of the convolutional kernel. + :param bias: bool + Whether or not to add the bias. + :param dtype: torch.cuda.FloatTensor or torch.FloatTensor + Whether or not to use cuda. + """ + super(ConvGRUCell, self).__init__() + self.height, self.width = input_size + self.padding = kernel_size[0] // 2, kernel_size[1] // 2 + self.hidden_dim = hidden_dim + self.bias = bias + + self.conv_gates = nn.Conv2d(in_channels=input_dim + hidden_dim, + out_channels=2 * self.hidden_dim, + # for update_gate,reset_gate respectively + kernel_size=kernel_size, + padding=self.padding, + bias=self.bias) + + self.conv_can = nn.Conv2d(in_channels=input_dim + hidden_dim, + out_channels=self.hidden_dim, + # for candidate neural memory + kernel_size=kernel_size, + padding=self.padding, + bias=self.bias) + + def init_hidden(self, batch_size): + return (Variable( + torch.zeros(batch_size, self.hidden_dim, self.height, self.width))) + + def forward(self, input_tensor, h_cur): + """ + :param self: + :param input_tensor: (b, c, h, w) + input is actually the target_model + :param h_cur: (b, c_hidden, h, w) + current hidden and cell states respectively + :return: h_next, + next hidden state + """ + combined = torch.cat([input_tensor, h_cur], dim=1) + combined_conv = self.conv_gates(combined) + + gamma, beta = torch.split(combined_conv, self.hidden_dim, dim=1) + reset_gate = torch.sigmoid(gamma) + update_gate = torch.sigmoid(beta) + + combined = torch.cat([input_tensor, reset_gate * h_cur], dim=1) + cc_cnm = self.conv_can(combined) + cnm = torch.tanh(cc_cnm) + + h_next = (1 - update_gate) * h_cur + update_gate * cnm + return h_next + + +class ConvGRU(nn.Module): + def __init__(self, input_size, input_dim, hidden_dim, kernel_size, + num_layers, + batch_first=False, bias=True, return_all_layers=False): + """ + :param input_size: (int, int) + Height and width of input tensor as (height, width). + :param input_dim: int e.g. 256 + Number of channels of input tensor. + :param hidden_dim: int e.g. 1024 + Number of channels of hidden state. + :param kernel_size: (int, int) + Size of the convolutional kernel. + :param num_layers: int + Number of ConvLSTM layers + :param dtype: torch.cuda.FloatTensor or torch.FloatTensor + Whether or not to use cuda. + :param alexnet_path: str + pretrained alexnet parameters + :param batch_first: bool + if the first position of array is batch or not + :param bias: bool + Whether or not to add the bias. + :param return_all_layers: bool + if return hidden and cell states for all layers + """ + super(ConvGRU, self).__init__() + + # Make sure that both `kernel_size` and + # `hidden_dim` are lists having len == num_layers + kernel_size = self._extend_for_multilayer(kernel_size, num_layers) + hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers) + if not len(kernel_size) == len(hidden_dim) == num_layers: + raise ValueError('Inconsistent list length.') + + self.height, self.width = input_size + self.input_dim = input_dim + self.hidden_dim = hidden_dim + self.kernel_size = kernel_size + self.num_layers = num_layers + self.batch_first = batch_first + self.bias = bias + self.return_all_layers = return_all_layers + + cell_list = [] + for i in range(0, self.num_layers): + cur_input_dim = input_dim if i == 0 else hidden_dim[i - 1] + cell_list.append(ConvGRUCell(input_size=(self.height, self.width), + input_dim=cur_input_dim, + hidden_dim=self.hidden_dim[i], + kernel_size=self.kernel_size[i], + bias=self.bias)) + + # convert python list to pytorch module + self.cell_list = nn.ModuleList(cell_list) + + def forward(self, input_tensor, hidden_state=None): + """ + :param input_tensor: (b, t, c, h, w) or (t,b,c,h,w) + depends on if batch first or not extracted features from alexnet + :param hidden_state: + :return: layer_output_list, last_state_list + """ + if not self.batch_first: + # (t, b, c, h, w) -> (b, t, c, h, w) + input_tensor = input_tensor.permute(1, 0, 2, 3, 4) + + # Implement stateful ConvLSTM + if hidden_state is not None: + raise NotImplementedError() + else: + hidden_state = self._init_hidden(batch_size=input_tensor.size(0), + device=input_tensor.device, + dtype=input_tensor.dtype) + + layer_output_list = [] + last_state_list = [] + + seq_len = input_tensor.size(1) + cur_layer_input = input_tensor + + for layer_idx in range(self.num_layers): + h = hidden_state[layer_idx] + output_inner = [] + for t in range(seq_len): + # input current hidden and cell state + # then compute the next hidden + # and cell state through ConvLSTMCell forward function + h = self.cell_list[layer_idx]( + input_tensor=cur_layer_input[:, t, :, :, :], # (b,t,c,h,w) + h_cur=h) + output_inner.append(h) + + layer_output = torch.stack(output_inner, dim=1) + cur_layer_input = layer_output + + layer_output_list.append(layer_output) + last_state_list.append([h]) + + if not self.return_all_layers: + layer_output_list = layer_output_list[-1:] + last_state_list = last_state_list[-1:] + + return layer_output_list, last_state_list + + def _init_hidden(self, batch_size, device=None, dtype=None): + init_states = [] + for i in range(self.num_layers): + init_states.append( + self.cell_list[i].init_hidden(batch_size).to(device).to(dtype)) + return init_states + + @staticmethod + def _check_kernel_size_consistency(kernel_size): + if not (isinstance(kernel_size, tuple) or + (isinstance(kernel_size, list) and all( + [isinstance(elem, tuple) for elem in kernel_size]))): + raise ValueError('`kernel_size` must be tuple or list of tuples') + + @staticmethod + def _extend_for_multilayer(param, num_layers): + if not isinstance(param, list): + param = [param] * num_layers + return param + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dcn_net.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dcn_net.py new file mode 100644 index 0000000000000000000000000000000000000000..07d095be11fc3182c4c43481934c90661627e323 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dcn_net.py @@ -0,0 +1,21 @@ +from mmcv.ops import DeformConv2dPack as DCN +import torch +import torch.nn as nn + +class DCNNet(nn.Module): + def __init__(self, args): + super(DCNNet,self).__init__() + + module_list =[] + in_channels = args['in_channels'] + out_channels = args['out_channels'] + stride = args['stride'] + kernel_size = args['kernel_size'] + padding = args['padding'] + + for i in range(args['n_blocks']): + module_list.append(DCN(in_channels[i],out_channels[i],kernel_size[i],stride=stride[i],padding=padding[i])) + self.model = nn.Sequential(*module_list) + + def forward(self, x): + return self.model(x) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/deformable_transformer_backbone.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/deformable_transformer_backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..861fe508297e29ad5bd6d7d494af9ced91a665bc --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/deformable_transformer_backbone.py @@ -0,0 +1,217 @@ +import numpy as np +import torch +import math +import torch.nn as nn +from opencood.models.sub_modules.resblock import ResNetModified, BasicBlock, Bottleneck +from opencood.models.sub_modules.detr_module import PositionEmbeddingSine, \ + DeformableTransformerEncoderLayer, DeformableTransformerEncoder +from opencood.models.fuse_modules.self_attn import AttFusion +from opencood.models.fuse_modules.deform_fuse import DeformFusion +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple + + +DEBUG = True + + +""" + Different from MaxFusion in max_fuse.py + This is a simplified version. + pairwise_t_matrix is already scaled. +""" +def regroup(x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + +class MaxFusion(nn.Module): + def __init__(self): + super().__init__() + def forward(self, x, record_len, pairwise_t_matrix): + """ + pairwise_t_matrix is already normalized [B, L, L, 2, 3] + """ + split_x = regroup(x, record_len) + batch_size = len(record_len) + C, H, W = split_x[0].shape[1:] # C, W, H before + out = [] + for b, xx in enumerate(split_x): + N = xx.shape[0] + t_matrix = pairwise_t_matrix[b][:N, :N, :, :] + i = 0 + xx = warp_affine_simple(xx, t_matrix[i, :, :, :], (H, W)) + + h = torch.max(xx, dim=0)[0] # C, W, H before + out.append(h) + return torch.stack(out, dim=0) + + + + +class DeformableTransformerBackbone(nn.Module): + def __init__(self, model_cfg): + super().__init__() + self.model_cfg = model_cfg + self.compress = False + + + self.proj_first = True + if ('proj_first' in model_cfg) and (model_cfg['proj_first'] is False): + self.proj_first = False + self.discrete_ratio = model_cfg['voxel_size'][0] + self.downsample_rate = 1 + + self.level_num = len(model_cfg['layer_nums']) # exactly 3 now + + layer_nums = model_cfg['layer_nums'] + num_filters = model_cfg['num_filters'] + layer_strides = model_cfg['layer_strides'] + hidden_dim = model_cfg['hidden_dim'] + upsample_strides = model_cfg['upsample_strides'] + num_upsample_filters = model_cfg['num_upsample_filter'] + + self.resnet = ResNetModified(BasicBlock, + layer_nums, + layer_strides, + num_filters) + + self.position_embedding = PositionEmbeddingSine(hidden_dim//2) + + self.hidden_dim = hidden_dim + + if model_cfg['fusion'] == 'max': + self.fuse_net = [MaxFusion() for _ in range(self.level_num)] + elif model_cfg['fusion'] == 'self_att': + self.fuse_net = [AttFusion(n_filter) for n_filter in num_filters] + elif model_cfg['fusion'] == 'deform': + self.fuse_net = DeformFusion(num_filters[0], model_cfg['deform_method']) + elif model_cfg['fusion'] == 'deform_w_cycle': + assert self.proj_first is False + assert model_cfg['deform_method'] == 'rigid' + self.fuse_net = DeformFusion(num_filters[0], model_cfg['deform_method'], cycle_consist_loss=True) + else: + raise + + input_proj_list = [] + for i in range(self.level_num): + proj_in_channels = num_filters[i] + input_proj_list.append(nn.Sequential( + nn.Conv2d(proj_in_channels, self.hidden_dim, kernel_size=1), + nn.GroupNorm(32, self.hidden_dim), + )) + + self.input_proj = nn.ModuleList(input_proj_list) + self.level_embed = nn.Parameter(torch.Tensor(self.level_num, self.hidden_dim)) + self.upsample_strides = model_cfg['upsample_strides'] + + encoder_layer = DeformableTransformerEncoderLayer(self.hidden_dim, model_cfg['dim_feedforward'], + model_cfg['dropout'], model_cfg['activation'], + self.level_num, model_cfg['n_head'], model_cfg['enc_n_points']) + self.encoder = DeformableTransformerEncoder(encoder_layer, model_cfg['num_encoder_layers']) + + self.deblocks = nn.ModuleList() + for idx in range(self.level_num): + self.deblocks.append(nn.Sequential( + nn.ConvTranspose2d( + self.hidden_dim, num_upsample_filters[idx], + upsample_strides[idx], + stride=upsample_strides[idx], bias=False + ), + nn.BatchNorm2d(num_upsample_filters[idx], + eps=1e-3, momentum=0.01), + nn.ReLU() + )) + + def forward(self, data_dict): + spatial_features = data_dict['spatial_features'] + if DEBUG: + origin_features = torch.clone(spatial_features) + + record_len = data_dict['record_len'] + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + lidar_pose = data_dict['lidar_pose'] # (sum(cav),6 ) + + ups = [] + ret_dict = {} + x = spatial_features + + B = len(record_len) + H, W = x.shape[2:] ## this is original feature map [200, 704], not downsampled + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + + if not self.proj_first: + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + + features = self.resnet(x) # feature[i] is (sum(cav), C, H, W), different i, different C, H, W + + if self.model_cfg['fusion'].startswith('deform'): + x_fuseds = self.fuse_net(features, record_len, pairwise_t_matrix, lidar_pose) + else: + x_fuseds = [self.fuse_net[i](features[i], record_len, pairwise_t_matrix) for i in range(len(features))] + + pos_embeds = [self.position_embedding(x_fused) for x_fused in x_fuseds] + srcs = [self.input_proj[i](x_fuseds[i]) for i in range(len(x_fuseds))] + + + # srcs = [] + # pos_embeds = [] + # for i, feat in enumerate(features): + # x_fused = self.fuse_net[i](feat, record_len, pairwise_t_matrix) + # x_pos = self.position_embedding(x_fused) + # x_fused = self.input_proj[i](x_fused) + # srcs.append(x_fused) # (B, hidden_dim, H1, W1) + # pos_embeds.append(x_pos) + + + src_flatten = [] + mask_flatten = [] + lvl_pos_embed_flatten = [] + spatial_shapes = [] + for lvl, (src, pos_embed) in enumerate(zip(srcs, pos_embeds)): + bs, c, h, w = src.shape + spatial_shape = (h, w) + spatial_shapes.append(spatial_shape) + src = src.flatten(2).transpose(1, 2) + pos_embed = pos_embed.flatten(2).transpose(1, 2) + + lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1) + lvl_pos_embed_flatten.append(lvl_pos_embed) + src_flatten.append(src) + src_flatten = torch.cat(src_flatten, 1) + mask_flatten = torch.zeros(src_flatten.shape[:2], device=src_flatten.device, dtype=torch.bool) + lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1) + spatial_shapes = torch.as_tensor(spatial_shapes, dtype=torch.long, device=src_flatten.device) + level_start_index = torch.cat((spatial_shapes.new_zeros((1, )), spatial_shapes.prod(1).cumsum(0)[:-1])) + valid_ratios = torch.stack([self.get_valid_ratio(m) for m in srcs], 1) + + + memory = self.encoder(src_flatten, spatial_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten) + flatten_length = [h*w for (h,w) in spatial_shapes] + output_split = torch.split(memory, flatten_length, dim=1) + output_features = [output.reshape(bs,spatial_shapes[i][0], spatial_shapes[i][1],self.hidden_dim).permute(0,3,1,2) for i, output in enumerate(output_split)] + + ups = [] + for i, feat in enumerate(output_features): + feat = self.deblocks[i](feat) + ups.append(feat) + + ups = torch.cat(ups, dim=1) + + x = ups + + data_dict['spatial_features_2d'] = x + return data_dict + + + def get_valid_ratio(self, x): + N, _, H, W = x.shape + mask = torch.zeros((N,H,W),dtype=torch.bool,device=x.device) + valid_H = torch.sum(~mask[:, :, 0], 1) + valid_W = torch.sum(~mask[:, 0, :], 1) + valid_ratio_h = valid_H.float() / H + valid_ratio_w = valid_W.float() / W + valid_ratio = torch.stack([valid_ratio_w, valid_ratio_h], -1) + return valid_ratio \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dense_head.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dense_head.py new file mode 100644 index 0000000000000000000000000000000000000000..c773f8037162a393e7e1191013154c88c431b7d2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dense_head.py @@ -0,0 +1,26 @@ +# author: Yifan Lu +# dense head for stage1, predict cls, reg, dir +import torch.nn as nn +import torch + +class Head(nn.Module): + def __init__(self, args): + super(Head, self).__init__() + + self.conv_box = nn.Conv2d(args['num_input'], args['num_pred'], 1) # 128 -> 14 + self.conv_cls = nn.Conv2d(args['num_input'], args['num_cls'], 1) # 128 -> 2 + self.conv_dir = nn.Conv2d(args['num_input'], args['num_dir'], 1) # 128 -> 4 + self.conv_iou = nn.Conv2d(args['num_input'], args['num_dir'], 1, bias=False) + + def forward(self, x): + box_preds = self.conv_box(x) + cls_preds = self.conv_cls(x) + dir_preds = self.conv_dir(x) # dir_preds.shape=[8, w, h, 4] + iou_preds = self.conv_iou(x) + + ret_dict = {"reg_preds": box_preds, \ + "cls_preds": cls_preds, \ + "dir_preds": dir_preds, \ + "iou_preds": iou_preds} + + return ret_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/detr_module.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/detr_module.py new file mode 100644 index 0000000000000000000000000000000000000000..642439391b759bc5168979f50a25d172fd0a1954 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/detr_module.py @@ -0,0 +1,137 @@ +import torch +import torch.nn as nn +import math +import copy +from opencood.models.sub_modules.ms_deform_attn import MSDeformAttn +import torch.nn.functional as F + +class PositionEmbeddingSine(nn.Module): + """ + This is a more standard version of the position embedding, very similar to the one + used by the Attention is all you need paper, generalized to work on images. + """ + def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): + super().__init__() + self.num_pos_feats = num_pos_feats + self.temperature = temperature + self.normalize = normalize + if scale is not None and normalize is False: + raise ValueError("normalize should be True if scale is passed") + if scale is None: + scale = 2 * math.pi + self.scale = scale + + def forward(self, x): + """ + Args: + x: torch.Tensor + [N, C, H, W] + """ + mask = torch.zeros((x.shape[0], x.shape[-2],x.shape[-1]), dtype=torch.bool, device=x.device) + assert mask is not None + not_mask = ~mask + y_embed = not_mask.cumsum(1, dtype=torch.float32) + x_embed = not_mask.cumsum(2, dtype=torch.float32) + if self.normalize: + eps = 1e-6 + y_embed = (y_embed - 0.5) / (y_embed[:, -1:, :] + eps) * self.scale + x_embed = (x_embed - 0.5) / (x_embed[:, :, -1:] + eps) * self.scale + + dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) + dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) + + pos_x = x_embed[:, :, :, None] / dim_t + pos_y = y_embed[:, :, :, None] / dim_t + pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3) + pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3) + pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) + return pos + + +class DeformableTransformerEncoderLayer(nn.Module): + def __init__(self, + d_model=256, d_ffn=1024, + dropout=0.1, activation="relu", + n_levels=4, n_heads=8, n_points=4): + super().__init__() + + # self attention + self.self_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points) + self.dropout1 = nn.Dropout(dropout) + self.norm1 = nn.LayerNorm(d_model) + + # ffn + self.linear1 = nn.Linear(d_model, d_ffn) + self.activation = _get_activation_fn(activation) + self.dropout2 = nn.Dropout(dropout) + self.linear2 = nn.Linear(d_ffn, d_model) + self.dropout3 = nn.Dropout(dropout) + self.norm2 = nn.LayerNorm(d_model) + + @staticmethod + def with_pos_embed(tensor, pos): + return tensor if pos is None else tensor + pos + + def forward_ffn(self, src): + src2 = self.linear2(self.dropout2(self.activation(self.linear1(src)))) + src = src + self.dropout3(src2) + src = self.norm2(src) + return src + + def forward(self, src, pos, reference_points, spatial_shapes, level_start_index, padding_mask=None): + # self attention + src2 = self.self_attn(self.with_pos_embed(src, pos), reference_points, src, spatial_shapes, level_start_index, padding_mask) + src = src + self.dropout1(src2) + src = self.norm1(src) + + # ffn + src = self.forward_ffn(src) + + return src + + +class DeformableTransformerEncoder(nn.Module): + def __init__(self, encoder_layer, num_layers): + super().__init__() + self.layers = _get_clones(encoder_layer, num_layers) + self.num_layers = num_layers + + @staticmethod + def get_reference_points(spatial_shapes, valid_ratios, device): + reference_points_list = [] + for lvl, (H_, W_) in enumerate(spatial_shapes): + + ref_y, ref_x = torch.meshgrid(torch.linspace(0.5, H_ - 0.5, H_, dtype=torch.float32, device=device), + torch.linspace(0.5, W_ - 0.5, W_, dtype=torch.float32, device=device)) + ref_y = ref_y.reshape(-1)[None] / (valid_ratios[:, None, lvl, 1] * H_) + ref_x = ref_x.reshape(-1)[None] / (valid_ratios[:, None, lvl, 0] * W_) + ref = torch.stack((ref_x, ref_y), -1) + + reference_points_list.append(ref) + reference_points = torch.cat(reference_points_list, 1) + reference_points = reference_points[:, :, None] * valid_ratios[:, None] + return reference_points + + def forward(self, src, spatial_shapes, level_start_index, valid_ratios, pos=None, padding_mask=None): + output = src + reference_points = self.get_reference_points(spatial_shapes, valid_ratios, device=src.device) + for _, layer in enumerate(self.layers): + output = layer(output, pos, reference_points, spatial_shapes, level_start_index, padding_mask) + + return output + + + + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + +def _get_activation_fn(activation): + """Return an activation function given a string""" + if activation == "relu": + return F.relu + if activation == "gelu": + return F.gelu + if activation == "glu": + return F.glu + raise RuntimeError(F"activation should be relu/gelu, not {activation}.") \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/discriminator.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..87bb7d70fb538c26850ddfca7979827c4ff7dd4d --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/discriminator.py @@ -0,0 +1,44 @@ +import torch +import torch.nn as nn +from opencood.models.da_modules.gsl import GradientScalarLayer + +def weights_init(m): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + nn.init.normal_(m.weight.data, 0.0, 0.02) + elif classname.find('BatchNorm') != -1: + nn.init.normal_(m.weight.data, 1.0, 0.02) + nn.init.constant_(m.bias.data, 0) + +class Discriminator(nn.Module): + def __init__(self, args): + super().__init__() + self.indim = args['indim'] + self.roi_size = args['roi_align_size'] + self.netD = nn.Sequential( + nn.Conv2d(self.indim, self.indim//2, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(self.indim//2), + nn.LeakyReLU(0.2, inplace=True), + nn.Conv2d(self.indim//2, self.indim//4, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(self.indim//4), + nn.LeakyReLU(0.2, inplace=True), + nn.AvgPool2d(kernel_size=self.roi_size, stride=1, padding=0), # [N, self.indim//4, 1, 1], + nn.Flatten(start_dim=1), + nn.Linear(self.indim//4, self.indim//8), + nn.LeakyReLU(0.2, inplace=True), + nn.Linear(self.indim//8, 1), + nn.Sigmoid() + ) + self.grl = GradientScalarLayer(- args.get('scale', 1)) + + self.netD.apply(weights_init) + + def forward(self, x): + """ + Input: + x: [N, indim, RoIsize, RoIsize] + Output: + cls: [N, 1] + """ + x = self.grl(x) + return self.netD(x) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/downsample_conv.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/downsample_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..ca9550a7f2f75010b80a7a8dd6639dcb469b3b1a --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/downsample_conv.py @@ -0,0 +1,50 @@ +""" +Class used to downsample features by 3*3 conv +""" +import torch.nn as nn + + +class DoubleConv(nn.Module): + """ + Double convoltuion + Args: + in_channels: input channel num + out_channels: output channel num + """ + + def __init__(self, in_channels, out_channels, kernel_size, + stride, padding): + super().__init__() + self.double_conv = nn.Sequential( + nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, + stride=stride, padding=padding), + nn.ReLU(inplace=True), + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + return self.double_conv(x) + + +class DownsampleConv(nn.Module): + def __init__(self, config): + super(DownsampleConv, self).__init__() + self.layers = nn.ModuleList([]) + input_dim = config['input_dim'] + + for (ksize, dim, stride, padding) in zip(config['kernal_size'], + config['dim'], + config['stride'], + config['padding']): + self.layers.append(DoubleConv(input_dim, + dim, + kernel_size=ksize, + stride=stride, + padding=padding)) + input_dim = dim + + def forward(self, x): + for i in range(len(self.layers)): + x = self.layers[i](x) + return x \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet.py new file mode 100644 index 0000000000000000000000000000000000000000..679f5ca1bb4da74a0ebbd6791f42ba813101bf4c --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet.py @@ -0,0 +1,141 @@ + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.ops import DeformConv2dPack as dconv2d +from timm.models.layers import DropPath +from opencood.models.sub_modules.cbam import BasicBlock +from opencood.models.sub_modules.feature_alignnet_modules import SCAligner, Res1x1Aligner, \ + Res3x3Aligner, Res3x3Aligner, CBAM, ConvNeXt, FANet, SDTAAgliner +import numpy as np + + + +class AlignNet(nn.Module): + def __init__(self, args): + super().__init__() + model_name = args['core_method'] + + if model_name == "scaligner": + self.channel_align = SCAligner(args['args']) + elif model_name == "resnet1x1": + self.channel_align = Res1x1Aligner(args['args']) + elif model_name == "resnet3x3": + self.channel_align = Res3x3Aligner(args['args']) + elif model_name == "sdta": + self.channel_align = SDTAAgliner(args['args']) + elif model_name == "cbam": + self.channel_align = CBAM(args['args']) + elif model_name == "convnext": + self.channel_align = ConvNeXt(args['args']) + elif model_name == "fanet": + self.channel_align = FANet(args['args']) + elif model_name == 'identity': + self.channel_align = nn.Identity() + + self.spatial_align_flag = args.get("spatial_align", False) + if self.spatial_align_flag: + warpnet_indim = args['args']['warpnet_indim'] + dim = args['args']['dim'] + self.teacher = args['args']['teacher'] + setattr(self, "warpnet", + nn.Sequential( + nn.Conv2d(warpnet_indim, warpnet_indim, kernel_size=3, stride=1, padding=1), + nn.BatchNorm2d(warpnet_indim), + nn.ReLU(), + nn.Conv2d(warpnet_indim, dim, kernel_size=3, stride=1, padding=1), + nn.BatchNorm2d(dim), + nn.ReLU(), + nn.Conv2d(dim, 2, kernel_size=3, stride=1, padding=1), + ) + ) + self.theta_identity = torch.tensor([[[1.,0.,0.],[0.,1.,0.]]]) + + self.count = 0 # debug + + def forward(self, x): + return self.channel_align(x) + + + def spatail_align(self, student_feature, teacher_feature, physical_dist): + physical_offset = self.warpnet(torch.cat([student_feature, teacher_feature], dim=1)).permute(0,2,3,1) # N, H, W, 2, unit is meter. + mask = torch.any(teacher_feature != 0, dim=1) + physical_offset *= mask.unsqueeze(-1) + relative_offset = physical_offset * torch.tensor([2./physical_dist[0], 2./physical_dist[1]], device=physical_offset.device) # N, H, W, 2 + warp_field = relative_offset + \ + torch.nn.functional.affine_grid(self.theta_identity.expand(student_feature.shape[0], 2, 3), student_feature.shape).to(relative_offset.device) + spataial_aligned_feature = torch.nn.functional.grid_sample(student_feature, warp_field) + + # self.visualize_offset(physical_offset, warp_field, student_feature, spataial_aligned_feature, teacher_feature) + return spataial_aligned_feature + + def visualize_offset(self, physical_offset, warp_field, feature_before, feature_after, teacher_feature): + """ + physical_offset: shape [N, H, W, 2] + warp_field: shape [N, H, W, 2] + feaure_before: [N, C, H, W] + feature_after: [N, C, H, W] + """ + import seaborn as sns + import matplotlib.pyplot as plt + import os + N = physical_offset.shape[0] + print(physical_offset.shape) + + save_path = "opencood/logs/vislog" + file_idx = self.count + self.count += 1 + + physical_offsets_save_path = os.path.join(save_path, "physical_offsets") + vmax = physical_offset.max() + print(f"physical offset max: {vmax}") + if not os.path.exists(physical_offsets_save_path): + os.mkdir(physical_offsets_save_path) + physical_offset = physical_offset.detach().cpu().numpy() + warp_field = warp_field.detach().cpu().numpy() + for i in range(N): + sns.heatmap(physical_offset[i,:,:,0], cmap="vlag", vmin=-vmax*0.8, vmax=vmax*0.8, square=True) + plt.axis('off') + plt.savefig(os.path.join(physical_offsets_save_path, "{}_{}_physical_x.png".format(file_idx, i)), dpi=500) + plt.close() + + sns.heatmap(physical_offset[i,:,:,1], cmap="vlag", vmin=-vmax*0.8, vmax=vmax*0.8, square=True) + plt.axis('off') + plt.savefig(os.path.join(physical_offsets_save_path, "{}_{}_physical_y.png".format(file_idx, i)), dpi=500) + plt.close() + + sns.heatmap(warp_field[i,:,:,0], cmap="vlag", square=True) + plt.axis('off') + plt.savefig(os.path.join(physical_offsets_save_path, "{}_{}_warpfield_x.png".format(file_idx, i)), dpi=500) + plt.close() + + sns.heatmap(warp_field[i,:,:,1], cmap="vlag", square=True) + plt.axis('off') + plt.savefig(os.path.join(physical_offsets_save_path, "{}_{}_warpfield_y.png".format(file_idx, i)), dpi=500) + plt.close() + + spatial_feature_save_path = os.path.join(save_path, "spatial_feature") + if not os.path.exists(spatial_feature_save_path): + os.mkdir(spatial_feature_save_path) + feature_before = feature_before.detach().cpu().numpy() + feature_after = feature_after.detach().cpu().numpy() + teacher_feature = teacher_feature.detach().cpu().numpy() + for i in range(N): + channel = np.random.randint(64) + plt.imshow(feature_before[i, channel]) + plt.axis("off") + plt.colorbar() + plt.savefig(os.path.join(spatial_feature_save_path, "{}_{}_before.png".format(file_idx, i)), dpi=500) + plt.close() + + plt.imshow(feature_after[i, channel]) + plt.axis("off") + plt.colorbar() + plt.savefig(os.path.join(spatial_feature_save_path, "{}_{}_spaligned.png".format(file_idx, i)), dpi=500) + plt.close() + + plt.imshow(teacher_feature[i, channel]) + plt.axis("off") + plt.colorbar() + plt.savefig(os.path.join(spatial_feature_save_path, "{}_{}_teacher.png".format(file_idx, i)), dpi=500) + plt.close() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet_modules.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet_modules.py new file mode 100644 index 0000000000000000000000000000000000000000..9354d3704bc0721151a8043e4754c82121b147c4 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet_modules.py @@ -0,0 +1,499 @@ + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.ops import DeformConv2dPack as dconv2d +from timm.models.layers import DropPath +from opencood.models.sub_modules.cbam import BasicBlock +import math + +class LayerNorm(nn.Module): + def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): + super().__init__() + self.weight = nn.Parameter(torch.ones(normalized_shape)) + self.bias = nn.Parameter(torch.zeros(normalized_shape)) + self.eps = eps + self.data_format = data_format + if self.data_format not in ["channels_last", "channels_first"]: + raise NotImplementedError + self.normalized_shape = (normalized_shape,) + + def forward(self, x): + if self.data_format == "channels_last": + return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) + elif self.data_format == "channels_first": + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + x = self.weight[:, None, None] * x + self.bias[:, None, None] + return x + +class XCA(nn.Module): + def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.): + super().__init__() + self.num_heads = num_heads + self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1)) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads) + qkv = qkv.permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q.transpose(-2, -1) + k = k.transpose(-2, -1) + v = v.transpose(-2, -1) + + q = torch.nn.functional.normalize(q, dim=-1) + k = torch.nn.functional.normalize(k, dim=-1) + + attn = (q @ k.transpose(-2, -1)) * self.temperature + # ------------------- + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).permute(0, 3, 1, 2).reshape(B, N, C) + # ------------------ + x = self.proj(x) + x = self.proj_drop(x) + + return x + + @torch.jit.ignore + def no_weight_decay(self): + return {'temperature'} + + +class ConvEncoder(nn.Module): + def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6, expan_ratio=4, kernel_size=1, deformable=False): + super().__init__() + if not deformable: + self.dwconv = nn.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size // 2, groups=dim) + else: + self.dwconv = dconv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size // 2, groups=dim) + self.norm = LayerNorm(dim, eps=1e-6) + self.pwconv1 = nn.Linear(dim, expan_ratio * dim) + self.act = nn.GELU() + self.pwconv2 = nn.Linear(expan_ratio * dim, dim) + self.gamma = nn.Parameter(layer_scale_init_value * torch.ones(dim), + requires_grad=True) if layer_scale_init_value > 0 else None + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + + def forward(self, x): + input = x + x = self.dwconv(x) + x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) + x = self.norm(x) + x = self.pwconv1(x) + x = self.act(x) + x = self.pwconv2(x) + if self.gamma is not None: + x = self.gamma * x + x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) + + x = input + self.drop_path(x) + return x + +class SDTAEncoder(nn.Module): + def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6, expan_ratio=4, + use_pos_emb=False, num_heads=4, qkv_bias=True, attn_drop=0., drop=0., num_conv=2, deformable=False): + super().__init__() + width = dim + convs = [] + if not deformable: + for i in range(num_conv): + convs.append(nn.Conv2d(dim, dim, kernel_size=1, padding=0, groups=width)) + # convs.append(nn.BatchNorm2d(dim)) + convs.append(nn.ReLU()) + else: + for i in range(num_conv): + convs.append(dconv2d(dim, dim, kernel_size=1, padding=0, groups=width)) + # convs.append(nn.BatchNorm2d(dim)) + convs.append(nn.ReLU()) + self.convs = nn.Sequential(*convs) + + + self.norm_xca = LayerNorm(dim, eps=1e-6) + self.gamma_xca = nn.Parameter(layer_scale_init_value * torch.ones(dim), + requires_grad=True) if layer_scale_init_value > 0 else None + self.xca = XCA(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop) + + self.norm = LayerNorm(dim, eps=1e-6) + self.pwconv1 = nn.Linear(dim, expan_ratio * dim) # pointwise/1x1 convs, implemented with linear layers + self.act = nn.GELU() # TODO: MobileViT is using 'swish' + self.pwconv2 = nn.Linear(expan_ratio * dim, dim) + self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), + requires_grad=True) if layer_scale_init_value > 0 else None + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + + def forward(self, x): + input = x + + x = self.convs(x) + + # XCA + B, C, H, W = x.shape + x = x.reshape(B, C, H * W).permute(0, 2, 1) + x = x + self.drop_path(self.gamma_xca * self.xca(self.norm_xca(x))) + x = x.reshape(B, H, W, C) + + # Inverted Bottleneck + x = self.norm(x) + x = self.pwconv1(x) + x = self.act(x) + x = self.pwconv2(x) + if self.gamma is not None: + x = self.gamma * x + x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) + + x = input + self.drop_path(x) + + return x + + + +class SDTA(nn.Module): + def __init__(self, args, deform): + super().__init__() + in_ch = args['in_ch'] + self.model = nn.ModuleList() + + for i in range(args['layer_num']): + self.model.append(ConvEncoder(dim=in_ch, deformable=deform)) + self.model.append(SDTAEncoder(dim=in_ch, deformable=deform)) + + def forward(self, x): + for m in self.model: + x = m(x) + return x + + +class Resnet3x3(nn.Module): + def __init__(self, args, deform=False): + super().__init__() + in_ch = args['in_ch'] + layernum = args['layer_num'] + model_list = nn.ModuleList() + for _ in range(layernum): + model_list.append(ResidualBlock(in_ch, in_ch, kernel_size=3, deform=deform)) + + self.model = nn.Sequential(*model_list) + + def forward(self, x): + return self.model(x) + + +class Resnet1x1(nn.Module): + def __init__(self, args, deform=False): + super().__init__() + in_ch = args['in_ch'] + layernum = args['layer_num'] + model_list = nn.ModuleList() + for _ in range(layernum): + model_list.append(ResidualBlock(in_ch, in_ch, kernel_size=1, deform=deform)) + + self.model = nn.Sequential(*model_list) + + def forward(self, x): + return self.model(x) + + +""" +Feature-Align Network with Knowledge Distillation for Efficient Denoising +""" +class ARNetBlock(nn.Module): + def __init__(self, indim, outdim): + super().__init__() + self.model = nn.Sequential( + nn.Conv2d(indim, indim, kernel_size=1), + nn.ReLU(), + nn.Conv2d(indim, indim, kernel_size=3, padding=1, groups=8), + nn.ReLU(), + nn.Conv2d(indim, outdim, kernel_size=1) + ) + + def forward(self, x): + return self.model(x) + +class FALayer(nn.Module): + def __init__(self, indim, outdim, imgdim): + super().__init__() + self.conv1 = nn.Conv2d(imgdim, imgdim, 1) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2d(imgdim, outdim, 1) + self.conv3 = nn.Conv2d(imgdim, outdim, 1) + self.arblock = ARNetBlock(indim, outdim) + + def forward(self, feature, img): + feature = self.arblock(feature) + inter = self.relu(self.conv1(img)) + gamma = self.conv2(inter) + beta = self.conv3(inter) + + return feature * gamma + beta + +class FANet(nn.Module): + def __init__(self, args): + super().__init__() + dim = args['dim'] + self.falayer1 = FALayer(dim, dim, dim) + self.falayer2 = FALayer(dim, dim*2, dim) + self.falayer3 = FALayer(dim*2, dim*4, dim) + self.falayer4 = FALayer(dim*4, dim*2, dim) + self.falayer5 = FALayer(dim*2, dim, dim) + self.maxpool = nn.MaxPool2d(2) + self.upsample2d = nn.Upsample(scale_factor=2, mode='bilinear') + + self.skip_conv1 = nn.Conv2d(dim*2, dim*2, 1) + self.skip_conv2 = nn.Conv2d(dim, dim, 1) + + + def forward(self, x): + x_detach = x.detach() + # fake image input + img0 = x_detach + img1 = self.maxpool(img0) + img2 = self.maxpool(img1) + + feature0 = self.falayer1(x, img0) # H,W, dim + feature1 = self.falayer2(self.maxpool(feature0), img1) # H/2, W/2, dim*2 + feature2 = self.falayer3(self.maxpool(feature1), img2) # H/4, W/4, dim*4 + + feature3 = self.falayer4(self.upsample2d(feature2), img1) + self.skip_conv1(feature1) + feature4 = self.falayer5(self.upsample2d(feature3), img0) + self.skip_conv2(feature0) + + return feature4 + + + +""" +CBAM: Convolutional Block Attention Module +""" +class CBAM(nn.Module): + def __init__(self, args): + super().__init__() + dim = args['dim'] + num_of_blocks = args['num_of_blocks'] + model_list = nn.ModuleList() + for _ in range(num_of_blocks): + model_list.append(BasicBlock(dim, dim)) + + self.model = nn.Sequential(*model_list) + + def forward(self, x): + return self.model(x) + + +""" +ConvNeXt +""" +class ConvNeXtBlock(nn.Module): + r""" + https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py + + ConvNeXt Block. There are two equivalent implementations: + (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) + (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back + We use (2) as we find it slightly faster in PyTorch + + Args: + dim (int): Number of input channels. + drop_path (float): Stochastic depth rate. Default: 0.0 + layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. + """ + def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6, kernel_size=7, deform=False): + super().__init__() + self.deform = deform + if self.deform: + self.dfconv = dconv2d(dim, dim, kernel_size=3, padding=1) + self.dwconv = nn.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size//2, groups=dim) # depthwise conv + self.norm = LayerNorm(dim, eps=1e-6) + self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers + self.act = nn.GELU() + self.pwconv2 = nn.Linear(4 * dim, dim) + self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), + requires_grad=True) if layer_scale_init_value > 0 else None + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + + def forward(self, x): + input = x + if self.deform: + x = self.dfconv(x) + x = self.act(x) + x = self.dwconv(x) + x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) + x = self.norm(x) + x = self.pwconv1(x) + x = self.act(x) + x = self.pwconv2(x) + if self.gamma is not None: + x = self.gamma * x + x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) + + x = input + self.drop_path(x) + return x + +class ConvNeXt(nn.Module): + def __init__(self, args): + super().__init__() + dim = args['dim'] + kernel_size = args.get("kernel_size", 7) + num_of_blocks = args['num_of_blocks'] + deform = args.get('deform', False) + model_list = nn.ModuleList() + for _ in range(num_of_blocks): + model_list.append(ConvNeXtBlock(dim, kernel_size=kernel_size, deform=deform)) + + self.model = nn.Sequential(*model_list) + + def forward(self, x): + return self.model(x) + + + + +""" +Resnet1x1 Aligner +""" +class ResidualBlock(nn.Module): + def __init__(self, in_channels, out_channels, use_1x1conv=False, kernel_size=3, deform=False): + super(ResidualBlock, self).__init__() + if kernel_size == 3: + padding = 1 + stride = 1 + elif kernel_size == 1: + padding = 0 + stride = 1 + else: + raise("Not Supported") + + if not deform: + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, stride=stride) + self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, padding=padding) + else: + self.conv1 = dconv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, stride=stride) + self.conv2 = dconv2d(out_channels, out_channels, kernel_size=kernel_size, padding=padding) + + # 1x1conv来升维 + if use_1x1conv: + self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride) + else: + self.conv3 = None + self.bn1 = nn.BatchNorm2d(out_channels) + self.bn2 = nn.BatchNorm2d(out_channels) + + def forward(self, X): + Y = F.relu(self.bn1(self.conv1(X))) + Y = self.bn2(self.conv2(Y)) + if self.conv3: + X = self.conv3(X) + return F.relu(Y + X) + + +class Res1x1Aligner(nn.Module): + def __init__(self, args): + super().__init__() + dim = args['dim'] + num_of_blocks = args['num_of_blocks'] + deform = args.get('deform', False) + model_list = nn.ModuleList() + for _ in range(num_of_blocks): + model_list.append(ResidualBlock(dim, dim, kernel_size=1, deform=deform)) + + self.model = nn.Sequential(*model_list) + + def forward(self, x): + return self.model(x) + +class Res3x3Aligner(nn.Module): + def __init__(self, args): + super().__init__() + dim = args['dim'] + num_of_blocks = args['num_of_blocks'] + deform = args.get('deform', False) + model_list = nn.ModuleList() + for _ in range(num_of_blocks): + model_list.append(ResidualBlock(dim, dim, kernel_size=3, deform=deform)) + + self.model = nn.Sequential(*model_list) + + def forward(self, x): + return self.model(x) + + +class SDTAAgliner(nn.Module): + def __init__(self, args): + super().__init__() + in_ch = args['dim'] + self.model = nn.ModuleList() + + for i in range(args['num_of_blocks']): + self.model.append(ConvEncoder(dim=in_ch, deformable=False)) + self.model.append(SDTAEncoder(dim=in_ch, deformable=False)) + + def forward(self, x): + for m in self.model: + x = m(x) + return x + +""" +Laynorm + MLP +""" +class ResMLP(nn.Module): + def __init__(self, num_of_layers=2, dim=64): + super().__init__() + model_list = [nn.LayerNorm(dim)] + for i in range(num_of_layers): + model_list.append(nn.Linear(dim, dim)) + model_list.append(nn.GELU()) + self.model = nn.Sequential(*model_list) + + def forward(self, x): + return x + self.model(x) + +class SCAligner(nn.Module): + """ + Structure: + + Input: + FeatureMap (NCHW) + Model: + Permute -> (NHWC) + ------------------------ x M + LayerNorm -> (NHWC) + MLP(GELU) x n + skip_conn-> (NHWC) + ------------------------ + Permute -> (NCHW) + + if Camera, additionally + + Input: + FeatureMap (NCHW) + Coming FeatureMap Mean (NCHW) + Model: + cat -> (N 2C HW) + conv2d -> (N2HW) + warp FeatureMap (NCHW) + + """ + def __init__(self, args): + super().__init__() + num_of_blocks = args['num_of_blocks'] + num_of_layers = args['num_of_layers'] + dim = args['dim'] + model_list = [] + for _ in range(num_of_blocks): + model_list.append(ResMLP(num_of_layers, dim)) + self.backbone = nn.Sequential(*model_list) + + + def forward(self, x): + x = x.permute(0,2,3,1) + x = self.backbone(x) + x = x.permute(0,3,1,2) + return x diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8a2197bda3199aa32cafc5b9d396479609853dd2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from .ms_deform_attn_func import MSDeformAttnFunction + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/ms_deform_attn_func.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/ms_deform_attn_func.py new file mode 100644 index 0000000000000000000000000000000000000000..8c5df8cf5d23aca963eec6c1133c180b37289607 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/ms_deform_attn_func.py @@ -0,0 +1,61 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import torch +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +import MultiScaleDeformableAttention as MSDA + + +class MSDeformAttnFunction(Function): + @staticmethod + def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step): + ctx.im2col_step = im2col_step + output = MSDA.ms_deform_attn_forward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step) + ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors + grad_value, grad_sampling_loc, grad_attn_weight = \ + MSDA.ms_deform_attn_backward( + value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step) + + return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None + + +def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights): + # for debug and test only, + # need to use cuda version instead + N_, S_, M_, D_ = value.shape + _, Lq_, M_, L_, P_, _ = sampling_locations.shape + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) + sampling_grids = 2 * sampling_locations - 1 + sampling_value_list = [] + for lid_, (H_, W_) in enumerate(value_spatial_shapes): + # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ + value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_) + # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 + sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1) + # N_*M_, D_, Lq_, P_ + sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_, + mode='bilinear', padding_mode='zeros', align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_) + attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_) + return output.transpose(1, 2).contiguous() diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/height_compression.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/height_compression.py new file mode 100644 index 0000000000000000000000000000000000000000..1d7f38ed23f62fab0709b4e1ac5b141aaefd1f18 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/height_compression.py @@ -0,0 +1,27 @@ +import torch.nn as nn + + +class HeightCompression(nn.Module): + def __init__(self, model_cfg, **kwargs): + super().__init__() + self.model_cfg = model_cfg + self.num_bev_features = self.model_cfg['feature_num'] + + def forward(self, batch_dict): + """ + Args: + batch_dict: + encoded_spconv_tensor: sparse tensor + Returns: + batch_dict: + spatial_features: + + """ + encoded_spconv_tensor = batch_dict['encoded_spconv_tensor'] + spatial_features = encoded_spconv_tensor.dense() + N, C, D, H, W = spatial_features.shape + spatial_features = spatial_features.view(N, C * D, H, W) + batch_dict['spatial_features'] = spatial_features + batch_dict['spatial_features_stride'] = \ + batch_dict['encoded_spconv_tensor_stride'] + return batch_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/hmsa.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/hmsa.py new file mode 100644 index 0000000000000000000000000000000000000000..814b298cc3de2636c2e31cf4949e64086a69abde --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/hmsa.py @@ -0,0 +1,151 @@ +import torch +from torch import nn + +from einops import rearrange + + +class HGTCavAttention(nn.Module): + def __init__(self, dim, heads, num_types=2, + num_relations=4, dim_head=64, dropout=0.1): + super().__init__() + inner_dim = heads * dim_head + + self.heads = heads + self.scale = dim_head ** -0.5 + self.num_types = num_types + + self.attend = nn.Softmax(dim=-1) + self.drop_out = nn.Dropout(dropout) + self.k_linears = nn.ModuleList() + self.q_linears = nn.ModuleList() + self.v_linears = nn.ModuleList() + self.a_linears = nn.ModuleList() + self.norms = nn.ModuleList() + for t in range(num_types): + self.k_linears.append(nn.Linear(dim, inner_dim)) + self.q_linears.append(nn.Linear(dim, inner_dim)) + self.v_linears.append(nn.Linear(dim, inner_dim)) + self.a_linears.append(nn.Linear(inner_dim, dim)) + + self.relation_att = nn.Parameter( + torch.Tensor(num_relations, heads, dim_head, dim_head)) + self.relation_msg = nn.Parameter( + torch.Tensor(num_relations, heads, dim_head, dim_head)) + + torch.nn.init.xavier_uniform(self.relation_att) + torch.nn.init.xavier_uniform(self.relation_msg) + + def to_qkv(self, x, types): + # x: (B,H,W,L,C) + # types: (B,L) + q_batch = [] + k_batch = [] + v_batch = [] + + for b in range(x.shape[0]): + q_list = [] + k_list = [] + v_list = [] + + for i in range(x.shape[-2]): + # (H,W,1,C) + q_list.append( + self.q_linears[types[b, i]](x[b, :, :, i, :].unsqueeze(2))) + k_list.append( + self.k_linears[types[b, i]](x[b, :, :, i, :].unsqueeze(2))) + v_list.append( + self.v_linears[types[b, i]](x[b, :, :, i, :].unsqueeze(2))) + # (1,H,W,L,C) + q_batch.append(torch.cat(q_list, dim=2).unsqueeze(0)) + k_batch.append(torch.cat(k_list, dim=2).unsqueeze(0)) + v_batch.append(torch.cat(v_list, dim=2).unsqueeze(0)) + # (B,H,W,L,C) + q = torch.cat(q_batch, dim=0) + k = torch.cat(k_batch, dim=0) + v = torch.cat(v_batch, dim=0) + return q, k, v + + def get_relation_type_index(self, type1, type2): + return type1 * self.num_types + type2 + + def get_hetero_edge_weights(self, x, types): + w_att_batch = [] + w_msg_batch = [] + + for b in range(x.shape[0]): + w_att_list = [] + w_msg_list = [] + + for i in range(x.shape[-2]): + w_att_i_list = [] + w_msg_i_list = [] + + for j in range(x.shape[-2]): + e_type = self.get_relation_type_index(types[b, i], + types[b, j]) + w_att_i_list.append(self.relation_att[e_type].unsqueeze(0)) + w_msg_i_list.append(self.relation_msg[e_type].unsqueeze(0)) + w_att_list.append(torch.cat(w_att_i_list, dim=0).unsqueeze(0)) + w_msg_list.append(torch.cat(w_msg_i_list, dim=0).unsqueeze(0)) + + w_att_batch.append(torch.cat(w_att_list, dim=0).unsqueeze(0)) + w_msg_batch.append(torch.cat(w_msg_list, dim=0).unsqueeze(0)) + + # (B,M,L,L,C_head,C_head) + w_att = torch.cat(w_att_batch, dim=0).permute(0, 3, 1, 2, 4, 5) + w_msg = torch.cat(w_msg_batch, dim=0).permute(0, 3, 1, 2, 4, 5) + return w_att, w_msg + + def to_out(self, x, types): + out_batch = [] + for b in range(x.shape[0]): + out_list = [] + for i in range(x.shape[-2]): + out_list.append( + self.a_linears[types[b, i]](x[b, :, :, i, :].unsqueeze(2))) + out_batch.append(torch.cat(out_list, dim=2).unsqueeze(0)) + out = torch.cat(out_batch, dim=0) + return out + + def forward(self, x, mask, prior_encoding): + # x: (B, L, H, W, C) -> (B, H, W, L, C) + # mask: (B, H, W, L, 1) + # prior_encoding: (B,L,H,W,3) + x = x.permute(0, 2, 3, 1, 4) + # mask: (B, 1, H, W, L, 1) + mask = mask.unsqueeze(1) + # (B,L) + velocities, dts, types = [itm.squeeze(-1) for itm in + prior_encoding[:, :, 0, 0, :].split( + [1, 1, 1], dim=-1)] + types = types.to(torch.int) + dts = dts.to(torch.int) + qkv = self.to_qkv(x, types) + # (B,M,L,L,C_head,C_head) + w_att, w_msg = self.get_hetero_edge_weights(x, types) + + # q: (B, M, H, W, L, C) + q, k, v = map(lambda t: rearrange(t, 'b h w l (m c) -> b m h w l c', + m=self.heads), (qkv)) + # attention, (B, M, H, W, L, L) + att_map = torch.einsum( + 'b m h w i p, b m i j p q, bm h w j q -> b m h w i j', + [q, w_att, k]) * self.scale + # add mask + att_map = att_map.masked_fill(mask == 0, -float('inf')) + # softmax + att_map = self.attend(att_map) + + # out:(B, M, H, W, L, C_head) + v_msg = torch.einsum('b m i j p c, b m h w j p -> b m h w i j c', + w_msg, v) + out = torch.einsum('b m h w i j, b m h w i j c -> b m h w i c', + att_map, v_msg) + + out = rearrange(out, 'b m h w l c -> b h w l (m c)', + m=self.heads) + out = self.to_out(out, types) + out = self.drop_out(out) + # (B L H W C) + out = out.permute(0, 3, 1, 2, 4) + return out \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/lss_submodule.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/lss_submodule.py new file mode 100644 index 0000000000000000000000000000000000000000..331eeb41c6a6e3aa5f877eace32e6da2f282fe74 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/lss_submodule.py @@ -0,0 +1,417 @@ +import torch +from torch import nn +from efficientnet_pytorch import EfficientNet +from torchvision.models.resnet import resnet18 +from torchvision.models.resnet import resnet101 +import torch.nn.functional as F +from opencood.utils.camera_utils import bin_depths +from opencood.models.sub_modules.torch_transformation_utils import \ + warp_affine_simple +from opencood.utils.transformation_utils import normalize_pairwise_tfm +from opencood.models.fuse_modules.fusion_in_one import \ + MaxFusion, AttFusion, V2VNetFusion, V2XViTFusion, When2commFusion, Where2commFusion, DiscoFusion + +class Up(nn.Module): + def __init__(self, in_channels, out_channels, scale_factor=2): + super().__init__() + + self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear', + align_corners=True) # 上采样 BxCxHxW->BxCx2Hx2W + + self.conv = nn.Sequential( # 两个3x3卷积 + nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True), # inplace=True使用原地操作,节省内存 + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True) + ) + + def forward(self, x1, x2): + x1 = self.up(x1) # 对x1进行上采样 + x1 = torch.cat([x2, x1], dim=1) # 将x1和x2 concat 在一起 + return self.conv(x1) + + +class CamEncode(nn.Module): # 提取图像特征进行图像编码 + def __init__(self, D, C, downsample, ddiscr, mode, use_gt_depth=False, depth_supervision=True): + super(CamEncode, self).__init__() + self.D = D # 42 + self.C = C # 64 + self.downsample = downsample + self.d_min = ddiscr[0] + self.d_max = ddiscr[1] + self.num_bins = ddiscr[2] + self.mode = mode + self.use_gt_depth = use_gt_depth + self.depth_supervision = depth_supervision # in the case of not use gt depth + + + self.trunk = EfficientNet.from_pretrained("efficientnet-b0") # 使用 efficientnet 提取特征 + + self.up1 = Up(320+112, 512) # 上采样模块,输入输出通道分别为320+112和512 + if downsample == 8: + self.up2 = Up(512+40, 512) + if not use_gt_depth: + self.depth_head = nn.Conv2d(512, self.D, kernel_size=1, padding=0) # 1x1卷积,变换维度 + + self.image_head = nn.Conv2d(512, self.C, kernel_size=1, padding=0) + + + def get_depth_dist(self, x, eps=1e-5): # 对深度维进行softmax,得到每个像素不同深度的概率 + return F.softmax(x, dim=1) + + def get_gt_depth_dist(self, x): # 对深度维进行onehot,得到每个像素不同深度的概率 + """ + Args: + x: [B*N, H, W] + Returns: + x: [B*N, D, fH, fW] + """ + target = self.training + torch.clamp_max_(x, self.d_max) # save memory + # [B*N, H, W], indices (float), value: [0, num_bins) + depth_indices, mask = bin_depths(x, self.mode, self.d_min, self.d_max, self.num_bins, target=target) + depth_indices = depth_indices[:, self.downsample//2::self.downsample, self.downsample//2::self.downsample] + onehot_dist = F.one_hot(depth_indices.long()).permute(0,3,1,2) # [B*N, num_bins, fH, fW] + + if not target: + mask = mask[:, self.downsample//2::self.downsample, self.downsample//2::self.downsample].unsqueeze(1) + onehot_dist *= mask + + return onehot_dist, depth_indices + + def get_eff_features(self, x): # 使用efficientnet提取特征 + # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231 + + endpoints = dict() + + # Stem + x = self.trunk._swish(self.trunk._bn0(self.trunk._conv_stem(x))) # x: 24 x 32 x 64 x 176 + prev_x = x + + # Blocks + for idx, block in enumerate(self.trunk._blocks): + drop_connect_rate = self.trunk._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len(self.trunk._blocks) # scale drop connect_rate + x = block(x, drop_connect_rate=drop_connect_rate) + if prev_x.size(2) > x.size(2): + endpoints['reduction_{}'.format(len(endpoints)+1)] = prev_x + prev_x = x + + # Head + endpoints['reduction_{}'.format(len(endpoints)+1)] = x # x: 24 x 320 x 4 x 11 + x = self.up1(endpoints['reduction_5'], endpoints['reduction_4']) # 先对endpoints[4]进行上采样,然后将 endpoints[5]和endpoints[4] concat 在一起 + if self.downsample == 8: + x = self.up2(x, endpoints['reduction_3']) + return x # x: 24 x 512 x 8 x 22 + + def forward(self, x): + """ + Returns: + log_depth : [B*N, D, fH, fW], or None if not used latter + depth_gt_indices : [B*N, fH, fW], or None if not used latter + new_x : [B*N, C, D, fH, fW] + """ + x_img = x[:,:3:,:,:] + features = self.get_eff_features(x_img) # depth: B*N x D x fH x fW(24 x 41 x 8 x 22) x: B*N x C x D x fH x fW(24 x 64 x 41 x 8 x 22) + x_img = self.image_head(features) + + if self.depth_supervision or self.use_gt_depth: # depth data must exist + x_depth = x[:,3,:,:] + depth_gt, depth_gt_indices = self.get_gt_depth_dist(x_depth) + + if self.use_gt_depth: + new_x = depth_gt.unsqueeze(1) * x_img.unsqueeze(2) # new_x: 24 x 64 x 41 x 8 x 18 + return None, new_x + else: + depth_logit = self.depth_head(features) + depth = self.get_depth_dist(depth_logit) + new_x = depth.unsqueeze(1) * x_img.unsqueeze(2) # new_x: 24 x 64 x 41 x 8 x 18 + if self.depth_supervision: + return (depth_logit, depth_gt_indices), new_x + else: + return None, new_x + +class CamEncode_Resnet101(nn.Module): # 提取图像特征进行图像编码 + def __init__(self, D, C, downsample, ddiscr, mode, use_gt_depth=False, depth_supervision=True): + super(CamEncode_Resnet101, self).__init__() + self.D = D # 42 + self.C = C # 64 + self.downsample = downsample + self.d_min = ddiscr[0] + self.d_max = ddiscr[1] + self.num_bins = ddiscr[2] + self.mode = mode + self.use_gt_depth = use_gt_depth + self.depth_supervision = depth_supervision # in the case of not use gt depth + + trunk = resnet101(pretrained=False, zero_init_residual=True) # 使用 resnet101 提取特征 + self.conv1 = trunk.conv1 + self.bn1 = trunk.bn1 + self.relu = trunk.relu + self.maxpool = trunk.maxpool + self.layer1 = trunk.layer1 + self.layer2 = trunk.layer2 + self.layer3 = nn.Identity() + + self.up1 = Up(320+112, 512) # 上采样模块,输入输出通道分别为320+112和512 + if downsample == 8: + self.up2 = Up(512+40, 512) + if not use_gt_depth: + self.depth_head = nn.Conv2d(512, self.D, kernel_size=1, padding=0) # 1x1卷积,变换维度 + + self.image_head = nn.Conv2d(512, self.C, kernel_size=1, padding=0) + + + def get_depth_dist(self, x, eps=1e-5): # 对深度维进行softmax,得到每个像素不同深度的概率 + return F.softmax(x, dim=1) + + def get_gt_depth_dist(self, x): # 对深度维进行onehot,得到每个像素不同深度的概率 + """ + Args: + x: [B*N, H, W] + Returns: + x: [B*N, D, fH, fW] + """ + target = self.training + torch.clamp_max_(x, self.d_max) # save memory + # [B*N, H, W], indices (float), value: [0, num_bins) + depth_indices, mask = bin_depths(x, self.mode, self.d_min, self.d_max, self.num_bins, target=target) + depth_indices = depth_indices[:, self.downsample//2::self.downsample, self.downsample//2::self.downsample] + onehot_dist = F.one_hot(depth_indices.long()).permute(0,3,1,2) # [B*N, num_bins, fH, fW] + + if not target: + mask = mask[:, self.downsample//2::self.downsample, self.downsample//2::self.downsample].unsqueeze(1) + onehot_dist *= mask + + return onehot_dist, depth_indices + + def resnet101_forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x_8 = x # channel = 512 + x = self.layer3(x) # channel = 1024 + return x_8, x + + def get_eff_features(self, x): # 使用efficientnet提取特征 + # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231 + #x: 16 x 3 x 480 x 640 + x_8, x_16 = self.resnet101_forward(x) # 16x512x60x80 , 16x1024x30x40 + if self.downsample == 8: + return x_8 + else: + res = self.final_conv(x_16) + return res + + + def forward(self, x): + """ + Returns: + log_depth : [B*N, D, fH, fW], or None if not used latter + depth_gt_indices : [B*N, fH, fW], or None if not used latter + new_x : [B*N, C, D, fH, fW] + """ + #x: 16 x 3 x 480 x 640 + #print(x.shape) + x_img = x[:,:3:,:,:] + features = self.get_eff_features(x_img) # depth: B*N x D x fH x fW(24 x 41 x 8 x 22) x: B*N x C x D x fH x fW(24 x 64 x 41 x 8 x 22) + x_img = self.image_head(features) + + if self.depth_supervision or self.use_gt_depth: # depth data must exist + x_depth = x[:,3,:,:] + depth_gt, depth_gt_indices = self.get_gt_depth_dist(x_depth) + + if self.use_gt_depth: + new_x = depth_gt.unsqueeze(1) * x_img.unsqueeze(2) # new_x: 24 x 64 x 41 x 8 x 18 + return None, new_x + else: + depth_logit = self.depth_head(features) + depth = self.get_depth_dist(depth_logit) + new_x = depth.unsqueeze(1) * x_img.unsqueeze(2) # new_x: 24 x 64 x 41 x 8 x 18 + if self.depth_supervision: + return (depth_logit, depth_gt_indices), new_x + else: + return None, new_x + + +class BevEncode(nn.Module): + def __init__(self, inC, outC): # inC: 64 outC: not 1 for object detection + super(BevEncode, self).__init__() + + # 使用resnet的前3个stage作为backbone + trunk = resnet18(pretrained=False, zero_init_residual=True) + self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = trunk.bn1 + self.relu = trunk.relu + + self.layer1 = trunk.layer1 + self.layer2 = trunk.layer2 + self.layer3 = trunk.layer3 + + self.up1 = Up(64+256, 256, scale_factor=4) + self.up2 = nn.Sequential( # 2倍上采样->3x3卷积->1x1卷积 + nn.Upsample(scale_factor=2, mode='bilinear', + align_corners=True), + nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(128), + nn.ReLU(inplace=True), + nn.Conv2d(128, outC, kernel_size=1, padding=0), + ) + + def forward(self, x): # x: 4 x 64 x 240 x 240 + x = self.conv1(x) # x: 4 x 64 x 120 x 120 + x = self.bn1(x) + x = self.relu(x) + + x1 = self.layer1(x) # x1: 4 x 64 x 120 x 120 + x = self.layer2(x1) # x: 4 x 128 x 60 x 60 + x = self.layer3(x) # x: 4 x 256 x 30 x 30 + + x = self.up1(x, x1) # 给x进行4倍上采样然后和x1 concat 在一起 x: 4 x 256 x 120 x 120 + x = self.up2(x) # 2倍上采样->3x3卷积->1x1卷积 x: 4 x 1 x 240 x 240 + + return x + +class BevEncodeSSFusion(nn.Module): + """ + Multiscale version of ResNet Encoder + """ + def __init__(self, fusion_args): # inC: 64 outC: not 1 for object detection + super(BevEncodeSSFusion, self).__init__() + args = fusion_args['args'] + inC = args['in_channels'] + self.discrete_ratio = args['voxel_size'][0] + self.downsample_rate = 1 + # 使用resnet的前3个stage作为backbone + trunk = resnet18(pretrained=False, zero_init_residual=True) + self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = trunk.bn1 + self.relu = trunk.relu # make it 64 channels + + self.layer1 = trunk.layer1 + self.layer2 = trunk.layer2 + self.layer3 = trunk.layer3 + + self.up_layer1 = Up(64+256, 256, scale_factor=2) + self.up_layer2 = Up(128+256, 256, scale_factor=2) + self.down_layer = nn.Sequential( + nn.Conv2d(256, 256, kernel_size=3, + stride=1, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 128, kernel_size=3, + stride=1,padding=1), + nn.ReLU(inplace=True) + ) + if fusion_args['core_method'] == "max": + self.fuse_module = MaxFusion() + elif fusion_args['core_method'] == "att": + self.fuse_module = AttFusion(256) + elif fusion_args['core_method'] == "disconet": + self.fuse_module = DiscoFusion(256) + elif fusion_args['core_method'] == "v2vnet": + self.fuse_module = V2VNetFusion(args['v2vnet']) + elif fusion_args['core_method'] == "v2xvit": + self.fuse_module = V2XViTFusion(args['v2xvit']) + elif fusion_args['core_method'] == "when2comm": + self.fuse_module = When2commFusion(args['when2comm']) + elif fusion_args['core_method'] == "where2comm": + self.fuse_module = Where2commFusion(args['where2comm']) + else: + raise "not implemented" + + def forward(self, x, record_len, pairwise_t_matrix): # x: 4 x 64 x 240 x 240 + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # (B,L,L,2,3) + pairwise_t_matrix = normalize_pairwise_tfm(pairwise_t_matrix, H, W, self.discrete_ratio, self.downsample_rate) + + x = self.conv1(x) # x: 4 x 64 x 120 x 120 + x = self.bn1(x) + x = self.relu(x) + + x1 = self.layer1(x) # x1: 4 x 64 x 120 x 120 + x2 = self.layer2(x1) # x2: 4 x 128 x 60 x 60 + x3 = self.layer3(x2) # x3: 4 x 256 x 30 x 30 + x_single = self.down_layer(self.up_layer1(self.up_layer2(x3, x2), x1)) # 4 x 128 x 120 x 120 + + x = self.up_layer1(self.up_layer2(x3, x2), x1) # 4 x 256 x 120 x 120 + x_fuse = self.fuse_module(x, record_len, pairwise_t_matrix) + x_fuse = self.down_layer(x_fuse) + + + return x_single, x_fuse + + + +class BevEncodeMSFusion(nn.Module): + """ + Multiscale version of ResNet Encoder + """ + def __init__(self, fusion_args): # inC: 64 outC: not 1 for object detection + super(BevEncodeMSFusion, self).__init__() + args = fusion_args['args'] + inC = args['in_channels'] + self.discrete_ratio = args['voxel_size'][0] + self.downsample_rate = 1 + # 使用resnet的前3个stage作为backbone + trunk = resnet18(pretrained=False, zero_init_residual=True) + self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = trunk.bn1 + self.relu = trunk.relu # make it 64 channels + + self.layer1 = trunk.layer1 + self.layer2 = trunk.layer2 + self.layer3 = trunk.layer3 + + self.up_layer1 = Up(64+256, 256, scale_factor=2) + self.up_layer2 = Up(128+256, 256, scale_factor=2) + self.down_layer = nn.Sequential( + nn.Conv2d(256, 256, kernel_size=3, + stride=1, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 128, kernel_size=3, + stride=1,padding=1), + nn.ReLU(inplace=True) + ) + if fusion_args['core_method'] == "max_ms": + self.fuse_module = [MaxFusion(), MaxFusion(), MaxFusion()] + elif fusion_args['core_method'] == "att_ms": + self.fuse_module = [AttFusion(64), AttFusion(128), AttFusion(256)] + else: + raise "not implemented" + + def forward(self, x, record_len, pairwise_t_matrix): # x: 4 x 64 x 240 x 240 + _, C, H, W = x.shape + B, L = pairwise_t_matrix.shape[:2] + + # (B,L,L,2,3) + pairwise_t_matrix = normalize_pairwise_tfm(pairwise_t_matrix, H, W, self.discrete_ratio, self.downsample_rate) + + x = self.conv1(x) # x: 4 x 64 x 120 x 120 + x = self.bn1(x) + x = self.relu(x) + + x1 = self.layer1(x) # x1: 4 x 64 x 120 x 120 + x2 = self.layer2(x1) # x2: 4 x 128 x 60 x 60 + x3 = self.layer3(x2) # x3: 4 x 256 x 30 x 30 + x_single = self.down_layer(self.up_layer1(self.up_layer2(x3, x2), x1)) # 4 x 64 x 120 x 120 + + x1_fuse = self.fuse_module[0](x1, record_len, pairwise_t_matrix) + x2_fuse = self.fuse_module[1](x2, record_len, pairwise_t_matrix) + x3_fuse = self.fuse_module[2](x3, record_len, pairwise_t_matrix) + + x_fuse = self.down_layer(self.up_layer1(self.up_layer2(x3_fuse, x2_fuse), x1_fuse)) # 4 x 64 x 120 x 120 + + return x_single, x_fuse + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mash_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mash_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..995038082841796698d7030632d4e69bd6695f09 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mash_utils.py @@ -0,0 +1,131 @@ +import torch +import torch.nn as nn + + +class conv2DBatchNormRelu(nn.Module): + def __init__( + self, + in_channels, + n_filters, + k_size, + stride, + padding, + bias=True, + dilation=1, + is_batchnorm=True, + shouldUseReLU=True, + ): + super(conv2DBatchNormRelu, self).__init__() + + conv_mod = nn.Conv2d( + int(in_channels), + int(n_filters), + kernel_size=k_size, + padding=padding, + stride=stride, + bias=bias, + dilation=dilation, + ) + + if shouldUseReLU: + if is_batchnorm: + self.cbr_unit = nn.Sequential(conv_mod, nn.BatchNorm2d(int(n_filters)), nn.ReLU(inplace=False)) + else: + self.cbr_unit = nn.Sequential(conv_mod, nn.ReLU(inplace=False)) + else: + if is_batchnorm: + self.cbr_unit = nn.Sequential(conv_mod, nn.BatchNorm2d(int(n_filters))) + else: + self.cbr_unit = nn.Sequential(conv_mod) + + + def forward(self, inputs): + outputs = self.cbr_unit(inputs) + return outputs + +class segnetDown3(nn.Module): + def __init__(self, in_size, out_size, indices=False): + super(segnetDown3, self).__init__() + self.conv1 = conv2DBatchNormRelu(in_size, out_size, 3, 1, 1) + self.conv2 = conv2DBatchNormRelu(out_size, out_size, 3, 1, 1) + self.conv3 = conv2DBatchNormRelu(out_size, out_size, 3, 1, 1) + + self.indices = indices + if indices: + self.maxpool_with_argmax = nn.MaxPool2d(2, 2, return_indices=True) + else: + self.maxpool_without_argmax = nn.MaxPool2d(2, 2, return_indices=False) + + def forward(self, inputs): + outputs = self.conv1(inputs) + outputs = self.conv2(outputs) + outputs = self.conv3(outputs) + unpooled_shape = outputs.size() + + if self.indices: + outputs, indices = self.maxpool_with_argmax(outputs) + return outputs, indices, unpooled_shape + else: + outputs = self.maxpool_without_argmax(outputs) + return outputs + +class segnetUp3(nn.Module): + def __init__(self, in_size, out_size, shouldUseReLU=True): + super(segnetUp3, self).__init__() + self.unpool = nn.MaxUnpool2d(2, 2) + self.up = torch.nn.Upsample(scale_factor=2,mode='bilinear',align_corners=True) + self.conv1 = conv2DBatchNormRelu(in_size, in_size, 3, 1, 1) + self.conv2 = conv2DBatchNormRelu(in_size, in_size, 3, 1, 1) + self.conv3 = conv2DBatchNormRelu(in_size, out_size, 3, 1, 1, shouldUseReLU=shouldUseReLU) + + def forward(self, inputs, indices=None, output_shape=None): + if indices is not None: + outputs = self.unpool(input=inputs, indices=indices, output_size=output_shape) + else: + outputs = self.up(inputs) + + outputs = self.conv1(outputs) + outputs = self.conv2(outputs) + outputs = self.conv3(outputs) + return outputs + +class QueryEncoder(nn.Module): + def __init__(self, in_ch, out_ch): + super(QueryEncoder, self).__init__() + + self.model = nn.Sequential( + conv2DBatchNormRelu(in_ch, 512, 1, 1, 0), + conv2DBatchNormRelu(512, 512, 1, 1, 0), + conv2DBatchNormRelu(512, out_ch, 1, 1, 0), + ) + def forward(self, x): + return self.model(x) + +class KeyEncoder(nn.Module): + def __init__(self, in_ch, out_ch): + super(KeyEncoder,self).__init__() + self.model = nn.Sequential( + conv2DBatchNormRelu(in_ch, 512, 1, 1, 0), + # conv2DBatchNormRelu(512, 512, 1, 1, 0), + conv2DBatchNormRelu(512, out_ch, 1, 1, 0), + ) + def forward(self, x): + return self.model(x) + +class SmoothingNetwork(nn.Module): + def __init__(self, in_ch=32*32+1): + super(SmoothingNetwork, self).__init__() + out_ch = in_ch + self.d32to16 = segnetDown3(in_ch,256,indices=True) + self.d16to08 = segnetDown3(256,128,indices=True) + self.d08to16 = segnetUp3(128,256) + self.d16to32 = segnetUp3(256,out_ch) + + def forward(self, distAB): + Da16d,Da_i16,Da_s16 = self.d32to16(torch.nn.Softmax(1)(distAB)) + Da08d,Da_i08,Da_s08 = self.d16to08(Da16d) + Da08 = Da08d + Da16 = self.d08to16(Da08,Da_i08,Da_s08) + Da32 = self.d16to32(Da16,Da_i16,Da_s16) + return Da32 + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..132d529c38d39c05839976dfd68129825a30655f --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher.py @@ -0,0 +1,181 @@ +import torch +from torch import nn + +from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu +from opencood.utils.box_utils import corner_to_center_torch, boxes_to_corners_3d, project_box3d, project_points_by_matrix_torch, get_mask_for_boxes_within_range_torch +from opencood.utils.transformation_utils import x1_to_x2 +from icecream import ic +import copy +pi = 3.141592653 + + +def limit_period(val, offset=0.5, period=2 * pi): + return val - torch.floor(val / period + offset) * period + + +class Matcher(nn.Module): + """Correct localization error and use Algorithm 1: + BBox matching with scores to fuse the proposal BBoxes""" + + def __init__(self, cfg, pc_range): + super(Matcher, self).__init__() + self.pc_range = pc_range + + @torch.no_grad() + def forward(self, data_dict): + clusters, scores = self.clustering(data_dict) + data_dict['boxes_fused'], data_dict[ + 'scores_fused'] = self.cluster_fusion(clusters, scores) + self.merge_keypoints(data_dict) + return data_dict + + + def clustering(self, data_dict): + """ + Assign predicted boxes to clusters according to their ious with each other + """ + clusters_batch = [] + scores_batch = [] + record_len = [int(l) for l in data_dict['record_len']] + lidar_poses = data_dict['lidar_pose'].cpu().numpy() + for i, l in enumerate(record_len): + cur_boxes_list = data_dict['det_boxes'][sum(record_len[:i]):sum(record_len[:i])+l] + + # Added by Yifan Lu + if data_dict['proj_first'] is False: + cur_boxes_list_ego = [] + # project bounding box to ego coordinate. [x,y,z,l,w,h,yaw] + cur_boxes_list_ego.append(cur_boxes_list[0]) + for agent_id in range(1, l): + tfm = x1_to_x2(lidar_poses[sum(record_len[:i])+agent_id], + lidar_poses[sum(record_len[:i])]) + tfm = torch.from_numpy(tfm).to(cur_boxes_list[0].device).float() + cur_boxes = cur_boxes_list[agent_id] + cur_corners = boxes_to_corners_3d(cur_boxes, order='hwl') + cur_corners_ego = project_box3d(cur_corners, tfm) + cur_boxes_ego = corner_to_center_torch(cur_corners_ego, order='hwl') + cur_boxes_list_ego.append(cur_boxes_ego) + cur_boxes_list = cur_boxes_list_ego + + + cur_scores_list = data_dict['det_scores'][sum(record_len[:i]):sum(record_len[:i])+l] + cur_boxes_list = [b for b in cur_boxes_list if len(b) > 0] + cur_scores_list = [s for s in cur_scores_list if len(s) > 0] + if len(cur_scores_list) == 0: + clusters_batch.append([torch.Tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.57]). + to(torch.device('cuda')).view(1, 7)]) + scores_batch.append([torch.Tensor([0.01]).to(torch.device('cuda')).view(-1)]) + continue + + pred_boxes_cat = torch.cat(cur_boxes_list, dim=0) + pred_boxes_cat[:, -1] = limit_period(pred_boxes_cat[:, -1]) + pred_scores_cat = torch.cat(cur_scores_list, dim=0) + + ious = boxes_iou3d_gpu(pred_boxes_cat, pred_boxes_cat) + cluster_indices = torch.zeros(len(ious)).int() # gt assignments of preds + cur_cluster_id = 1 + while torch.any(cluster_indices == 0): + cur_idx = torch.where(cluster_indices == 0)[0][0] # find the idx of the first pred which is not assigned yet + cluster_indices[torch.where(ious[cur_idx] > 0.1)[0]] = cur_cluster_id + cur_cluster_id += 1 + clusters = [] + scores = [] + for j in range(1, cur_cluster_id): + clusters.append(pred_boxes_cat[cluster_indices==j]) + scores.append(pred_scores_cat[cluster_indices==j]) + clusters_batch.append(clusters) + scores_batch.append(scores) + + return clusters_batch, scores_batch + + def cluster_fusion(self, clusters, scores): + """ + Merge boxes in each cluster with scores as weights for merging + """ + boxes_fused = [] + scores_fused = [] + for cl, sl in zip(clusters, scores): # each frame + for c, s in zip(cl, sl): # frame's cluster + # reverse direction for non-dominant direction of boxes + dirs = c[:, -1] + max_score_idx = torch.argmax(s) + dirs_diff = torch.abs(dirs - dirs[max_score_idx].item()) + lt_pi = (dirs_diff > pi).int() + dirs_diff = dirs_diff * (1 - lt_pi) + ( + 2 * pi - dirs_diff) * lt_pi + score_lt_half_pi = s[dirs_diff > pi / 2].sum() # larger than + score_set_half_pi = s[ + dirs_diff <= pi / 2].sum() # small equal than + # select larger scored direction as final direction + if score_lt_half_pi <= score_set_half_pi: + dirs[dirs_diff > pi / 2] += pi + else: + dirs[dirs_diff <= pi / 2] += pi + dirs = limit_period(dirs) + s_normalized = s / s.sum() + sint = torch.sin(dirs) * s_normalized + cost = torch.cos(dirs) * s_normalized + theta = torch.atan2(sint.sum(), cost.sum()).view(1, ) + center_dim = c[:, :-1] * s_normalized[:, None] + + boxes_fused.append(torch.cat([center_dim.sum(dim=0), theta])) + s_sorted = torch.sort(s, descending=True).values + s_fused = 0 + for i, ss in enumerate(s_sorted): + s_fused += ss ** (i + 1) + s_fused = torch.tensor([min(s_fused, 1.0)], device=s.device) + scores_fused.append(s_fused) + + assert len(boxes_fused) > 0 + boxes_fused = torch.stack(boxes_fused, dim=0) + len_records = [len(c) for c in clusters] # each frame + boxes_fused = [ + boxes_fused[sum(len_records[:i]):sum(len_records[:i]) + l] for i, l + in enumerate(len_records)] + scores_fused = torch.stack(scores_fused, dim=0) + scores_fused = [ + scores_fused[sum(len_records[:i]):sum(len_records[:i]) + l] for + i, l in enumerate(len_records)] + + for i in range(len(boxes_fused)): + corners3d = boxes_to_corners_3d(boxes_fused[i], order='hwl') + mask = get_mask_for_boxes_within_range_torch(corners3d, self.pc_range) + boxes_fused[i] = boxes_fused[i][mask] + scores_fused[i] = scores_fused[i][mask] + + return boxes_fused, scores_fused + + def merge_keypoints(self, data_dict): + # merge keypoints + kpts_feat_out = [] + kpts_coor_out = [] + kpts_coor_out_ego = [] + keypoints_features = data_dict['point_features'] # sum(record_len) + keypoints_coords = data_dict['point_coords'] # [[N,3],...] + idx = 0 + record_len = data_dict['record_len'] + lidar_poses = data_dict['lidar_pose'].cpu().numpy() + for l in data_dict['record_len']: + # Added by Yifan Lu + # if not project first, first transform the keypoints coords + if data_dict['proj_first'] is False: + kpts_coor_cur = [] + for agent_id in range(0, l): + tfm = x1_to_x2(lidar_poses[idx+agent_id], lidar_poses[idx]) + tfm = torch.from_numpy(tfm).to(keypoints_coords[0].device).float() + keypoints_coords[idx+agent_id][:, :3] = project_points_by_matrix_torch(keypoints_coords[idx+agent_id][:,:3], tfm) + + kpts_coor_out_ego.append( + torch.cat(keypoints_coords[idx:l + idx], dim=0) + ) + + kpts_coor_out.append( + torch.cat(keypoints_coords[idx:l + idx], dim=0)) + kpts_feat_out.append( + torch.cat(keypoints_features[idx:l + idx], dim=0)) + idx += l + data_dict['point_features'] = kpts_feat_out + data_dict['point_coords'] = kpts_coor_out + + if data_dict['proj_first'] is False: + data_dict['point_coords'] = kpts_coor_out_ego diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v2.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..c24c2cc9fa86f8b07eca9bc3b058791c7071b454 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v2.py @@ -0,0 +1,245 @@ +""" + A new version of proposal matcher. + It will collect voxel features, instead of keypoint features. + TODO: Add agent-object pose graph optimization +""" + +import torch +from torch import nn +import numpy as np +import spconv +from collections import OrderedDict +import opencood.utils.spconv_utils as spconv_utils +from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu +from opencood.utils.box_utils import corner_to_center_torch, boxes_to_corners_3d, project_box3d, get_mask_for_boxes_within_range_torch +from opencood.utils.transformation_utils import x1_to_x2 +from opencood.utils.common_utils import limit_period +from icecream import ic + + +class MatcherV2(nn.Module): + + def __init__(self, cfg, pc_range): + super(MatcherV2, self).__init__() + self.order = cfg['order'] + self.voxel_size = cfg['voxel_size'] + self.feature_source = cfg['feature_source'] # ['x_conv2', 'x_conv3', 'x_conv4'] + self.pc_range = pc_range + self.sp_wraper = spconv_utils.warpSparseTensor() + self.sp_merger = spconv_utils.MergeDuplicate("max") + + @torch.no_grad() + def forward(self, data_dict): + clusters, scores = self.clustering(data_dict) + data_dict['boxes_fused'], data_dict[ + 'scores_fused'] = self.cluster_fusion(clusters, scores) + self.collect_voxel_feature(data_dict) + return data_dict + + def clustering(self, data_dict): + """ + Assign predicted boxes to clusters according to their ious with each other + """ + clusters_batch = [] + scores_batch = [] + record_len = [int(cavnum) for cavnum in data_dict['record_len']] + lidar_poses = data_dict['lidar_pose'].cpu().numpy() + + # iterate each frame + for i, cavnum in enumerate(record_len): + cur_boxes_list = data_dict['det_boxes'][sum(record_len[:i]):sum(record_len[:i]) + cavnum] + cur_boxes_list_ego = [] + # preserve ego boxes + cur_boxes_list_ego.append(cur_boxes_list[0]) + # transform box to ego coordinate. [x,y,z,h,w,l,yaw] + for agent_id in range(1, cavnum): + tfm = x1_to_x2(lidar_poses[sum(record_len[:i])+agent_id], + lidar_poses[sum(record_len[:i])]) + tfm = torch.from_numpy(tfm).to(cur_boxes_list[0].device).float() + cur_boxes = cur_boxes_list[agent_id] + cur_corners = boxes_to_corners_3d(cur_boxes, order=self.order) + cur_corners_ego = project_box3d(cur_corners, tfm) + cur_boxes_ego = corner_to_center_torch(cur_corners_ego, order=self.order) + cur_boxes_list_ego.append(cur_boxes_ego) + + cur_boxes_list = cur_boxes_list_ego + + cur_scores_list = data_dict['det_scores'][sum(record_len[:i]):sum(record_len[:i]) + cavnum] + cur_boxes_list = [b for b in cur_boxes_list if len(b) > 0] + cur_scores_list = [s for s in cur_scores_list if len(s) > 0] + + if len(cur_scores_list) == 0: + clusters_batch.append([torch.Tensor([0.0, 0.0, 0.0, 1.6, 2.0, 4.0, 0]). # + to(torch.device('cuda:0')).view(1, 7)]) + scores_batch.append([torch.Tensor([0.01]).to(torch.device('cuda:0')).view(-1)]) + continue + + pred_boxes_cat = torch.cat(cur_boxes_list, dim=0) + pred_boxes_cat[:, -1] = limit_period(pred_boxes_cat[:, -1]) + pred_scores_cat = torch.cat(cur_scores_list, dim=0) + + ious = boxes_iou3d_gpu(pred_boxes_cat, pred_boxes_cat) + cluster_indices = torch.zeros(len(ious)).int() + cur_cluster_id = 1 + + # cluster proposals + while torch.any(cluster_indices == 0): + cur_idx = torch.where(cluster_indices == 0)[0][0] # find the idx of the first pred which is not assigned yet + cluster_indices[torch.where(ious[cur_idx] > 0.1)[0]] = cur_cluster_id + cur_cluster_id += 1 + + clusters = [] + scores = [] + + for j in range(1, cur_cluster_id): + clusters.append(pred_boxes_cat[cluster_indices==j]) + scores.append(pred_scores_cat[cluster_indices==j]) + + clusters_batch.append(clusters) + scores_batch.append(scores) + + return clusters_batch, scores_batch + + def cluster_fusion(self, clusters, scores): + """ + Merge boxes in each cluster with scores as weights for merging. + TODO: change to select the proposal with highest score? And then adjust the proposal + """ + boxes_fused = [] + scores_fused = [] + for cl, sl in zip(clusters, scores): # cl, sl are clusters and scores within one sample + for c, s in zip(cl, sl): # one sample (cl) has many clusters (c), c,s,a correspond to one cluster + # reverse direction for non-dominant direction of boxes + dirs = c[:, -1] + max_score_idx = torch.argmax(s) + dirs_diff = torch.abs(dirs - dirs[max_score_idx].item()) + lt_pi = (dirs_diff > np.pi).int() + dirs_diff = dirs_diff * (1 - lt_pi) + ( + 2 * np.pi - dirs_diff) * lt_pi + score_lt_half_pi = s[dirs_diff > np.pi / 2].sum() # larger than + score_set_half_pi = s[ + dirs_diff <= np.pi / 2].sum() # small equal than + # select larger scored direction as final direction + if score_lt_half_pi <= score_set_half_pi: + dirs[dirs_diff > np.pi / 2] += np.pi + else: + dirs[dirs_diff <= np.pi / 2] += np.pi + + dirs = limit_period(dirs) + s_normalized = s / s.sum() + sint = torch.sin(dirs) * s_normalized + cost = torch.cos(dirs) * s_normalized + theta = torch.atan2(sint.sum(), cost.sum()).view(1, ) + center_dim = c[:, :-1] * s_normalized[:, None] + + boxes_fused.append(torch.cat([center_dim.sum(dim=0), theta])) + s_sorted = torch.sort(s, descending=True).values + s_fused = 0 + for i, ss in enumerate(s_sorted): + s_fused += ss ** (i + 1) + s_fused = torch.tensor([min(s_fused, 1.0)], device=s.device) + scores_fused.append(s_fused) + + assert len(boxes_fused) > 0 + boxes_fused = torch.stack(boxes_fused, dim=0) + box_num_sample = [len(c) for c in clusters] # in a batch, each sample has how many boxes + boxes_fused = [ + boxes_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for i, l + in enumerate(box_num_sample)] + scores_fused = torch.stack(scores_fused, dim=0) + scores_fused = [ + scores_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for + i, l in enumerate(box_num_sample)] + + for i in range(len(boxes_fused)): + corners3d = boxes_to_corners_3d(boxes_fused[i], order=self.order) + mask = get_mask_for_boxes_within_range_torch(corners3d, self.pc_range) + boxes_fused[i] = boxes_fused[i][mask] + scores_fused[i] = scores_fused[i][mask] + + return boxes_fused, scores_fused + + def retrieve_cav_sp_feature(self, sp_feature, agent_pos): + features = sp_feature.features + indices = sp_feature.indices + mask = indices[:, 0] == agent_pos + + new_indices = indices.clone() + new_indices[:, 0] = 0 + + return spconv.SparseConvTensor(features[mask], new_indices[mask], sp_feature.spatial_shape, batch_size=1) + + def collect_voxel_feature(self, data_dict): + """ + 1. collect features by feauture_source + 2. convert sparse features to dense features + 3. warp dense feature map and merge them + 4. convert dense feature map to sparse + """ + + multi_scale_3d_features = data_dict['multi_scale_3d_features'] # sum(record_len), but SparseConvTensor + multi_scale_3d_stride = data_dict['multi_scale_3d_strides'] + data_dict['multi_scale_3d_features_fused'] = OrderedDict() + lidar_poses = data_dict['lidar_pose'].cpu().numpy() + device = data_dict['lidar_pose'].device + + for srcname in self.feature_source: + start_agent_pos = 0 + sp_feature = multi_scale_3d_features[srcname] + stride = multi_scale_3d_stride[srcname] + voxel_size = torch.tensor(self.voxel_size).to(device) + voxel_size *= stride + sp_tensor_fused_list = [] # each sample + # ic(srcname) + # ic(sp_feature.indices) + + for idx, cavnum in enumerate(data_dict['record_len']): + # each sample + sp_tensor_cav_list = [self.retrieve_cav_sp_feature(sp_feature, start_agent_pos)] # each cav + + for agent_id in range(1, cavnum): + sp_tensor_cav = self.retrieve_cav_sp_feature(sp_feature, start_agent_pos+agent_id) + + if data_dict['proj_first'] is False: + tfm = x1_to_x2(lidar_poses[start_agent_pos+agent_id], lidar_poses[start_agent_pos]) + tfm = torch.from_numpy(tfm).to(device) # cav_to_ego + sp_tensor_warp = self.sp_wraper(sp_tensor_cav, tfm, voxel_size, self.pc_range) + sp_tensor_cav = sp_tensor_warp + + sp_tensor_cav_list.append(sp_tensor_cav) + + sp_tensor_fused = spconv_utils.fuseSparseTensor(sp_tensor_cav_list) # only fuse + sp_tensor_fused = self.sp_merger(sp_tensor_fused) + sp_tensor_fused.indices[:, 0] = idx # batch_idx set to sample idx + + # sp_tensor_fused = self.retrieve_cav_sp_feature(sp_feature, start_agent_pos) + # sp_tensor_fused.indices[:, 0] = idx + + sp_tensor_fused_list.append(sp_tensor_fused) + + start_agent_pos += cavnum + + + + + new_features = torch.cat([x.features for x in sp_tensor_fused_list], dim=0) + new_indice = torch.cat([x.indices for x in sp_tensor_fused_list], dim=0) + features_fused = spconv.SparseConvTensor(new_features, new_indice, sp_tensor_fused_list[0].spatial_shape, + len(data_dict['record_len']), sp_tensor_fused_list[0].grid) + + data_dict['multi_scale_3d_features_fused'][srcname] = features_fused + + # ic("test dense feature") + + # # ic(features_fused.features) + # ic(features_fused.indices.shape) + # ic(features_fused.features.shape) + # ic(features_fused.spatial_shape) + # ic(features_fused.indices[:,1].min()) + # ic(features_fused.indices[:,2].min()) + # ic(features_fused.indices[:,3].min()) + # ic(features_fused.indices[:,1].max()) + # ic(features_fused.indices[:,2].max()) + # ic(features_fused.indices[:,3].max()) + # # dense_feature = features_fused.dense() + # # ic(dense_feature.shape) diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v3.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..077c9d25528a34a151c033323631337e9869d3a1 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v3.py @@ -0,0 +1,244 @@ +""" + A new version of proposal matcher. + It will collect BEV features, instead of keypoint features. + TODO: Add agent-object pose graph optimization + +""" + +import torch +from torch import nn +import numpy as np +import spconv +from collections import OrderedDict +from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu +from opencood.utils.box_utils import corner_to_center_torch, boxes_to_corners_3d, project_box3d, get_mask_for_boxes_within_range_torch +from opencood.utils.transformation_utils import x1_to_x2 +from opencood.utils.common_utils import limit_period +from icecream import ic +from itertools import compress + + +class MatcherV3(nn.Module): + def __init__(self, cfg, pc_range): + super(MatcherV3, self).__init__() + self.order = cfg['order'] + self.pc_range = pc_range + self.enlarge_ratio = cfg.get("enlarge_ratio", 1) + + @torch.no_grad() + def forward(self, data_dict): + clusters, scores, agentid_batch, view_vector_batch = self.clustering(data_dict) + data_dict['boxes_fused'], data_dict['scores_fused'], data_dict['agentid_fused'], data_dict['view_vector_fused'] \ + = self.cluster_fusion(clusters, scores, agentid_batch, view_vector_batch) + + self.get_roi_from_box(data_dict) # ['roi_fused'] + return data_dict + + def clustering(self, data_dict): + """ + Assign predicted boxes to clusters according to their ious with each other. + Assign the order to boxes, belong to which agent + + Returns: + clusters_batch: [batch1, batch2, batch3, ...], + where batch1 = [[box1_in_cluster1, box2_in_cluster1, box3_in_cluster1], [box1_in_cluster2, box2_in_cluster2], ...] + """ + clusters_batch = [] + scores_batch = [] + agentid_batch = [] + view_vector_batch = [] + + record_len = [int(cavnum) for cavnum in data_dict['record_len']] + lidar_poses = data_dict['lidar_pose'].cpu().numpy() + + # iterate each frame + for i, cavnum in enumerate(record_len): + cur_boxes_list = data_dict['det_boxes'][sum(record_len[:i]):sum(record_len[:i]) + cavnum] # within one sample, different cav + cur_boxes_list_ego = [] + cur_agentid_list = [] + cur_view_vector_list = [] + # preserve ego boxes + cur_boxes_list_ego.append(cur_boxes_list[0]) + cur_agentid_list.append(torch.tensor([sum(record_len[:i]) + 0] * len(cur_boxes_list[0]))) + + ### view vector #### + cur_boxes = cur_boxes_list[0] + view_angle = torch.atan2(cur_boxes[:, 1], cur_boxes[:, 0]) - cur_boxes[:,6] # view angle + view_angle = limit_period(view_angle) # normalized view angle + distance = (cur_boxes[:, 0] ** 2 + cur_boxes[:, 1] ** 2) ** 0.5 + view_vector = torch.stack([view_angle, distance], dim=-1) # [proposalnum, 2] + cur_view_vector_list.append(view_vector) + #################### + + # transform box to ego coordinate. [x,y,z,h,w,l,yaw] + # especially proj first is false + for agent_id in range(1, cavnum): + tfm = x1_to_x2(lidar_poses[sum(record_len[:i])+agent_id], + lidar_poses[sum(record_len[:i])]) + tfm = torch.from_numpy(tfm).to(cur_boxes_list[0].device).float() + cur_boxes = cur_boxes_list[agent_id] + cur_corners = boxes_to_corners_3d(cur_boxes, order=self.order) + + ### view vector #### + view_angle = torch.atan2(cur_boxes[:, 1], cur_boxes[:, 0]) - cur_boxes[:,6] # view angle + view_angle = limit_period(view_angle) # normalized view angle + distance = (cur_boxes[:, 0] ** 2 + cur_boxes[:, 1] ** 2) ** 0.5 + view_vector = torch.stack([view_angle, distance], dim=-1) # [proposalnum, 2] + #################### + + cur_corners_ego = project_box3d(cur_corners, tfm) + cur_boxes_ego = corner_to_center_torch(cur_corners_ego, order=self.order) + cur_boxes_list_ego.append(cur_boxes_ego) + cur_agentid_list.append(torch.tensor([sum(record_len[:i]) + agent_id] * len(cur_boxes_ego))) + cur_view_vector_list.append(view_vector) + + + cur_boxes_list = cur_boxes_list_ego + cur_scores_list = data_dict['det_scores'][sum(record_len[:i]):sum(record_len[:i]) + cavnum] + + + cur_boxes_list = [b for b in cur_boxes_list if len(b) > 0] + cur_scores_list = [s for s in cur_scores_list if len(s) > 0] + cur_agentid_list = [a for a in cur_agentid_list if len(a) > 0] + cur_view_vector_list = [v for v in cur_view_vector_list if len(v) > 0] + + if len(cur_scores_list) == 0: + clusters_batch.append([torch.Tensor([0.0, 0.0, 0.0, 1.6, 2.0, 4.0, 0]). # hwl + to(torch.device('cuda')).view(1, 7)]) + scores_batch.append([torch.Tensor([0.01]).to(torch.device('cuda')).view(-1)]) + agentid_batch.append([torch.tensor([0]).to(torch.device('cuda')).view(-1)]) + view_vector_batch.append([torch.tensor([[0, 0]]).to(torch.device('cuda'))]) + continue + + pred_boxes_cat = torch.cat(cur_boxes_list, dim=0) + pred_boxes_cat[:, -1] = limit_period(pred_boxes_cat[:, -1]) + pred_scores_cat = torch.cat(cur_scores_list, dim=0) + agentid_cat = torch.cat(cur_agentid_list, dim=0).to(torch.long) + view_vector_cat = torch.cat(cur_view_vector_list, dim=0) + + ious = boxes_iou3d_gpu(pred_boxes_cat, pred_boxes_cat) + cluster_indices = torch.zeros(len(ious)).int() + cur_cluster_id = 1 + + # cluster proposals + while torch.any(cluster_indices == 0): + cur_idx = torch.where(cluster_indices == 0)[0][0] # find the idx of the first pred which is not assigned yet + cluster_indices[torch.where(ious[cur_idx] > 0.1)[0]] = cur_cluster_id + cur_cluster_id += 1 + + clusters = [] + scores = [] + agentid = [] + view_vector = [] + + for j in range(1, cur_cluster_id): + clusters.append(pred_boxes_cat[cluster_indices==j]) # shape: [[num_in_cluster, 7], ... ] + scores.append(pred_scores_cat[cluster_indices==j]) # shape: [[num_in_cluster,], ...] + agentid.append(agentid_cat[cluster_indices==j]) # shape: [[num_in_cluster,], ...] + view_vector.append(view_vector_cat[cluster_indices==j]) # shape [[num_in_cluster, 2],...] + + clusters_batch.append(clusters) # shape: [[[num_in_cluster, 7], ...], ... ] + scores_batch.append(scores) # shape: [[[num_in_cluster,], ...], ... ] + agentid_batch.append(agentid) # shape: [[[num_in_cluster,], ...], ...] + view_vector_batch.append(view_vector) # shape [[[num_in_cluster, 2], ...], ...] + + return clusters_batch, scores_batch, agentid_batch, view_vector_batch + + def cluster_fusion(self, clusters, scores, agentid, view_vector): + """ + Merge boxes in each cluster with scores as weights for merging. + """ + boxes_fused = [] + scores_fused = [] + agentid_fused = [] + view_vector_fused = [] + for cl, sl, al, vl in zip(clusters, scores, agentid, view_vector): # cl, sl are clusters and scores within one sample + for c, s, a, v in zip(cl, sl, al, vl): # one sample (cl) has many clusters (c), c,s,a correspond to one cluster. + # reverse direction for non-dominant direction of boxes + dirs = c[:, -1] + max_score_idx = torch.argmax(s) + dirs_diff = torch.abs(dirs - dirs[max_score_idx].item()) + lt_pi = (dirs_diff > np.pi).int() + dirs_diff = dirs_diff * (1 - lt_pi) + ( + 2 * np.pi - dirs_diff) * lt_pi + score_lt_half_pi = s[dirs_diff > np.pi / 2].sum() # larger than + score_set_half_pi = s[dirs_diff <= np.pi / 2].sum() # small equal than + # select larger scored direction as final direction + if score_lt_half_pi <= score_set_half_pi: + dirs[dirs_diff > np.pi / 2] += np.pi + else: + dirs[dirs_diff <= np.pi / 2] += np.pi + + dirs = limit_period(dirs) + s_normalized = s / s.sum() + sint = torch.sin(dirs) * s_normalized + cost = torch.cos(dirs) * s_normalized + theta = torch.atan2(sint.sum(), cost.sum()).view(1, ) + center_dim = c[:, :-1] * s_normalized[:, None] + + boxes_fused.append(torch.cat([center_dim.sum(dim=0), theta])) + # s_sorted = torch.sort(s, descending=True).values + # s_fused = 0 + # for i, ss in enumerate(s_sorted): + # s_fused += ss ** (i + 1) + # s_fused = torch.tensor([min(s_fused, 1.0)], device=s.device) + s_fused = torch.max(s) + + scores_fused.append(s_fused) # content: [s_cluster0, s_cluster1, ...] + agentid_fused.append(a) # content [[id1,id2], [id1, id2, id3], ...] + view_vector_fused.append(v) # shape [[2, 2], [3, 2], ...] + + assert len(boxes_fused) > 0 + boxes_fused = torch.stack(boxes_fused, dim=0) + box_num_sample = [len(c) for c in clusters] # in a batch, each sample has how many clusters + + boxes_fused = [boxes_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for + i, l in enumerate(box_num_sample)] # shape [[num_of_cluster_in_sample1, 7], [num_of_cluster_in_sample2, 7], ...] + + scores_fused = torch.stack(scores_fused, dim=0) + scores_fused = [scores_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for + i, l in enumerate(box_num_sample)] # shape [[num_of_cluster_in_sample1,], [num_of_cluster_in_sample2,], ...] + + agentid_fused = [agentid_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for + i, l in enumerate(box_num_sample)] # content [[[id1,id2], [id1, id2, id3], ... ], [sample2 content], ...] + + view_vector_fused = [view_vector_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for + i, l in enumerate(box_num_sample)] # shape [[ [2,2], [3,2], ...], [sample2 content], ...] + + for i in range(len(boxes_fused)): + corners3d = boxes_to_corners_3d(boxes_fused[i], order=self.order) + mask = get_mask_for_boxes_within_range_torch(corners3d, self.pc_range) + boxes_fused[i] = boxes_fused[i][mask] + scores_fused[i] = scores_fused[i][mask] + agentid_fused[i] = list(compress(agentid_fused[i], mask)) + view_vector_fused[i] = list(compress(view_vector_fused[i], mask)) + + return boxes_fused, scores_fused, agentid_fused, view_vector_fused + + def get_roi_from_box(self, data_dict): + feature_shape = data_dict['feature_shape'] # [H,W] + grid_size_H = (self.pc_range[4] - self.pc_range[1]) / feature_shape[0] + grid_size_W = (self.pc_range[3] - self.pc_range[0]) / feature_shape[1] + + boxes_fused_list = data_dict['boxes_fused'] # [sample1, sample2, ...] + roi_list = [] + + for boxes_fused in boxes_fused_list: + # boxes_fused shape [N, 7], hwl order + # we omit the angle in the naive version + grid_center_x = (boxes_fused[:,0] - self.pc_range[0]) / grid_size_W + grid_center_y = (boxes_fused[:,1] - self.pc_range[1]) / grid_size_H + grid_offset_x = boxes_fused[:, -2] / 2 / grid_size_W + grid_offset_y = boxes_fused[:, -3] / 2 / grid_size_H + 1 # enlarge + + + xmin = (grid_center_x - grid_offset_x * self.enlarge_ratio).clamp(min=0) + xmax = (grid_center_x + grid_offset_x * self.enlarge_ratio).clamp(max=feature_shape[1] - 1) + ymin = (grid_center_y - grid_offset_y * self.enlarge_ratio).clamp(min=0) + ymax = (grid_center_y + grid_offset_y * self.enlarge_ratio).clamp(max=feature_shape[0] - 1) + + roi = torch.stack([xmin, xmax, ymin, ymax], dim=-1).to(torch.long) # [boxnum, 4] + + roi_list.append(roi) + + data_dict['roi_fused'] = roi_list # shape [[num_of_cluster_in_sample1, 4], [num_of_cluster_in_sample2, 4], ...] \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mean_vfe.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mean_vfe.py new file mode 100644 index 0000000000000000000000000000000000000000..9231578bb86d11b48cfd43ec8ae9cb7bdd7fa683 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mean_vfe.py @@ -0,0 +1,33 @@ +import torch +import torch.nn as nn + +class MeanVFE(nn.Module): + def __init__(self, model_cfg, num_point_features, **kwargs): + super().__init__() + self.model_cfg = model_cfg + self.num_point_features = num_point_features + + def get_output_feature_dim(self): + return self.num_point_features + + def forward(self, batch_dict, **kwargs): + """ + Args: + batch_dict: + voxels: (num_voxels, max_points_per_voxel, C) + voxel_num_points: optional (num_voxels) + **kwargs: + + Returns: + vfe_features: (num_voxels, C) + """ + voxel_features, voxel_num_points = batch_dict['voxel_features'], \ + batch_dict['voxel_num_points'] + points_mean = voxel_features[:, :, :].sum(dim=1, keepdim=False) + normalizer = torch.clamp_min(voxel_num_points.view(-1, 1), min=1.0).\ + type_as(voxel_features) + points_mean = points_mean / normalizer + batch_dict['voxel_features'] = points_mean.contiguous() + + return batch_dict + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/ms_deform_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/ms_deform_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..2f43ed689912d845206d587baee2cc2a56780622 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/ms_deform_attn.py @@ -0,0 +1,115 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +from .functions import MSDeformAttnFunction + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttn(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 64 + + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2) + self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(1, self.n_levels, self.n_points, 1) + for i in range(self.n_points): + grid_init[:, :, i, :] *= i + 1 + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area + or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C) + :param input_spatial_shapes (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})] + :param input_level_start_index (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 2 + if reference_points.shape[-1] == 2: + offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1) + sampling_locations = reference_points[:, :, None, :, None, :] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None, :] + elif reference_points.shape[-1] == 4: + sampling_locations = reference_points[:, :, None, :, None, :2] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 2 or 4, but get {} instead.'.format(reference_points.shape[-1])) + output = MSDeformAttnFunction.apply( + value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, self.im2col_step) + output = self.output_proj(output) + return output diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mswin.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mswin.py new file mode 100644 index 0000000000000000000000000000000000000000..1af51b95205e7664b9c2c78ebef64e74a6eee390 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mswin.py @@ -0,0 +1,123 @@ +""" +Multi-scale window transformer +""" +import torch +import torch.nn as nn +import numpy as np + +from einops import rearrange +from opencood.models.sub_modules.split_attn import SplitAttn + + +def get_relative_distances(window_size): + indices = torch.tensor(np.array( + [[x, y] for x in range(window_size) for y in range(window_size)])) + distances = indices[None, :, :] - indices[:, None, :] + return distances + + +class BaseWindowAttention(nn.Module): + def __init__(self, dim, heads, dim_head, drop_out, window_size, + relative_pos_embedding): + super().__init__() + inner_dim = dim_head * heads + + self.heads = heads + self.scale = dim_head ** -0.5 + self.window_size = window_size + self.relative_pos_embedding = relative_pos_embedding + + self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False) + + if self.relative_pos_embedding: + self.relative_indices = get_relative_distances(window_size) + \ + window_size - 1 + self.pos_embedding = nn.Parameter(torch.randn(2 * window_size - 1, + 2 * window_size - 1)) + else: + self.pos_embedding = nn.Parameter(torch.randn(window_size ** 2, + window_size ** 2)) + + self.to_out = nn.Sequential( + nn.Linear(inner_dim, dim), + nn.Dropout(drop_out) + ) + + def forward(self, x): + b, l, h, w, c, m = *x.shape, self.heads + + qkv = self.to_qkv(x).chunk(3, dim=-1) + new_h = h // self.window_size + new_w = w // self.window_size + + # q : (b, l, m, new_h*new_w, window_size^2, c_head) + q, k, v = map( + lambda t: rearrange(t, + 'b l (new_h w_h) (new_w w_w) (m c) -> b l m (new_h new_w) (w_h w_w) c', + m=m, w_h=self.window_size, + w_w=self.window_size), qkv) + # b l m h window_size window_size + dots = torch.einsum('b l m h i c, b l m h j c -> b l m h i j', + q, k, ) * self.scale + # consider prior knowledge of the local window + if self.relative_pos_embedding: + dots += self.pos_embedding[self.relative_indices[:, :, 0], + self.relative_indices[:, :, 1]] + else: + dots += self.pos_embedding + + attn = dots.softmax(dim=-1) + + out = torch.einsum('b l m h i j, b l m h j c -> b l m h i c', attn, v) + # b l h w c + out = rearrange(out, + 'b l m (new_h new_w) (w_h w_w) c -> b l (new_h w_h) (new_w w_w) (m c)', + m=self.heads, w_h=self.window_size, + w_w=self.window_size, + new_w=new_w, new_h=new_h) + out = self.to_out(out) + + return out + + +class PyramidWindowAttention(nn.Module): + def __init__(self, dim, heads, dim_heads, drop_out, window_size, + relative_pos_embedding, fuse_method='naive'): + super().__init__() + + assert isinstance(window_size, list) + assert isinstance(heads, list) + assert isinstance(dim_heads, list) + assert len(dim_heads) == len(heads) + + self.pwmsa = nn.ModuleList([]) + + for (head, dim_head, ws) in zip(heads, dim_heads, window_size): + self.pwmsa.append(BaseWindowAttention(dim, + head, + dim_head, + drop_out, + ws, + relative_pos_embedding)) + self.fuse_mehod = fuse_method + if fuse_method == 'split_attn': + self.split_attn = SplitAttn(256) + elif fuse_method == 'split_attn128': + self.split_attn = SplitAttn(128) + elif fuse_method == 'split_attn64': + self.split_attn = SplitAttn(64) + + def forward(self, x): + output = None + # naive fusion will just sum up all window attention output and do a + # mean + if self.fuse_mehod == 'naive': + for wmsa in self.pwmsa: + output = wmsa(x) if output is None else output + wmsa(x) + return output / len(self.pwmsa) + + elif self.fuse_mehod.startswith('split_attn'): + window_list = [] + for wmsa in self.pwmsa: + window_list.append(wmsa(x)) + return self.split_attn(window_list) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/naive_compress.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/naive_compress.py new file mode 100644 index 0000000000000000000000000000000000000000..49842af9ce56bb9581ef4438d65b9ff23a6facf3 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/naive_compress.py @@ -0,0 +1,32 @@ +import torch +import torch.nn as nn + + +class NaiveCompressor(nn.Module): + """ + A very naive compression that only compress on the channel. + """ + def __init__(self, input_dim, compress_raito): + super().__init__() + self.encoder = nn.Sequential( + nn.Conv2d(input_dim, input_dim//compress_raito, kernel_size=3, + stride=1, padding=1), + nn.BatchNorm2d(input_dim//compress_raito, eps=1e-3, momentum=0.01), + nn.ReLU() + ) + self.decoder = nn.Sequential( + nn.Conv2d(input_dim//compress_raito, input_dim, kernel_size=3, + stride=1, padding=1), + nn.BatchNorm2d(input_dim, eps=1e-3, momentum=0.01), + nn.ReLU(), + nn.Conv2d(input_dim, input_dim, kernel_size=3, stride=1, padding=1), + nn.BatchNorm2d(input_dim, eps=1e-3, + momentum=0.01), + nn.ReLU() + ) + + def forward(self, x): + x = self.encoder(x) + x = self.decoder(x) + + return x \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pillar_vfe.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pillar_vfe.py new file mode 100644 index 0000000000000000000000000000000000000000..19fabe03682edde39302dfd69c7de08ad7f9fca9 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pillar_vfe.py @@ -0,0 +1,155 @@ +""" +Pillar VFE, credits to OpenPCDet. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class PFNLayer(nn.Module): + def __init__(self, + in_channels, + out_channels, + use_norm=True, + last_layer=False): + super().__init__() + + self.last_vfe = last_layer + self.use_norm = use_norm + if not self.last_vfe: + out_channels = out_channels // 2 + + if self.use_norm: + self.linear = nn.Linear(in_channels, out_channels, bias=False) + self.norm = nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01) + else: + self.linear = nn.Linear(in_channels, out_channels, bias=True) + + self.part = 50000 + + def forward(self, inputs): + if inputs.shape[0] > self.part: + # nn.Linear performs randomly when batch size is too large + num_parts = inputs.shape[0] // self.part + part_linear_out = [self.linear( + inputs[num_part * self.part:(num_part + 1) * self.part]) + for num_part in range(num_parts + 1)] + x = torch.cat(part_linear_out, dim=0) + else: + x = self.linear(inputs) + torch.backends.cudnn.enabled = False + x = self.norm(x.permute(0, 2, 1)).permute(0, 2, + 1) if self.use_norm else x + torch.backends.cudnn.enabled = True + x = F.relu(x) + x_max = torch.max(x, dim=1, keepdim=True)[0] + + if self.last_vfe: + return x_max + else: + x_repeat = x_max.repeat(1, inputs.shape[1], 1) + x_concatenated = torch.cat([x, x_repeat], dim=2) + return x_concatenated + + +class PillarVFE(nn.Module): + def __init__(self, model_cfg, num_point_features, voxel_size, + point_cloud_range): + super().__init__() + self.model_cfg = model_cfg + + self.use_norm = self.model_cfg['use_norm'] + self.with_distance = self.model_cfg['with_distance'] + + self.use_absolute_xyz = self.model_cfg['use_absolute_xyz'] + num_point_features += 6 if self.use_absolute_xyz else 3 + if self.with_distance: + num_point_features += 1 + + self.num_filters = self.model_cfg['num_filters'] + assert len(self.num_filters) > 0 + num_filters = [num_point_features] + list(self.num_filters) + + pfn_layers = [] + for i in range(len(num_filters) - 1): + in_filters = num_filters[i] + out_filters = num_filters[i + 1] + pfn_layers.append( + PFNLayer(in_filters, out_filters, self.use_norm, + last_layer=(i >= len(num_filters) - 2)) + ) + self.pfn_layers = nn.ModuleList(pfn_layers) + + self.voxel_x = voxel_size[0] + self.voxel_y = voxel_size[1] + self.voxel_z = voxel_size[2] + self.x_offset = self.voxel_x / 2 + point_cloud_range[0] + self.y_offset = self.voxel_y / 2 + point_cloud_range[1] + self.z_offset = self.voxel_z / 2 + point_cloud_range[2] + + def get_output_feature_dim(self): + return self.num_filters[-1] + + @staticmethod + def get_paddings_indicator(actual_num, max_num, axis=0): + actual_num = torch.unsqueeze(actual_num, axis + 1) + max_num_shape = [1] * len(actual_num.shape) + max_num_shape[axis + 1] = -1 + max_num = torch.arange(max_num, + dtype=torch.int, + device=actual_num.device).view(max_num_shape) + paddings_indicator = actual_num.int() > max_num + return paddings_indicator + + def forward(self, batch_dict): + """encoding voxel feature using point-pillar method + Args: + voxel_features: [M, 32, 4] + voxel_num_points: [M,] + voxel_coords: [M, 4] + Returns: + features: [M,64], after PFN + """ + voxel_features, voxel_num_points, coords = \ + batch_dict['voxel_features'], batch_dict['voxel_num_points'], \ + batch_dict['voxel_coords'] + + points_mean = \ + voxel_features[:, :, :3].sum(dim=1, keepdim=True) / \ + voxel_num_points.type_as(voxel_features).view(-1, 1, 1) + f_cluster = voxel_features[:, :, :3] - points_mean + + f_center = torch.zeros_like(voxel_features[:, :, :3]) + f_center[:, :, 0] = voxel_features[:, :, 0] - ( + coords[:, 3].to(voxel_features.dtype).unsqueeze( + 1) * self.voxel_x + self.x_offset) + f_center[:, :, 1] = voxel_features[:, :, 1] - ( + coords[:, 2].to(voxel_features.dtype).unsqueeze( + 1) * self.voxel_y + self.y_offset) + f_center[:, :, 2] = voxel_features[:, :, 2] - ( + coords[:, 1].to(voxel_features.dtype).unsqueeze( + 1) * self.voxel_z + self.z_offset) + + if self.use_absolute_xyz: + features = [voxel_features, f_cluster, f_center] + else: + features = [voxel_features[..., 3:], f_cluster, f_center] + + if self.with_distance: + points_dist = torch.norm(voxel_features[:, :, :3], 2, 2, + keepdim=True) + features.append(points_dist) + features = torch.cat(features, dim=-1) + + voxel_count = features.shape[1] + mask = self.get_paddings_indicator(voxel_num_points, voxel_count, + axis=0) + mask = torch.unsqueeze(mask, -1).type_as(voxel_features) + features *= mask + for pfn in self.pfn_layers: + features = pfn(features) + features = features.squeeze() + batch_dict['pillar_features'] = features + + return batch_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/point_pillar_scatter.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/point_pillar_scatter.py new file mode 100644 index 0000000000000000000000000000000000000000..e74d14f844f90e9814117aa640536028dd9ae2fd --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/point_pillar_scatter.py @@ -0,0 +1,73 @@ +import torch +import torch.nn as nn + + +class PointPillarScatter(nn.Module): + def __init__(self, model_cfg): + super().__init__() + + self.model_cfg = model_cfg + self.num_bev_features = self.model_cfg['num_features'] + self.nx, self.ny, self.nz = model_cfg['grid_size'] # [704, 200, 1] + + assert self.nz == 1 + + def forward(self, batch_dict): + """ 将生成的pillar按照坐标索引还原到原空间中 + Args: + pillar_features:(M, 64) + coords:(M, 4) 第一维是batch_index + + Returns: + batch_spatial_features:(4, 64, H, W) + + |-------| + | | |-------------| + | | -> | * | + | | | | + | * | |-------------| + |-------| + + Lidar Point Cloud Feature Map + x-axis up Along with W + y-axis right Along with H + + Something like clockwise rotation of 90 degree. + + """ + pillar_features, coords = batch_dict['pillar_features'], batch_dict[ + 'voxel_coords'] + batch_spatial_features = [] + batch_size = coords[:, 0].max().int().item() + 1 + + for batch_idx in range(batch_size): + spatial_feature = torch.zeros( + self.num_bev_features, + self.nz * self.nx * self.ny, + dtype=pillar_features.dtype, + device=pillar_features.device) + # batch_index的mask + batch_mask = coords[:, 0] == batch_idx + # 根据mask提取坐标 + this_coords = coords[batch_mask, :] # (batch_idx_voxel,4) # zyx order, x in [0,706], y in [0,200] + # 这里的坐标是b,z,y和x的形式,且只有一层,因此计算索引的方式如下 + indices = this_coords[:, 1] + this_coords[:, 2] * self.nx + this_coords[:, 3] + # 转换数据类型 + indices = indices.type(torch.long) + # 根据mask提取pillar_features + pillars = pillar_features[batch_mask, :] # (batch_idx_voxel,64) + pillars = pillars.t() # (64,batch_idx_voxel) + # 在索引位置填充pillars + spatial_feature[:, indices] = pillars + # 将空间特征加入list,每个元素为(64, self.nz * self.nx * self.ny) + batch_spatial_features.append(spatial_feature) + + batch_spatial_features = \ + torch.stack(batch_spatial_features, 0) + batch_spatial_features = \ + batch_spatial_features.view(batch_size, self.num_bev_features * + self.nz, self.ny, self.nx) # It put y axis(in lidar frame) as image height. [..., 200, 704] + batch_dict['spatial_features'] = batch_spatial_features + + return batch_dict + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pose_graph_optim.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pose_graph_optim.py new file mode 100644 index 0000000000000000000000000000000000000000..054bd8d23ee365e186e58e3ec2a5e42b8bc28802 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pose_graph_optim.py @@ -0,0 +1,148 @@ +""" +This is pose graph optimizer, using g2o (bind to C++) +""" +import g2o +import numpy as np + +class PoseGraphOptimization2D(g2o.SparseOptimizer): + def __init__(self, verbose=False): + super().__init__() + # solver = g2o.BlockSolverSE2(g2o.LinearSolverCholmodSE2()) + solver = g2o.BlockSolverSE2(g2o.LinearSolverDenseSE2()) + solver = g2o.OptimizationAlgorithmLevenberg(solver) + super().set_algorithm(solver) + super().set_verbose(verbose) + + def optimize(self, max_iterations=1000): + super().initialize_optimization() + super().optimize(max_iterations) + + + def add_vertex(self, id, pose, fixed=False, SE2=True): + if SE2: + v = g2o.VertexSE2() + else: + v = g2o.VertexPointXY() + v.set_estimate(pose) + v.set_id(id) + v.set_fixed(fixed) + super().add_vertex(v) + + + def add_edge(self, vertices, measurement, + information=np.identity(3), + robust_kernel=None, SE2 = True): + """ + Args: + measurement: g2o.SE2 + """ + if SE2: + edge = g2o.EdgeSE2() + else: + edge = g2o.EdgeSE2PointXY() + + for i, v in enumerate(vertices): + if isinstance(v, int): + v = self.vertex(v) + edge.set_vertex(i, v) + + edge.set_measurement(measurement) # relative pose shape [3, 1] / [2, 1] + edge.set_information(information) # importance of each component shape [3, 3] / [2, 2] + if robust_kernel is not None: + edge.set_robust_kernel(robust_kernel) + super().add_edge(edge) + + def get_pose(self, id): + return self.vertex(id).estimate() + + +class PoseGraphOptimization(g2o.SparseOptimizer): + def __init__(self): + super().__init__() + solver = g2o.BlockSolverSE3(g2o.LinearSolverCholmodSE3()) + solver = g2o.OptimizationAlgorithmLevenberg(solver) + super().set_algorithm(solver) + super().set_verbose(True) + + def optimize(self, max_iterations=50): + super().initialize_optimization() + super().optimize(max_iterations) + + def add_vertex(self, id, pose, fixed=False): + v_se3 = g2o.VertexSE3() + v_se3.set_estimate(pose) + v_se3.set_id(id) + v_se3.set_fixed(fixed) + super().add_vertex(v_se3) + + def add_edge(self, vertices, measurement, + information=np.identity(6), + robust_kernel=None): + + edge = g2o.EdgeSE3() + for i, v in enumerate(vertices): + if isinstance(v, int): + v = self.vertex(v) + edge.set_vertex(i, v) + + edge.set_measurement(measurement) # relative pose, shape [4, 4] + edge.set_information(information) # importance of each component, shape [6, 6] + if robust_kernel is not None: + edge.set_robust_kernel(robust_kernel) + super().add_edge(edge) + + def get_pose(self, id): + return self.vertex(id).estimate() + + +if __name__ == "__main__": + pgo = PoseGraphOptimization() + + with open("/GPFS/rhome/yifanlu/workspace/g2o_test/noise.g2o","r") as f: + for line in f: + if line.startswith("VERTEX_SE3:QUAT"): + vertex_content = line.split(" ",1)[1] + vertex_content_array = np.fromstring(vertex_content, dtype=float, sep=" ") + ids = int(vertex_content_array[0]) + index = [0,1,2,6,3,4,5] + pose_array = vertex_content_array[1:][index] + + pose = np.eye(4) + pose[:3,3] = pose_array[:3] + pose[:3,:3] = g2o.Quaternion(pose_array[3:]).matrix() + pose = g2o.Isometry3d(pose) + + fixed = True if ids==6 else False + # fixed = False + pgo.add_vertex(id=ids, pose=pose, fixed=fixed) + + elif line.startswith("EDGE_SE3:QUAT"): + edge_content = line.split(" ", 1)[1] + edge_content_array = np.fromstring(edge_content, dtype=float, sep=" ") + + edge = [int(v) for v in edge_content_array[:2]] + index = [0,1,2,6,3,4,5] + pose_array = edge_content_array[2:2+7][index] + information_array = edge_content_array[2+7:] + + pose = np.eye(4) + pose[:3,3] = pose_array[:3] + pose[:3,:3] = g2o.Quaternion(pose_array[3:]).matrix() + pose = g2o.Isometry3d(pose) + + information = np.eye(6) + information[0,0] = information_array[0] + information[1,1] = information_array[6] + information[2,2] = information_array[11] + information[3,3] = information_array[15] + information[4,4] = information_array[18] + information[5,5] = information_array[20] + + pgo.add_edge(edge, pose, information) + + + print('num vertices:', len(pgo.vertices())) + print('num edges:', len(pgo.edges()), end='\n\n') + pgo.optimize() + + # pgo.save("out_pose_graph2.g2o") diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/refactor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/refactor.py new file mode 100644 index 0000000000000000000000000000000000000000..2f3d93913160fc224e980d92ecf8a4c87faeddbc --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/refactor.py @@ -0,0 +1,42 @@ +import torch +import torch.nn as nn +import numpy as np +from icecream import ic + +def flatten(l): + return [item for sublist in l for item in sublist] + +def refactor(batch_dict, lidar_agent_indicator): + agent_num = len(lidar_agent_indicator) + proposal_agentids_sample_list = batch_dict['agentid_fused'] # [sample1, sample2, ..., sample{batchnum}] + + lidar_matrix_list = [] + camera_matrix_list = [] + + # scatter agentid + for proposal_agentids_list in proposal_agentids_sample_list: # [[0,1,2],[1,2],[0,2],...] + proposal_num = len(proposal_agentids_list) + + sp_row = [[i]*len(proposal_agentids_list[i]) for i in range(len(proposal_agentids_list))] + sp_row = flatten(sp_row) + sp_col = torch.cat(proposal_agentids_list).tolist() + + indice = np.array([sp_row, sp_col], dtype=np.int32) + value = np.ones_like(sp_row) + + lidar_matrix = torch.sparse_coo_tensor(indice, value, (proposal_num, agent_num), device=lidar_agent_indicator.device).to_dense() + camera_matrix = torch.sparse_coo_tensor(indice, value, (proposal_num, agent_num), device=lidar_agent_indicator.device).to_dense() + + lidar_mask = (lidar_agent_indicator) + camera_mask = (1 - lidar_agent_indicator) + + lidar_matrix *= lidar_mask + camera_matrix *= camera_mask + + lidar_matrix_list.append(lidar_matrix) + camera_matrix_list.append(camera_matrix) + + batch_dict['lidar_matrix_list'] = lidar_matrix_list + batch_dict['camera_matrix_list'] = camera_matrix_list + + return batch_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/resblock.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/resblock.py new file mode 100644 index 0000000000000000000000000000000000000000..f14305e65425cf1ac3ba7ee3250181884c283b09 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/resblock.py @@ -0,0 +1,372 @@ +import torch +from torch import Tensor +import torch.nn as nn +from typing import Type, Any, Callable, Union, List, Optional + + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', + 'wide_resnet50_2', 'wide_resnet101_2'] + + +def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d: + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + + +def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d: + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion: int = 1 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion: int = 4 # original 4 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x: Tensor) -> Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNetModified(nn.Module): + + def __init__( + self, + block: Type[Union[BasicBlock, Bottleneck]], + layers: List[int], # number of block in one layer + layer_strides: List[int], # stride after one layer + num_filters: List[int], # feature dim + zero_init_residual: bool = False, + groups: int = 1, + width_per_group: int = 64, + replace_stride_with_dilation: Optional[List[bool]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + inplanes = 64 + ) -> None: + super(ResNetModified, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = inplanes + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + + self.layernum = len(num_filters) + for i in range(self.layernum): + self.__setattr__(f"layer{i}", self._make_layer(block, num_filters[i], layers[i], stride=layer_strides[i])) + + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type] + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type] + + def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int, + stride: int = 1, dilate: bool = False) -> nn.Sequential: + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + # if stride != 1, the first block will downsample the feature map + # plane is the feature dim + # if Bottleneck, then the output dim is planes * block.expansion(4) + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def _forward_impl(self, x: Tensor, return_interm: bool = True): + # See note [TorchScript super()] + interm_features = [] + for i in range(self.layernum): + x = eval(f"self.layer{i}")(x) + interm_features.append(x) + + if return_interm: + return interm_features + return x + + def forward(self, x: Tensor): + return self._forward_impl(x) + + +def _resnet( + arch: str, + block: Type[Union[BasicBlock, Bottleneck]], + layers: List[int], + pretrained: bool, + progress: bool, + **kwargs: Any +) -> ResNetModified: + model = ResNetModified(block, layers, **kwargs) + + return model + + +def resnet18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified: + r"""ResNet-18 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, + **kwargs) + + +def resnet34(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified: + r"""ResNet-34 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified: + r"""ResNet-50 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified: + r"""ResNet-101 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, + **kwargs) + + +def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified: + r"""ResNet-152 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, + **kwargs) + + +def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified: + r"""ResNeXt-50 32x4d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 4 + return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def resnext101_32x8d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified: + r"""ResNeXt-101 32x8d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) + + +def wide_resnet50_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified: + r"""Wide ResNet-50-2 model from + `"Wide Residual Networks" `_. + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def wide_resnet101_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified: + r"""Wide ResNet-101-2 model from + `"Wide Residual Networks" `_. + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) + + + + +if __name__=="__main__": + model = ResNetModified(BasicBlock, [3,4,5]) + input = torch.randn(4,64,200,704) + output = model(input) + from icecream import ic + for out in output: + ic(out.shape) diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/roi_head.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/roi_head.py new file mode 100644 index 0000000000000000000000000000000000000000..d8606c19de8b4c4e1e4c8bffe7bc8c1e50b926a9 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/roi_head.py @@ -0,0 +1,286 @@ +import copy +from icecream import ic +import torch.nn as nn +import torch +import numpy as np +from opencood.pcdet_utils.pointnet2.pointnet2_stack import \ + pointnet2_modules as pointnet2_stack_modules +from opencood.utils import common_utils +from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu +from opencood.utils import box_utils + + +class RoIHead(nn.Module): + def __init__(self, model_cfg): + super().__init__() + self.model_cfg = model_cfg + input_channels = model_cfg['in_channels'] + self.code_size = 7 + + mlps = copy.copy(self.model_cfg['roi_grid_pool']['mlps']) + for k in range(len(mlps)): + mlps[k] = [input_channels] + mlps[k] + + self.roi_grid_pool_layer = pointnet2_stack_modules.StackSAModuleMSG( + radii=self.model_cfg['roi_grid_pool']['pool_radius'], + nsamples=self.model_cfg['roi_grid_pool']['n_sample'], + mlps=mlps, + use_xyz=True, + pool_method=self.model_cfg['roi_grid_pool']['pool_method'], + ) + + grid_size = self.model_cfg['roi_grid_pool']['grid_size'] + self.grid_size = grid_size + c_out = sum([x[-1] for x in mlps]) + pre_channel = grid_size * grid_size * grid_size * c_out + fc_layers = [self.model_cfg['n_fc_neurons']] * 2 + self.shared_fc_layers, pre_channel = self._make_fc_layers(pre_channel, + fc_layers) + + self.cls_layers, pre_channel = self._make_fc_layers(pre_channel, + fc_layers, + output_channels= + self.model_cfg[ + 'num_cls']) + self.iou_layers, _ = self._make_fc_layers(pre_channel, fc_layers, + output_channels= + self.model_cfg['num_cls']) + self.reg_layers, _ = self._make_fc_layers(pre_channel, fc_layers, + output_channels= + self.model_cfg[ + 'num_cls'] * 7) + + self._init_weights(weight_init='xavier') + + def _init_weights(self, weight_init='xavier'): + if weight_init == 'kaiming': + init_func = nn.init.kaiming_normal_ + elif weight_init == 'xavier': + init_func = nn.init.xavier_normal_ + elif weight_init == 'normal': + init_func = nn.init.normal_ + else: + raise NotImplementedError + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): + if weight_init == 'normal': + init_func(m.weight, mean=0, std=0.001) + else: + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001) + + def _make_fc_layers(self, input_channels, fc_list, output_channels=None): + fc_layers = [] + pre_channel = input_channels + for k in range(len(fc_list)): + fc_layers.extend([ + nn.Conv1d(pre_channel, fc_list[k], kernel_size=1, bias=False), + # nn.BatchNorm1d(fc_list[k]), + nn.ReLU() + ]) + pre_channel = fc_list[k] + if self.model_cfg['dp_ratio'] > 0: + fc_layers.append(nn.Dropout(self.model_cfg['dp_ratio'])) + if output_channels is not None: + fc_layers.append( + nn.Conv1d(pre_channel, output_channels, kernel_size=1, + bias=True)) + fc_layers = nn.Sequential(*fc_layers) + return fc_layers, pre_channel + + def get_global_grid_points_of_roi(self, rois): + rois = rois.view(-1, rois.shape[-1]) + batch_size_rcnn = rois.shape[0] + + # (B, 6x6x6, 3) + local_roi_grid_points = self.get_dense_grid_points(rois, + batch_size_rcnn, + self.grid_size) + global_roi_grid_points = common_utils.rotate_points_along_z( + local_roi_grid_points.clone(), rois[:, 6] + ).squeeze(dim=1) + global_center = rois[:, 0:3].clone() + global_roi_grid_points += global_center.unsqueeze(dim=1) + return global_roi_grid_points, local_roi_grid_points + + @staticmethod + def get_dense_grid_points(rois, batch_size_rcnn, grid_size): + """ + Get the local coordinates of each grid point of a roi in the coordinate + system of the roi(origin lies in the center of this roi. + """ + faked_features = rois.new_ones((grid_size, grid_size, grid_size)) + dense_idx = torch.stack(torch.where(faked_features), + dim=1) # (N, 3) [x_idx, y_idx, z_idx] + dense_idx = dense_idx.repeat(batch_size_rcnn, 1, + 1).float() # (B, 6x6x6, 3) + + local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6] + roi_grid_points = ( + dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze( + dim=1) \ + - (local_roi_size.unsqueeze( + dim=1) / 2) # (B, 6x6x6, 3) + return roi_grid_points + + def assign_targets(self, batch_dict): + batch_dict['rcnn_label_dict'] = { + 'rois': [], + 'gt_of_rois': [], + 'gt_of_rois_src': [], + 'cls_tgt': [], + 'reg_tgt': [], + 'iou_tgt': [], + 'rois_anchor': [], + 'record_len': [], + 'rois_scores_stage1': [] + } + pred_boxes = batch_dict['boxes_fused'] + pred_scores = batch_dict['scores_fused'] + gt_boxes = [b[m][:, [0, 1, 2, 5, 4, 3, 6]].float() for b, m in + zip(batch_dict['object_bbx_center'], + batch_dict['object_bbx_mask'].bool())] # hwl -> lwh order + for rois, scores, gts in zip(pred_boxes, pred_scores, gt_boxes): # each frame + rois = rois[:, [0, 1, 2, 5, 4, 3, 6]] # hwl -> lwh + if gts.shape[0] == 0: + gts = rois.clone() + + ious = boxes_iou3d_gpu(rois, gts) + max_ious, gt_inds = ious.max(dim=1) + gt_of_rois = gts[gt_inds] + rcnn_labels = (max_ious > 0.3).float() + mask = torch.logical_not(rcnn_labels.bool()) + + # set negative samples back to rois, no correction in stage2 for them + gt_of_rois[mask] = rois[mask] + gt_of_rois_src = gt_of_rois.clone().detach() + + # canoical transformation + roi_center = rois[:, 0:3] + # TODO: roi_ry > 0 in pcdet + roi_ry = rois[:, 6] % (2 * np.pi) + gt_of_rois[:, 0:3] = gt_of_rois[:, 0:3] - roi_center + gt_of_rois[:, 6] = gt_of_rois[:, 6] - roi_ry + + # transfer LiDAR coords to local coords + gt_of_rois = common_utils.rotate_points_along_z( + points=gt_of_rois.view(-1, 1, gt_of_rois.shape[-1]), + angle=-roi_ry.view(-1) + ).view(-1, gt_of_rois.shape[-1]) + + # flip orientation if rois have opposite orientation + heading_label = (gt_of_rois[:, 6] + ( + torch.div(torch.abs(gt_of_rois[:, 6].min()), + (2 * np.pi), rounding_mode='trunc') + + 1) * 2 * np.pi) % (2 * np.pi) # 0 ~ 2pi + opposite_flag = (heading_label > np.pi * 0.5) & ( + heading_label < np.pi * 1.5) + + # (0 ~ pi/2, 3pi/2 ~ 2pi) + heading_label[opposite_flag] = (heading_label[ + opposite_flag] + np.pi) % ( + 2 * np.pi) + flag = heading_label > np.pi + heading_label[flag] = heading_label[ + flag] - np.pi * 2 # (-pi/2, pi/2) + heading_label = torch.clamp(heading_label, min=-np.pi / 2, + max=np.pi / 2) + gt_of_rois[:, 6] = heading_label + + # generate regression target + rois_anchor = rois.clone().detach().view(-1, self.code_size) + rois_anchor[:, 0:3] = 0 + rois_anchor[:, 6] = 0 + + reg_targets = box_utils.box_encode( + gt_of_rois.view(-1, self.code_size), rois_anchor + ) + + batch_dict['rcnn_label_dict']['rois'].append(rois) + batch_dict['rcnn_label_dict']['rois_scores_stage1'].append(scores) + batch_dict['rcnn_label_dict']['gt_of_rois'].append(gt_of_rois) + batch_dict['rcnn_label_dict']['gt_of_rois_src'].append( + gt_of_rois_src) + batch_dict['rcnn_label_dict']['cls_tgt'].append(rcnn_labels) + batch_dict['rcnn_label_dict']['reg_tgt'].append(reg_targets) + batch_dict['rcnn_label_dict']['iou_tgt'].append(max_ious) + batch_dict['rcnn_label_dict']['rois_anchor'].append(rois_anchor) + batch_dict['rcnn_label_dict']['record_len'].append(rois.shape[0]) + + + # cat list to tensor + for k, v in batch_dict['rcnn_label_dict'].items(): + if k == 'record_len': + continue + batch_dict['rcnn_label_dict'][k] = torch.cat(v, dim=0) + + return batch_dict + + def roi_grid_pool(self, batch_dict): + batch_size = len(batch_dict['record_len']) + rois = batch_dict['rcnn_label_dict']['rois'] + point_coords = batch_dict['point_coords'] + point_features = batch_dict['point_features'] + label_record_len = batch_dict['rcnn_label_dict']['record_len'] + + point_features = torch.cat(point_features, dim=0) + # (BxN, 6x6x6, 3) + global_roi_grid_points, local_roi_grid_points = \ + self.get_global_grid_points_of_roi(rois) + # (B, Nx6x6x6, 3) + global_roi_grid_points = global_roi_grid_points.view(batch_size, -1, 3) + + xyz = torch.cat(point_coords, dim=0) + xyz_batch_cnt = xyz.new_zeros(batch_size).int() + for bs_idx in range(batch_size): + xyz_batch_cnt[bs_idx] = len(point_coords[bs_idx]) + new_xyz = global_roi_grid_points.view(-1, 3) + new_xyz_batch_cnt = xyz.new_zeros(batch_size).int() + for bs_idx in range(batch_size): + new_xyz_batch_cnt[bs_idx] = label_record_len[ + bs_idx] * self.grid_size ** 3 + + pooled_points, pooled_features = self.roi_grid_pool_layer( + xyz=xyz[:, :3].contiguous(), + xyz_batch_cnt=xyz_batch_cnt, + new_xyz=new_xyz[:, :3].contiguous(), + new_xyz_batch_cnt=new_xyz_batch_cnt, + features=point_features.contiguous(), # weighted point features + ) # (M1 + M2 ..., C) + # (BxN, 6x6x6, C) + pooled_features = pooled_features.view(-1, self.grid_size ** 3, + pooled_features.shape[-1]) + + return pooled_features + + def forward(self, batch_dict): + batch_dict = self.assign_targets(batch_dict) + # RoI aware pooling + pooled_features = self.roi_grid_pool(batch_dict) # (BxN, 6x6x6, C) + + batch_size_rcnn = pooled_features.shape[0] + pooled_features = pooled_features.permute(0, 2, 1). \ + contiguous().view(batch_size_rcnn, -1, self.grid_size, + self.grid_size, + self.grid_size) # (BxN, C, 6, 6, 6) + shared_features = self.shared_fc_layers( + pooled_features.view(batch_size_rcnn, -1, 1)) + rcnn_cls = self.cls_layers(shared_features).transpose(1, + 2).contiguous().squeeze( + dim=1) # (B, 1 or 2) + rcnn_iou = self.iou_layers(shared_features).transpose(1, + 2).contiguous().squeeze( + dim=1) # (B, 1) + rcnn_reg = self.reg_layers(shared_features).transpose(1, + 2).contiguous().squeeze( + dim=1) # (B, C) + + batch_dict['stage2_out'] = { + 'rcnn_cls': rcnn_cls, + 'rcnn_iou': rcnn_iou, + 'rcnn_reg': rcnn_reg, + } + return batch_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/sparse_backbone_3d.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/sparse_backbone_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..874b4d9523ad897097d3628fb3888454f8337d13 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/sparse_backbone_3d.py @@ -0,0 +1,146 @@ +from functools import partial + +import spconv +import torch.nn as nn + +try: # spconv1 + from spconv import SparseSequential, SubMConv3d, SparseConv3d, SparseInverseConv3d, SparseConvTensor +except: # spconv2 + from spconv.pytorch import SparseSequential, SubMConv3d, SparseConv3d, SparseInverseConv3d, SparseConvTensor + +def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stride=1, padding=0, + conv_type='subm', norm_fn=None): + + if conv_type == 'subm': + conv = SubMConv3d(in_channels, out_channels, kernel_size, bias=False, indice_key=indice_key) + elif conv_type == 'spconv': + conv = SparseConv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, + bias=False, indice_key=indice_key) + elif conv_type == 'inverseconv': + conv = SparseInverseConv3d(in_channels, out_channels, kernel_size, indice_key=indice_key, bias=False) + else: + raise NotImplementedError + + m = SparseSequential( + conv, + norm_fn(out_channels), + nn.ReLU(), + ) + + return m + + +class VoxelBackBone8x(nn.Module): + def __init__(self, model_cfg, input_channels, grid_size, **kwargs): + super().__init__() + self.model_cfg = model_cfg + norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + + self.sparse_shape = grid_size[::-1] + [1, 0, 0] + + self.conv_input = SparseSequential( + SubMConv3d(input_channels, 16, 3, padding=1, bias=False, indice_key='subm1'), + norm_fn(16), + nn.ReLU(), + ) + block = post_act_block + + self.conv1 = SparseSequential( + block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1'), + ) + + self.conv2 = SparseSequential( + # [1600, 1408, 41] <- [800, 704, 21] + block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'), + block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'), + block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'), + ) + + self.conv3 = SparseSequential( + # [800, 704, 21] <- [400, 352, 11] + block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'), + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), + ) + + self.conv4 = SparseSequential( + # [400, 352, 11] <- [200, 176, 5] + block(64, 64, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'), + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'), + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'), + ) + + last_pad = 0 + if 'num_features_out' in self.model_cfg: + self.num_point_features = self.model_cfg['num_features_out'] + else: + self.num_point_features = 128 + self.conv_out = SparseSequential( + # [200, 150, 5] -> [200, 150, 2] + SparseConv3d(64, self.num_point_features, (3, 1, 1), stride=(2, 1, 1), padding=last_pad, + bias=False, indice_key='spconv_down2'), + norm_fn(self.num_point_features), + nn.ReLU(), + ) + + self.backbone_channels = { + 'x_conv1': 16, + 'x_conv2': 32, + 'x_conv3': 64, + 'x_conv4': 64 + } + + def forward(self, batch_dict): + """ + Args: + batch_dict: + batch_size: int + vfe_features: (num_voxels, C) + voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx] + Returns: + batch_dict: + encoded_spconv_tensor: sparse tensor + """ + voxel_features, voxel_coords = batch_dict['voxel_features'], \ + batch_dict['voxel_coords'] + batch_size = batch_dict['batch_size'] + input_sp_tensor = SparseConvTensor( + features=voxel_features, + indices=voxel_coords.int(), + spatial_shape=self.sparse_shape, + batch_size=batch_size + ) + + x = self.conv_input(input_sp_tensor) + + x_conv1 = self.conv1(x) + x_conv2 = self.conv2(x_conv1) + x_conv3 = self.conv3(x_conv2) + x_conv4 = self.conv4(x_conv3) + + # for detection head + # [200, 176, 5] -> [200, 176, 2] + out = self.conv_out(x_conv4) + + batch_dict.update({ + 'encoded_spconv_tensor': out, + 'encoded_spconv_tensor_stride': 8 + }) + batch_dict.update({ + 'multi_scale_3d_features': { + 'x_conv1': x_conv1, + 'x_conv2': x_conv2, + 'x_conv3': x_conv3, + 'x_conv4': x_conv4, + } + }) + batch_dict.update({ + 'multi_scale_3d_strides': { + 'x_conv1': 1, + 'x_conv2': 2, + 'x_conv3': 4, + 'x_conv4': 8, + } + }) + + return batch_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/split_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/split_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..5390b16c494e07bf1e05e06db0279ad8122b6bad --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/split_attn.py @@ -0,0 +1,63 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class RadixSoftmax(nn.Module): + def __init__(self, radix, cardinality): + super(RadixSoftmax, self).__init__() + self.radix = radix + self.cardinality = cardinality + + def forward(self, x): + # x: (B, L, 1, 1, 3C) + batch = x.size(0) + cav_num = x.size(1) + + if self.radix > 1: + # x: (B, L, 1, 3, C) + x = x.view(batch, + cav_num, + self.cardinality, self.radix, -1) + x = F.softmax(x, dim=3) + # B, 3LC + x = x.reshape(batch, -1) + else: + x = torch.sigmoid(x) + return x + + +class SplitAttn(nn.Module): + def __init__(self, input_dim): + super(SplitAttn, self).__init__() + self.input_dim = input_dim + + self.fc1 = nn.Linear(input_dim, input_dim, bias=False) + self.bn1 = nn.LayerNorm(input_dim) + self.act1 = nn.ReLU() + self.fc2 = nn.Linear(input_dim, input_dim * 3, bias=False) + + self.rsoftmax = RadixSoftmax(3, 1) + + def forward(self, window_list): + # window list: [(B, L, H, W, C) * 3] + assert len(window_list) == 3, 'only 3 windows are supported' + + sw, mw, bw = window_list[0], window_list[1], window_list[2] + B, L = sw.shape[0], sw.shape[1] + + # global average pooling, B, L, H, W, C + x_gap = sw + mw + bw + # B, L, 1, 1, C + x_gap = x_gap.mean((2, 3), keepdim=True) + x_gap = self.act1(self.bn1(self.fc1(x_gap))) + # B, L, 1, 1, 3C + x_attn = self.fc2(x_gap) + # B L 1 1 3C + x_attn = self.rsoftmax(x_attn).view(B, L, 1, 1, -1) + + out = sw * x_attn[:, :, :, :, 0:self.input_dim] + \ + mw * x_attn[:, :, :, :, self.input_dim:2*self.input_dim] +\ + bw * x_attn[:, :, :, :, self.input_dim*2:] + + return out \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/torch_transformation_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/torch_transformation_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f96e301a760af673d915fec7a5d709ef34c3c3db --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/torch_transformation_utils.py @@ -0,0 +1,443 @@ +""" +torch_transformation_utils.py +""" +import os + +import torch +import torch.nn.functional as F +import numpy as np +import matplotlib.pyplot as plt +from icecream import ic + +def get_roi_and_cav_mask(shape, cav_mask, spatial_correction_matrix, + discrete_ratio, downsample_rate): + """ + Get mask for the combination of cav_mask and rorated ROI mask. + Parameters + ---------- + shape : tuple + Shape of (B, L, H, W, C). + cav_mask : torch.Tensor + Shape of (B, L). + spatial_correction_matrix : torch.Tensor + Shape of (B, L, 4, 4) + discrete_ratio : float + Discrete ratio. + downsample_rate : float + Downsample rate. + + Returns + ------- + com_mask : torch.Tensor + Combined mask with shape (B, H, W, L, 1). + + """ + B, L, H, W, C = shape + C = 1 + # (B,L,4,4) + dist_correction_matrix = get_discretized_transformation_matrix( + spatial_correction_matrix, discrete_ratio, + downsample_rate) + # (B*L,2,3) + T = get_transformation_matrix( + dist_correction_matrix.reshape(-1, 2, 3), (H, W)) + # (B,L,1,H,W) + roi_mask = get_rotated_roi((B, L, C, H, W), T) + # (B,L,1,H,W) + com_mask = combine_roi_and_cav_mask(roi_mask, cav_mask) + # (B,H,W,1,L) + com_mask = com_mask.permute(0,3,4,2,1) + return com_mask + + +def combine_roi_and_cav_mask(roi_mask, cav_mask): + """ + Combine ROI mask and CAV mask + + Parameters + ---------- + roi_mask : torch.Tensor + Mask for ROI region after considering the spatial transformation/correction. + cav_mask : torch.Tensor + Mask for CAV to remove padded 0. + + Returns + ------- + com_mask : torch.Tensor + Combined mask. + """ + # (B, L, 1, 1, 1) + cav_mask = cav_mask.unsqueeze(2).unsqueeze(3).unsqueeze(4) + # (B, L, C, H, W) + cav_mask = cav_mask.expand(roi_mask.shape) + # (B, L, C, H, W) + com_mask = roi_mask * cav_mask + return com_mask + + +def get_rotated_roi(shape, correction_matrix): + """ + Get rorated ROI mask. + + Parameters + ---------- + shape : tuple + Shape of (B,L,C,H,W). + correction_matrix : torch.Tensor + Correction matrix with shape (N,2,3). + + Returns + ------- + roi_mask : torch.Tensor + Roated ROI mask with shape (N,2,3). + + """ + B, L, C, H, W = shape + # To reduce the computation, we only need to calculate the + # mask for the first channel. + # (B,L,1,H,W) + x = torch.ones((B, L, 1, H, W)).to(correction_matrix.dtype).to( + correction_matrix.device) + # (B*L,1,H,W) + roi_mask = warp_affine(x.reshape(-1, 1, H, W), correction_matrix, + dsize=(H, W), mode="nearest") + # (B,L,C,H,W) + roi_mask = torch.repeat_interleave(roi_mask, C, dim=1).reshape(B, L, C, H, + W) + return roi_mask + + +def get_discretized_transformation_matrix(matrix, discrete_ratio, + downsample_rate): + """ + Get disretized transformation matrix. + Parameters + ---------- + matrix : torch.Tensor + Shape -- (B, L, 4, 4) where B is the batch size, L is the max cav + number. + discrete_ratio : float + Discrete ratio. + downsample_rate : float or int + downsample_rate + + discrete_ratio * downsample_rate = ___ meter one pixel, in the current feature map. + + Returns + ------- + matrix : torch.Tensor + Output transformation matrix in 2D with shape (B, L, 2, 3), + including 2D transformation and 2D rotation. + transformation is pixel level + + """ + matrix = matrix[:, :, [0, 1], :][:, :, :, [0, 1, 3]] + # normalize the x,y transformation + matrix[:, :, :, -1] = matrix[:, :, :, -1] \ + / (discrete_ratio * downsample_rate) + + return matrix.type(dtype=torch.float) + + +def _torch_inverse_cast(input): + r""" + Helper function to make torch.inverse work with other than fp32/64. + The function torch.inverse is only implemented for fp32/64 which makes + impossible to be used by fp16 or others. What this function does, + is cast input data type to fp32, apply torch.inverse, + and cast back to the input dtype. + Args: + input : torch.Tensor + Tensor to be inversed. + + Returns: + out : torch.Tensor + Inversed Tensor. + + """ + dtype = input.dtype + if dtype not in (torch.float32, torch.float64): + dtype = torch.float32 + out = torch.inverse(input.to(dtype)).to(input.dtype) + return out + + +def normal_transform_pixel( + height, width, device, dtype, eps=1e-14): + r""" + Compute the normalization matrix from image size in pixels to [-1, 1]. + Args: + height : int + Image height. + width : int + Image width. + device : torch.device + Output tensor devices. + dtype : torch.dtype + Output tensor data type. + eps : float + Epsilon to prevent divide-by-zero errors. + + Returns: + tr_mat : torch.Tensor + Normalized transform with shape :math:`(1, 3, 3)`. + """ + tr_mat = torch.tensor( + [[1.0, 0.0, -1.0], [0.0, 1.0, -1.0], [0.0, 0.0, 1.0]], device=device, + dtype=dtype) # 3x3 + + # prevent divide by zero bugs + width_denom = eps if width == 1 else width - 1.0 + height_denom = eps if height == 1 else height - 1.0 + + tr_mat[0, 0] = tr_mat[0, 0] * 2.0 / width_denom + tr_mat[1, 1] = tr_mat[1, 1] * 2.0 / height_denom + + return tr_mat.unsqueeze(0) # 1x3x3 + + +def eye_like(n, B, device, dtype): + r""" + Return a 2-D tensor with ones on the diagonal and + zeros elsewhere with the same batch size as the input. + Args: + n : int + The number of rows :math:`(n)`. + B : int + Btach size. + device : torch.device + Devices of the output tensor. + dtype : torch.dtype + Data type of the output tensor. + + Returns: + The identity matrix with the shape :math:`(B, n, n)`. + """ + + identity = torch.eye(n, device=device, dtype=dtype) + return identity[None].repeat(B, 1, 1) + + +def normalize_homography(dst_pix_trans_src_pix, dsize_src, dsize_dst=None): + r""" + Normalize a given homography in pixels to [-1, 1]. + Args: + dst_pix_trans_src_pix : torch.Tensor + Homography/ies from source to destination to be normalized with + shape :math:`(B, 3, 3)`. + dsize_src : Tuple[int, int] + Size of the source image (height, width). + dsize_dst : Tuple[int, int] + Size of the destination image (height, width). + + Returns: + dst_norm_trans_src_norm : torch.Tensor + The normalized homography of shape :math:`(B, 3, 3)`. + """ + if dsize_dst is None: + dsize_dst = dsize_src + # source and destination sizes + src_h, src_w = dsize_src + dst_h, dst_w = dsize_dst + device = dst_pix_trans_src_pix.device + dtype = dst_pix_trans_src_pix.dtype + # compute the transformation pixel/norm for src/dst + src_norm_trans_src_pix = normal_transform_pixel(src_h, src_w, device, + dtype).to( + dst_pix_trans_src_pix) + + src_pix_trans_src_norm = _torch_inverse_cast(src_norm_trans_src_pix) + dst_norm_trans_dst_pix = normal_transform_pixel(dst_h, dst_w, device, + dtype).to( + dst_pix_trans_src_pix) + # compute chain transformations + dst_norm_trans_src_norm: torch.Tensor = dst_norm_trans_dst_pix @ ( + dst_pix_trans_src_pix @ src_pix_trans_src_norm) + return dst_norm_trans_src_norm + + +def get_rotation_matrix2d(M, dsize): + r""" + Return rotation matrix for torch.affine_grid based on transformation matrix. + Args: + M : torch.Tensor + Transformation matrix with shape :math:`(B, 2, 3)`. + dsize : Tuple[int, int] + Size of the source image (height, width). + + Returns: + R : torch.Tensor + Rotation matrix with shape :math:`(B, 2, 3)`. + """ + H, W = dsize + B = M.shape[0] + center = torch.Tensor([W / 2, H / 2]).to(M.dtype).to(M.device).unsqueeze(0) + shift_m = eye_like(3, B, M.device, M.dtype) + shift_m[:, :2, 2] = center + + shift_m_inv = eye_like(3, B, M.device, M.dtype) + shift_m_inv[:, :2, 2] = -center + + rotat_m = eye_like(3, B, M.device, M.dtype) + rotat_m[:, :2, :2] = M[:, :2, :2] + affine_m = shift_m @ rotat_m @ shift_m_inv + return affine_m[:, :2, :] # Bx2x3 + + +def get_transformation_matrix(M, dsize): + r""" + Return transformation matrix for torch.affine_grid. + Args: + M : torch.Tensor + Transformation matrix with shape :math:`(N, 2, 3)`. + dsize : Tuple[int, int] + Size of the source image (height, width). + + Returns: + T : torch.Tensor + Transformation matrix with shape :math:`(N, 2, 3)`. + """ + T = get_rotation_matrix2d(M, dsize) + T[..., 2] += M[..., 2] + return T + + +def convert_affinematrix_to_homography(A): + r""" + Convert to homography coordinates + Args: + A : torch.Tensor + The affine matrix with shape :math:`(B,2,3)`. + + Returns: + H : torch.Tensor + The homography matrix with shape of :math:`(B,3,3)`. + """ + H: torch.Tensor = torch.nn.functional.pad(A, [0, 0, 0, 1], "constant", + value=0.0) + H[..., -1, -1] += 1.0 + return H + + +def warp_affine_simple(src, M, dsize, + mode='bilinear', + padding_mode='zeros', + align_corners=False): + + B, C, H, W = src.size() + grid = F.affine_grid(M, + [B, C, dsize[0], dsize[1]], + align_corners=align_corners).to(src) + return F.grid_sample(src, grid, align_corners=align_corners) + +def warp_affine( + src, M, dsize, + mode='bilinear', + padding_mode='zeros', + align_corners=True): + r""" + Transform the src based on transformation matrix M. + Args: + src : torch.Tensor + Input feature map with shape :math:`(B,C,H,W)`. + M : torch.Tensor + Transformation matrix with shape :math:`(B,2,3)`. + dsize : tuple + Tuple of output image H_out and W_out. + mode : str + Interpolation methods for F.grid_sample. + padding_mode : str + Padding methods for F.grid_sample. + align_corners : boolean + Parameter of F.affine_grid. + + Returns: + Transformed features with shape :math:`(B,C,H,W)`. + """ + + B, C, H, W = src.size() + + # we generate a 3x3 transformation matrix from 2x3 affine + M_3x3 = convert_affinematrix_to_homography(M) + dst_norm_trans_src_norm = normalize_homography(M_3x3, (H, W), dsize) + + # src_norm_trans_dst_norm = torch.inverse(dst_norm_trans_src_norm) + src_norm_trans_dst_norm = _torch_inverse_cast(dst_norm_trans_src_norm) + + grid = F.affine_grid(src_norm_trans_dst_norm[:, :2, :], + [B, C, dsize[0], dsize[1]], + align_corners=align_corners) + + return F.grid_sample(src.half() if grid.dtype==torch.half else src, + grid, align_corners=align_corners, mode=mode, + padding_mode=padding_mode) + + +class Test: + """ + Test the transformation in this file. + The methods in this class are not supposed to be used outside of this file. + """ + + def __init__(self): + pass + + @staticmethod + def load_img(): + torch.manual_seed(0) + x = torch.randn(1, 5, 16, 400, 200) * 100 + # x = torch.ones(1, 5, 16, 400, 200) + return x + + @staticmethod + def load_raw_transformation_matrix(N): + a = 90 / 180 * np.pi + matrix = torch.Tensor([[np.cos(a), -np.sin(a), 10], + [np.sin(a), np.cos(a), 10]]) + matrix = torch.repeat_interleave(matrix.unsqueeze(0).unsqueeze(0), N, + dim=1) + return matrix + + @staticmethod + def load_raw_transformation_matrix2(N, alpha): + a = alpha / 180 * np.pi + matrix = torch.Tensor([[np.cos(a), -np.sin(a), 0, 0], + [np.sin(a), np.cos(a), 0, 0]]) + matrix = torch.repeat_interleave(matrix.unsqueeze(0).unsqueeze(0), N, + dim=1) + return matrix + + @staticmethod + def test(): + img = Test.load_img() + B, L, C, H, W = img.shape + raw_T = Test.load_raw_transformation_matrix(5) + T = get_transformation_matrix(raw_T.reshape(-1, 2, 3), (H, W)) + img_rot = warp_affine(img.reshape(-1, C, H, W), T, (H, W)) + print(img_rot[0, 0, :, :]) + plt.matshow(img_rot[0, 0, :, :]) + plt.show() + + @staticmethod + def test_combine_roi_and_cav_mask(): + B = 2 + L = 5 + C = 16 + H = 300 + W = 400 + # 2, 5 + cav_mask = torch.Tensor([[1, 1, 1, 0, 0], [1, 0, 0, 0, 0]]) + x = torch.zeros(B, L, C, H, W) + correction_matrix = Test.load_raw_transformation_matrix2(5, 10) + correction_matrix = torch.cat([correction_matrix, correction_matrix], + dim=0) + mask = get_roi_and_cav_mask((B, L, H, W, C), cav_mask, + correction_matrix, 0.4, 4) + plt.matshow(mask[0, :, :, 0, 0]) + plt.show() + + + +if __name__ == "__main__": + os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' + Test.test_combine_roi_and_cav_mask() diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2v_robust_module.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2v_robust_module.py new file mode 100644 index 0000000000000000000000000000000000000000..56ab92444117cc2ac3f8efb9bd11bd8bd37e443f --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2v_robust_module.py @@ -0,0 +1,403 @@ +from icecream import ic +import torch +import math +import torch.nn as nn +from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple +from opencood.utils.transformation_utils import pose_to_tfm, tfm_to_pose_torch, tfm_to_xycs_torch, xycs_to_tfm_torch + +def regroup(x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + +# Part1, Pose Regression Module + +class PoseRegression(nn.Module): + """ + Args: + in_ch: 2*C + + forward: + x: [N,2C,H,W] concatenated feature + + Returns: + [N, 3]: x, y, yaw + + """ + def __init__(self, in_ch=512, hidden_ch=256): + super(PoseRegression, self).__init__() + self.model = nn.Sequential( + nn.Conv2d(in_ch, hidden_ch, kernel_size=(3, 3), padding=1), + nn.LeakyReLU(negative_slope=0.01), + nn.MaxPool2d(kernel_size=2, stride=2, padding=0), + nn.Conv2d(hidden_ch, hidden_ch, kernel_size=(3, 3), padding=1), + nn.LeakyReLU(negative_slope=0.01), + nn.MaxPool2d(kernel_size=2, stride=2, padding=0), + nn.Conv2d(hidden_ch, hidden_ch, kernel_size=(3, 3), padding=1), + nn.LeakyReLU(negative_slope=0.01), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d( + hidden_ch, hidden_ch, kernel_size=(3, 3), stride=(2, 2), padding=1 + ), + nn.LeakyReLU(negative_slope=0.01), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.AdaptiveAvgPool2d(output_size=1), + nn.Flatten(), + nn.Linear(in_features=hidden_ch, out_features=hidden_ch, bias=True), + nn.LeakyReLU(negative_slope=0.01), + nn.Linear(in_features=hidden_ch, out_features=hidden_ch, bias=True), + nn.LeakyReLU(negative_slope=0.01), + nn.Linear(in_features=hidden_ch, out_features=3, bias=True), + ) + + def forward(self, x): + + pose_reg = self.model(x) + return pose_reg + + + +class PoseRegressionWraper(nn.Module): + """ + Args: + features: [sum(cav), C, H, W], + record_len: list + pairwise_t_matrix: [B, L, L, 4, 4], original pairwise_t_matrix, noise contains + Retuens: + pairwise_t_matrix_new: [B, L, L, 4, 4], the relative pose after correction. + """ + def __init__(self, in_ch, hidden_ch, affine_parameter): + super(PoseRegressionWraper, self).__init__() + self.pose_regression = PoseRegression( + in_ch=in_ch, hidden_ch=hidden_ch + ) + self.H = affine_parameter['H'] + self.W = affine_parameter['W'] + self.downsample_rate = affine_parameter['downsample_rate'] + self.discrete_ratio = affine_parameter['discrete_ratio'] + + def forward(self, features, record_len, pairwise_t_matrix): + _, C, H, W = features.shape + B, L = pairwise_t_matrix.shape[:2] + split_x = regroup(features, record_len) + pairwise_t_matrix_new = torch.eye(4, device=pairwise_t_matrix.device).view(1,1,1,4,4).repeat(B,L,L,1,1) + pose_corr_matrix = torch.zeros((B,L,L,3),device=pairwise_t_matrix.device) + for b in range(B): + N = record_len[b] + agent_features = split_x[b] + for i in range(N): + t_matrix = pairwise_t_matrix[b] + t_matrix = t_matrix[:,:,[0, 1],:][:,:,:,[0, 1, 3]] # [L, L, 2, 3] + t_matrix[...,0,1] = t_matrix[...,0,1] * H / W + t_matrix[...,1,0] = t_matrix[...,1,0] * W / H + t_matrix[...,0,2] = t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + t_matrix[...,1,2] = t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + # (N,C,H,W) + neighbors = warp_affine_simple(agent_features, t_matrix[i, :N, :, :], (self.H, self.W)) + # (N,C,H,W) + ego_agent_feature = agent_features[i].unsqueeze(0).repeat(N, 1, 1, 1) + # (N,2C,H,W) + neighbor_feature = torch.cat( + [neighbors, ego_agent_feature], dim=1) + # (N,3) + pose_corr = self.pose_regression(neighbor_feature) + pose_corr_matrix[b,i,:N] = pose_corr + + # (N, 4, 4) + pose_corr_tfm = pose_to_tfm(pose_corr) + pairwise_t_matrix_new[b,i,:N] = pose_corr_tfm @ pairwise_t_matrix[b,i,:N] + + return pose_corr_matrix, pairwise_t_matrix_new + + + +# Part 2, Global Consistent Module +def get_intersection(pairwise_t_matrix, affine_parameter): + """ get intersection from pairwise_t_matrix + + Args: + pairwise_t_matrix: torch.Tensor, shape [L, L, 4, 4] + pairwise transformation matrix for one frame. + pairwise_t_matrix[i,j] = Tji, i is ego + affine_parameter: dict + H, W, etc. + + + Returns: + intersection: torch.Tensor, shape [L, L] + """ + H = affine_parameter['H'] + W = affine_parameter['W'] + downsample_rate = affine_parameter['downsample_rate'] + discrete_ratio = affine_parameter['discrete_ratio'] + intersections = [] + + L = pairwise_t_matrix.shape[0] + one_tensor = torch.zeros((L,1,H,W), device=pairwise_t_matrix.device) + for i in range(L): + t_matrix = pairwise_t_matrix[:,:,[0, 1],:][:,:,:,[0, 1, 3]] # [L, L, 2, 3] + t_matrix[...,0,1] = t_matrix[...,0,1] * H / W + t_matrix[...,1,0] = t_matrix[...,1,0] * W / H + t_matrix[...,0,2] = t_matrix[...,0,2] / (downsample_rate * discrete_ratio * W) * 2 + t_matrix[...,1,2] = t_matrix[...,1,2] / (downsample_rate * discrete_ratio * H) * 2 + + # [L,1,H,W] + neighbors = warp_affine_simple(one_tensor, t_matrix[i, :L, :, :], (H, W)) + intersection = torch.sum(neighbors, dim=[1,2,3]) / (H * W) # [L,] + intersections.append(intersection) + + # [L, L], intersections[i,:], ego is i + intersections = torch.stack(intersections) + + # if intersection is zero, may meet nan later. + eps = 0.01 + intersections += eps + + return intersections + + + + +def WeightedMLE(pose, pairwise_t_matrix, weight): + """ Weighted MLE for estimate mu and sigma of multivariate student t distribution. + simutanously for all nodes + Args: + pose: [N,3] + pairwise_t_matrix: [L, L, 4, 4] + weight: [L, L] + + Returns: + pose_mu: [N, 3] , but [N, 4] now + pose_sigma: [N, 3, 3], but [N, 4] now + """ + + N = pose.shape[0] + mu_list = [] + sigma_list = [] + + for i in range(N): + + neighbor_ids = list(range(N)) + neighbor_ids.remove(i) + + weights = weight[i,neighbor_ids].repeat(2) # [2(N-1)] + relative_pose1 = pairwise_t_matrix[i,neighbor_ids] # [N-1, 4, 4] Tji + relative_pose2 = pairwise_t_matrix[neighbor_ids,i] # [N-1, 4, 4] Tij + relative_pose2 = torch.inverse(relative_pose2) + relative_pose = torch.cat([relative_pose1,relative_pose2], dim=0) # [2(N-1), 4, 4] + + tfm = pose_to_tfm(pose[neighbor_ids]).repeat(2,1,1) # [2(N-1), 4, 4] + samples = tfm @ relative_pose # [2(N-1), 4, 4] + # here is one problem, -179 and +179 degree. They are close actually. + # so we use cos and sin to replace angle + samples = tfm_to_xycs_torch(samples).to(torch.float64) # [N, 4] + + + mu = samples.median(0).values + Sigma = torch.eye(4, device=pose.device, dtype=torch.float64) + small_identity = torch.eye(4, device=pose.device, dtype=torch.float64) * 0.05 + + diff = mu[None] - samples + + v = 2 + for _ in range(15): + eta = (v + mu.size(0)) / ( + v + torch.einsum("ni,ij,nj->n", diff, Sigma.inverse(), diff) + ) + mu = torch.einsum("n,n,ni->i", weights, eta, samples) / (weights * eta).sum() + diff = mu[None] - samples + # Sigma = torch.einsum('n,n,ni,nj->ij', weights, w, diff, diff) / weights.sum() + Sigma = ( + torch.einsum("n,ni,nj->ij", eta, diff, diff) / diff.size(0) + small_identity + ) + + mu_list.append(mu.to(torch.float32)) + sigma_list.append(Sigma.to(torch.float32)) + + pose_mu = torch.stack(mu_list) + pose_sigma = torch.stack(sigma_list) + + return pose_mu, pose_sigma + + +def WeightedEM(lidar_pose, pairwise_t_matrix, intersection): + """Weighted EM algorithm, for a single frame, not batch data + Args: + lidar_pose : torch.Tenosr + shape [N, 3] + pairwise_t_matrix: torch.Tensor + shape [L, L, 4, 4] + intersection: torch.Tensor + shape [L, L] + + Returns: + pose_mu : torch.Tensor + new lidar pose after correction. shape [N, 3] + """ + num_iters = 10 + pose = lidar_pose + weight = torch.ones_like(intersection, device=intersection.device) + + for k in range(num_iters): + pose_mu, pose_sigma = WeightedMLE(pose, pairwise_t_matrix, weight) # [N, 4], [N, 4, 4] + weight = update_weight(pose_mu, pose_sigma, pairwise_t_matrix, intersection) + + N = lidar_pose.shape[0] + pose_new = torch.zeros((N,3), device=lidar_pose.device, dtype=lidar_pose.dtype) + pose_new[:,:2] = pose_mu[:,:2] + pose_new[:,2] = torch.rad2deg(torch.atan2(pose_mu[:,3], pose_mu[:,2])) # sin, cos + + return pose_new + +def update_weight(pose_mu, pose_sigma, pairwise_t_matrix, intersection): + """ using the close form to update weight w. + Args: + pose_mu: [N,3], but [N, 4] now + pose_sigma: [N, 3, 3], but [N, 4, 4] now + pairwise_t_matrix: [L,L,4,4] + interesection: [L, L] + """ + k = 120 + df = 2 # degree of freedom + L = intersection.shape[0] + N = pose_mu.shape[0] + weight = torch.zeros_like(intersection, device=intersection.device) + for i in range(N): + for j in range(N): + if i!=j: + pose_estimate1 = xycs_to_tfm_torch(pose_mu[[j]])[0] @ pairwise_t_matrix[i,j] # [4,4] + pose_estimate2 = xycs_to_tfm_torch(pose_mu[[i]])[0] @ torch.inverse(pairwise_t_matrix[i,j]) # [4,4] + pose_estimate = torch.stack([pose_estimate1, pose_estimate2]) # [2, 4, 4] + pose_estimate = tfm_to_xycs_torch(pose_estimate) # [2, 4] + weight[i,j] = k * intersection[i,j] / (k - log_t(pose_estimate, pose_mu[i], pose_sigma[i], df).sum()) + + return weight + + + +def log_t(x, mu, Sigma, df): + """ log pdf of t distribution + Args: + x: [N, 3] + mu: [3,] + Sigma: [3,3] + df: int, degree of freedom + + Returns: + log_pdf: log of the pdf + """ + + assert len(x.shape) == 2 + n, p = x.shape + # assert mu.shape[0] == p # for now, allow multiple mu + assert Sigma.shape == (p, p) + + v = torch.as_tensor(df, dtype=x.dtype, device=x.device) + p = torch.as_tensor(p, dtype=x.dtype, device=x.device) + pi = torch.tensor(math.pi, dtype=x.dtype, device=x.device) + half_v = v / 2.0 + half_p = p / 2.0 + + log_num = (half_v + half_p).lgamma() + log_denom = half_v.lgamma() + half_p * (v.log() + pi.log()) + 0.5 * Sigma.logdet() + + d = x - mu + log_val = -(half_p + half_v) * torch.log( + 1 + torch.einsum("ni,ij,nj->n", d, Sigma.inverse(), d) / v + ) + + log_pdf = log_num - log_denom + log_val + + return log_pdf + + +# Part 3, Attention Module + +class Attention(nn.Module): + """ + Args: + in_ch: 2*C + + forward: + x: [N,2C,H,W] concatenated feature + + """ + def __init__(self, in_ch, hidden_ch=160): + super(Attention, self).__init__() + self.model = nn.Sequential( + nn.Conv2d(in_ch, hidden_ch, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.01), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(hidden_ch, hidden_ch, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.01), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.AdaptiveMaxPool2d(output_size=1), + nn.Flatten(), + nn.Linear(in_features=hidden_ch, out_features=1, bias=True), + nn.Sigmoid(), + ) + + def forward(self, x): + out = self.model(x) + return out + +class AttentionWrapper(nn.Module): + """ wrapper of attention scoring + Args: + features: [sum(cav), C, H, W], + record_len: list + pairwise_t_matrix: [B, L, L, 4, 4], original pairwise_t_matrix, noise contains + Retuens: + pairwise_score: [B, L, L] + pairwise_score[i,j], ego is i. + """ + def __init__(self, in_ch, hidden_ch, affine_parameter, learnable_alpha=True): + super(AttentionWrapper, self).__init__() + self.attention_net = Attention(in_ch, hidden_ch) + self.H = affine_parameter['H'] + self.W = affine_parameter['W'] + self.downsample_rate = affine_parameter['downsample_rate'] + self.discrete_ratio = affine_parameter['discrete_ratio'] + if learnable_alpha: + self.alpha = nn.Parameter(torch.Tensor([0.15])) + else: + self.alpha = 0.35 + + def forward(self, features, record_len, pairwise_t_matrix): + _, C, H, W = features.shape + B, L = pairwise_t_matrix.shape[:2] + split_x = regroup(features, record_len) + pairwise_score = torch.zeros((B, L, L), device=features.device) + # mask = torch.eye(L, device=features.device).expand(B,L,L) + + + for b in range(B): + N = record_len[b] + agent_features = split_x[b] + for i in range(N): + t_matrix = pairwise_t_matrix[b] + t_matrix = t_matrix[:,:,[0, 1],:][:,:,:,[0, 1, 3]] # [L, L, 2, 3] + t_matrix[...,0,1] = t_matrix[...,0,1] * self.H / self.W + t_matrix[...,1,0] = t_matrix[...,1,0] * self.W / self.H + t_matrix[...,0,2] = t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2 + t_matrix[...,1,2] = t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2 + + # (N,C,H,W) + neighbors = warp_affine_simple(agent_features, t_matrix[i, :N, :, :], (self.H, self.W)) + # (N,C,H,W) + ego_agent_feature = agent_features[i].unsqueeze(0).repeat(N, 1, 1, 1) + # (N,2C,H,W) + neighbor_feature = torch.cat( + [neighbors, ego_agent_feature], dim=1) + # (N,1) + pairwise_score[b,i,:N] = self.attention_net(neighbor_feature).flatten() + + # pairwise_score *= mask + + scores = pairwise_score + eps = 1e-4 + # pairwise_score (B, L, L). pairwise_score[b,i,j] is agent j' feature warping to agent i's coordinate + # weight (B, L, L), normalized at dim=2 + weight = pairwise_score / (torch.sum(pairwise_score, dim=2, keepdim=True) + self.alpha + eps) + + return scores, weight diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2xvit_basic.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2xvit_basic.py new file mode 100644 index 0000000000000000000000000000000000000000..6f138232c39ec62537a12b55bba9a57704248c03 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2xvit_basic.py @@ -0,0 +1,193 @@ +import math +import torch +import torch.nn as nn + +from opencood.models.sub_modules.base_transformer import * +from opencood.models.sub_modules.hmsa import * +from opencood.models.sub_modules.mswin import * +from opencood.models.sub_modules.torch_transformation_utils import \ + get_transformation_matrix, warp_affine, get_roi_and_cav_mask, \ + get_discretized_transformation_matrix + + +class STTF(nn.Module): + def __init__(self, args): + super(STTF, self).__init__() + self.discrete_ratio = args['voxel_size'][0] + self.downsample_rate = args['downsample_rate'] + + def forward(self, x, mask, spatial_correction_matrix): + x = x.permute(0, 1, 4, 2, 3) + dist_correction_matrix = get_discretized_transformation_matrix( + spatial_correction_matrix, self.discrete_ratio, + self.downsample_rate) + # Only compensate non-ego vehicles + B, L, C, H, W = x.shape + + T = get_transformation_matrix( + dist_correction_matrix[:, 1:, :, :].reshape(-1, 2, 3), (H, W)) + cav_features = warp_affine(x[:, 1:, :, :, :].reshape(-1, C, H, W), T, + (H, W)) + cav_features = cav_features.reshape(B, -1, C, H, W) + x = torch.cat([x[:, 0, :, :, :].unsqueeze(1), cav_features], dim=1) + x = x.permute(0, 1, 3, 4, 2) + return x + + +class RelTemporalEncoding(nn.Module): + """ + Implement the Temporal Encoding (Sinusoid) function. + """ + + def __init__(self, n_hid, RTE_ratio, max_len=100, dropout=0.2): + super(RelTemporalEncoding, self).__init__() + position = torch.arange(0., max_len).unsqueeze(1) + div_term = torch.exp(torch.arange(0, n_hid, 2) * + -(math.log(10000.0) / n_hid)) + emb = nn.Embedding(max_len, n_hid) + emb.weight.data[:, 0::2] = torch.sin(position * div_term) / math.sqrt( + n_hid) + emb.weight.data[:, 1::2] = torch.cos(position * div_term) / math.sqrt( + n_hid) + emb.requires_grad = False + self.RTE_ratio = RTE_ratio + self.emb = emb + self.lin = nn.Linear(n_hid, n_hid) + + def forward(self, x, t): + # When t has unit of 50ms, rte_ratio=1. + # So we can train on 100ms but test on 50ms + return x + self.lin(self.emb(t * self.RTE_ratio)).unsqueeze( + 0).unsqueeze(1) + + +class RTE(nn.Module): + def __init__(self, dim, RTE_ratio=2): + super(RTE, self).__init__() + self.RTE_ratio = RTE_ratio + + self.emb = RelTemporalEncoding(dim, RTE_ratio=self.RTE_ratio) + + def forward(self, x, dts): + # x: (B,L,H,W,C) + # dts: (B,L) + rte_batch = [] + for b in range(x.shape[0]): + rte_list = [] + for i in range(x.shape[1]): + rte_list.append( + self.emb(x[b, i, :, :, :], dts[b, i]).unsqueeze(0)) + rte_batch.append(torch.cat(rte_list, dim=0).unsqueeze(0)) + return torch.cat(rte_batch, dim=0) + + +class V2XFusionBlock(nn.Module): + def __init__(self, num_blocks, cav_att_config, pwindow_config): + super().__init__() + # first multi-agent attention and then multi-window attention + self.layers = nn.ModuleList([]) + self.num_blocks = num_blocks + + for _ in range(num_blocks): + att = HGTCavAttention(cav_att_config['dim'], + heads=cav_att_config['heads'], + dim_head=cav_att_config['dim_head'], + dropout=cav_att_config['dropout']) if \ + cav_att_config['use_hetero'] else \ + CavAttention(cav_att_config['dim'], + heads=cav_att_config['heads'], + dim_head=cav_att_config['dim_head'], + dropout=cav_att_config['dropout']) + self.layers.append(nn.ModuleList([ + PreNorm(cav_att_config['dim'], att), + PreNorm(cav_att_config['dim'], + PyramidWindowAttention(pwindow_config['dim'], + heads=pwindow_config['heads'], + dim_heads=pwindow_config[ + 'dim_head'], + drop_out=pwindow_config[ + 'dropout'], + window_size=pwindow_config[ + 'window_size'], + relative_pos_embedding= + pwindow_config[ + 'relative_pos_embedding'], + fuse_method=pwindow_config[ + 'fusion_method']))])) + + def forward(self, x, mask, prior_encoding): + for cav_attn, pwindow_attn in self.layers: + x = cav_attn(x, mask=mask, prior_encoding=prior_encoding) + x + x = pwindow_attn(x) + x + return x + + +class V2XTEncoder(nn.Module): + def __init__(self, args): + super().__init__() + + cav_att_config = args['cav_att_config'] + pwindow_att_config = args['pwindow_att_config'] + feed_config = args['feed_forward'] + + num_blocks = args['num_blocks'] + depth = args['depth'] + mlp_dim = feed_config['mlp_dim'] + dropout = feed_config['dropout'] + + self.downsample_rate = args['sttf']['downsample_rate'] + self.discrete_ratio = args['sttf']['voxel_size'][0] + self.use_roi_mask = args['use_roi_mask'] + self.use_RTE = cav_att_config['use_RTE'] + self.RTE_ratio = cav_att_config['RTE_ratio'] + self.sttf = STTF(args['sttf']) + # adjust the channel numbers from 256+3 -> 256 + self.prior_feed = nn.Linear(cav_att_config['dim'] + 3, + cav_att_config['dim']) + self.layers = nn.ModuleList([]) + if self.use_RTE: + self.rte = RTE(cav_att_config['dim'], self.RTE_ratio) + for _ in range(depth): + self.layers.append(nn.ModuleList([ + V2XFusionBlock(num_blocks, cav_att_config, pwindow_att_config), + PreNorm(cav_att_config['dim'], + FeedForward(cav_att_config['dim'], mlp_dim, + dropout=dropout)) + ])) + + def forward(self, x, mask, spatial_correction_matrix): + + # transform the features to the current timestamp + # velocity, time_delay, infra + # (B,L,H,W,3) + prior_encoding = x[..., -3:] + # (B,L,H,W,C) + x = x[..., :-3] + if self.use_RTE: + # dt: (B,L) + dt = prior_encoding[:, :, 0, 0, 1].to(torch.int) + x = self.rte(x, dt) + x = self.sttf(x, mask, spatial_correction_matrix) + com_mask = mask.unsqueeze(1).unsqueeze(2).unsqueeze( + 3) if not self.use_roi_mask else get_roi_and_cav_mask(x.shape, + mask, + spatial_correction_matrix, + self.discrete_ratio, + self.downsample_rate) + for attn, ff in self.layers: + x = attn(x, mask=com_mask, prior_encoding=prior_encoding) + x = ff(x) + x + return x + + +class V2XTransformer(nn.Module): + def __init__(self, args): + super(V2XTransformer, self).__init__() + + encoder_args = args['encoder'] + self.encoder = V2XTEncoder(encoder_args) + + def forward(self, x, mask, spatial_correction_matrix): + output = self.encoder(x, mask, spatial_correction_matrix) + output = output[:, 0] + return output \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/view_embedding.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/view_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..3826feec5de3de077ec7a40d1c67f63799067ad8 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/view_embedding.py @@ -0,0 +1,328 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from icecream import ic +import numpy as np +from opencood.pcdet_utils.roiaware_pool3d.roiaware_pool3d_utils import points_in_boxes_gpu + +def all_pair_l2(A, B): + """ All pair L2 distance for A and B + Args: + A : np.ndarray + shape [N_A, D] + B : np.ndarray + shape [N_B, D] + Returns: + C : np.ndarray + shape [N_A, N_B] + """ + TwoAB = 2*A@B.T # [N_A, N_B] + C = torch.sqrt( + torch.sum(A * A, 1, keepdim=True).repeat_interleave(TwoAB.shape[1], dim=1) \ + + torch.sum(B * B, 1, keepdim=True).T.repeat_interleave(TwoAB.shape[0], dim=0) \ + - TwoAB + ) + return C + +def bilinear_interpolate_torch(im, x, y): + """ + .--------> x + | + | + | + v y + + x0y0 ------ x1 + | | + | | + | | + | | + y1 ------- x1y1 + + Args: + im: (H, W, C) [y, x] + x: (N) + y: (N) + Returns: + """ + x0 = torch.floor(x).long() + x1 = x0 + 1 + + y0 = torch.floor(y).long() + y1 = y0 + 1 + + x0 = torch.clamp(x0, 0, im.shape[1] - 1) + x1 = torch.clamp(x1, 0, im.shape[1] - 1) + y0 = torch.clamp(y0, 0, im.shape[0] - 1) + y1 = torch.clamp(y1, 0, im.shape[0] - 1) + + Ia = im[y0, x0] + Ib = im[y1, x0] + Ic = im[y0, x1] + Id = im[y1, x1] + + wa = (x1.type_as(x) - x) * (y1.type_as(y) - y) + wb = (x1.type_as(x) - x) * (y - y0.type_as(y)) + wc = (x - x0.type_as(x)) * (y1.type_as(y) - y) + wd = (x - x0.type_as(x)) * (y - y0.type_as(y)) + ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd) + return ans + +def boxes_to_tfm(box3d): + with torch.no_grad(): + cos_theta = torch.cos(box3d[:, -1]) + sin_theta = torch.sin(box3d[:, -1]) + pos_x = box3d[:, 0] + pos_y = box3d[:, 1] + T_ego_obj_row1 = torch.stack([cos_theta, -sin_theta, pos_x], dim=-1) # [N, 3] + T_ego_obj_row2 = torch.stack([sin_theta, cos_theta, pos_y], dim=-1) + T_ego_obj_row3 = torch.tensor([0,0,1.], device=T_ego_obj_row1.device).expand(T_ego_obj_row1.shape) + T_ego_obj = torch.stack([T_ego_obj_row1, T_ego_obj_row2, T_ego_obj_row3], dim=1) + return T_ego_obj + +def get_poi(pred_box3d_list, order, num_of_sample): + """ + get point of interest + + Frist, Divide the area of ego agent + .--------> x + | + | + | + v y + + 0 | 1 | 2 + -------+---------+------- + 3 | (obj) 4 | 5 + -------+---------+------- + 6 | 7 | 8 + + + Inputs: + pred_box3d_list: [[shape: N1, 7], [shape: N2, 7], ...], angle in rad + Returns + ego_partition_list: [[shape: N1], [shape: N2], ...] + """ + poi_list = [] + poi_norm_in_obj = [] + poi_valid_mask_list = [] + for box3d in pred_box3d_list: + T_ego_obj = boxes_to_tfm(box3d) # [N_box, 3, 3] + + T_obj_ego = torch.linalg.inv(T_ego_obj) + x_obj_ego = T_obj_ego[:, 0, 2] + y_obj_ego = T_obj_ego[:, 1, 2] + + hwl = box3d[:, 3:6] if order == "hwl" else box3d[:, [5,4,3]] + ego_in_left = (x_obj_ego < - hwl[:, 2]/2).int().view(-1,1) # [N_box, 1] + ego_in_right = (x_obj_ego > - hwl[:, 2]/2).int().view(-1,1) + ego_in_up = (y_obj_ego < - hwl[:, 1]/2).int().view(-1,1) + ego_in_down = (y_obj_ego > hwl[:, 1]/2).int().view(-1,1) + + poi_norm = torch.rand((box3d.shape[0], num_of_sample, 2), device=box3d.device) * 2 - 1 # range [-1, 1] + + ego_in_left_poi_deprecated_mask = (poi_norm[..., 0] > 0.6).int() # [N_box, num_of_sample] + ego_in_right_poi_deprecated_mask = (poi_norm[..., 0] < -0.6).int() + ego_in_up_poi_deprecated_mask = (poi_norm[..., 1] > 0.6).int() + ego_in_down_poi_deprecated_mask = (poi_norm[..., 1] < -0.6).int() + + # filter poi + ego_in_left = (x_obj_ego < - hwl[:, 2]/2).int().view(-1,1) # [N_box, 1] + # [N_box, num_of_sample] + poi_deprecated_mask = ego_in_left * ego_in_left_poi_deprecated_mask + \ + ego_in_right * ego_in_right_poi_deprecated_mask + \ + ego_in_up * ego_in_up_poi_deprecated_mask + \ + ego_in_down * ego_in_down_poi_deprecated_mask + poi_deprecated_mask = poi_deprecated_mask > 1 + poi_valid_mask = poi_deprecated_mask == 0 + + poi_exact_pos_in_obj_coor = poi_norm * hwl[:, [2,1]].view(box3d.shape[0], 1, 2) # [N_box, num_of_sample ,2] + poi_exact_pos_in_obj_coor_homo = F.pad(poi_exact_pos_in_obj_coor, (0,1), 'constant', 1) # [N_box, num_of_sample, 3] + poi_exact_pos_in_ego_coor = torch.bmm(T_ego_obj, poi_exact_pos_in_obj_coor_homo.permute(0, 2, 1)) # [N_box, 3, num_of_sample] + poi_exact_pos_in_ego_coor = poi_exact_pos_in_ego_coor.permute(0, 2, 1) # [N_box, num_of_sample, 3] + poi_exact_pos_in_ego_coor = poi_exact_pos_in_ego_coor[..., :2] # [N_box, num_of_sample, 2] + + poi_list.append(poi_exact_pos_in_ego_coor) + poi_valid_mask_list.append(poi_valid_mask) + poi_norm_in_obj.append(poi_norm) + + return poi_list, poi_norm_in_obj, poi_valid_mask_list + +class PoiExtractor(nn.Module): + def __init__(self, args): + super().__init__() + self.pc_range = args['pc_range'] + self.bev_stride = args['stride'] + self.voxel_size= args['voxel_size'][0] + self.grid_size = self.voxel_size * self.bev_stride + self.order = args['order'] + self.sample_num = args['sample_num'] # 20 may be ok + self.feat_dim = args['feat_dim'] # 64 + + # learn from relative position (poi_norm) to feature + self.emb = Embedding(2, self.feat_dim, args['N_freqs']) + self.alpha = nn.Parameter(torch.tensor([0.5])) + + # preset grids + grid_x = torch.linspace(self.pc_range[0] + self.grid_size/2, self.pc_range[3] - self.grid_size/2, steps = int((self.pc_range[3]-self.pc_range[0])//self.grid_size), device='cuda') + grid_y = torch.linspace(self.pc_range[1] + self.grid_size/2, self.pc_range[4] - self.grid_size/2, steps = int((self.pc_range[4]-self.pc_range[1])//self.grid_size), device='cuda') + + self.grid_x_idx = torch.arange(int((self.pc_range[3]-self.pc_range[0])//self.grid_size), device='cuda') + self.grid_y_idx = torch.arange(int((self.pc_range[4]-self.pc_range[1])//self.grid_size), device='cuda') + self.bev_grid_idx = torch.cartesian_prod(self.grid_x_idx, self.grid_y_idx) # [num_of_grid, 2] + + self.bev_grid_points = torch.cartesian_prod(grid_x, grid_y) # [num_of_grid, 2] + self.bev_grid_points_xyz = F.pad(self.bev_grid_points, (0,1), mode='constant', value=1) # x,y,z, [num_of_grid, 3] + + + def forward(self, heter_feature_2d, pred_box3d_list, lidar_agent_indicator, inferring=False): + bs = heter_feature_2d.shape[0] + # poi_list [[N_box1, num_of_sample, 2], ...] + # poi_norm_in_obj [[N_box1, num_of_sample, 2], ...] + # poi_valid_mask_list [[N_box1, num_of_sample]] + + lidar_pred_box3d_list = [x for i, x in enumerate(pred_box3d_list) if lidar_agent_indicator[i]] + poi_list, poi_norm_in_obj, poi_valid_mask_list = get_poi(lidar_pred_box3d_list, self.order, self.sample_num) + + # learning. only within lidar agent + poi_feature_pred, poi_feature_gt = \ + self.learning(heter_feature_2d[lidar_agent_indicator==1], poi_list, poi_norm_in_obj, poi_valid_mask_list) + + if inferring: + heter_feature_2d_pred, heter_feature_2d_pred_mask = self.inferring(heter_feature_2d, pred_box3d_list) + heter_feature_2d = heter_feature_2d * (1 - heter_feature_2d_pred_mask) + \ + heter_feature_2d * (heter_feature_2d_pred_mask) * self.alpha + \ + heter_feature_2d_pred * (heter_feature_2d_pred_mask) * (1 - self.alpha) + + return heter_feature_2d, poi_feature_pred, poi_feature_gt + + + def learning(self, lidar_feature_2d, poi_list, poi_norm_in_obj, poi_valid_mask_list): + poi_feature_list = [] + poi_norm_valid_list = [] + + # learning + for i, (poi, poi_norm, mask) in enumerate(zip(poi_list, poi_norm_in_obj, poi_valid_mask_list)): + x_idxs = (poi[..., 0] - self.pc_range[0]) / self.grid_size + 0.5 # [N_box1, num_of_sample] + y_idxs = (poi[..., 1] - self.pc_range[1]) / self.grid_size + 0.5 # [N_box1, num_of_sample] + cur_x_idxs = x_idxs[mask == 1] # [N_poi, ] + cur_y_idxs = y_idxs[mask == 1] # [N_poi, ] + + cur_bev_feature = lidar_feature_2d[i].permute(1, 2, 0) # [H, W, C] + poi_feature = bilinear_interpolate_torch(cur_bev_feature, cur_x_idxs, cur_y_idxs) # [N_poi, C] + poi_norm_valid = poi_norm[mask == 1] # [N_poi, 2] + + poi_feature_list.append(poi_feature) + poi_norm_valid_list.append(poi_norm_valid) + + poi_feature_gt = torch.cat(poi_feature_list) # [sum(N_poi), C] + poi_norm = torch.cat(poi_norm_valid_list) # [sum(N_poi), 2] + poi_feature_pred = self.emb(poi_norm) + + return poi_feature_pred, poi_feature_gt + + + def inferring(self, heter_feature_2d, pred_box3d_list): + max_len = max([len(pred_box3d) for pred_box3d in pred_box3d_list]) + pred_box3d_tensor = torch.zeros((heter_feature_2d.shape[0], max_len, 7), device=heter_feature_2d.device) # [B, max_box_num, 7] + heter_feature_pred = torch.zeros_like(heter_feature_2d, device=heter_feature_2d.device) + heter_feature_pred_mask = torch.zeros((heter_feature_2d.shape[0], 1, heter_feature_2d.shape[2], heter_feature_2d.shape[3]), \ + device=heter_feature_2d.device) + + for i, pred_box3d in enumerate(pred_box3d_list): + pred_box3d_copy = pred_box3d.clone() + pred_box3d_copy[:, 2] = 1 # move the z center to 1 + if self.order == "hwl": + pred_box3d_copy[:, [3,4,5]] = pred_box3d_copy[:, [5,4,3]] # -> dx dy dz + + pred_box3d_tensor[i,:len(pred_box3d)] = pred_box3d_copy + + bev_grid_points = self.bev_grid_points_xyz.expand(heter_feature_2d.shape[0], -1, -1) # [B, num_of_grid, 3] + masks = points_in_boxes_gpu(bev_grid_points, pred_box3d_tensor) # [B, num_of_grid] + + for i, mask in enumerate(masks): + pred_box3d = pred_box3d_list[i] + if pred_box3d.shape[0] == 0 or sum(mask > 0) == 0: + continue + T_ego_objs = boxes_to_tfm(pred_box3d) + T_objs_ego = torch.linalg.inv(T_ego_objs) # [N_box, 3, 3] + object_xy_coor = pred_box3d[:, :2] # [N_box, 2] + bev_grid_xy_coor = bev_grid_points[i][..., :2][mask > 0] # [num_of_valid_grid, 2] + + # assign grid to object + grid_object_l2dis = all_pair_l2(bev_grid_xy_coor, object_xy_coor) + grid_in_which_object = torch.argmin(grid_object_l2dis, dim=1) # shape [num_of_valid_grid,], value within [0, N_box) + T_objs_ego_for_the_grid = T_objs_ego[grid_in_which_object] # [num_of_valid_grid, 3, 3] + + object_size_for_the_grid = pred_box3d[grid_in_which_object][:,[5,4]] if self.order=='hwl' \ + else pred_box3d[grid_in_which_object][:,[3,4]] # [num_of_valid_grid, 2] + + # get pos in object coord. + bev_grid_xy_homo = F.pad(bev_grid_xy_coor, (0,1), 'constant', 1).unsqueeze(-1) # [num_of_valid_grid, 3, 1] + grid_in_obj_coor = torch.bmm(T_objs_ego_for_the_grid, bev_grid_xy_homo) # [num_of_valid_grid, 3, 1] + grid_in_obj_xy_coor = grid_in_obj_coor[:,:2,0] # [num_of_valid_grid, 2] + grid_in_obj_xy_norm = grid_in_obj_xy_coor / object_size_for_the_grid # [num_of_valid_grid, 2] + + feature_idx = self.bev_grid_idx[mask > 0] # [num_of_valid_grid, 2] + features = self.emb(grid_in_obj_xy_norm) # [num_of_valid_grid, 64] + + heter_feature_pred[i, :, feature_idx[:, 1], feature_idx[:, 0]] = features.T + heter_feature_pred_mask[i, 0, feature_idx[:, 1], feature_idx[:, 0]] = 1 + + return heter_feature_pred, heter_feature_pred_mask + + + + + + +class Embedding(nn.Module): + def __init__(self, in_channels, out_channels=64, N_freqs=8, logscale=True): + """ + Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...) + """ + super(Embedding, self).__init__() + self.N_freqs = N_freqs + self.in_channels = in_channels + self.funcs = [torch.sin, torch.cos] + self.mlp_in_channels = in_channels*(len(self.funcs)*N_freqs + 1) # 2 * 8 * 2 + 2 = 34 + self.mlp_inter_channels = out_channels * 2 + self.mlp_out_channels = out_channels + + self.mlp_layers = [nn.Linear(self.mlp_in_channels, self.mlp_inter_channels)] + for i in range(4): + self.mlp_layers.append(nn.ReLU(inplace=True)) + self.mlp_layers.append(nn.Linear(self.mlp_inter_channels, self.mlp_inter_channels)) + self.mlp_layers.append(nn.ReLU(inplace=True)) + self.mlp_layers.append(nn.Linear(self.mlp_inter_channels, self.mlp_out_channels)) + + self.mlp_layers = nn.Sequential(*self.mlp_layers) + + + if logscale: + self.freq_bands = 2**torch.linspace(0, N_freqs-1, N_freqs) + else: + self.freq_bands = torch.linspace(1, 2**(N_freqs-1), N_freqs) + + def forward(self, x): + """ + Embeds x to (x, sin(2^k x), cos(2^k x), ...) + Different from the paper, "x" is also in the output + See https://github.com/bmild/nerf/issues/12 + + Inputs: + x: (B, self.in_channels) + + Outputs: + out: (B, self.out_channels) + """ + out = [x] + for freq in self.freq_bands: + for func in self.funcs: + out += [func(freq*x)] + + out = torch.cat(out, -1) + out = self.mlp_layers(out) + + return out + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_rcnn_head.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_rcnn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..5ce9a51c05cece04d18a143c77082ce9bd6767d2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_rcnn_head.py @@ -0,0 +1,379 @@ +import torch +import numpy as np +import torch.nn as nn +from opencood.pcdet_utils.pointnet2.pointnet2_stack import voxel_pool_modules as voxelpool_stack_modules +from opencood.utils import common_utils +from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu +from opencood.utils import box_utils +from icecream import ic +from copy import deepcopy + +class VoxelRCNNHead(nn.Module): + def __init__(self, model_cfg, backbone_channels): + super().__init__() + self.model_cfg = model_cfg # 模型配置 + self.voxel_size = model_cfg['voxel_size'] # voxel大小 + self.pool_cfg = model_cfg['pool_cfg'] + self.point_cloud_range = model_cfg['pc_range'] + self.grid_size = self.pool_cfg['grid_size'] # 6 + self.feature_source = self.pool_cfg['feature_source'] + self.code_size = 7 + + c_out = 0 + self.roi_grid_pool_layers = nn.ModuleList() # 初始化ROI网格池化层MuduleList + + for src_name in self.feature_source: # FEATURES_SOURCE: ['x_conv2', 'x_conv3', 'x_conv4'] + layer_cfg = self.pool_cfg['pool_layers'][src_name] + mlps = deepcopy(layer_cfg['mlps']) # 根据特征层获取MLP参数 + + for k in range(len(mlps)): # MLPS: [[32, 32]] 长度为1 + # backbone_channels: {'x_conv1':16, 'x_conv2':32, 'x_conv3':64, 'x_conv4':64} + mlps[k] = [backbone_channels[src_name]] + mlps[k] # 计算MLP层输入输出维度,在最前面增加一个值eg:[[32,32,32]] + + pool_layer = voxelpool_stack_modules.NeighborVoxelSAModuleMSG( + query_ranges=layer_cfg['query_ranges'], # 查询范围 + nsamples=layer_cfg['nsample'], # 采样数量 + radii=layer_cfg['pool_radius'], # 池化半径 0.4->0.8->1.6 + mlps=mlps, # mlp层 + pool_method=layer_cfg['pool_method'], # 池化方法 + ) + # 将池化层添加到ROI网格池化层MuduleList + self.roi_grid_pool_layers.append(pool_layer) + + c_out += sum([x[-1] for x in mlps]) # 取mlps最后的输出维度 32->64->96 + + # c_out = sum([x[-1] for x in mlps]) + pre_channel = self.grid_size * self.grid_size * self.grid_size * c_out # 20736=6*6*6*96 + + + fc_layers = [self.model_cfg['n_fc_neurons']] * 2 + self.shared_fc_layers, pre_channel = self._make_fc_layers(pre_channel, + fc_layers) + + self.cls_layers, pre_channel = self._make_fc_layers(pre_channel, + fc_layers, + output_channels=1) + self.iou_layers, _ = self._make_fc_layers(pre_channel, fc_layers, + output_channels=1) + self.reg_layers, _ = self._make_fc_layers(pre_channel, fc_layers, + output_channels=7) + self._init_weights(weight_init='xavier') + + + def _init_weights(self, weight_init='xavier'): + if weight_init == 'kaiming': + init_func = nn.init.kaiming_normal_ + elif weight_init == 'xavier': + init_func = nn.init.xavier_normal_ + elif weight_init == 'normal': + init_func = nn.init.normal_ + else: + raise NotImplementedError + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): + if weight_init == 'normal': + init_func(m.weight, mean=0, std=0.001) + else: + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001) + + + def _make_fc_layers(self, input_channels, fc_list, output_channels=None): + fc_layers = [] + pre_channel = input_channels + for k in range(len(fc_list)): + fc_layers.extend([ + nn.Conv1d(pre_channel, fc_list[k], kernel_size=1, bias=False), + # nn.BatchNorm1d(fc_list[k]), + nn.ReLU() + ]) + pre_channel = fc_list[k] + if self.model_cfg['dp_ratio'] > 0: + fc_layers.append(nn.Dropout(self.model_cfg['dp_ratio'])) + if output_channels is not None: + fc_layers.append( + nn.Conv1d(pre_channel, output_channels, kernel_size=1, + bias=True)) + fc_layers = nn.Sequential(*fc_layers) + return fc_layers, pre_channel + + def roi_grid_pool(self, batch_dict): + """ + roi_grid_pooling happens after box fusion and voxel feature merges + + Args: + batch_dict: + batch_size: + rois: (sum(rois), 7 + C) + point_coords: (num_points, 4) [bs_idx, x, y, z] + point_features: (num_points, C) + point_cls_scores: (N1 + N2 + N3 + ..., 1) + point_part_offset: (N1 + N2 + N3 + ..., 3) + Returns: + + """ + + batch_size = len(batch_dict['record_len']) + rois = batch_dict['rcnn_label_dict']['rois'] # already lwh order + label_record_len = batch_dict['rcnn_label_dict']['record_len'] + with_vf_transform = batch_dict.get('with_voxel_feature_transform', False) # False + + + # 1.计算roi网格点全局点云坐标(旋转+roi中心点平移) + roi_grid_xyz, _ = self.get_global_grid_points_of_roi( + rois, grid_size=self.grid_size + ) # (BxN, 6x6x6, 3) --> (1024, 216, 3) + # roi_grid_xyz: (B, Nx6x6x6, 3) + roi_grid_xyz = roi_grid_xyz.view(-1, 3) # (sum(proposal)*6*6*6, 3) + + # 2.compute the voxel coordinates of grid points + roi_grid_coords_x = torch.div((roi_grid_xyz[:, 0:1] - self.point_cloud_range[0]), self.voxel_size[0], rounding_mode='floor') + roi_grid_coords_y = torch.div((roi_grid_xyz[:, 1:2] - self.point_cloud_range[1]), self.voxel_size[1], rounding_mode='floor') + roi_grid_coords_z = torch.div((roi_grid_xyz[:, 2:3] - self.point_cloud_range[2]), self.voxel_size[2], rounding_mode='floor') + + roi_grid_coords = torch.cat([roi_grid_coords_x, roi_grid_coords_y, roi_grid_coords_z], dim=-1) # 整数坐标 --> (sum(proposal)*6*6*6, 3) + + # 3.逐帧赋值batch index + batch_idx = rois.new_zeros(roi_grid_coords.shape[0], 1) + idx_start = 0 + for bs_idx in range(batch_size): + batch_idx[idx_start:idx_start+label_record_len[bs_idx] * self.grid_size ** 3] = bs_idx + idx_start += label_record_len[bs_idx] * self.grid_size ** 3 + + # 4.计算每帧roi grid的有效坐标点数(虚拟特征点数) + roi_grid_batch_cnt = rois.new_zeros(batch_size).int() + for bs_idx in range(batch_size): + roi_grid_batch_cnt[bs_idx] = label_record_len[bs_idx] * self.grid_size ** 3 + + pooled_features_list = [] + for k, src_name in enumerate(self.feature_source): + pool_layer = self.roi_grid_pool_layers[k] # 获取第k个池化层 + cur_stride = batch_dict['multi_scale_3d_strides'][src_name] # 获取该层下采样步长 + cur_sp_tensors = batch_dict['multi_scale_3d_features_fused'][src_name] # 获取该层稀疏特征 + + # 1.compute voxel center xyz and batch_cnt + cur_coords = cur_sp_tensors.indices # 提取有效voxel的坐标 --> (204916, 4) + cur_voxel_xyz = common_utils.get_voxel_centers( + cur_coords[:, 1:4], # 第0维是batch index + downsample_times=cur_stride, # 下采样倍数 + voxel_size=self.voxel_size, # voxel大小 + point_cloud_range=self.point_cloud_range # 点云范围 + ) # 有效voxle中心点云坐标 --> (204916, 3) + + # 2.统计每帧点云的有效坐标数 + cur_voxel_xyz_batch_cnt = cur_voxel_xyz.new_zeros(batch_size).int() + for bs_idx in range(batch_size): + cur_voxel_xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum() + + # 3.get voxel2point tensor 计算空间voxel坐标与voxel特征之间的索引 + v2p_ind_tensor = common_utils.generate_voxel2pinds(cur_sp_tensors) # (8, 21, 800, 704) + + # 4.compute the grid coordinates in this scale, in [batch_idx, x y z] order + cur_roi_grid_coords = torch.div(roi_grid_coords, cur_stride, rounding_mode='floor') # 计算下采样后的网格坐标 (sum(proposal)*6*6*6,3) + cur_roi_grid_coords = torch.cat([batch_idx, cur_roi_grid_coords], dim=-1) # 将batch index与roi grid coord拼接 --> (sum(proposal)*6*6*6,4) + cur_roi_grid_coords = cur_roi_grid_coords.int() # 转化为整数 + + + # ic(cur_voxel_xyz.contiguous()) + # ic(cur_voxel_xyz.contiguous().shape) + # ic(cur_voxel_xyz_batch_cnt) + + # ic(roi_grid_xyz.contiguous().view(-1, 3)) + # ic(roi_grid_xyz.contiguous().view(-1, 3).shape) + # ic(roi_grid_batch_cnt) + + # ic(cur_roi_grid_coords.contiguous().view(-1, 4)) + # ic(cur_roi_grid_coords.contiguous().view(-1, 4).shape) + # ic(cur_sp_tensors.features.contiguous()) + # ic(v2p_ind_tensor) + # ic("___________") + + + # 5.voxel neighbor aggregation + pooled_features = pool_layer( + xyz=cur_voxel_xyz.contiguous(), # voxle中心点云坐标 + xyz_batch_cnt=cur_voxel_xyz_batch_cnt, # 每帧点云有效坐标的个数 + new_xyz=roi_grid_xyz.contiguous().view(-1, 3), # roi grid点云坐标 + new_xyz_batch_cnt=roi_grid_batch_cnt, # 每个roi grid中有效坐标个数 + new_coords=cur_roi_grid_coords.contiguous().view(-1, 4), # 在该特征层上的roi voxle坐标 + features=cur_sp_tensors.features.contiguous(), # 稀疏特征 + voxel2point_indices=v2p_ind_tensor # 空间voxle坐标与voxle特征之间的索引(对应关系) + ) + + + # 6.改变特征维度,并加入池化特征list + pooled_features = pooled_features.view( + -1, self.grid_size ** 3, + pooled_features.shape[-1] + ) # (sum(rcnn_proposal), 6x6x6, C) --> (1024, 216, 32) + pooled_features_list.append(pooled_features) + + ms_pooled_features = torch.cat(pooled_features_list, dim=-1) + + return ms_pooled_features # (sum(rcnn_proposal), 6x6x6, C) --> (1024, 216, 32) + + + def get_global_grid_points_of_roi(self, rois, grid_size): + """ + 计算roi网格点全局点云坐标(旋转+roi中心点平移) + Args: + rois:(1024, 7) + grid_size:6 + Returns: + global_roi_grid_points, local_roi_grid_points: (1024, 216, 3) + """ + rois = rois.view(-1, rois.shape[-1]) + batch_size_rcnn = rois.shape[0] + + local_roi_grid_points = self.get_dense_grid_points(rois, batch_size_rcnn, grid_size) # (B, 6x6x6, 3) --> (1024, 216, 3) + global_roi_grid_points = common_utils.rotate_points_along_z( + local_roi_grid_points.clone(), rois[:, 6] + ).squeeze(dim=1) # (1024, 216, 3) 前3维沿着z轴旋转 + global_center = rois[:, 0:3].clone() # 提取roi的中心坐标 (1024,3) + global_roi_grid_points += global_center.unsqueeze(dim=1) # 将box平移到roi的中心 (1024, 216, 3) + return global_roi_grid_points, local_roi_grid_points + + @staticmethod + def get_dense_grid_points(rois, batch_size_rcnn, grid_size): + """ + 根据roi的长宽高计算稠密的虚拟点云坐标(roi box划分为6x6x6的网格坐标) + Args: + rois:(1024, 7) + batch_size_rcnn:1024 + grid_size:6 + Returns: + roi_grid_points: (1024, 216, 3) + """ + faked_features = rois.new_ones((grid_size, grid_size, grid_size)) # 初始化一个全1的6x6x6的伪特征 + dense_idx = faked_features.nonzero() # (N, 3) [x_idx, y_idx, z_idx] --> (216,3) + dense_idx = dense_idx.repeat(batch_size_rcnn, 1, 1).float() # (B, 6x6x6, 3) --> (1024, 216, 3) + + local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6] # 取出roi的长宽高(1024,3) + # ROI网格点坐标:先平移0.5个单位,然后归一化,再乘roi的大小,最后将原点移动中心 + # (1024,216,3) / (1024,1,3) - (1024,1,3) + roi_grid_points = (dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze(dim=1) \ + - (local_roi_size.unsqueeze(dim=1) / 2) # (B, 6x6x6, 3) + return roi_grid_points # (1024, 216, 3) + + + def forward(self, batch_dict): + batch_dict = self.assign_targets(batch_dict) + # RoI aware pooling + pooled_features = self.roi_grid_pool(batch_dict) # (BxN, 6x6x6, C) + + + batch_size_rcnn = pooled_features.shape[0] + pooled_features = pooled_features.permute(0, 2, 1). \ + contiguous().view(batch_size_rcnn, -1, self.grid_size, + self.grid_size, + self.grid_size) # (BxN, C, 6, 6, 6) + + shared_features = self.shared_fc_layers( + pooled_features.view(batch_size_rcnn, -1, 1)) + rcnn_cls = self.cls_layers(shared_features).transpose(1,2).contiguous().squeeze(dim=1) # (B, 1 or 2) + rcnn_iou = self.iou_layers(shared_features).transpose(1,2).contiguous().squeeze(dim=1) # (B, 1) + rcnn_reg = self.reg_layers(shared_features).transpose(1,2).contiguous().squeeze(dim=1) # (B, C) + + batch_dict['stage2_out'] = { + 'rcnn_cls': rcnn_cls, + 'rcnn_iou': rcnn_iou, + 'rcnn_reg': rcnn_reg, + } + + return batch_dict + + def assign_targets(self, batch_dict): + batch_dict['rcnn_label_dict'] = { + 'rois': [], + 'gt_of_rois': [], + 'gt_of_rois_src': [], + 'cls_tgt': [], + 'reg_tgt': [], + 'iou_tgt': [], + 'rois_anchor': [], + 'record_len': [], + 'rois_scores_stage1': [] + } + pred_boxes = batch_dict['boxes_fused'] + pred_scores = batch_dict['scores_fused'] + gt_boxes = [b[m][:, [0, 1, 2, 5, 4, 3, 6]].float() for b, m in + zip(batch_dict['object_bbx_center'], + batch_dict['object_bbx_mask'].bool())] # hwl -> lwh order + for rois, scores, gts in zip(pred_boxes, pred_scores, gt_boxes): # each frame + rois = rois[:, [0, 1, 2, 5, 4, 3, 6]] # hwl -> lwh + if gts.shape[0] == 0: + gts = rois.clone() + + ious = boxes_iou3d_gpu(rois, gts) + max_ious, gt_inds = ious.max(dim=1) + gt_of_rois = gts[gt_inds] + rcnn_labels = (max_ious > 0.3).float() + mask = torch.logical_not(rcnn_labels.bool()) + + # set negative samples back to rois, no correction in stage2 for them + gt_of_rois[mask] = rois[mask] + gt_of_rois_src = gt_of_rois.clone().detach() + + # canoical transformation + roi_center = rois[:, 0:3] + # TODO: roi_ry > 0 in pcdet + roi_ry = rois[:, 6] % (2 * np.pi) + gt_of_rois[:, 0:3] = gt_of_rois[:, 0:3] - roi_center + gt_of_rois[:, 6] = gt_of_rois[:, 6] - roi_ry + + # transfer LiDAR coords to local coords + gt_of_rois = common_utils.rotate_points_along_z( + points=gt_of_rois.view(-1, 1, gt_of_rois.shape[-1]), + angle=-roi_ry.view(-1) + ).view(-1, gt_of_rois.shape[-1]) + + # flip orientation if rois have opposite orientation + heading_label = (gt_of_rois[:, 6] + ( + torch.div(torch.abs(gt_of_rois[:, 6].min()), + (2 * np.pi), rounding_mode='trunc') + + 1) * 2 * np.pi) % (2 * np.pi) # 0 ~ 2pi + opposite_flag = (heading_label > np.pi * 0.5) & ( + heading_label < np.pi * 1.5) + + # (0 ~ pi/2, 3pi/2 ~ 2pi) + heading_label[opposite_flag] = (heading_label[ + opposite_flag] + np.pi) % ( + 2 * np.pi) + flag = heading_label > np.pi + heading_label[flag] = heading_label[ + flag] - np.pi * 2 # (-pi/2, pi/2) + heading_label = torch.clamp(heading_label, min=-np.pi / 2, + max=np.pi / 2) + gt_of_rois[:, 6] = heading_label + + # generate regression target + rois_anchor = rois.clone().detach().view(-1, self.code_size) + rois_anchor[:, 0:3] = 0 + rois_anchor[:, 6] = 0 + + reg_targets = box_utils.box_encode( + gt_of_rois.view(-1, self.code_size), rois_anchor + ) + + batch_dict['rcnn_label_dict']['rois'].append(rois) + batch_dict['rcnn_label_dict']['rois_scores_stage1'].append(scores.flatten()) + batch_dict['rcnn_label_dict']['gt_of_rois'].append(gt_of_rois) + batch_dict['rcnn_label_dict']['gt_of_rois_src'].append( + gt_of_rois_src) + batch_dict['rcnn_label_dict']['cls_tgt'].append(rcnn_labels) + batch_dict['rcnn_label_dict']['reg_tgt'].append(reg_targets) + batch_dict['rcnn_label_dict']['iou_tgt'].append(max_ious) + batch_dict['rcnn_label_dict']['rois_anchor'].append(rois_anchor) + batch_dict['rcnn_label_dict']['record_len'].append(rois.shape[0]) + + + # cat list to tensor + for k, v in batch_dict['rcnn_label_dict'].items(): + if k == 'record_len': + continue + batch_dict['rcnn_label_dict'][k] = torch.cat(v, dim=0) + + return batch_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_roi_pooling.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_roi_pooling.py new file mode 100644 index 0000000000000000000000000000000000000000..9d7e995c5df52407c96f12c911f9f13b4eb4790b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_roi_pooling.py @@ -0,0 +1,182 @@ +import torch +import torch.nn as nn +from opencood.pcdet_utils.pointnet2.pointnet2_stack import voxel_pool_modules as voxelpool_stack_modules +from opencood.utils import common_utils + + +class VoxelRoIPooling(nn.Module): + def __init__(self, backbone_channels, model_cfg, voxel_size, point_cloud_range, **kwargs): + super().__init__() + self.model_cfg = model_cfg + layer_cfg = self.model_cfg['pool_layers'] + self.point_cloud_range = point_cloud_range + self.voxel_size = voxel_size + self.grid_size = model_cfg['grid_size'] + + c_out = 0 + self.roi_grid_pool_layers = nn.ModuleList() + for src_name in layer_cfg['features_source']: + mlps = layer_cfg[src_name]['mlps'] + for k in range(len(mlps)): + mlps[k] = [backbone_channels[src_name]] + mlps[k] + pool_layer = voxelpool_stack_modules.NeighborVoxelSAModuleMSG( + query_ranges=layer_cfg[src_name]['query_ranges'], + nsamples=layer_cfg[src_name]['nsample'], + radii=layer_cfg[src_name]['pool_radius'], + mlps=mlps, + pool_method=layer_cfg[src_name]['pool_method'], + ) + + self.roi_grid_pool_layers.append(pool_layer) + + c_out += sum([x[-1] for x in mlps]) + + self.init_weights() + + def init_weights(self): + init_func = nn.init.xavier_normal_ + for module_list in [self.shared_fc_layer, self.cls_fc_layers, self.reg_fc_layers]: + for m in module_list.modules(): + if isinstance(m, nn.Linear): + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + nn.init.normal_(self.cls_pred_layer.weight, 0, 0.01) + nn.init.constant_(self.cls_pred_layer.bias, 0) + nn.init.normal_(self.reg_pred_layer.weight, mean=0, std=0.001) + nn.init.constant_(self.reg_pred_layer.bias, 0) + + + def roi_grid_pool(self, batch_dict): + """ + Args: + batch_dict: + batch_size: + rois: (B, num_rois, 7 + C) + point_coords: (num_points, 4) [bs_idx, x, y, z] + point_features: (num_points, C) + point_cls_scores: (N1 + N2 + N3 + ..., 1) + point_part_offset: (N1 + N2 + N3 + ..., 3) + Returns: + """ + rois = batch_dict['rois'] + batch_size = batch_dict['batch_size'] + with_vf_transform = batch_dict.get('with_voxel_feature_transform', False) + + roi_grid_xyz, _ = self.get_global_grid_points_of_roi( + rois, grid_size=self.grid_size + ) # (BxN, 6x6x6, 3) + # roi_grid_xyz: (B, Nx6x6x6, 3) + roi_grid_xyz = roi_grid_xyz.view(batch_size, -1, 3) + + # compute the voxel coordinates of grid points + roi_grid_coords_x = (roi_grid_xyz[:, :, 0:1] - self.point_cloud_range[0]) // self.voxel_size[0] + roi_grid_coords_y = (roi_grid_xyz[:, :, 1:2] - self.point_cloud_range[1]) // self.voxel_size[1] + roi_grid_coords_z = (roi_grid_xyz[:, :, 2:3] - self.point_cloud_range[2]) // self.voxel_size[2] + # roi_grid_coords: (B, Nx6x6x6, 3) + roi_grid_coords = torch.cat([roi_grid_coords_x, roi_grid_coords_y, roi_grid_coords_z], dim=-1) + + batch_idx = rois.new_zeros(batch_size, roi_grid_coords.shape[1], 1) + for bs_idx in range(batch_size): + batch_idx[bs_idx, :, 0] = bs_idx + + roi_grid_batch_cnt = rois.new_zeros(batch_size).int().fill_(roi_grid_coords.shape[1]) + + pooled_features_list = [] + for k, src_name in enumerate(self.model_cfg['features_source']): + pool_layer = self.roi_grid_pool_layers[k] + cur_stride = batch_dict['multi_scale_3d_strides'][src_name] + cur_sp_tensors = batch_dict['multi_scale_3d_features'][src_name] + + if with_vf_transform: + cur_sp_tensors = batch_dict['multi_scale_3d_features_post'][src_name] + else: + cur_sp_tensors = batch_dict['multi_scale_3d_features'][src_name] + + # compute voxel center xyz and batch_cnt + cur_coords = cur_sp_tensors.indices + cur_voxel_xyz = common_utils.get_voxel_centers( + cur_coords[:, 1:4], + downsample_times=cur_stride, + voxel_size=self.voxel_size, + point_cloud_range=self.point_cloud_range + ) + cur_voxel_xyz_batch_cnt = cur_voxel_xyz.new_zeros(batch_size).int() + for bs_idx in range(batch_size): + cur_voxel_xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum() + # get voxel2point tensor + v2p_ind_tensor = common_utils.generate_voxel2pinds(cur_sp_tensors) + # compute the grid coordinates in this scale, in [batch_idx, x y z] order + cur_roi_grid_coords = roi_grid_coords // cur_stride + cur_roi_grid_coords = torch.cat([batch_idx, cur_roi_grid_coords], dim=-1) + cur_roi_grid_coords = cur_roi_grid_coords.int() + # voxel neighbor aggregation + pooled_features = pool_layer( + xyz=cur_voxel_xyz.contiguous(), + xyz_batch_cnt=cur_voxel_xyz_batch_cnt, + new_xyz=roi_grid_xyz.contiguous().view(-1, 3), + new_xyz_batch_cnt=roi_grid_batch_cnt, + new_coords=cur_roi_grid_coords.contiguous().view(-1, 4), + features=cur_sp_tensors.features.contiguous(), + voxel2point_indices=v2p_ind_tensor + ) + + pooled_features = pooled_features.view( + -1, self.grid_size ** 3, + pooled_features.shape[-1] + ) # (BxN, 6x6x6, C) + pooled_features_list.append(pooled_features) + + ms_pooled_features = torch.cat(pooled_features_list, dim=-1) + + return ms_pooled_features + + + def get_global_grid_points_of_roi(self, rois, grid_size): + rois = rois.view(-1, rois.shape[-1]) + batch_size_rcnn = rois.shape[0] + + local_roi_grid_points = self.get_dense_grid_points(rois, batch_size_rcnn, grid_size) # (B, 6x6x6, 3) + global_roi_grid_points = common_utils.rotate_points_along_z( + local_roi_grid_points.clone(), rois[:, 6] + ).squeeze(dim=1) + global_center = rois[:, 0:3].clone() + global_roi_grid_points += global_center.unsqueeze(dim=1) + return global_roi_grid_points, local_roi_grid_points + + @staticmethod + def get_dense_grid_points(rois, batch_size_rcnn, grid_size): + faked_features = rois.new_ones((grid_size, grid_size, grid_size)) + dense_idx = faked_features.nonzero() # (N, 3) [x_idx, y_idx, z_idx] + dense_idx = dense_idx.repeat(batch_size_rcnn, 1, 1).float() # (B, 6x6x6, 3) + + local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6] + roi_grid_points = (dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze(dim=1) \ + - (local_roi_size.unsqueeze(dim=1) / 2) # (B, 6x6x6, 3) + return roi_grid_points + + def forward(self, batch_dict): + """ + :param input_data: input dict + :return: + """ + + """ + Move those part to roi heads + + # targets_dict = self.proposal_layer( + # batch_dict, nms_config=self.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST'] + # ) + # if self.training: + # targets_dict = self.assign_targets(batch_dict) + # batch_dict['rois'] = targets_dict['rois'] + # batch_dict['roi_labels'] = targets_dict['roi_labels'] + """ + + + # RoI aware pooling + pooled_features = self.roi_grid_pool(batch_dict) # (BxN, 6x6x6, C) + batch_dict['pooled_features'] = pooled_features + + return batch_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/vsa.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/vsa.py new file mode 100644 index 0000000000000000000000000000000000000000..42293b252f037b77fc2fa6ab0a49fc642fd070c2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/vsa.py @@ -0,0 +1,274 @@ +import copy +import random + +import torch +import torch.nn as nn + +from opencood.pcdet_utils.pointnet2.pointnet2_stack import pointnet2_modules as pointnet2_stack_modules +from opencood.pcdet_utils.pointnet2.pointnet2_stack import pointnet2_utils as pointnet2_stack_utils +from opencood.pcdet_utils.roiaware_pool3d.roiaware_pool3d_utils import points_in_boxes_gpu +from opencood.utils import common_utils + + +def bilinear_interpolate_torch(im, x, y): + """ + Args: + im: (H, W, C) [y, x] + x: (N) + y: (N) + Returns: + """ + x0 = torch.floor(x).long() + x1 = x0 + 1 + + y0 = torch.floor(y).long() + y1 = y0 + 1 + + x0 = torch.clamp(x0, 0, im.shape[1] - 1) + x1 = torch.clamp(x1, 0, im.shape[1] - 1) + y0 = torch.clamp(y0, 0, im.shape[0] - 1) + y1 = torch.clamp(y1, 0, im.shape[0] - 1) + + Ia = im[y0, x0] + Ib = im[y1, x0] + Ic = im[y0, x1] + Id = im[y1, x1] + + wa = (x1.type_as(x) - x) * (y1.type_as(y) - y) + wb = (x1.type_as(x) - x) * (y - y0.type_as(y)) + wc = (x - x0.type_as(x)) * (y1.type_as(y) - y) + wd = (x - x0.type_as(x)) * (y - y0.type_as(y)) + ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd) + return ans + + +class VoxelSetAbstraction(nn.Module): + def __init__(self, model_cfg, voxel_size, point_cloud_range, num_bev_features=None, + num_rawpoint_features=None, **kwargs): + super().__init__() + self.model_cfg = model_cfg + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + + SA_cfg = self.model_cfg['sa_layer'] + + self.SA_layers = nn.ModuleList() + self.SA_layer_names = [] + self.downsample_times_map = {} + c_in = 0 + for src_name in self.model_cfg['features_source']: + if src_name in ['bev', 'raw_points']: + continue + self.downsample_times_map[src_name] = SA_cfg[src_name]['downsample_factor'] + mlps = copy.copy(SA_cfg[src_name]['mlps']) + for k in range(len(mlps)): + mlps[k] = [mlps[k][0]] + mlps[k] + cur_layer = pointnet2_stack_modules.StackSAModuleMSG( + radii=SA_cfg[src_name]['pool_radius'], + nsamples=SA_cfg[src_name]['n_sample'], + mlps=mlps, + use_xyz=True, + pool_method='max_pool', + ) + self.SA_layers.append(cur_layer) + self.SA_layer_names.append(src_name) + + c_in += sum([x[-1] for x in mlps]) + + if 'bev' in self.model_cfg['features_source']: + c_bev = num_bev_features + c_in += c_bev + + if 'raw_points' in self.model_cfg['features_source']: + mlps = copy.copy(SA_cfg['raw_points']['mlps']) + for k in range(len(mlps)): + mlps[k] = [num_rawpoint_features - 3] + mlps[k] + + self.SA_rawpoints = pointnet2_stack_modules.StackSAModuleMSG( + radii=SA_cfg['raw_points']['pool_radius'], + nsamples=SA_cfg['raw_points']['n_sample'], + mlps=mlps, + use_xyz=True, + pool_method='max_pool' + ) + c_in += sum([x[-1] for x in mlps]) + + self.vsa_point_feature_fusion = nn.Sequential( + nn.Linear(c_in, self.model_cfg['num_out_features'], bias=False), + nn.BatchNorm1d(self.model_cfg['num_out_features']), + nn.ReLU(), + ) + self.num_point_features = self.model_cfg['num_out_features'] + self.num_point_features_before_fusion = c_in + + def interpolate_from_bev_features(self, keypoints, bev_features, batch_size, bev_stride): + x_idxs = (keypoints[:, :, 0] - self.point_cloud_range[0]) / self.voxel_size[0] + y_idxs = (keypoints[:, :, 1] - self.point_cloud_range[1]) / self.voxel_size[1] + x_idxs = x_idxs / bev_stride + y_idxs = y_idxs / bev_stride + + point_bev_features_list = [] + for k in range(batch_size): + cur_x_idxs = x_idxs[k] + cur_y_idxs = y_idxs[k] + cur_bev_features = bev_features[k].permute(1, 2, 0) # (H, W, C) + point_bev_features = bilinear_interpolate_torch(cur_bev_features, cur_x_idxs, cur_y_idxs) + point_bev_features_list.append(point_bev_features.unsqueeze(dim=0)) + + point_bev_features = torch.cat(point_bev_features_list, dim=0) # (B, N, C0) + return point_bev_features + + def get_sampled_points(self, batch_dict): + batch_size = batch_dict['batch_size'] + if self.model_cfg['point_source'] == 'raw_points': + src_points = batch_dict['origin_lidar_for_vsa'][:, 1:] + batch_indices = batch_dict['origin_lidar_for_vsa'][:, 0].long() + elif self.model_cfg['point_source'] == 'voxel_centers': + src_points = common_utils.get_voxel_centers( + batch_dict['voxel_coords'][:, 1:4], + downsample_times=1, + voxel_size=self.voxel_size, + point_cloud_range=self.point_cloud_range + ) + batch_indices = batch_dict['voxel_coords'][:, 0].long() + else: + raise NotImplementedError + + keypoints_batch = torch.randn((batch_size, self.model_cfg['num_keypoints'], 4), device=src_points.device) + keypoints_batch[..., 0] = keypoints_batch[..., 0] * 140 + keypoints_batch[..., 1] = keypoints_batch[..., 0] * 40 + # points with height flag 10 are padding/invalid, for later filtering + keypoints_batch[..., 2] = 10.0 + for bs_idx in range(batch_size): + bs_mask = (batch_indices == bs_idx) + sampled_points = src_points[bs_mask].unsqueeze(dim=0) # (1, N, 3) + # sample points with FPS + # some cropped pcd may have very few points, select various number + # of points to ensure similar sample density + # 50000 is approximately the number of points in one full pcd + num_kpts = int(self.model_cfg['num_keypoints'] * sampled_points.shape[1] / 50000) + 1 + num_kpts = min(num_kpts, self.model_cfg['num_keypoints']) + cur_pt_idxs = pointnet2_stack_utils.furthest_point_sample( + sampled_points[:, :, 0:3].contiguous(), num_kpts + ).long() + + if sampled_points.shape[1] < num_kpts: + empty_num = num_kpts - sampled_points.shape[1] + cur_pt_idxs[0, -empty_num:] = cur_pt_idxs[0, :empty_num] + + keypoints = sampled_points[0][cur_pt_idxs[0]].unsqueeze(dim=0) + + keypoints_batch[bs_idx, :len(keypoints[0]), :] = keypoints + + # keypoints = torch.cat(keypoints_list, dim=0) # (B, M, 3) + return keypoints_batch + + def forward(self, batch_dict): + """ + Args: + batch_dict: + batch_size: + keypoints: (B, num_keypoints, 3) + multi_scale_3d_features: { + 'x_conv4': ... + } + points: optional (N, 1 + 3 + C) [bs_idx, x, y, z, ...] + spatial_features: optional + spatial_features_stride: optional + Returns: + point_features: (N, C) + point_coords: (N, 4) + """ + keypoints = self.get_sampled_points(batch_dict) # BxNx4 + kpt_mask1 = torch.logical_and(keypoints[..., 2] > -2.8, keypoints[..., 2] < 1.0) + kpt_mask2 = None + # Only select the points that are in the predicted bounding boxes + if 'det_boxes' in batch_dict: + dets_list = batch_dict['det_boxes'] + max_len = max([len(dets) for dets in dets_list]) + boxes = torch.zeros((len(dets_list), max_len, 7), dtype=dets_list[0].dtype, + device=dets_list[0].device) + for i, dets in enumerate(dets_list): + dets = dets[:, [0,1,2,5,4,3,6]] # hwl -> lwh + if len(dets)==0: + continue + cur_dets = dets.clone() + if self.model_cfg['enlarge_selection_boxes']: + cur_dets[:, 3:6] += 0.5 + boxes[i, :len(dets)] = cur_dets + # mask out some keypoints to spare the GPU storage + kpt_mask2 = points_in_boxes_gpu(keypoints[..., :3], boxes) >= 0 + + kpt_mask = torch.logical_and(kpt_mask1, kpt_mask2) if kpt_mask2 is not None else kpt_mask1 + # Ensure there are more than 2 points are selected to satisfy the + # condition of batch norm in the FC layers of feature fusion module + if (kpt_mask).sum() < 2: + kpt_mask[0, torch.randint(0, 1024, (2,))] = True + + + point_features_list = [] + if 'bev' in self.model_cfg['features_source']: + point_bev_features = self.interpolate_from_bev_features( + keypoints[..., :3], batch_dict['spatial_features'], batch_dict['batch_size'], + bev_stride=batch_dict['spatial_features_stride'] + ) + point_features_list.append(point_bev_features[kpt_mask]) + + batch_size, num_keypoints, _ = keypoints.shape + + new_xyz = keypoints[kpt_mask] + new_xyz_batch_cnt = torch.tensor([(mask).sum() for mask in kpt_mask], device=new_xyz.device).int() + + if 'raw_points' in self.model_cfg['features_source']: + raw_points = batch_dict['origin_lidar_for_vsa'] + xyz = raw_points[:, 1:4] + xyz_batch_cnt = xyz.new_zeros(batch_size).int() + indices = raw_points[:, 0].long() + for bs_idx in range(batch_size): + xyz_batch_cnt[bs_idx] = (indices == bs_idx).sum() + point_features = None + + pooled_points, pooled_features = self.SA_rawpoints( + xyz=xyz.contiguous(), + xyz_batch_cnt=xyz_batch_cnt, + new_xyz=new_xyz[:, :3].contiguous(), + new_xyz_batch_cnt=new_xyz_batch_cnt, + features=point_features, + ) + point_features_list.append(pooled_features) + + for k, src_name in enumerate(self.SA_layer_names): + cur_coords = batch_dict['multi_scale_3d_features'][src_name].indices + xyz = common_utils.get_voxel_centers( + cur_coords[:, 1:4], + downsample_times=self.downsample_times_map[src_name], + voxel_size=self.voxel_size, + point_cloud_range=self.point_cloud_range + ) + xyz_batch_cnt = xyz.new_zeros(batch_size).int() + for bs_idx in range(batch_size): + xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum() + + pooled_points, pooled_features = self.SA_layers[k]( + xyz=xyz.contiguous(), + xyz_batch_cnt=xyz_batch_cnt, + new_xyz=new_xyz[:, :3].contiguous(), + new_xyz_batch_cnt=new_xyz_batch_cnt, + features=batch_dict['multi_scale_3d_features'][src_name].features.contiguous(), + ) + + point_features_list.append(pooled_features) + + point_features = torch.cat(point_features_list, dim=1) + batch_dict['point_features_before_fusion'] = point_features.view(-1, point_features.shape[-1]) # torch.Size([373, 640]) + point_features = self.vsa_point_feature_fusion(point_features.view(-1, point_features.shape[-1])) # (0): Linear(in_features=512, out_features=32, bias=False) + + cur_idx = 0 + batch_dict['point_features'] = [] + batch_dict['point_coords'] = [] + for num in new_xyz_batch_cnt: + batch_dict['point_features'].append(point_features[cur_idx:cur_idx + num]) + batch_dict['point_coords'].append(new_xyz[cur_idx:cur_idx + num]) + cur_idx += num + + return batch_dict \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net.py new file mode 100644 index 0000000000000000000000000000000000000000..78116a2c5f16ffad61b738701a543b8e5bd84433 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net.py @@ -0,0 +1,234 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , OpenPCDet +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import torch.nn as nn +import torch.nn.functional as F +import torch +from torch.autograd import Variable + +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.utils.common_utils import torch_tensor_to_numpy + + +# conv2d + bn + relu +class Conv2d(nn.Module): + + def __init__(self, in_channels, out_channels, k, s, p, activation=True, + batch_norm=True): + super(Conv2d, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=k, + stride=s, padding=p) + if batch_norm: + self.bn = nn.BatchNorm2d(out_channels) + else: + self.bn = None + self.activation = activation + + def forward(self, x): + x = self.conv(x) + if self.bn is not None: + x = self.bn(x) + if self.activation: + return F.relu(x, inplace=True) + else: + return x + + +# conv3d + bn + relu +class Conv3d(nn.Module): + + def __init__(self, in_channels, out_channels, k, s, p, batch_norm=True): + super(Conv3d, self).__init__() + self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=k, + stride=s, padding=p) + if batch_norm: + self.bn = nn.BatchNorm3d(out_channels) + else: + self.bn = None + + def forward(self, x): + x = self.conv(x) + if self.bn is not None: + x = self.bn(x) + + return F.relu(x, inplace=True) + + +# Fully Connected Network +class FCN(nn.Module): + + def __init__(self, cin, cout): + super(FCN, self).__init__() + self.cout = cout + self.linear = nn.Linear(cin, cout) + self.bn = nn.BatchNorm1d(cout) + + def forward(self, x): + # KK is the stacked k across batch + kk, t, _ = x.shape + x = self.linear(x.view(kk * t, -1)) + x = F.relu(self.bn(x)) + return x.view(kk, t, -1) + + +# Voxel Feature Encoding layer +class VFE(nn.Module): + + def __init__(self, cin, cout, T): + super(VFE, self).__init__() + assert cout % 2 == 0 + self.units = cout // 2 + self.fcn = FCN(cin, self.units) + self.T = T + + def forward(self, x, mask): + # point-wise feature + pwf = self.fcn(x) + # locally aggregated feature + laf = torch.max(pwf, 1)[0].unsqueeze(1).repeat(1, self.T, 1) + # point-wise concat feature + pwcf = torch.cat((pwf, laf), dim=2) + # apply mask + mask = mask.unsqueeze(2).repeat(1, 1, self.units * 2) + pwcf = pwcf * mask.float() + + return pwcf + + +# Stacked Voxel Feature Encoding +class SVFE(nn.Module): + + def __init__(self, T): + super(SVFE, self).__init__() + self.vfe_1 = VFE(7, 32, T) + self.vfe_2 = VFE(32, 128, T) + self.fcn = FCN(128, 128) + + def forward(self, x): + mask = torch.ne(torch.max(x, 2)[0], 0) + x = self.vfe_1(x, mask) + x = self.vfe_2(x, mask) + x = self.fcn(x) + # element-wise max pooling + x = torch.max(x, 1)[0] + return x + + +# Convolutional Middle Layer +class CML(nn.Module): + def __init__(self): + super(CML, self).__init__() + self.conv3d_1 = Conv3d(64, 64, 3, s=(2, 1, 1), p=(1, 1, 1)) + self.conv3d_2 = Conv3d(64, 64, 3, s=(1, 1, 1), p=(0, 1, 1)) + self.conv3d_3 = Conv3d(64, 64, 3, s=(2, 1, 1), p=(1, 1, 1)) + + def forward(self, x): + x = self.conv3d_1(x) + x = self.conv3d_2(x) + x = self.conv3d_3(x) + return x + + +# Region Proposal Network +class RPN(nn.Module): + def __init__(self, anchor_num=2): + super(RPN, self).__init__() + self.anchor_num = anchor_num + + self.block_1 = [Conv2d(128, 128, 3, 2, 1)] + self.block_1 += [Conv2d(128, 128, 3, 1, 1) for _ in range(3)] + self.block_1 = nn.Sequential(*self.block_1) + + self.block_2 = [Conv2d(128, 128, 3, 2, 1)] + self.block_2 += [Conv2d(128, 128, 3, 1, 1) for _ in range(5)] + self.block_2 = nn.Sequential(*self.block_2) + + self.block_3 = [Conv2d(128, 256, 3, 2, 1)] + self.block_3 += [nn.Conv2d(256, 256, 3, 1, 1) for _ in range(5)] + self.block_3 = nn.Sequential(*self.block_3) + + self.deconv_1 = nn.Sequential(nn.ConvTranspose2d(256, 256, 4, 4, 0), + nn.BatchNorm2d(256)) + self.deconv_2 = nn.Sequential(nn.ConvTranspose2d(128, 256, 2, 2, 0), + nn.BatchNorm2d(256)) + self.deconv_3 = nn.Sequential(nn.ConvTranspose2d(128, 256, 1, 1, 0), + nn.BatchNorm2d(256)) + + self.score_head = Conv2d(768, self.anchor_num, 1, 1, 0, + activation=False, batch_norm=False) + self.reg_head = Conv2d(768, 7 * self.anchor_num, 1, 1, 0, + activation=False, batch_norm=False) + + def forward(self, x): + x = self.block_1(x) + x_skip_1 = x + x = self.block_2(x) + x_skip_2 = x + x = self.block_3(x) + x_0 = self.deconv_1(x) + x_1 = self.deconv_2(x_skip_2) + x_2 = self.deconv_3(x_skip_1) + x = torch.cat((x_0, x_1, x_2), 1) + return self.score_head(x), self.reg_head(x) + + +class VoxelNet(nn.Module): + def __init__(self, args): + super(VoxelNet, self).__init__() + self.svfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + + # self.svfe = SVFE(args['T']) + self.cml = CML() + self.rpn = RPN(args['anchor_num']) + + self.N = args['N'] + self.D = args['D'] + self.H = args['H'] + self.W = args['W'] + self.T = args['T'] + self.anchor_num = args['anchor_num'] + + def voxel_indexing(self, sparse_features, coords): + dim = sparse_features.shape[-1] + + dense_feature = Variable( + torch.zeros(dim, self.N, self.D, self.H, self.W).cuda()) + + dense_feature[:, coords[:, 0], coords[:, 1], coords[:, 2], + coords[:, 3]] = sparse_features.transpose(0, 1) + + return dense_feature.transpose(0, 1) + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points} + + # feature learning network + vwfs = self.svfe(batch_dict)['pillar_features'] + + voxel_coords = torch_tensor_to_numpy(voxel_coords) + vwfs = self.voxel_indexing(vwfs, voxel_coords) + + # convolutional middle network + vwfs = self.cml(vwfs) + + # region proposal network + + # merge the depth and feature dim into one, output probability score + # map and regression map + psm, rm = self.rpn(vwfs.view(self.N, -1, self.H, self.W)) + + output_dict = {'psm': psm, + 'rm': rm} + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net_intermediate.py new file mode 100644 index 0000000000000000000000000000000000000000..fec3aa3157554322033f9cc5331fe9f8f7485221 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net_intermediate.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +VoxelNet for intermediate fusion +""" +import torch.nn as nn +import torch.nn.functional as F +import torch +import numpy as np +from torch.autograd import Variable + +from opencood.models.voxel_net import RPN, CML +from opencood.models.sub_modules.pillar_vfe import PillarVFE +from opencood.utils.common_utils import torch_tensor_to_numpy +from opencood.models.fuse_modules.self_attn import AttFusion +from opencood.models.sub_modules.auto_encoder import AutoEncoder + + +# conv2d + bn + relu +class Conv2d(nn.Module): + + def __init__(self, in_channels, out_channels, k, s, p, activation=True, + batch_norm=True, bias=True): + super(Conv2d, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=k, + stride=s, padding=p, bias=bias) + if batch_norm: + self.bn = nn.BatchNorm2d(out_channels) + else: + self.bn = None + self.activation = activation + + def forward(self, x): + x = self.conv(x) + if self.bn is not None: + x = self.bn(x) + if self.activation: + return F.relu(x, inplace=True) + else: + return x + + +class NaiveFusion(nn.Module): + + def __init__(self): + super(NaiveFusion, self).__init__() + self.conv1 = Conv2d(128 * 5, 256, 3, 1, 1, + batch_norm=False, bias=False) + self.conv2 = Conv2d(256, 128, 3, 1, 1) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + + return x + + +class VoxelNetIntermediate(nn.Module): + def __init__(self, args): + super(VoxelNetIntermediate, self).__init__() + self.svfe = PillarVFE(args['pillar_vfe'], + num_point_features=4, + voxel_size=args['voxel_size'], + point_cloud_range=args['lidar_range']) + + self.proj_first = True + if ('proj_first' in args) and (args['proj_first'] is False): + self.proj_first = False + + self.cml = CML() + self.fusion_net = AttFusion(128) + self.rpn = RPN(args['anchor_num']) + + self.N = args['N'] + self.D = args['D'] + self.H = args['H'] + self.W = args['W'] + self.T = args['T'] + self.anchor_num = args['anchor_num'] + + self.compression = False + if 'compression' in args and args['compression'] > 0: + self.compression = True + self.compression_layer = AutoEncoder(128, args['compression']) + + def voxel_indexing(self, sparse_features, coords): + dim = sparse_features.shape[-1] + + dense_feature = Variable( + torch.zeros(dim, self.N, self.D, self.H, self.W).cuda()) + + dense_feature[:, coords[:, 0], coords[:, 1], coords[:, 2], + coords[:, 3]] = sparse_features.transpose(0, 1) + + return dense_feature.transpose(0, 1) + + def regroup(self, dense_feature, record_len): + """ + Regroup the data based on the record_len. + + Parameters + ---------- + dense_feature : torch.Tensor + N, C, H, W + record_len : list + [sample1_len, sample2_len, ...] + + Returns + ------- + regroup_feature : torch.Tensor + B, 5C, H, W + """ + cum_sum_len = list(np.cumsum(record_len)) + split_features = torch.tensor_split(dense_feature, + cum_sum_len[:-1]) + regroup_features = [] + + for split_feature in split_features: + # M, C, H, W + feature_shape = split_feature.shape + + # the maximum M is 5 as most 5 cavs + padding_len = 5 - feature_shape[0] + padding_tensor = torch.zeros(padding_len, feature_shape[1], + feature_shape[2], feature_shape[3]) + padding_tensor = padding_tensor.to(split_feature.device) + + split_feature = torch.cat([split_feature, padding_tensor], + dim=0) + + # 1, 5C, H, W + split_feature = split_feature.view(-1, + feature_shape[2], + feature_shape[3]).unsqueeze(0) + regroup_features.append(split_feature) + + # B, 5C, H, W + regroup_features = torch.cat(regroup_features, dim=0) + + return regroup_features + + def forward(self, data_dict): + voxel_features = data_dict['processed_lidar']['voxel_features'] + voxel_coords = data_dict['processed_lidar']['voxel_coords'] + voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] + record_len = data_dict['record_len'] + pairwise_t_matrix = data_dict['pairwise_t_matrix'] + + + batch_dict = {'voxel_features': voxel_features, + 'voxel_coords': voxel_coords, + 'voxel_num_points': voxel_num_points, + 'pairwise_t_matrix': pairwise_t_matrix} + + if voxel_coords.is_cuda: + record_len_tmp = record_len.cpu() + + record_len_tmp = list(record_len_tmp.numpy()) + + self.N = sum(record_len_tmp) + + # feature learning network + vwfs = self.svfe(batch_dict)['pillar_features'] + + voxel_coords = torch_tensor_to_numpy(voxel_coords) + vwfs = self.voxel_indexing(vwfs, voxel_coords) + + # convolutional middle network + vwfs = self.cml(vwfs) + # convert from 3d to 2d N C H W + vmfs = vwfs.view(self.N, -1, self.H, self.W) + + # compression layer + if self.compression: + vmfs = self.compression_layer(vmfs) + + # pairwise_t_matrix + # project_first must be right + # have not check project_first=False + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + + if not self.proj_first: + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * self.H / self.W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * self.W / self.H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / self.W * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / self.H * 2 + + + # information naive fusion + vmfs_fusion = self.fusion_net(vmfs, record_len, pairwise_t_matrix) + + # region proposal network + # merge the depth and feature dim into one, output probability score + # map and regression map + psm, rm = self.rpn(vmfs_fusion) + + output_dict = {'psm': psm, + 'rm': rm} + + return output_dict diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e7ef00135af613c65097efb7d1715ace89e4e83 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/__init__.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/box_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/box_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3794450e0618ae83c48f5c51fd31fc166842074e Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/box_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/camera_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/camera_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4963ec68151a79563b87312eaa75434b5efd22a6 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/camera_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/common_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/common_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0340bf3a78476f191af36b10c02acb262a934625 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/common_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/eval_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/eval_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ab71a410cdb71dbf8db703395987c7551e90a66 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/eval_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/heter_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/heter_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a56d98944bbad90a634fd510128843b299fde59f Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/heter_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pcd_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pcd_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e22e2505e142e8dbcdf442dc28a593dbc19118a5 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pcd_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pose_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pose_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3d52d369d81a134570672cfd4293f4c561cb775 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pose_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/transformation_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/transformation_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c9765efe9edb70249e07a5682722cbdca1438b6 Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/transformation_utils.cpython-37.pyc differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/bessel_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/bessel_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..12140e1143b0c1f7234c4576b32a305f5cbf8689 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/bessel_utils.py @@ -0,0 +1,288 @@ +""" +Pytorch implementation of the logarithm of the modified Bessel function +of the 1st kind I(nu, z). +Based mainly on scipy and extends the definition domain compared to +scipy methods (which can easily provide infinite values). The extension +is done using numerical approximations involving the ratio of Bessel +functions: +- https://arxiv.org/pdf/1606.02008.pdf (Theorems 5,6) +- https://arxiv.org/pdf/1902.02603.pdf (Appendix) +Jean-Remy Conti +2021 +""" + +import torch +from scipy import special, pi + + +def logbessel_I_scipy(nu, z, check = True): + ''' + Pytorch version of scipy computation of modified Bessel functions + of the 1st kind I(nu,z). + Parameters + ---------- + nu: positive int, float + Order of modified Bessel function of 1st kind. + z: int/float or tensor, shape (N,) + Argument of Bessel function. + check: bool + If True, check if argument of log is non zero. + + Return + ------ + result: tensor, shape (N,) + ''' + if not isinstance(z, torch.Tensor): + z = torch.tensor(z) + z = z.reshape(-1) + + result = special.ive(nu, z) + if check: + assert len(result[ result == 0]) == 0, ('Bessel functions take ' + + 'value 0 for z = {}'.format( + z[ result == 0])) + result = torch.log(result) + z + return result + + + +def logbessel_I_asymptotic(nu, z): + ''' + Asymptotic branches of the modified Bessel function of the 1st kind. + https://arxiv.org/pdf/1902.02603.pdf + Parameters + ---------- + nu: positive int, float + Order of modified Bessel function of 1st kind. + z: tensor, shape (N,) + Argument of Bessel function. + + Return + ------ + result: tensor, shape (N,) + ''' + z = z.double() + eta = (nu + 0.5)/(2* (nu+1) ) + result = torch.zeros(z.shape[0]).double() + + result[ z <= nu ] = ( + nu*torch.log(z[ z <= nu ]) + eta*z[ z <= nu ] + - (nu+eta)*torch.log(torch.tensor(2.)) + - torch.log( torch.tensor(special.gamma(nu+1)) ) + ) + + result[ z > nu ] = ( + z[ z > nu ] - 0.5*torch.log(z[ z > nu ]) + - 0.5*torch.log(torch.tensor(2*pi)) + ) + return result + +def B(alpha, nu, z): + ''' + https://arxiv.org/pdf/1606.02008.pdf + ''' + nu = nu.reshape(1,-1) + z = z.reshape(-1,1) + lamda = nu + float(alpha-1)/2. + delta = nu-0.5 + lamda / (2*torch.sqrt(lamda**2 + z**2)) + return z / (delta + torch.sqrt(delta**2 + z**2) ) +def B_tilde(alpha, nu, z): + ''' + https://arxiv.org/pdf/1606.02008.pdf + ''' + nu = nu.reshape(1,-1) + z = z.reshape(-1,1) + sigma = nu + float(alpha+1)/2. + delta_p = nu + 0.5 + sigma/(2*torch.sqrt(sigma**2 + z**2)) + delta_m = nu - 0.5 - sigma/(2*torch.sqrt(sigma**2 + z**2)) + return z/( delta_m + torch.sqrt(delta_p**2 + z**2)) +def lb_Ak(nu, z): + ''' + Lower-bound for the ratio of modified Bessel functions of 1st kind. + https://arxiv.org/pdf/1606.02008.pdf (Theorems 5 and 6). + ''' + assert torch.all(nu >= 0) + nu = nu.reshape(1,-1) + z = z.reshape(-1,1) + return B_tilde(0, nu, z) +def ub_Ak(nu, z): + ''' + Upper-bound for the ratio of modified Bessel functions of 1st kind. + https://arxiv.org/pdf/1606.02008.pdf (Theorems 5 and 6). + Return + ------ + ub: tensor, shape (z.shape[0], nu.shape[0]) + Upper-bound for Ak(nu, z). + ''' + assert torch.all(nu >= 0) + nu = nu.reshape(1,-1) + z = z.reshape(-1,1) + + ub = torch.zeros(z.shape[0], nu.shape[1]) + ub[:, nu.reshape(-1) >= 0.5] = torch.min(B(0, nu[ nu >= 0.5 ], z), + B_tilde(2, nu[ nu >= 0.5 ], z)) + ub[:, nu.reshape(-1) < 0.5] = B_tilde(2, nu[ nu < 0.5 ], z) + return ub + +def Ak_approx(nu, z): + ''' + Approximation of ratio of modified Bessel functions of 1st kind. + https://arxiv.org/pdf/1902.02603.pdf + Parameters + ---------- + nu: tensor, shape (N0,) + Order of modified Bessel functions of 1st kind. + z: tensor, shape (N1,) + Argument of Bessel function. Positive values only. + + Return + ------ + tensor, shape (N1, N0) + ''' + return 0.5*(lb_Ak(nu, z) + ub_Ak(nu, z)) + + +def logbessel_I_approx(nu, z): + ''' + Approximation of the logarithm of the modified Bessel function of + 1st kind I(nu, z) using their ratio. + https://arxiv.org/pdf/1902.02603.pdf + Parameters + ---------- + nu: positive int, float + Order of modified Bessel function of 1st kind. + z: tensor, shape (N,) + Argument of Bessel function. Positive values only. + Return + ------ + approx: tensor, shape (N,) + ''' + assert nu >= 0 + approx = logbessel_I_scipy(nu-int(nu), z, check= True) + nu_v = nu - torch.arange(0, int(nu)) + A = Ak_approx(nu_v, z) + approx += torch.log(A).sum(axis= 1) + return approx + + +def logbessel_I(nu, z, fast = False, check = True): + ''' + Pytorch implementation of the logarithm of the modified Bessel + function of the 1st kind I(nu, z). + Based mainly on scipy and extends the definition domain compared to + scipy methods (which can easily provide infinite values). The + extension is done using numerical approximations involving the ratio + of Bessel functions: + - https://arxiv.org/pdf/1606.02008.pdf (Theorems 5,6) + - https://arxiv.org/pdf/1902.02603.pdf (Appendix) + Parameters + ---------- + nu: positive int, float + Order of modified Bessel function of 1st kind. + z: int/float or tensor, shape (N,) + Argument of Bessel function. + fast: bool + If True, use asymptotic behavior as approximation when main + scipy method is not tractable. If False, use tight bounds for + the ratio of Bessel functions: + https://arxiv.org/pdf/1902.02603.pdf + check: bool + If True, check if argument of log is non zero and not NaN. + + Return + ------ + result: tensor, shape (N,) + ''' + if not isinstance(z, torch.Tensor): + z = torch.tensor(z) + z = z.reshape(-1) + + result = special.ive(nu, z) + # Indices for which scipy.special.ive is wrong + bad_idx = torch.arange(result.shape[0])[ result == 0] + result = torch.log(result) + z + if fast: + result[ bad_idx ] = logbessel_I_asymptotic(nu, z[ bad_idx ]) + else: + result[ bad_idx ] = logbessel_I_approx(nu, z[ bad_idx ]) + + if check: + # If problem with assertion, use a better defined init in + # logbessel_approx + assert len(result[ torch.isnan(result) ]) == 0, ('Bessel functions take ' + + 'NaN value for z = {}'.format( + z[ torch.isnan(result) ])) + assert len(result[ torch.isinf(result) ]) == 0, ('Bessel functions take ' + + 'inf value for z = {}'.format( + z[ torch.isinf(result) ])) + return result + + + +if __name__ == "__main__": + + import time + import matplotlib.pyplot as plt + + # ------- Single test ------- # + print(logbessel_I(nu= 10000, z= 10000)) + + + # ------- Vectorized tests ------- # + nu = 1000 + z = torch.arange(1, 200001) + + # Scipy + print('------\nScipy adaptation:') + start = time.time() + scipy_adapt = logbessel_I_scipy(nu, z, check= False) + end = time.time() + print('Computation time: ', "%.4f"%(end-start), 's') + + # Asymptotic + print('------\nAsymptotic computation:') + start = time.time() + asympt = logbessel_I_asymptotic(nu, z) + end = time.time() + print('Computation time: ', "%.4f"%(end-start), 's') + + # Approximation using ratios of Bessel functions + print('------\nApproximation via ratios of Bessel functions:') + start = time.time() + approx = logbessel_I_approx(nu, z) + end = time.time() + print('Computation time: ', "%.4f"%(end-start), 's') + + # Fast extension of scipy + print('------\nFast method (proposed):') + start = time.time() + logbessels_fast = logbessel_I(nu, z, fast= True, check= False) + end = time.time() + print('Computation time: ', "%.4f"%(end-start), 's') + + # Precise extension of scipy + print('------\nPrecise method (proposed):') + start = time.time() + logbessels = logbessel_I(nu, z, fast= False, check= False) + end = time.time() + print('Computation time: ', "%.4f"%(end-start), 's') + + + # ------- Plots ------- # + linewidth = 4 + + # Plot different methods + plt.plot(z, scipy_adapt, '-', label='scipy', linewidth = linewidth) + plt.plot(z, asympt, '--', label='asymptotic', linewidth = linewidth) + plt.plot(z, logbessels, '--', label='ours', linewidth = linewidth) + plt.xlabel(r'$z$') + plt.title(r'$\log[I(\nu = {}, z)]$'.format(nu)) + plt.legend() + + # Plot relative error of approximation + plt.figure() + plt.plot(z, torch.abs((approx - scipy_adapt)/scipy_adapt), '-', label=None, linewidth = linewidth) + plt.xlabel(r'$z$') + plt.title(r'Relative error of approximation for $\log[I(\nu = {}, z)]$'.format(nu)) + + plt.show() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.c b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.c new file mode 100644 index 0000000000000000000000000000000000000000..e626057643ea66b5248e6e6b10d38d69ba553a45 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.c @@ -0,0 +1,13565 @@ +/* Generated by Cython 3.0.11 */ + +/* BEGIN: Cython Metadata +{ + "distutils": { + "depends": [], + "name": "opencood.utils.box_overlaps", + "sources": [ + "opencood/utils/box_overlaps.pyx" + ] + }, + "module_name": "opencood.utils.box_overlaps" +} +END: Cython Metadata */ + +#ifndef PY_SSIZE_T_CLEAN +#define PY_SSIZE_T_CLEAN +#endif /* PY_SSIZE_T_CLEAN */ +#if defined(CYTHON_LIMITED_API) && 0 + #ifndef Py_LIMITED_API + #if CYTHON_LIMITED_API+0 > 0x03030000 + #define Py_LIMITED_API CYTHON_LIMITED_API + #else + #define Py_LIMITED_API 0x03030000 + #endif + #endif +#endif + +#include "Python.h" +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) + #error Cython requires Python 2.7+ or Python 3.3+. +#else +#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API +#define __PYX_EXTRA_ABI_MODULE_NAME "limited" +#else +#define __PYX_EXTRA_ABI_MODULE_NAME "" +#endif +#define CYTHON_ABI "3_0_11" __PYX_EXTRA_ABI_MODULE_NAME +#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI +#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." +#define CYTHON_HEX_VERSION 0x03000BF0 +#define CYTHON_FUTURE_DIVISION 1 +#include +#ifndef offsetof + #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#define __PYX_COMMA , +#ifndef HAVE_LONG_LONG + #define HAVE_LONG_LONG +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX +#if defined(GRAALVM_PYTHON) + /* For very preliminary testing purposes. Most variables are set the same as PyPy. + The existence of this section does not imply that anything works or is even tested */ + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 1 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(PYPY_VERSION) + #define CYTHON_COMPILING_IN_PYPY 1 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #if PY_VERSION_HEX < 0x03090000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00) + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(CYTHON_LIMITED_API) + #ifdef Py_LIMITED_API + #undef __PYX_LIMITED_VERSION_HEX + #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API + #endif + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 1 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_CLINE_IN_TRACEBACK + #define CYTHON_CLINE_IN_TRACEBACK 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 1 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #endif + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 1 + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL) + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 1 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #ifndef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 1 + #endif + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #ifndef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 + #endif +#else + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 1 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #ifndef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 1 + #endif + #if PY_MAJOR_VERSION < 3 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 1 + #endif + #ifndef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 1 + #endif + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #elif !defined(CYTHON_USE_UNICODE_WRITER) + #define CYTHON_USE_UNICODE_WRITER 1 + #endif + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #ifndef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 1 + #endif + #ifndef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6) + #endif + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1) + #endif + #ifndef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 1 + #endif + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #if PY_VERSION_HEX < 0x030400a1 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #elif !defined(CYTHON_USE_TP_FINALIZE) + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #if PY_VERSION_HEX < 0x030600B1 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #elif !defined(CYTHON_USE_DICT_VERSIONS) + #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5) + #endif + #if PY_VERSION_HEX < 0x030700A3 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #elif !defined(CYTHON_USE_EXC_INFO_STACK) + #define CYTHON_USE_EXC_INFO_STACK 1 + #endif + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 1 + #endif +#endif +#if !defined(CYTHON_FAST_PYCCALL) +#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) +#endif +#if !defined(CYTHON_VECTORCALL) +#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) +#endif +#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1) +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_MAJOR_VERSION < 3 + #include "longintrepr.h" + #endif + #undef SHIFT + #undef BASE + #undef MASK + #ifdef SIZEOF_VOID_P + enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; + #endif +#endif +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#ifndef __has_cpp_attribute + #define __has_cpp_attribute(x) 0 +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#ifndef CYTHON_UNUSED + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(maybe_unused) + #define CYTHON_UNUSED [[maybe_unused]] + #endif + #endif + #endif +#endif +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_UNUSED_VAR +# if defined(__cplusplus) + template void CYTHON_UNUSED_VAR( const T& ) { } +# else +# define CYTHON_UNUSED_VAR(x) (void)(x) +# endif +#endif +#ifndef CYTHON_MAYBE_UNUSED_VAR + #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x) +#endif +#ifndef CYTHON_NCP_UNUSED +# if CYTHON_COMPILING_IN_CPYTHON +# define CYTHON_NCP_UNUSED +# else +# define CYTHON_NCP_UNUSED CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_USE_CPP_STD_MOVE + #if defined(__cplusplus) && (\ + __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600)) + #define CYTHON_USE_CPP_STD_MOVE 1 + #else + #define CYTHON_USE_CPP_STD_MOVE 0 + #endif +#endif +#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) +#ifdef _MSC_VER + #ifndef _MSC_STDINT_H_ + #if _MSC_VER < 1300 + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + #else + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + #endif + #endif + #if _MSC_VER < 1300 + #ifdef _WIN64 + typedef unsigned long long __pyx_uintptr_t; + #else + typedef unsigned int __pyx_uintptr_t; + #endif + #else + #ifdef _WIN64 + typedef unsigned __int64 __pyx_uintptr_t; + #else + typedef unsigned __int32 __pyx_uintptr_t; + #endif + #endif +#else + #include + typedef uintptr_t __pyx_uintptr_t; +#endif +#ifndef CYTHON_FALLTHROUGH + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(fallthrough) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(fallthrough) + #define CYTHON_FALLTHROUGH [[fallthrough]] + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_cpp_attribute(clang::fallthrough) + #define CYTHON_FALLTHROUGH [[clang::fallthrough]] + #elif __has_cpp_attribute(gnu::fallthrough) + #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] + #endif + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_attribute(fallthrough) + #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) + #else + #define CYTHON_FALLTHROUGH + #endif + #endif + #if defined(__clang__) && defined(__apple_build_version__) + #if __apple_build_version__ < 7000000 + #undef CYTHON_FALLTHROUGH + #define CYTHON_FALLTHROUGH + #endif + #endif +#endif +#ifdef __cplusplus + template + struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);}; + #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value) +#else + #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0) +#endif +#if CYTHON_COMPILING_IN_PYPY == 1 + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000) +#else + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000) +#endif +#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer)) + +#ifndef CYTHON_INLINE + #if defined(__clang__) + #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) + #define CYTHON_INLINE __inline__ + #elif defined(_MSC_VER) + #define CYTHON_INLINE __inline + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_INLINE inline + #else + #define CYTHON_INLINE + #endif +#endif + +#define __PYX_BUILD_PY_SSIZE_T "n" +#define CYTHON_FORMAT_SSIZE_T "z" +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_DefaultClassType PyClass_Type + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" + #define __Pyx_DefaultClassType PyType_Type +#if CYTHON_COMPILING_IN_LIMITED_API + static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyObject *exception_table = NULL; + PyObject *types_module=NULL, *code_type=NULL, *result=NULL; + #if __PYX_LIMITED_VERSION_HEX < 0x030B0000 + PyObject *version_info; + PyObject *py_minor_version = NULL; + #endif + long minor_version = 0; + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 + minor_version = 11; + #else + if (!(version_info = PySys_GetObject("version_info"))) goto end; + if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; + minor_version = PyLong_AsLong(py_minor_version); + Py_DECREF(py_minor_version); + if (minor_version == -1 && PyErr_Occurred()) goto end; + #endif + if (!(types_module = PyImport_ImportModule("types"))) goto end; + if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end; + if (minor_version <= 7) { + (void)p; + result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else if (minor_version <= 10) { + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else { + if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end; + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell); + } + end: + Py_XDECREF(code_type); + Py_XDECREF(exception_table); + Py_XDECREF(types_module); + if (type) { + PyErr_Restore(type, value, traceback); + } + return result; + } + #ifndef CO_OPTIMIZED + #define CO_OPTIMIZED 0x0001 + #endif + #ifndef CO_NEWLOCALS + #define CO_NEWLOCALS 0x0002 + #endif + #ifndef CO_VARARGS + #define CO_VARARGS 0x0004 + #endif + #ifndef CO_VARKEYWORDS + #define CO_VARKEYWORDS 0x0008 + #endif + #ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x0200 + #endif + #ifndef CO_GENERATOR + #define CO_GENERATOR 0x0020 + #endif + #ifndef CO_COROUTINE + #define CO_COROUTINE 0x0080 + #endif +#elif PY_VERSION_HEX >= 0x030B0000 + static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyCodeObject *result; + PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); + if (!empty_bytes) return NULL; + result = + #if PY_VERSION_HEX >= 0x030C0000 + PyUnstable_Code_NewWithPosOnlyArgs + #else + PyCode_NewWithPosOnlyArgs + #endif + (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes); + Py_DECREF(empty_bytes); + return result; + } +#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#endif +#endif +#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE) + #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type) +#else + #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is) + #define __Pyx_Py_Is(x, y) Py_Is(x, y) +#else + #define __Pyx_Py_Is(x, y) ((x) == (y)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone) + #define __Pyx_Py_IsNone(ob) Py_IsNone(ob) +#else + #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue) + #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob) +#else + #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse) + #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob) +#else + #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False) +#endif +#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj)) +#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o) +#else + #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o) +#endif +#ifndef CO_COROUTINE + #define CO_COROUTINE 0x80 +#endif +#ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x200 +#endif +#ifndef Py_TPFLAGS_CHECKTYPES + #define Py_TPFLAGS_CHECKTYPES 0 +#endif +#ifndef Py_TPFLAGS_HAVE_INDEX + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#ifndef Py_TPFLAGS_HAVE_NEWBUFFER + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#ifndef Py_TPFLAGS_HAVE_FINALIZE + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#ifndef Py_TPFLAGS_SEQUENCE + #define Py_TPFLAGS_SEQUENCE 0 +#endif +#ifndef Py_TPFLAGS_MAPPING + #define Py_TPFLAGS_MAPPING 0 +#endif +#ifndef METH_STACKLESS + #define METH_STACKLESS 0 +#endif +#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) + #ifndef METH_FASTCALL + #define METH_FASTCALL 0x80 + #endif + typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); + typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames); +#else + #if PY_VERSION_HEX >= 0x030d00A4 + # define __Pyx_PyCFunctionFast PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords + #else + # define __Pyx_PyCFunctionFast _PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords + #endif +#endif +#if CYTHON_METH_FASTCALL + #define __Pyx_METH_FASTCALL METH_FASTCALL + #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast + #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords +#else + #define __Pyx_METH_FASTCALL METH_VARARGS + #define __Pyx_PyCFunction_FastCall PyCFunction + #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords +#endif +#if CYTHON_VECTORCALL + #define __pyx_vectorcallfunc vectorcallfunc + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET + #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n)) +#elif CYTHON_BACKPORT_VECTORCALL + typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args, + size_t nargsf, PyObject *kwnames); + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1)) + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)) +#else + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0 + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n)) +#endif +#if PY_MAJOR_VERSION >= 0x030900B1 +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_CheckExact(func) +#else +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_Check(func) +#endif +#define __Pyx_CyOrPyCFunction_Check(func) PyCFunction_Check(func) +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) (((PyCFunctionObject*)(func))->m_ml->ml_meth) +#elif !CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(func) +#endif +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FLAGS(func) (((PyCFunctionObject*)(func))->m_ml->ml_flags) +static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) { + return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self; +} +#endif +static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) { +#if CYTHON_COMPILING_IN_LIMITED_API + return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc; +#else + return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +#endif +} +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCFunction(func, cfunc) +#if __PYX_LIMITED_VERSION_HEX < 0x030900B1 + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b)) + typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *); +#else + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b) + #define __Pyx_PyCMethod PyCMethod +#endif +#ifndef METH_METHOD + #define METH_METHOD 0x200 +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) + #define PyObject_Malloc(s) PyMem_Malloc(s) + #define PyObject_Free(p) PyMem_Free(p) + #define PyObject_Realloc(p) PyMem_Realloc(p) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) +#else + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyThreadState_Current PyThreadState_Get() +#elif !CYTHON_FAST_THREAD_STATE + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#elif PY_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked() +#elif PY_VERSION_HEX >= 0x03060000 + #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() +#elif PY_VERSION_HEX >= 0x03000000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#else + #define __Pyx_PyThreadState_Current _PyThreadState_Current +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op) +{ + void *result; + result = PyModule_GetState(op); + if (!result) + Py_FatalError("Couldn't find the module state"); + return result; +} +#endif +#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype) +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name)) +#else + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name) +#endif +#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) +#include "pythread.h" +#define Py_tss_NEEDS_INIT 0 +typedef int Py_tss_t; +static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { + *key = PyThread_create_key(); + return 0; +} +static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { + Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); + *key = Py_tss_NEEDS_INIT; + return key; +} +static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { + PyObject_Free(key); +} +static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { + return *key != Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { + PyThread_delete_key(*key); + *key = Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { + return PyThread_set_key_value(*key, value); +} +static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { + return PyThread_get_key_value(*key); +} +#endif +#if PY_MAJOR_VERSION < 3 + #if CYTHON_COMPILING_IN_PYPY + #if PYPY_VERSION_NUM < 0x07030600 + #if defined(__cplusplus) && __cplusplus >= 201402L + [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]] + #elif defined(__GNUC__) || defined(__clang__) + __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))) + #elif defined(_MSC_VER) + __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")) + #endif + static CYTHON_INLINE int PyGILState_Check(void) { + return 0; + } + #else // PYPY_VERSION_NUM < 0x07030600 + #endif // PYPY_VERSION_NUM < 0x07030600 + #else + static CYTHON_INLINE int PyGILState_Check(void) { + PyThreadState * tstate = _PyThreadState_Current; + return tstate && (tstate == PyGILState_GetThisThreadState()); + } + #endif +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized) +#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) +#else +#define __Pyx_PyDict_NewPresized(n) PyDict_New() +#endif +#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS +#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) { + PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name); + if (res == NULL) PyErr_Clear(); + return res; +} +#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) +#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#else +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) { +#if CYTHON_COMPILING_IN_PYPY + return PyDict_GetItem(dict, name); +#else + PyDictEntry *ep; + PyDictObject *mp = (PyDictObject*) dict; + long hash = ((PyStringObject *) name)->ob_shash; + assert(hash != -1); + ep = (mp->ma_lookup)(mp, name, hash); + if (ep == NULL) { + return NULL; + } + return ep->me_value; +#endif +} +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#endif +#if CYTHON_USE_TYPE_SLOTS + #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags) + #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0) + #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext) +#else + #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp)) + #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature) + #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v) +#else + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v) +#endif +#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000 +#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\ + PyTypeObject *type = Py_TYPE((PyObject*)obj);\ + assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\ + PyObject_GC_Del(obj);\ + Py_DECREF(type);\ +} +#else +#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define CYTHON_PEP393_ENABLED 1 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((void)u, (0)) + #define __Pyx_PyUnicode_DATA(u) ((void*)u) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i)) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u)) +#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_READY(op) (0) + #else + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #endif + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) + #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u)) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch) + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) + #else + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) + #else + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) + #endif + #endif +#else + #define CYTHON_PEP393_ENABLED 0 + #define PyUnicode_1BYTE_KIND 1 + #define PyUnicode_2BYTE_KIND 2 + #define PyUnicode_4BYTE_KIND 4 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #if !defined(PyUnicode_DecodeUnicodeEscape) + #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors) + #endif + #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500) + #undef PyUnicode_Contains + #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) + #endif + #if !defined(PyByteArray_Check) + #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) + #endif + #if !defined(PyObject_Format) + #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) + #endif +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) + #define PyObject_ASCII(o) PyObject_Repr(o) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#ifndef PyObject_Unicode + #define PyObject_Unicode PyObject_Str +#endif +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#if CYTHON_COMPILING_IN_CPYTHON + #define __Pyx_PySequence_ListKeepNew(obj)\ + (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj)) +#else + #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type) +#endif +#if PY_VERSION_HEX >= 0x030900A4 + #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) +#else + #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) +#endif +#if CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i) + #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o) +#else + #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i) + #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v) + #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o) +#endif +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name) +#else + static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) { + PyObject *module = PyImport_AddModule(name); + Py_XINCREF(module); + return module; + } +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define __Pyx_Py3Int_Check(op) PyLong_Check(op) + #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#else + #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op)) + #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op)) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t +#endif +#if CYTHON_USE_ASYNC_SLOTS + #if PY_VERSION_HEX >= 0x030500B1 + #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods + #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) + #else + #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) + #endif +#else + #define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef __Pyx_PyAsyncMethodsStruct + typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; + } __Pyx_PyAsyncMethodsStruct; +#endif + +#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) + #if !defined(_USE_MATH_DEFINES) + #define _USE_MATH_DEFINES + #endif +#endif +#include +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif +#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) +#define __Pyx_truncl trunc +#else +#define __Pyx_truncl truncl +#endif + +#define __PYX_MARK_ERR_POS(f_index, lineno) \ + { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } +#define __PYX_ERR(f_index, lineno, Ln_error) \ + { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } + +#ifdef CYTHON_EXTERN_C + #undef __PYX_EXTERN_C + #define __PYX_EXTERN_C CYTHON_EXTERN_C +#elif defined(__PYX_EXTERN_C) + #ifdef _MSC_VER + #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") + #else + #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. + #endif +#else + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#define __PYX_HAVE__opencood__utils__box_overlaps +#define __PYX_HAVE_API__opencood__utils__box_overlaps +/* Early includes */ +#include +#include + + /* Using NumPy API declarations from "Cython/Includes/numpy/" */ + +#include "numpy/arrayobject.h" +#include "numpy/ndarrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/arrayscalars.h" +#include "numpy/ufuncobject.h" +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_uchar_cast(c) ((unsigned char)c) +#define __Pyx_long_cast(x) ((long)x) +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ + (sizeof(type) < sizeof(Py_ssize_t)) ||\ + (sizeof(type) > sizeof(Py_ssize_t) &&\ + likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX) &&\ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ + v == (type)PY_SSIZE_T_MIN))) ||\ + (sizeof(type) == sizeof(Py_ssize_t) &&\ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX))) ) +static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { + return (size_t) i < (size_t) limit; +} +#if defined (__cplusplus) && __cplusplus >= 201103L + #include + #define __Pyx_sst_abs(value) std::abs(value) +#elif SIZEOF_INT >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) abs(value) +#elif SIZEOF_LONG >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) labs(value) +#elif defined (_MSC_VER) + #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define __Pyx_sst_abs(value) llabs(value) +#elif defined (__GNUC__) + #define __Pyx_sst_abs(value) __builtin_llabs(value) +#else + #define __Pyx_sst_abs(value) ((value<0) ? -value : value) +#endif +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s); +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*); +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) +#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o) +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) +#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); +#define __Pyx_PySequence_Tuple(obj)\ + (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); +#if CYTHON_ASSUME_SAFE_MACROS +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) +#else +#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) +#endif +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_VERSION_HEX >= 0x030C00A7 + #ifndef _PyLong_SIGN_MASK + #define _PyLong_SIGN_MASK 3 + #endif + #ifndef _PyLong_NON_SIZE_BITS + #define _PyLong_NON_SIZE_BITS 3 + #endif + #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK) + #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0) + #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x)) + #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1) + #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_SignedDigitCount(x)\ + ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x)) + #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue) + #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x) + #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x) + #else + #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0]) + #endif + typedef Py_ssize_t __Pyx_compact_pylong; + typedef size_t __Pyx_compact_upylong; + #else + #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) + #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) + #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) + #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x)) + #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x) + #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1) + #define __Pyx_PyLong_CompactValue(x)\ + ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0])) + typedef sdigit __Pyx_compact_pylong; + typedef digit __Pyx_compact_upylong; + #endif + #if PY_VERSION_HEX >= 0x030C00A5 + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) + #else + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit) + #endif +#endif +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +#include +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = (char) c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#include +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ +static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } + +#if !CYTHON_USE_MODULE_STATE +static PyObject *__pyx_m = NULL; +#endif +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm = __FILE__; +static const char *__pyx_filename; + +/* Header.proto */ +#if !defined(CYTHON_CCOMPLEX) + #if defined(__cplusplus) + #define CYTHON_CCOMPLEX 1 + #elif (defined(_Complex_I) && !defined(_MSC_VER)) || ((defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_COMPLEX__) && !defined(_MSC_VER)) + #define CYTHON_CCOMPLEX 1 + #else + #define CYTHON_CCOMPLEX 0 + #endif +#endif +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + #include + #else + #include + #endif +#endif +#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__) + #undef _Complex_I + #define _Complex_I 1.0fj +#endif + +/* #### Code section: filename_table ### */ + +static const char *__pyx_f[] = { + "opencood/utils/box_overlaps.pyx", + "__init__.pxd", + "type.pxd", +}; +/* #### Code section: utility_code_proto_before_types ### */ +/* ForceInitThreads.proto */ +#ifndef __PYX_FORCE_INIT_THREADS + #define __PYX_FORCE_INIT_THREADS 0 +#endif + +/* BufferFormatStructs.proto */ +struct __Pyx_StructField_; +#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0) +typedef struct { + const char* name; + struct __Pyx_StructField_* fields; + size_t size; + size_t arraysize[8]; + int ndim; + char typegroup; + char is_unsigned; + int flags; +} __Pyx_TypeInfo; +typedef struct __Pyx_StructField_ { + __Pyx_TypeInfo* type; + const char* name; + size_t offset; +} __Pyx_StructField; +typedef struct { + __Pyx_StructField* field; + size_t parent_offset; +} __Pyx_BufFmt_StackElem; +typedef struct { + __Pyx_StructField root; + __Pyx_BufFmt_StackElem* head; + size_t fmt_offset; + size_t new_count, enc_count; + size_t struct_alignment; + int is_complex; + char enc_type; + char new_packmode; + char enc_packmode; + char is_valid_array; +} __Pyx_BufFmt_Context; + +/* #### Code section: numeric_typedefs ### */ + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":736 + * # in Cython to enable them only on the right systems. + * + * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t + */ +typedef npy_int8 __pyx_t_5numpy_int8_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":737 + * + * ctypedef npy_int8 int8_t + * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< + * ctypedef npy_int32 int32_t + * ctypedef npy_int64 int64_t + */ +typedef npy_int16 __pyx_t_5numpy_int16_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":738 + * ctypedef npy_int8 int8_t + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< + * ctypedef npy_int64 int64_t + * #ctypedef npy_int96 int96_t + */ +typedef npy_int32 __pyx_t_5numpy_int32_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":739 + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t + * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< + * #ctypedef npy_int96 int96_t + * #ctypedef npy_int128 int128_t + */ +typedef npy_int64 __pyx_t_5numpy_int64_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":743 + * #ctypedef npy_int128 int128_t + * + * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t + */ +typedef npy_uint8 __pyx_t_5numpy_uint8_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":744 + * + * ctypedef npy_uint8 uint8_t + * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< + * ctypedef npy_uint32 uint32_t + * ctypedef npy_uint64 uint64_t + */ +typedef npy_uint16 __pyx_t_5numpy_uint16_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":745 + * ctypedef npy_uint8 uint8_t + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< + * ctypedef npy_uint64 uint64_t + * #ctypedef npy_uint96 uint96_t + */ +typedef npy_uint32 __pyx_t_5numpy_uint32_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":746 + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t + * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< + * #ctypedef npy_uint96 uint96_t + * #ctypedef npy_uint128 uint128_t + */ +typedef npy_uint64 __pyx_t_5numpy_uint64_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 + * #ctypedef npy_uint128 uint128_t + * + * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< + * ctypedef npy_float64 float64_t + * #ctypedef npy_float80 float80_t + */ +typedef npy_float32 __pyx_t_5numpy_float32_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 + * + * ctypedef npy_float32 float32_t + * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< + * #ctypedef npy_float80 float80_t + * #ctypedef npy_float128 float128_t + */ +typedef npy_float64 __pyx_t_5numpy_float64_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":760 + * # The int types are mapped a bit surprising -- + * # numpy.int corresponds to 'l' and numpy.long to 'q' + * ctypedef npy_long int_t # <<<<<<<<<<<<<< + * ctypedef npy_longlong longlong_t + * + */ +typedef npy_long __pyx_t_5numpy_int_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":761 + * # numpy.int corresponds to 'l' and numpy.long to 'q' + * ctypedef npy_long int_t + * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< + * + * ctypedef npy_ulong uint_t + */ +typedef npy_longlong __pyx_t_5numpy_longlong_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":763 + * ctypedef npy_longlong longlong_t + * + * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<< + * ctypedef npy_ulonglong ulonglong_t + * + */ +typedef npy_ulong __pyx_t_5numpy_uint_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":764 + * + * ctypedef npy_ulong uint_t + * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< + * + * ctypedef npy_intp intp_t + */ +typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":766 + * ctypedef npy_ulonglong ulonglong_t + * + * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< + * ctypedef npy_uintp uintp_t + * + */ +typedef npy_intp __pyx_t_5numpy_intp_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":767 + * + * ctypedef npy_intp intp_t + * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< + * + * ctypedef npy_double float_t + */ +typedef npy_uintp __pyx_t_5numpy_uintp_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":769 + * ctypedef npy_uintp uintp_t + * + * ctypedef npy_double float_t # <<<<<<<<<<<<<< + * ctypedef npy_double double_t + * ctypedef npy_longdouble longdouble_t + */ +typedef npy_double __pyx_t_5numpy_float_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":770 + * + * ctypedef npy_double float_t + * ctypedef npy_double double_t # <<<<<<<<<<<<<< + * ctypedef npy_longdouble longdouble_t + * + */ +typedef npy_double __pyx_t_5numpy_double_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 + * ctypedef npy_double float_t + * ctypedef npy_double double_t + * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< + * + * ctypedef npy_cfloat cfloat_t + */ +typedef npy_longdouble __pyx_t_5numpy_longdouble_t; + +/* "opencood/utils/box_overlaps.pyx":14 + * + * DTYPE = np.float32 + * ctypedef float DTYPE_t # <<<<<<<<<<<<<< + * + * + */ +typedef float __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t; +/* #### Code section: complex_type_declarations ### */ +/* Declarations.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + typedef ::std::complex< float > __pyx_t_float_complex; + #else + typedef float _Complex __pyx_t_float_complex; + #endif +#else + typedef struct { float real, imag; } __pyx_t_float_complex; +#endif +static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float); + +/* Declarations.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + typedef ::std::complex< double > __pyx_t_double_complex; + #else + typedef double _Complex __pyx_t_double_complex; + #endif +#else + typedef struct { double real, imag; } __pyx_t_double_complex; +#endif +static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double); + +/* #### Code section: type_declarations ### */ + +/*--- Type declarations ---*/ + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":773 + * ctypedef npy_longdouble longdouble_t + * + * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<< + * ctypedef npy_cdouble cdouble_t + * ctypedef npy_clongdouble clongdouble_t + */ +typedef npy_cfloat __pyx_t_5numpy_cfloat_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 + * + * ctypedef npy_cfloat cfloat_t + * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<< + * ctypedef npy_clongdouble clongdouble_t + * + */ +typedef npy_cdouble __pyx_t_5numpy_cdouble_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":775 + * ctypedef npy_cfloat cfloat_t + * ctypedef npy_cdouble cdouble_t + * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<< + * + * ctypedef npy_cdouble complex_t + */ +typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 + * ctypedef npy_clongdouble clongdouble_t + * + * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew1(a): + */ +typedef npy_cdouble __pyx_t_5numpy_complex_t; +/* #### Code section: utility_code_proto ### */ + +/* --- Runtime support code (head) --- */ +/* Refnanny.proto */ +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, Py_ssize_t); + void (*DECREF)(void*, PyObject*, Py_ssize_t); + void (*GOTREF)(void*, PyObject*, Py_ssize_t); + void (*GIVEREF)(void*, PyObject*, Py_ssize_t); + void* (*SetupContext)(const char*, Py_ssize_t, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + if (acquire_gil) {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + PyGILState_Release(__pyx_gilstate_save);\ + } else {\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + } + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__)) + #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext() +#endif + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } + #define __Pyx_RefNannyFinishContext()\ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContextNogil() + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif +#define __Pyx_Py_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; Py_XDECREF(tmp);\ + } while (0) +#define __Pyx_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_XDECREF(tmp);\ + } while (0) +#define __Pyx_DECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_DECREF(tmp);\ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +/* PyErrExceptionMatches.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) +static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); +#else +#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) +#endif + +/* PyThreadStateGet.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; +#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; +#if PY_VERSION_HEX >= 0x030C00A6 +#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL) +#else +#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type) +#endif +#else +#define __Pyx_PyThreadState_declare +#define __Pyx_PyThreadState_assign +#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL) +#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred() +#endif + +/* PyErrFetchRestore.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) +#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6 +#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) +#else +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#endif +#else +#define __Pyx_PyErr_Clear() PyErr_Clear() +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) +#endif + +/* PyObjectGetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +/* PyObjectGetAttrStrNoError.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name); + +/* GetBuiltinName.proto */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name); + +/* GetTopmostException.proto */ +#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE +static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate); +#endif + +/* SaveResetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +#else +#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) +#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) +#endif + +/* GetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb) +static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#else +static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); +#endif + +/* PyObjectCall.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +/* RaiseException.proto */ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); + +/* TupleAndListFromArray.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n); +static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n); +#endif + +/* IncludeStringH.proto */ +#include + +/* BytesEquals.proto */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); + +/* UnicodeEquals.proto */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); + +/* fastcall.proto */ +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i) +#elif CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i) +#else + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i) +#endif +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) + #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) +#else + #define __Pyx_Arg_NewRef_VARARGS(arg) arg + #define __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) +#define __Pyx_KwValues_VARARGS(args, nargs) NULL +#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s) +#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw) +#if CYTHON_METH_FASTCALL + #define __Pyx_Arg_FASTCALL(args, i) args[i] + #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds) + #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs)) + static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 + CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues); + #else + #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) + #endif + #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs + to have the same reference counting */ + #define __Pyx_Arg_XDECREF_FASTCALL(arg) +#else + #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS + #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS + #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS + #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS + #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS + #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg) + #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start) +#else +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop) +#endif + +/* RaiseArgTupleInvalid.proto */ +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +/* RaiseDoubleKeywords.proto */ +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +/* ParseKeywords.proto */ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, + const char* function_name); + +/* ArgTypeTest.proto */ +#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ + ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\ + __Pyx__ArgTypeTest(obj, type, name, exact)) +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); + +/* IsLittleEndian.proto */ +static CYTHON_INLINE int __Pyx_Is_Little_Endian(void); + +/* BufferFormatCheck.proto */ +static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts); +static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, + __Pyx_BufFmt_StackElem* stack, + __Pyx_TypeInfo* type); + +/* BufferGetAndValidate.proto */ +#define __Pyx_GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)\ + ((obj == Py_None || obj == NULL) ?\ + (__Pyx_ZeroBuffer(buf), 0) :\ + __Pyx__GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)) +static int __Pyx__GetBufferAndValidate(Py_buffer* buf, PyObject* obj, + __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack); +static void __Pyx_ZeroBuffer(Py_buffer* buf); +static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info); +static Py_ssize_t __Pyx_minusones[] = { -1, -1, -1, -1, -1, -1, -1, -1 }; +static Py_ssize_t __Pyx_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +/* PyDictVersioning.proto */ +#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) +#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ + (version_var) = __PYX_GET_DICT_VERSION(dict);\ + (cache_var) = (value); +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ + (VAR) = __pyx_dict_cached_value;\ + } else {\ + (VAR) = __pyx_dict_cached_value = (LOOKUP);\ + __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ + }\ +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); +#else +#define __PYX_GET_DICT_VERSION(dict) (0) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); +#endif + +/* GetModuleGlobalName.proto */ +#if CYTHON_USE_DICT_VERSIONS +#define __Pyx_GetModuleGlobalName(var, name) do {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\ + (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\ + __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ +} while(0) +#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\ + PY_UINT64_T __pyx_dict_version;\ + PyObject *__pyx_dict_cached_value;\ + (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ +} while(0) +static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value); +#else +#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name) +#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name) +static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name); +#endif + +/* ExtTypeTest.proto */ +static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); + +/* BufferIndexError.proto */ +static void __Pyx_RaiseBufferIndexError(int axis); + +#define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1) +/* GetItemInt.proto */ +#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ + (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ + __Pyx_GetItemInt_Generic(o, to_py_func(i)))) +#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, + int is_list, int wraparound, int boundscheck); + +/* PyFunctionFastCall.proto */ +#if CYTHON_FAST_PYCALL +#if !CYTHON_VECTORCALL +#define __Pyx_PyFunction_FastCall(func, args, nargs)\ + __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); +#endif +#define __Pyx_BUILD_ASSERT_EXPR(cond)\ + (sizeof(char [1 - 2*!(cond)]) - 1) +#ifndef Py_MEMBER_SIZE +#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) +#endif +#if !CYTHON_VECTORCALL +#if PY_VERSION_HEX >= 0x03080000 + #include "frameobject.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif + #define __Pxy_PyFrame_Initialize_Offsets() + #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus) +#else + static size_t __pyx_pyframe_localsplus_offset = 0; + #include "frameobject.h" + #define __Pxy_PyFrame_Initialize_Offsets()\ + ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ + (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) + #define __Pyx_PyFrame_GetLocalsplus(frame)\ + (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) +#endif +#endif +#endif + +/* PyObjectCallMethO.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +/* PyObjectFastCall.proto */ +#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL) +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs); + +/* PyObjectCallOneArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + +/* ObjectGetItem.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key); +#else +#define __Pyx_PyObject_GetItem(obj, key) PyObject_GetItem(obj, key) +#endif + +/* BufferFallbackError.proto */ +static void __Pyx_RaiseBufferFallbackError(void); + +#define __Pyx_BufPtrStrided1d(type, buf, i0, s0) (type)((char*)buf + i0 * s0) +/* PyIntBinop.proto */ +#if !CYTHON_COMPILING_IN_PYPY +static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check); +#else +#define __Pyx_PyInt_AddObjC(op1, op2, intval, inplace, zerodivision_check)\ + (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2)) +#endif + +/* SliceObject.proto */ +#define __Pyx_PyObject_DelSlice(obj, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)\ + __Pyx_PyObject_SetSlice(obj, (PyObject*)NULL, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound) +static CYTHON_INLINE int __Pyx_PyObject_SetSlice( + PyObject* obj, PyObject* value, Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** py_start, PyObject** py_stop, PyObject** py_slice, + int has_cstart, int has_cstop, int wraparound); + +/* SetItemInt.proto */ +#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) :\ + (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) :\ + __Pyx_SetItemInt_Generic(o, to_py_func(i), v))) +static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v); +static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, + int is_list, int wraparound, int boundscheck); + +/* TypeImport.proto */ +#ifndef __PYX_HAVE_RT_ImportType_proto_3_0_11 +#define __PYX_HAVE_RT_ImportType_proto_3_0_11 +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#include +#endif +#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L +#define __PYX_GET_STRUCT_ALIGNMENT_3_0_11(s) alignof(s) +#else +#define __PYX_GET_STRUCT_ALIGNMENT_3_0_11(s) sizeof(void*) +#endif +enum __Pyx_ImportType_CheckSize_3_0_11 { + __Pyx_ImportType_CheckSize_Error_3_0_11 = 0, + __Pyx_ImportType_CheckSize_Warn_3_0_11 = 1, + __Pyx_ImportType_CheckSize_Ignore_3_0_11 = 2 +}; +static PyTypeObject *__Pyx_ImportType_3_0_11(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_11 check_size); +#endif + +/* Import.proto */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); + +/* ImportDottedModule.proto */ +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple); +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple); +#endif + +/* IncludeStructmemberH.proto */ +#include + +/* FixUpExtensionType.proto */ +#if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type); +#endif + +/* FetchSharedCythonModule.proto */ +static PyObject *__Pyx_FetchSharedCythonABIModule(void); + +/* FetchCommonType.proto */ +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); +#else +static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases); +#endif + +/* PyMethodNew.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + PyObject *typesModule=NULL, *methodType=NULL, *result=NULL; + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + typesModule = PyImport_ImportModule("types"); + if (!typesModule) return NULL; + methodType = PyObject_GetAttrString(typesModule, "MethodType"); + Py_DECREF(typesModule); + if (!methodType) return NULL; + result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL); + Py_DECREF(methodType); + return result; +} +#elif PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + return PyMethod_New(func, self); +} +#else + #define __Pyx_PyMethod_New PyMethod_New +#endif + +/* PyVectorcallFastCallDict.proto */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw); +#endif + +/* CythonFunctionShared.proto */ +#define __Pyx_CyFunction_USED +#define __Pyx_CYFUNCTION_STATICMETHOD 0x01 +#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02 +#define __Pyx_CYFUNCTION_CCLASS 0x04 +#define __Pyx_CYFUNCTION_COROUTINE 0x08 +#define __Pyx_CyFunction_GetClosure(f)\ + (((__pyx_CyFunctionObject *) (f))->func_closure) +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_CyFunction_GetClassObj(f)\ + (((__pyx_CyFunctionObject *) (f))->func_classobj) +#else + #define __Pyx_CyFunction_GetClassObj(f)\ + ((PyObject*) ((PyCMethodObject *) (f))->mm_class) +#endif +#define __Pyx_CyFunction_SetClassObj(f, classobj)\ + __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj)) +#define __Pyx_CyFunction_Defaults(type, f)\ + ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) +#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\ + ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) +typedef struct { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject_HEAD + PyObject *func; +#elif PY_VERSION_HEX < 0x030900B1 + PyCFunctionObject func; +#else + PyCMethodObject func; +#endif +#if CYTHON_BACKPORT_VECTORCALL + __pyx_vectorcallfunc func_vectorcall; +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_weakreflist; +#endif + PyObject *func_dict; + PyObject *func_name; + PyObject *func_qualname; + PyObject *func_doc; + PyObject *func_globals; + PyObject *func_code; + PyObject *func_closure; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_classobj; +#endif + void *defaults; + int defaults_pyobjects; + size_t defaults_size; + int flags; + PyObject *defaults_tuple; + PyObject *defaults_kwdict; + PyObject *(*defaults_getter)(PyObject *); + PyObject *func_annotations; + PyObject *func_is_coroutine; +} __pyx_CyFunctionObject; +#undef __Pyx_CyOrPyCFunction_Check +#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType) +#define __Pyx_CyOrPyCFunction_Check(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type) +#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType) +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc); +#undef __Pyx_IsSameCFunction +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCyOrCFunction(func, cfunc) +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj); +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, + size_t size, + int pyobjects); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, + PyObject *tuple); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, + PyObject *dict); +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, + PyObject *dict); +static int __pyx_CyFunction_init(PyObject *module); +#if CYTHON_METH_FASTCALL +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +#if CYTHON_BACKPORT_VECTORCALL +#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall) +#else +#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall) +#endif +#endif + +/* CythonFunction.proto */ +static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); + +/* CLineInTraceback.proto */ +#ifdef CYTHON_CLINE_IN_TRACEBACK +#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) +#else +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); +#endif + +/* CodeObjectCache.proto */ +#if !CYTHON_COMPILING_IN_LIMITED_API +typedef struct { + PyCodeObject* code_object; + int code_line; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); +#endif + +/* AddTraceback.proto */ +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); + +/* BufferStructDeclare.proto */ +typedef struct { + Py_ssize_t shape, strides, suboffsets; +} __Pyx_Buf_DimInfo; +typedef struct { + size_t refcount; + Py_buffer pybuffer; +} __Pyx_Buffer; +typedef struct { + __Pyx_Buffer *rcbuffer; + char *data; + __Pyx_Buf_DimInfo diminfo[8]; +} __Pyx_LocalBuf_ND; + +#if PY_MAJOR_VERSION < 3 + static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags); + static void __Pyx_ReleaseBuffer(Py_buffer *view); +#else + #define __Pyx_GetBuffer PyObject_GetBuffer + #define __Pyx_ReleaseBuffer PyBuffer_Release +#endif + + +/* GCCDiagnostics.proto */ +#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) +#define __Pyx_HAS_GCC_DIAGNOSTIC +#endif + +/* RealImag.proto */ +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + #define __Pyx_CREAL(z) ((z).real()) + #define __Pyx_CIMAG(z) ((z).imag()) + #else + #define __Pyx_CREAL(z) (__real__(z)) + #define __Pyx_CIMAG(z) (__imag__(z)) + #endif +#else + #define __Pyx_CREAL(z) ((z).real) + #define __Pyx_CIMAG(z) ((z).imag) +#endif +#if defined(__cplusplus) && CYTHON_CCOMPLEX\ + && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103) + #define __Pyx_SET_CREAL(z,x) ((z).real(x)) + #define __Pyx_SET_CIMAG(z,y) ((z).imag(y)) +#else + #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x) + #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y) +#endif + +/* Arithmetic.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #define __Pyx_c_eq_float(a, b) ((a)==(b)) + #define __Pyx_c_sum_float(a, b) ((a)+(b)) + #define __Pyx_c_diff_float(a, b) ((a)-(b)) + #define __Pyx_c_prod_float(a, b) ((a)*(b)) + #define __Pyx_c_quot_float(a, b) ((a)/(b)) + #define __Pyx_c_neg_float(a) (-(a)) + #ifdef __cplusplus + #define __Pyx_c_is_zero_float(z) ((z)==(float)0) + #define __Pyx_c_conj_float(z) (::std::conj(z)) + #if 1 + #define __Pyx_c_abs_float(z) (::std::abs(z)) + #define __Pyx_c_pow_float(a, b) (::std::pow(a, b)) + #endif + #else + #define __Pyx_c_is_zero_float(z) ((z)==0) + #define __Pyx_c_conj_float(z) (conjf(z)) + #if 1 + #define __Pyx_c_abs_float(z) (cabsf(z)) + #define __Pyx_c_pow_float(a, b) (cpowf(a, b)) + #endif + #endif +#else + static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex); + static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex); + #if 1 + static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex); + #endif +#endif + +/* Arithmetic.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #define __Pyx_c_eq_double(a, b) ((a)==(b)) + #define __Pyx_c_sum_double(a, b) ((a)+(b)) + #define __Pyx_c_diff_double(a, b) ((a)-(b)) + #define __Pyx_c_prod_double(a, b) ((a)*(b)) + #define __Pyx_c_quot_double(a, b) ((a)/(b)) + #define __Pyx_c_neg_double(a) (-(a)) + #ifdef __cplusplus + #define __Pyx_c_is_zero_double(z) ((z)==(double)0) + #define __Pyx_c_conj_double(z) (::std::conj(z)) + #if 1 + #define __Pyx_c_abs_double(z) (::std::abs(z)) + #define __Pyx_c_pow_double(a, b) (::std::pow(a, b)) + #endif + #else + #define __Pyx_c_is_zero_double(z) ((z)==0) + #define __Pyx_c_conj_double(z) (conj(z)) + #if 1 + #define __Pyx_c_abs_double(z) (cabs(z)) + #define __Pyx_c_pow_double(a, b) (cpow(a, b)) + #endif + #endif +#else + static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex); + static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex); + #if 1 + static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex); + #endif +#endif + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value); + +/* CIntFromPy.proto */ +static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +/* FormatTypeName.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +typedef PyObject *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%U" +static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp); +#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj) +#else +typedef const char *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%.200s" +#define __Pyx_PyType_GetName(tp) ((tp)->tp_name) +#define __Pyx_DECREF_TypeName(obj) +#endif + +/* CIntFromPy.proto */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +/* CIntFromPy.proto */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +/* FastTypeChecks.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2) +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); +#else +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2)) +#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) +#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) +#endif +#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2) +#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) + +/* CheckBinaryVersion.proto */ +static unsigned long __Pyx_get_runtime_version(void); +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer); + +/* InitStrings.proto */ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); + +/* #### Code section: module_declarations ### */ +static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self); /* proto*/ + +/* Module declarations from "libc.string" */ + +/* Module declarations from "libc.stdio" */ + +/* Module declarations from "__builtin__" */ + +/* Module declarations from "cpython.type" */ + +/* Module declarations from "cpython" */ + +/* Module declarations from "cpython.object" */ + +/* Module declarations from "cpython.ref" */ + +/* Module declarations from "numpy" */ + +/* Module declarations from "numpy" */ + +/* Module declarations from "opencood.utils.box_overlaps" */ +/* #### Code section: typeinfo ### */ +static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t = { "DTYPE_t", NULL, sizeof(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t), { 0 }, 0, 'R', 0, 0 }; +static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 }; +/* #### Code section: before_global_var ### */ +#define __Pyx_MODULE_NAME "opencood.utils.box_overlaps" +extern int __pyx_module_is_main_opencood__utils__box_overlaps; +int __pyx_module_is_main_opencood__utils__box_overlaps = 0; + +/* Implementation of "opencood.utils.box_overlaps" */ +/* #### Code section: global_var ### */ +static PyObject *__pyx_builtin_range; +static PyObject *__pyx_builtin_ImportError; +/* #### Code section: string_decls ### */ +static const char __pyx_k_K[] = "K"; +static const char __pyx_k_M[] = "M"; +static const char __pyx_k_N[] = "N"; +static const char __pyx_k_i[] = "i"; +static const char __pyx_k_k[] = "k"; +static const char __pyx_k_m[] = "m"; +static const char __pyx_k_n[] = "n"; +static const char __pyx_k__6[] = "*"; +static const char __pyx_k_ih[] = "ih"; +static const char __pyx_k_iw[] = "iw"; +static const char __pyx_k_np[] = "np"; +static const char __pyx_k_ov[] = "ov"; +static const char __pyx_k_ua[] = "ua"; +static const char __pyx_k__13[] = "?"; +static const char __pyx_k_bi0[] = "bi0"; +static const char __pyx_k_bi1[] = "bi1"; +static const char __pyx_k_bi2[] = "bi2"; +static const char __pyx_k_bi3[] = "bi3"; +static const char __pyx_k_det[] = "det"; +static const char __pyx_k_bit2[] = "bit2"; +static const char __pyx_k_det2[] = "det2"; +static const char __pyx_k_main[] = "__main__"; +static const char __pyx_k_name[] = "__name__"; +static const char __pyx_k_spec[] = "__spec__"; +static const char __pyx_k_test[] = "__test__"; +static const char __pyx_k_DTYPE[] = "DTYPE"; +static const char __pyx_k_boxes[] = "boxes"; +static const char __pyx_k_dtype[] = "dtype"; +static const char __pyx_k_numpy[] = "numpy"; +static const char __pyx_k_range[] = "range"; +static const char __pyx_k_zeros[] = "zeros"; +static const char __pyx_k_import[] = "__import__"; +static const char __pyx_k_thresh[] = "thresh"; +static const char __pyx_k_acc_box[] = "acc_box"; +static const char __pyx_k_float32[] = "float32"; +static const char __pyx_k_box_area[] = "box_area"; +static const char __pyx_k_box_vote[] = "box_vote"; +static const char __pyx_k_dets_NMS[] = "dets_NMS"; +static const char __pyx_k_dets_all[] = "dets_all"; +static const char __pyx_k_intersec[] = "intersec"; +static const char __pyx_k_overlaps[] = "overlaps"; +static const char __pyx_k_acc_score[] = "acc_score"; +static const char __pyx_k_dets_voted[] = "dets_voted"; +static const char __pyx_k_ImportError[] = "ImportError"; +static const char __pyx_k_query_boxes[] = "query_boxes"; +static const char __pyx_k_initializing[] = "_initializing"; +static const char __pyx_k_is_coroutine[] = "_is_coroutine"; +static const char __pyx_k_bbox_overlaps[] = "bbox_overlaps"; +static const char __pyx_k_class_getitem[] = "__class_getitem__"; +static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines"; +static const char __pyx_k_bbox_intersections[] = "bbox_intersections"; +static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; +static const char __pyx_k_opencood_utils_box_overlaps[] = "opencood.utils.box_overlaps"; +static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import"; +static const char __pyx_k_opencood_utils_box_overlaps_pyx[] = "opencood/utils/box_overlaps.pyx"; +static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import"; +/* #### Code section: decls ### */ +static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_bbox_overlaps(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes); /* proto */ +static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_2bbox_intersections(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes); /* proto */ +static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_4box_vote(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets_NMS, PyArrayObject *__pyx_v_dets_all); /* proto */ +/* #### Code section: late_includes ### */ +/* #### Code section: module_state ### */ +typedef struct { + PyObject *__pyx_d; + PyObject *__pyx_b; + PyObject *__pyx_cython_runtime; + PyObject *__pyx_empty_tuple; + PyObject *__pyx_empty_bytes; + PyObject *__pyx_empty_unicode; + #ifdef __Pyx_CyFunction_USED + PyTypeObject *__pyx_CyFunctionType; + #endif + #ifdef __Pyx_FusedFunction_USED + PyTypeObject *__pyx_FusedFunctionType; + #endif + #ifdef __Pyx_Generator_USED + PyTypeObject *__pyx_GeneratorType; + #endif + #ifdef __Pyx_IterableCoroutine_USED + PyTypeObject *__pyx_IterableCoroutineType; + #endif + #ifdef __Pyx_Coroutine_USED + PyTypeObject *__pyx_CoroutineAwaitType; + #endif + #ifdef __Pyx_Coroutine_USED + PyTypeObject *__pyx_CoroutineType; + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + PyTypeObject *__pyx_ptype_7cpython_4type_type; + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + PyTypeObject *__pyx_ptype_5numpy_dtype; + PyTypeObject *__pyx_ptype_5numpy_flatiter; + PyTypeObject *__pyx_ptype_5numpy_broadcast; + PyTypeObject *__pyx_ptype_5numpy_ndarray; + PyTypeObject *__pyx_ptype_5numpy_generic; + PyTypeObject *__pyx_ptype_5numpy_number; + PyTypeObject *__pyx_ptype_5numpy_integer; + PyTypeObject *__pyx_ptype_5numpy_signedinteger; + PyTypeObject *__pyx_ptype_5numpy_unsignedinteger; + PyTypeObject *__pyx_ptype_5numpy_inexact; + PyTypeObject *__pyx_ptype_5numpy_floating; + PyTypeObject *__pyx_ptype_5numpy_complexfloating; + PyTypeObject *__pyx_ptype_5numpy_flexible; + PyTypeObject *__pyx_ptype_5numpy_character; + PyTypeObject *__pyx_ptype_5numpy_ufunc; + #if CYTHON_USE_MODULE_STATE + #endif + PyObject *__pyx_n_s_DTYPE; + PyObject *__pyx_n_s_ImportError; + PyObject *__pyx_n_s_K; + PyObject *__pyx_n_s_M; + PyObject *__pyx_n_s_N; + PyObject *__pyx_n_s__13; + PyObject *__pyx_n_s__6; + PyObject *__pyx_n_s_acc_box; + PyObject *__pyx_n_s_acc_score; + PyObject *__pyx_n_s_asyncio_coroutines; + PyObject *__pyx_n_s_bbox_intersections; + PyObject *__pyx_n_s_bbox_overlaps; + PyObject *__pyx_n_s_bi0; + PyObject *__pyx_n_s_bi1; + PyObject *__pyx_n_s_bi2; + PyObject *__pyx_n_s_bi3; + PyObject *__pyx_n_s_bit2; + PyObject *__pyx_n_s_box_area; + PyObject *__pyx_n_s_box_vote; + PyObject *__pyx_n_s_boxes; + PyObject *__pyx_n_s_class_getitem; + PyObject *__pyx_n_s_cline_in_traceback; + PyObject *__pyx_n_s_det; + PyObject *__pyx_n_s_det2; + PyObject *__pyx_n_s_dets_NMS; + PyObject *__pyx_n_s_dets_all; + PyObject *__pyx_n_s_dets_voted; + PyObject *__pyx_n_s_dtype; + PyObject *__pyx_n_s_float32; + PyObject *__pyx_n_s_i; + PyObject *__pyx_n_s_ih; + PyObject *__pyx_n_s_import; + PyObject *__pyx_n_s_initializing; + PyObject *__pyx_n_s_intersec; + PyObject *__pyx_n_s_is_coroutine; + PyObject *__pyx_n_s_iw; + PyObject *__pyx_n_s_k; + PyObject *__pyx_n_s_m; + PyObject *__pyx_n_s_main; + PyObject *__pyx_n_s_n; + PyObject *__pyx_n_s_name; + PyObject *__pyx_n_s_np; + PyObject *__pyx_n_s_numpy; + PyObject *__pyx_kp_s_numpy_core_multiarray_failed_to; + PyObject *__pyx_kp_s_numpy_core_umath_failed_to_impor; + PyObject *__pyx_n_s_opencood_utils_box_overlaps; + PyObject *__pyx_kp_s_opencood_utils_box_overlaps_pyx; + PyObject *__pyx_n_s_ov; + PyObject *__pyx_n_s_overlaps; + PyObject *__pyx_n_s_query_boxes; + PyObject *__pyx_n_s_range; + PyObject *__pyx_n_s_spec; + PyObject *__pyx_n_s_test; + PyObject *__pyx_n_s_thresh; + PyObject *__pyx_n_s_ua; + PyObject *__pyx_n_s_zeros; + PyObject *__pyx_int_0; + PyObject *__pyx_int_1; + PyObject *__pyx_int_4; + PyObject *__pyx_tuple_; + PyObject *__pyx_slice__3; + PyObject *__pyx_slice__5; + PyObject *__pyx_tuple__2; + PyObject *__pyx_tuple__4; + PyObject *__pyx_tuple__7; + PyObject *__pyx_tuple__9; + PyObject *__pyx_tuple__11; + PyObject *__pyx_codeobj__8; + PyObject *__pyx_codeobj__10; + PyObject *__pyx_codeobj__12; +} __pyx_mstate; + +#if CYTHON_USE_MODULE_STATE +#ifdef __cplusplus +namespace { + extern struct PyModuleDef __pyx_moduledef; +} /* anonymous namespace */ +#else +static struct PyModuleDef __pyx_moduledef; +#endif + +#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o)) + +#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef))) + +#define __pyx_m (PyState_FindModule(&__pyx_moduledef)) +#else +static __pyx_mstate __pyx_mstate_global_static = +#ifdef __cplusplus + {}; +#else + {0}; +#endif +static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static; +#endif +/* #### Code section: module_state_clear ### */ +#if CYTHON_USE_MODULE_STATE +static int __pyx_m_clear(PyObject *m) { + __pyx_mstate *clear_module_state = __pyx_mstate(m); + if (!clear_module_state) return 0; + Py_CLEAR(clear_module_state->__pyx_d); + Py_CLEAR(clear_module_state->__pyx_b); + Py_CLEAR(clear_module_state->__pyx_cython_runtime); + Py_CLEAR(clear_module_state->__pyx_empty_tuple); + Py_CLEAR(clear_module_state->__pyx_empty_bytes); + Py_CLEAR(clear_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_CLEAR(clear_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_CLEAR(clear_module_state->__pyx_FusedFunctionType); + #endif + Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_4type_type); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_dtype); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_flatiter); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_broadcast); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_ndarray); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_generic); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_number); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_integer); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_signedinteger); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_unsignedinteger); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_inexact); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_floating); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_complexfloating); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_flexible); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_character); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_ufunc); + Py_CLEAR(clear_module_state->__pyx_n_s_DTYPE); + Py_CLEAR(clear_module_state->__pyx_n_s_ImportError); + Py_CLEAR(clear_module_state->__pyx_n_s_K); + Py_CLEAR(clear_module_state->__pyx_n_s_M); + Py_CLEAR(clear_module_state->__pyx_n_s_N); + Py_CLEAR(clear_module_state->__pyx_n_s__13); + Py_CLEAR(clear_module_state->__pyx_n_s__6); + Py_CLEAR(clear_module_state->__pyx_n_s_acc_box); + Py_CLEAR(clear_module_state->__pyx_n_s_acc_score); + Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines); + Py_CLEAR(clear_module_state->__pyx_n_s_bbox_intersections); + Py_CLEAR(clear_module_state->__pyx_n_s_bbox_overlaps); + Py_CLEAR(clear_module_state->__pyx_n_s_bi0); + Py_CLEAR(clear_module_state->__pyx_n_s_bi1); + Py_CLEAR(clear_module_state->__pyx_n_s_bi2); + Py_CLEAR(clear_module_state->__pyx_n_s_bi3); + Py_CLEAR(clear_module_state->__pyx_n_s_bit2); + Py_CLEAR(clear_module_state->__pyx_n_s_box_area); + Py_CLEAR(clear_module_state->__pyx_n_s_box_vote); + Py_CLEAR(clear_module_state->__pyx_n_s_boxes); + Py_CLEAR(clear_module_state->__pyx_n_s_class_getitem); + Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback); + Py_CLEAR(clear_module_state->__pyx_n_s_det); + Py_CLEAR(clear_module_state->__pyx_n_s_det2); + Py_CLEAR(clear_module_state->__pyx_n_s_dets_NMS); + Py_CLEAR(clear_module_state->__pyx_n_s_dets_all); + Py_CLEAR(clear_module_state->__pyx_n_s_dets_voted); + Py_CLEAR(clear_module_state->__pyx_n_s_dtype); + Py_CLEAR(clear_module_state->__pyx_n_s_float32); + Py_CLEAR(clear_module_state->__pyx_n_s_i); + Py_CLEAR(clear_module_state->__pyx_n_s_ih); + Py_CLEAR(clear_module_state->__pyx_n_s_import); + Py_CLEAR(clear_module_state->__pyx_n_s_initializing); + Py_CLEAR(clear_module_state->__pyx_n_s_intersec); + Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine); + Py_CLEAR(clear_module_state->__pyx_n_s_iw); + Py_CLEAR(clear_module_state->__pyx_n_s_k); + Py_CLEAR(clear_module_state->__pyx_n_s_m); + Py_CLEAR(clear_module_state->__pyx_n_s_main); + Py_CLEAR(clear_module_state->__pyx_n_s_n); + Py_CLEAR(clear_module_state->__pyx_n_s_name); + Py_CLEAR(clear_module_state->__pyx_n_s_np); + Py_CLEAR(clear_module_state->__pyx_n_s_numpy); + Py_CLEAR(clear_module_state->__pyx_kp_s_numpy_core_multiarray_failed_to); + Py_CLEAR(clear_module_state->__pyx_kp_s_numpy_core_umath_failed_to_impor); + Py_CLEAR(clear_module_state->__pyx_n_s_opencood_utils_box_overlaps); + Py_CLEAR(clear_module_state->__pyx_kp_s_opencood_utils_box_overlaps_pyx); + Py_CLEAR(clear_module_state->__pyx_n_s_ov); + Py_CLEAR(clear_module_state->__pyx_n_s_overlaps); + Py_CLEAR(clear_module_state->__pyx_n_s_query_boxes); + Py_CLEAR(clear_module_state->__pyx_n_s_range); + Py_CLEAR(clear_module_state->__pyx_n_s_spec); + Py_CLEAR(clear_module_state->__pyx_n_s_test); + Py_CLEAR(clear_module_state->__pyx_n_s_thresh); + Py_CLEAR(clear_module_state->__pyx_n_s_ua); + Py_CLEAR(clear_module_state->__pyx_n_s_zeros); + Py_CLEAR(clear_module_state->__pyx_int_0); + Py_CLEAR(clear_module_state->__pyx_int_1); + Py_CLEAR(clear_module_state->__pyx_int_4); + Py_CLEAR(clear_module_state->__pyx_tuple_); + Py_CLEAR(clear_module_state->__pyx_slice__3); + Py_CLEAR(clear_module_state->__pyx_slice__5); + Py_CLEAR(clear_module_state->__pyx_tuple__2); + Py_CLEAR(clear_module_state->__pyx_tuple__4); + Py_CLEAR(clear_module_state->__pyx_tuple__7); + Py_CLEAR(clear_module_state->__pyx_tuple__9); + Py_CLEAR(clear_module_state->__pyx_tuple__11); + Py_CLEAR(clear_module_state->__pyx_codeobj__8); + Py_CLEAR(clear_module_state->__pyx_codeobj__10); + Py_CLEAR(clear_module_state->__pyx_codeobj__12); + return 0; +} +#endif +/* #### Code section: module_state_traverse ### */ +#if CYTHON_USE_MODULE_STATE +static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) { + __pyx_mstate *traverse_module_state = __pyx_mstate(m); + if (!traverse_module_state) return 0; + Py_VISIT(traverse_module_state->__pyx_d); + Py_VISIT(traverse_module_state->__pyx_b); + Py_VISIT(traverse_module_state->__pyx_cython_runtime); + Py_VISIT(traverse_module_state->__pyx_empty_tuple); + Py_VISIT(traverse_module_state->__pyx_empty_bytes); + Py_VISIT(traverse_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_VISIT(traverse_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_VISIT(traverse_module_state->__pyx_FusedFunctionType); + #endif + Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_4type_type); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_dtype); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_flatiter); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_broadcast); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_ndarray); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_generic); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_number); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_integer); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_signedinteger); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_unsignedinteger); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_inexact); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_floating); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_complexfloating); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_flexible); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_character); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_ufunc); + Py_VISIT(traverse_module_state->__pyx_n_s_DTYPE); + Py_VISIT(traverse_module_state->__pyx_n_s_ImportError); + Py_VISIT(traverse_module_state->__pyx_n_s_K); + Py_VISIT(traverse_module_state->__pyx_n_s_M); + Py_VISIT(traverse_module_state->__pyx_n_s_N); + Py_VISIT(traverse_module_state->__pyx_n_s__13); + Py_VISIT(traverse_module_state->__pyx_n_s__6); + Py_VISIT(traverse_module_state->__pyx_n_s_acc_box); + Py_VISIT(traverse_module_state->__pyx_n_s_acc_score); + Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines); + Py_VISIT(traverse_module_state->__pyx_n_s_bbox_intersections); + Py_VISIT(traverse_module_state->__pyx_n_s_bbox_overlaps); + Py_VISIT(traverse_module_state->__pyx_n_s_bi0); + Py_VISIT(traverse_module_state->__pyx_n_s_bi1); + Py_VISIT(traverse_module_state->__pyx_n_s_bi2); + Py_VISIT(traverse_module_state->__pyx_n_s_bi3); + Py_VISIT(traverse_module_state->__pyx_n_s_bit2); + Py_VISIT(traverse_module_state->__pyx_n_s_box_area); + Py_VISIT(traverse_module_state->__pyx_n_s_box_vote); + Py_VISIT(traverse_module_state->__pyx_n_s_boxes); + Py_VISIT(traverse_module_state->__pyx_n_s_class_getitem); + Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback); + Py_VISIT(traverse_module_state->__pyx_n_s_det); + Py_VISIT(traverse_module_state->__pyx_n_s_det2); + Py_VISIT(traverse_module_state->__pyx_n_s_dets_NMS); + Py_VISIT(traverse_module_state->__pyx_n_s_dets_all); + Py_VISIT(traverse_module_state->__pyx_n_s_dets_voted); + Py_VISIT(traverse_module_state->__pyx_n_s_dtype); + Py_VISIT(traverse_module_state->__pyx_n_s_float32); + Py_VISIT(traverse_module_state->__pyx_n_s_i); + Py_VISIT(traverse_module_state->__pyx_n_s_ih); + Py_VISIT(traverse_module_state->__pyx_n_s_import); + Py_VISIT(traverse_module_state->__pyx_n_s_initializing); + Py_VISIT(traverse_module_state->__pyx_n_s_intersec); + Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine); + Py_VISIT(traverse_module_state->__pyx_n_s_iw); + Py_VISIT(traverse_module_state->__pyx_n_s_k); + Py_VISIT(traverse_module_state->__pyx_n_s_m); + Py_VISIT(traverse_module_state->__pyx_n_s_main); + Py_VISIT(traverse_module_state->__pyx_n_s_n); + Py_VISIT(traverse_module_state->__pyx_n_s_name); + Py_VISIT(traverse_module_state->__pyx_n_s_np); + Py_VISIT(traverse_module_state->__pyx_n_s_numpy); + Py_VISIT(traverse_module_state->__pyx_kp_s_numpy_core_multiarray_failed_to); + Py_VISIT(traverse_module_state->__pyx_kp_s_numpy_core_umath_failed_to_impor); + Py_VISIT(traverse_module_state->__pyx_n_s_opencood_utils_box_overlaps); + Py_VISIT(traverse_module_state->__pyx_kp_s_opencood_utils_box_overlaps_pyx); + Py_VISIT(traverse_module_state->__pyx_n_s_ov); + Py_VISIT(traverse_module_state->__pyx_n_s_overlaps); + Py_VISIT(traverse_module_state->__pyx_n_s_query_boxes); + Py_VISIT(traverse_module_state->__pyx_n_s_range); + Py_VISIT(traverse_module_state->__pyx_n_s_spec); + Py_VISIT(traverse_module_state->__pyx_n_s_test); + Py_VISIT(traverse_module_state->__pyx_n_s_thresh); + Py_VISIT(traverse_module_state->__pyx_n_s_ua); + Py_VISIT(traverse_module_state->__pyx_n_s_zeros); + Py_VISIT(traverse_module_state->__pyx_int_0); + Py_VISIT(traverse_module_state->__pyx_int_1); + Py_VISIT(traverse_module_state->__pyx_int_4); + Py_VISIT(traverse_module_state->__pyx_tuple_); + Py_VISIT(traverse_module_state->__pyx_slice__3); + Py_VISIT(traverse_module_state->__pyx_slice__5); + Py_VISIT(traverse_module_state->__pyx_tuple__2); + Py_VISIT(traverse_module_state->__pyx_tuple__4); + Py_VISIT(traverse_module_state->__pyx_tuple__7); + Py_VISIT(traverse_module_state->__pyx_tuple__9); + Py_VISIT(traverse_module_state->__pyx_tuple__11); + Py_VISIT(traverse_module_state->__pyx_codeobj__8); + Py_VISIT(traverse_module_state->__pyx_codeobj__10); + Py_VISIT(traverse_module_state->__pyx_codeobj__12); + return 0; +} +#endif +/* #### Code section: module_state_defines ### */ +#define __pyx_d __pyx_mstate_global->__pyx_d +#define __pyx_b __pyx_mstate_global->__pyx_b +#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime +#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple +#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes +#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode +#ifdef __Pyx_CyFunction_USED +#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType +#endif +#ifdef __Pyx_FusedFunction_USED +#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType +#endif +#ifdef __Pyx_Generator_USED +#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType +#endif +#ifdef __Pyx_IterableCoroutine_USED +#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#define __pyx_ptype_7cpython_4type_type __pyx_mstate_global->__pyx_ptype_7cpython_4type_type +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#define __pyx_ptype_5numpy_dtype __pyx_mstate_global->__pyx_ptype_5numpy_dtype +#define __pyx_ptype_5numpy_flatiter __pyx_mstate_global->__pyx_ptype_5numpy_flatiter +#define __pyx_ptype_5numpy_broadcast __pyx_mstate_global->__pyx_ptype_5numpy_broadcast +#define __pyx_ptype_5numpy_ndarray __pyx_mstate_global->__pyx_ptype_5numpy_ndarray +#define __pyx_ptype_5numpy_generic __pyx_mstate_global->__pyx_ptype_5numpy_generic +#define __pyx_ptype_5numpy_number __pyx_mstate_global->__pyx_ptype_5numpy_number +#define __pyx_ptype_5numpy_integer __pyx_mstate_global->__pyx_ptype_5numpy_integer +#define __pyx_ptype_5numpy_signedinteger __pyx_mstate_global->__pyx_ptype_5numpy_signedinteger +#define __pyx_ptype_5numpy_unsignedinteger __pyx_mstate_global->__pyx_ptype_5numpy_unsignedinteger +#define __pyx_ptype_5numpy_inexact __pyx_mstate_global->__pyx_ptype_5numpy_inexact +#define __pyx_ptype_5numpy_floating __pyx_mstate_global->__pyx_ptype_5numpy_floating +#define __pyx_ptype_5numpy_complexfloating __pyx_mstate_global->__pyx_ptype_5numpy_complexfloating +#define __pyx_ptype_5numpy_flexible __pyx_mstate_global->__pyx_ptype_5numpy_flexible +#define __pyx_ptype_5numpy_character __pyx_mstate_global->__pyx_ptype_5numpy_character +#define __pyx_ptype_5numpy_ufunc __pyx_mstate_global->__pyx_ptype_5numpy_ufunc +#if CYTHON_USE_MODULE_STATE +#endif +#define __pyx_n_s_DTYPE __pyx_mstate_global->__pyx_n_s_DTYPE +#define __pyx_n_s_ImportError __pyx_mstate_global->__pyx_n_s_ImportError +#define __pyx_n_s_K __pyx_mstate_global->__pyx_n_s_K +#define __pyx_n_s_M __pyx_mstate_global->__pyx_n_s_M +#define __pyx_n_s_N __pyx_mstate_global->__pyx_n_s_N +#define __pyx_n_s__13 __pyx_mstate_global->__pyx_n_s__13 +#define __pyx_n_s__6 __pyx_mstate_global->__pyx_n_s__6 +#define __pyx_n_s_acc_box __pyx_mstate_global->__pyx_n_s_acc_box +#define __pyx_n_s_acc_score __pyx_mstate_global->__pyx_n_s_acc_score +#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines +#define __pyx_n_s_bbox_intersections __pyx_mstate_global->__pyx_n_s_bbox_intersections +#define __pyx_n_s_bbox_overlaps __pyx_mstate_global->__pyx_n_s_bbox_overlaps +#define __pyx_n_s_bi0 __pyx_mstate_global->__pyx_n_s_bi0 +#define __pyx_n_s_bi1 __pyx_mstate_global->__pyx_n_s_bi1 +#define __pyx_n_s_bi2 __pyx_mstate_global->__pyx_n_s_bi2 +#define __pyx_n_s_bi3 __pyx_mstate_global->__pyx_n_s_bi3 +#define __pyx_n_s_bit2 __pyx_mstate_global->__pyx_n_s_bit2 +#define __pyx_n_s_box_area __pyx_mstate_global->__pyx_n_s_box_area +#define __pyx_n_s_box_vote __pyx_mstate_global->__pyx_n_s_box_vote +#define __pyx_n_s_boxes __pyx_mstate_global->__pyx_n_s_boxes +#define __pyx_n_s_class_getitem __pyx_mstate_global->__pyx_n_s_class_getitem +#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback +#define __pyx_n_s_det __pyx_mstate_global->__pyx_n_s_det +#define __pyx_n_s_det2 __pyx_mstate_global->__pyx_n_s_det2 +#define __pyx_n_s_dets_NMS __pyx_mstate_global->__pyx_n_s_dets_NMS +#define __pyx_n_s_dets_all __pyx_mstate_global->__pyx_n_s_dets_all +#define __pyx_n_s_dets_voted __pyx_mstate_global->__pyx_n_s_dets_voted +#define __pyx_n_s_dtype __pyx_mstate_global->__pyx_n_s_dtype +#define __pyx_n_s_float32 __pyx_mstate_global->__pyx_n_s_float32 +#define __pyx_n_s_i __pyx_mstate_global->__pyx_n_s_i +#define __pyx_n_s_ih __pyx_mstate_global->__pyx_n_s_ih +#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import +#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing +#define __pyx_n_s_intersec __pyx_mstate_global->__pyx_n_s_intersec +#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine +#define __pyx_n_s_iw __pyx_mstate_global->__pyx_n_s_iw +#define __pyx_n_s_k __pyx_mstate_global->__pyx_n_s_k +#define __pyx_n_s_m __pyx_mstate_global->__pyx_n_s_m +#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main +#define __pyx_n_s_n __pyx_mstate_global->__pyx_n_s_n +#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name +#define __pyx_n_s_np __pyx_mstate_global->__pyx_n_s_np +#define __pyx_n_s_numpy __pyx_mstate_global->__pyx_n_s_numpy +#define __pyx_kp_s_numpy_core_multiarray_failed_to __pyx_mstate_global->__pyx_kp_s_numpy_core_multiarray_failed_to +#define __pyx_kp_s_numpy_core_umath_failed_to_impor __pyx_mstate_global->__pyx_kp_s_numpy_core_umath_failed_to_impor +#define __pyx_n_s_opencood_utils_box_overlaps __pyx_mstate_global->__pyx_n_s_opencood_utils_box_overlaps +#define __pyx_kp_s_opencood_utils_box_overlaps_pyx __pyx_mstate_global->__pyx_kp_s_opencood_utils_box_overlaps_pyx +#define __pyx_n_s_ov __pyx_mstate_global->__pyx_n_s_ov +#define __pyx_n_s_overlaps __pyx_mstate_global->__pyx_n_s_overlaps +#define __pyx_n_s_query_boxes __pyx_mstate_global->__pyx_n_s_query_boxes +#define __pyx_n_s_range __pyx_mstate_global->__pyx_n_s_range +#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec +#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test +#define __pyx_n_s_thresh __pyx_mstate_global->__pyx_n_s_thresh +#define __pyx_n_s_ua __pyx_mstate_global->__pyx_n_s_ua +#define __pyx_n_s_zeros __pyx_mstate_global->__pyx_n_s_zeros +#define __pyx_int_0 __pyx_mstate_global->__pyx_int_0 +#define __pyx_int_1 __pyx_mstate_global->__pyx_int_1 +#define __pyx_int_4 __pyx_mstate_global->__pyx_int_4 +#define __pyx_tuple_ __pyx_mstate_global->__pyx_tuple_ +#define __pyx_slice__3 __pyx_mstate_global->__pyx_slice__3 +#define __pyx_slice__5 __pyx_mstate_global->__pyx_slice__5 +#define __pyx_tuple__2 __pyx_mstate_global->__pyx_tuple__2 +#define __pyx_tuple__4 __pyx_mstate_global->__pyx_tuple__4 +#define __pyx_tuple__7 __pyx_mstate_global->__pyx_tuple__7 +#define __pyx_tuple__9 __pyx_mstate_global->__pyx_tuple__9 +#define __pyx_tuple__11 __pyx_mstate_global->__pyx_tuple__11 +#define __pyx_codeobj__8 __pyx_mstate_global->__pyx_codeobj__8 +#define __pyx_codeobj__10 __pyx_mstate_global->__pyx_codeobj__10 +#define __pyx_codeobj__12 __pyx_mstate_global->__pyx_codeobj__12 +/* #### Code section: module_code ### */ + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":250 + * + * @property + * cdef inline PyObject* base(self) nogil: # <<<<<<<<<<<<<< + * """Returns a borrowed reference to the object owning the data/memory. + * """ + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self) { + PyObject *__pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":253 + * """Returns a borrowed reference to the object owning the data/memory. + * """ + * return PyArray_BASE(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_BASE(__pyx_v_self); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":250 + * + * @property + * cdef inline PyObject* base(self) nogil: # <<<<<<<<<<<<<< + * """Returns a borrowed reference to the object owning the data/memory. + * """ + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 + * + * @property + * cdef inline dtype descr(self): # <<<<<<<<<<<<<< + * """Returns an owned reference to the dtype of the array. + * """ + */ + +static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArrayObject *__pyx_v_self) { + PyArray_Descr *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyArray_Descr *__pyx_t_1; + __Pyx_RefNannySetupContext("descr", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 + * """Returns an owned reference to the dtype of the array. + * """ + * return PyArray_DESCR(self) # <<<<<<<<<<<<<< + * + * @property + */ + __Pyx_XDECREF((PyObject *)__pyx_r); + __pyx_t_1 = PyArray_DESCR(__pyx_v_self); + __Pyx_INCREF((PyObject *)((PyArray_Descr *)__pyx_t_1)); + __pyx_r = ((PyArray_Descr *)__pyx_t_1); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 + * + * @property + * cdef inline dtype descr(self): # <<<<<<<<<<<<<< + * """Returns an owned reference to the dtype of the array. + * """ + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF((PyObject *)__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":262 + * + * @property + * cdef inline int ndim(self) nogil: # <<<<<<<<<<<<<< + * """Returns the number of dimensions in the array. + * """ + */ + +static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self) { + int __pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 + * """Returns the number of dimensions in the array. + * """ + * return PyArray_NDIM(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_NDIM(__pyx_v_self); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":262 + * + * @property + * cdef inline int ndim(self) nogil: # <<<<<<<<<<<<<< + * """Returns the number of dimensions in the array. + * """ + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":268 + * + * @property + * cdef inline npy_intp *shape(self) nogil: # <<<<<<<<<<<<<< + * """Returns a pointer to the dimensions/shape of the array. + * The number of elements matches the number of dimensions of the array (ndim). + */ + +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self) { + npy_intp *__pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 + * Can return NULL for 0-dimensional arrays. + * """ + * return PyArray_DIMS(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_DIMS(__pyx_v_self); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":268 + * + * @property + * cdef inline npy_intp *shape(self) nogil: # <<<<<<<<<<<<<< + * """Returns a pointer to the dimensions/shape of the array. + * The number of elements matches the number of dimensions of the array (ndim). + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 + * + * @property + * cdef inline npy_intp *strides(self) nogil: # <<<<<<<<<<<<<< + * """Returns a pointer to the strides of the array. + * The number of elements matches the number of dimensions of the array (ndim). + */ + +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self) { + npy_intp *__pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 + * The number of elements matches the number of dimensions of the array (ndim). + * """ + * return PyArray_STRIDES(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_STRIDES(__pyx_v_self); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 + * + * @property + * cdef inline npy_intp *strides(self) nogil: # <<<<<<<<<<<<<< + * """Returns a pointer to the strides of the array. + * The number of elements matches the number of dimensions of the array (ndim). + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 + * + * @property + * cdef inline npy_intp size(self) nogil: # <<<<<<<<<<<<<< + * """Returns the total size (in number of elements) of the array. + * """ + */ + +static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self) { + npy_intp __pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":286 + * """Returns the total size (in number of elements) of the array. + * """ + * return PyArray_SIZE(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_SIZE(__pyx_v_self); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 + * + * @property + * cdef inline npy_intp size(self) nogil: # <<<<<<<<<<<<<< + * """Returns the total size (in number of elements) of the array. + * """ + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 + * + * @property + * cdef inline char* data(self) nogil: # <<<<<<<<<<<<<< + * """The pointer to the data buffer as a char*. + * This is provided for legacy reasons to avoid direct struct field access. + */ + +static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self) { + char *__pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":295 + * of `PyArray_DATA()` instead, which returns a 'void*'. + * """ + * return PyArray_BYTES(self) # <<<<<<<<<<<<<< + * + * ctypedef unsigned char npy_bool + */ + __pyx_r = PyArray_BYTES(__pyx_v_self); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 + * + * @property + * cdef inline char* data(self) nogil: # <<<<<<<<<<<<<< + * """The pointer to the data buffer as a char*. + * This is provided for legacy reasons to avoid direct struct field access. + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":779 + * ctypedef npy_cdouble complex_t + * + * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(1, a) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 + * + * cdef inline object PyArray_MultiIterNew1(a): + * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew2(a, b): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 780, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":779 + * ctypedef npy_cdouble complex_t + * + * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(1, a) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":782 + * return PyArray_MultiIterNew(1, a) + * + * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(2, a, b) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 + * + * cdef inline object PyArray_MultiIterNew2(a, b): + * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 783, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":782 + * return PyArray_MultiIterNew(1, a) + * + * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(2, a, b) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":785 + * return PyArray_MultiIterNew(2, a, b) + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(3, a, b, c) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): + * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 786, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":785 + * return PyArray_MultiIterNew(2, a, b) + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(3, a, b, c) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":788 + * return PyArray_MultiIterNew(3, a, b, c) + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(4, a, b, c, d) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":789 + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): + * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 789, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":788 + * return PyArray_MultiIterNew(3, a, b, c) + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(4, a, b, c, d) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":791 + * return PyArray_MultiIterNew(4, a, b, c, d) + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":792 + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 792, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":791 + * return PyArray_MultiIterNew(4, a, b, c, d) + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + __Pyx_RefNannySetupContext("PyDataType_SHAPE", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":795 + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< + * return d.subarray.shape + * else: + */ + __pyx_t_1 = PyDataType_HASSUBARRAY(__pyx_v_d); + if (__pyx_t_1) { + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":796 + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape # <<<<<<<<<<<<<< + * else: + * return () + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(((PyObject*)__pyx_v_d->subarray->shape)); + __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":795 + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< + * return d.subarray.shape + * else: + */ + } + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 + * return d.subarray.shape + * else: + * return () # <<<<<<<<<<<<<< + * + * + */ + /*else*/ { + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_empty_tuple); + __pyx_r = __pyx_empty_tuple; + goto __pyx_L0; + } + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":975 + * int _import_umath() except -1 + * + * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< + * Py_INCREF(base) # important to do this before stealing the reference below! + * PyArray_SetBaseObject(arr, base) + */ + +static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) { + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":976 + * + * cdef inline void set_array_base(ndarray arr, object base): + * Py_INCREF(base) # important to do this before stealing the reference below! # <<<<<<<<<<<<<< + * PyArray_SetBaseObject(arr, base) + * + */ + Py_INCREF(__pyx_v_base); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":977 + * cdef inline void set_array_base(ndarray arr, object base): + * Py_INCREF(base) # important to do this before stealing the reference below! + * PyArray_SetBaseObject(arr, base) # <<<<<<<<<<<<<< + * + * cdef inline object get_array_base(ndarray arr): + */ + (void)(PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base)); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":975 + * int _import_umath() except -1 + * + * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< + * Py_INCREF(base) # important to do this before stealing the reference below! + * PyArray_SetBaseObject(arr, base) + */ + + /* function exit code */ +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 + * PyArray_SetBaseObject(arr, base) + * + * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< + * base = PyArray_BASE(arr) + * if base is NULL: + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) { + PyObject *__pyx_v_base; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + __Pyx_RefNannySetupContext("get_array_base", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":980 + * + * cdef inline object get_array_base(ndarray arr): + * base = PyArray_BASE(arr) # <<<<<<<<<<<<<< + * if base is NULL: + * return None + */ + __pyx_v_base = PyArray_BASE(__pyx_v_arr); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":981 + * cdef inline object get_array_base(ndarray arr): + * base = PyArray_BASE(arr) + * if base is NULL: # <<<<<<<<<<<<<< + * return None + * return base + */ + __pyx_t_1 = (__pyx_v_base == NULL); + if (__pyx_t_1) { + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":982 + * base = PyArray_BASE(arr) + * if base is NULL: + * return None # <<<<<<<<<<<<<< + * return base + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":981 + * cdef inline object get_array_base(ndarray arr): + * base = PyArray_BASE(arr) + * if base is NULL: # <<<<<<<<<<<<<< + * return None + * return base + */ + } + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":983 + * if base is NULL: + * return None + * return base # <<<<<<<<<<<<<< + * + * # Versions of the import_* functions which are more suitable for + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(((PyObject *)__pyx_v_base)); + __pyx_r = ((PyObject *)__pyx_v_base); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 + * PyArray_SetBaseObject(arr, base) + * + * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< + * base = PyArray_BASE(arr) + * if base is NULL: + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":987 + * # Versions of the import_* functions which are more suitable for + * # Cython code. + * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< + * try: + * __pyx_import_array() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("import_array", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":988 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * __pyx_import_array() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":989 + * cdef inline int import_array() except -1: + * try: + * __pyx_import_array() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy.core.multiarray failed to import") + */ + __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 989, __pyx_L3_error) + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":988 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * __pyx_import_array() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":990 + * try: + * __pyx_import_array() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy.core.multiarray failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 990, __pyx_L5_except_error) + __Pyx_XGOTREF(__pyx_t_5); + __Pyx_XGOTREF(__pyx_t_6); + __Pyx_XGOTREF(__pyx_t_7); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":991 + * __pyx_import_array() + * except Exception: + * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_umath() except -1: + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 991, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 991, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":988 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * __pyx_import_array() + * except Exception: + */ + __pyx_L5_except_error:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":987 + * # Versions of the import_* functions which are more suitable for + * # Cython code. + * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< + * try: + * __pyx_import_array() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":993 + * raise ImportError("numpy.core.multiarray failed to import") + * + * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("import_umath", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":994 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":995 + * cdef inline int import_umath() except -1: + * try: + * _import_umath() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy.core.umath failed to import") + */ + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 995, __pyx_L3_error) + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":994 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":996 + * try: + * _import_umath() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy.core.umath failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 996, __pyx_L5_except_error) + __Pyx_XGOTREF(__pyx_t_5); + __Pyx_XGOTREF(__pyx_t_6); + __Pyx_XGOTREF(__pyx_t_7); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":997 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_ufunc() except -1: + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 997, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 997, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":994 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + __pyx_L5_except_error:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":993 + * raise ImportError("numpy.core.multiarray failed to import") + * + * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":999 + * raise ImportError("numpy.core.umath failed to import") + * + * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("import_ufunc", 1); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1001 + * cdef inline int import_ufunc() except -1: + * try: + * _import_umath() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy.core.umath failed to import") + */ + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1001, __pyx_L3_error) + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1002 + * try: + * _import_umath() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy.core.umath failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1002, __pyx_L5_except_error) + __Pyx_XGOTREF(__pyx_t_5); + __Pyx_XGOTREF(__pyx_t_6); + __Pyx_XGOTREF(__pyx_t_7); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1003 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + * + * + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1003, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 1003, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + __pyx_L5_except_error:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":999 + * raise ImportError("numpy.core.umath failed to import") + * + * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1006 + * + * + * cdef inline bint is_timedelta64_object(object obj): # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.timedelta64)` + */ + +static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_obj) { + int __pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1018 + * bool + * """ + * return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyTimedeltaArrType_Type)); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1006 + * + * + * cdef inline bint is_timedelta64_object(object obj): # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.timedelta64)` + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 + * + * + * cdef inline bint is_datetime64_object(object obj): # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.datetime64)` + */ + +static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_obj) { + int __pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1033 + * bool + * """ + * return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyDatetimeArrType_Type)); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 + * + * + * cdef inline bint is_datetime64_object(object obj): # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.datetime64)` + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1036 + * + * + * cdef inline npy_datetime get_datetime64_value(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy datetime64 object + */ + +static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject *__pyx_v_obj) { + npy_datetime __pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1043 + * also needed. That can be found using `get_datetime64_unit`. + * """ + * return (obj).obval # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = ((PyDatetimeScalarObject *)__pyx_v_obj)->obval; + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1036 + * + * + * cdef inline npy_datetime get_datetime64_value(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy datetime64 object + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1046 + * + * + * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy timedelta64 object + */ + +static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject *__pyx_v_obj) { + npy_timedelta __pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1050 + * returns the int64 value underlying scalar numpy timedelta64 object + * """ + * return (obj).obval # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = ((PyTimedeltaScalarObject *)__pyx_v_obj)->obval; + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1046 + * + * + * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy timedelta64 object + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1053 + * + * + * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the unit part of the dtype for a numpy datetime64 object. + */ + +static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObject *__pyx_v_obj) { + NPY_DATETIMEUNIT __pyx_r; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1057 + * returns the unit part of the dtype for a numpy datetime64 object. + * """ + * return (obj).obmeta.base # <<<<<<<<<<<<<< + */ + __pyx_r = ((NPY_DATETIMEUNIT)((PyDatetimeScalarObject *)__pyx_v_obj)->obmeta.base); + goto __pyx_L0; + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1053 + * + * + * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the unit part of the dtype for a numpy datetime64 object. + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "opencood/utils/box_overlaps.pyx":17 + * + * + * def bbox_overlaps( # <<<<<<<<<<<<<< + * np.ndarray[DTYPE_t, ndim=2] boxes, + * np.ndarray[DTYPE_t, ndim=2] query_boxes): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_1bbox_overlaps(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +PyDoc_STRVAR(__pyx_doc_8opencood_5utils_12box_overlaps_bbox_overlaps, "\n Parameters\n ----------\n boxes: (N, 4) ndarray of float\n query_boxes: (K, 4) ndarray of float\n Returns\n -------\n overlaps: (N, K) ndarray of overlap between boxes and query_boxes\n "); +static PyMethodDef __pyx_mdef_8opencood_5utils_12box_overlaps_1bbox_overlaps = {"bbox_overlaps", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8opencood_5utils_12box_overlaps_1bbox_overlaps, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8opencood_5utils_12box_overlaps_bbox_overlaps}; +static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_1bbox_overlaps(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + PyArrayObject *__pyx_v_boxes = 0; + PyArrayObject *__pyx_v_query_boxes = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[2] = {0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("bbox_overlaps (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_boxes,&__pyx_n_s_query_boxes,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_boxes)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 17, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_query_boxes)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[1]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 17, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("bbox_overlaps", 1, 2, 2, 1); __PYX_ERR(0, 17, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "bbox_overlaps") < 0)) __PYX_ERR(0, 17, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 2)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + } + __pyx_v_boxes = ((PyArrayObject *)values[0]); + __pyx_v_query_boxes = ((PyArrayObject *)values[1]); + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("bbox_overlaps", 1, 2, 2, __pyx_nargs); __PYX_ERR(0, 17, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("opencood.utils.box_overlaps.bbox_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_boxes), __pyx_ptype_5numpy_ndarray, 1, "boxes", 0))) __PYX_ERR(0, 18, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_query_boxes), __pyx_ptype_5numpy_ndarray, 1, "query_boxes", 0))) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_r = __pyx_pf_8opencood_5utils_12box_overlaps_bbox_overlaps(__pyx_self, __pyx_v_boxes, __pyx_v_query_boxes); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_bbox_overlaps(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes) { + unsigned int __pyx_v_N; + unsigned int __pyx_v_K; + PyArrayObject *__pyx_v_overlaps = 0; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_iw; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_ih; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_box_area; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_ua; + unsigned int __pyx_v_k; + unsigned int __pyx_v_n; + __Pyx_LocalBuf_ND __pyx_pybuffernd_boxes; + __Pyx_Buffer __pyx_pybuffer_boxes; + __Pyx_LocalBuf_ND __pyx_pybuffernd_overlaps; + __Pyx_Buffer __pyx_pybuffer_overlaps; + __Pyx_LocalBuf_ND __pyx_pybuffernd_query_boxes; + __Pyx_Buffer __pyx_pybuffer_query_boxes; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + npy_intp *__pyx_t_1; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyArrayObject *__pyx_t_6 = NULL; + unsigned int __pyx_t_7; + unsigned int __pyx_t_8; + unsigned int __pyx_t_9; + size_t __pyx_t_10; + Py_ssize_t __pyx_t_11; + int __pyx_t_12; + size_t __pyx_t_13; + Py_ssize_t __pyx_t_14; + size_t __pyx_t_15; + Py_ssize_t __pyx_t_16; + size_t __pyx_t_17; + Py_ssize_t __pyx_t_18; + unsigned int __pyx_t_19; + unsigned int __pyx_t_20; + unsigned int __pyx_t_21; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_22; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_23; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_24; + int __pyx_t_25; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_26; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("bbox_overlaps", 1); + __pyx_pybuffer_overlaps.pybuffer.buf = NULL; + __pyx_pybuffer_overlaps.refcount = 0; + __pyx_pybuffernd_overlaps.data = NULL; + __pyx_pybuffernd_overlaps.rcbuffer = &__pyx_pybuffer_overlaps; + __pyx_pybuffer_boxes.pybuffer.buf = NULL; + __pyx_pybuffer_boxes.refcount = 0; + __pyx_pybuffernd_boxes.data = NULL; + __pyx_pybuffernd_boxes.rcbuffer = &__pyx_pybuffer_boxes; + __pyx_pybuffer_query_boxes.pybuffer.buf = NULL; + __pyx_pybuffer_query_boxes.refcount = 0; + __pyx_pybuffernd_query_boxes.data = NULL; + __pyx_pybuffernd_query_boxes.rcbuffer = &__pyx_pybuffer_query_boxes; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_boxes, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 17, __pyx_L1_error) + } + __pyx_pybuffernd_boxes.diminfo[0].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_boxes.diminfo[0].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_boxes.diminfo[1].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_boxes.diminfo[1].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[1]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_query_boxes, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 17, __pyx_L1_error) + } + __pyx_pybuffernd_query_boxes.diminfo[0].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_query_boxes.diminfo[0].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_query_boxes.diminfo[1].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_query_boxes.diminfo[1].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[1]; + + /* "opencood/utils/box_overlaps.pyx":29 + * overlaps: (N, K) ndarray of overlap between boxes and query_boxes + * """ + * cdef unsigned int N = boxes.shape[0] # <<<<<<<<<<<<<< + * cdef unsigned int K = query_boxes.shape[0] + * cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) + */ + __pyx_t_1 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_boxes)); if (unlikely(__pyx_t_1 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 29, __pyx_L1_error) + __pyx_v_N = (__pyx_t_1[0]); + + /* "opencood/utils/box_overlaps.pyx":30 + * """ + * cdef unsigned int N = boxes.shape[0] + * cdef unsigned int K = query_boxes.shape[0] # <<<<<<<<<<<<<< + * cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) + * cdef DTYPE_t iw, ih, box_area + */ + __pyx_t_1 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_query_boxes)); if (unlikely(__pyx_t_1 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 30, __pyx_L1_error) + __pyx_v_K = (__pyx_t_1[0]); + + /* "opencood/utils/box_overlaps.pyx":31 + * cdef unsigned int N = boxes.shape[0] + * cdef unsigned int K = query_boxes.shape[0] + * cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) # <<<<<<<<<<<<<< + * cdef DTYPE_t iw, ih, box_area + * cdef DTYPE_t ua + */ + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_PyInt_From_unsigned_int(__pyx_v_N); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_4 = __Pyx_PyInt_From_unsigned_int(__pyx_v_K); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_4); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4)) __PYX_ERR(0, 31, __pyx_L1_error); + __pyx_t_2 = 0; + __pyx_t_4 = 0; + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_GIVEREF(__pyx_t_5); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5)) __PYX_ERR(0, 31, __pyx_L1_error); + __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_DTYPE); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_t_5, __pyx_n_s_dtype, __pyx_t_2) < 0) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 31, __pyx_L1_error) + __pyx_t_6 = ((PyArrayObject *)__pyx_t_2); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES| PyBUF_WRITABLE, 2, 0, __pyx_stack) == -1)) { + __pyx_v_overlaps = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.buf = NULL; + __PYX_ERR(0, 31, __pyx_L1_error) + } else {__pyx_pybuffernd_overlaps.diminfo[0].strides = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_overlaps.diminfo[0].shape = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_overlaps.diminfo[1].strides = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_overlaps.diminfo[1].shape = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.shape[1]; + } + } + __pyx_t_6 = 0; + __pyx_v_overlaps = ((PyArrayObject *)__pyx_t_2); + __pyx_t_2 = 0; + + /* "opencood/utils/box_overlaps.pyx":35 + * cdef DTYPE_t ua + * cdef unsigned int k, n + * for k in range(K): # <<<<<<<<<<<<<< + * box_area = ( + * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * + */ + __pyx_t_7 = __pyx_v_K; + __pyx_t_8 = __pyx_t_7; + for (__pyx_t_9 = 0; __pyx_t_9 < __pyx_t_8; __pyx_t_9+=1) { + __pyx_v_k = __pyx_t_9; + + /* "opencood/utils/box_overlaps.pyx":37 + * for k in range(K): + * box_area = ( + * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * # <<<<<<<<<<<<<< + * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + * ) + */ + __pyx_t_10 = __pyx_v_k; + __pyx_t_11 = 2; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_10 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_11 < 0) { + __pyx_t_11 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_11 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_11 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 37, __pyx_L1_error) + } + __pyx_t_13 = __pyx_v_k; + __pyx_t_14 = 0; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_13 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_14 < 0) { + __pyx_t_14 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_14 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_14 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 37, __pyx_L1_error) + } + + /* "opencood/utils/box_overlaps.pyx":38 + * box_area = ( + * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * + * (query_boxes[k, 3] - query_boxes[k, 1] + 1) # <<<<<<<<<<<<<< + * ) + * for n in range(N): + */ + __pyx_t_15 = __pyx_v_k; + __pyx_t_16 = 3; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_15 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_16 < 0) { + __pyx_t_16 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_16 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_16 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 38, __pyx_L1_error) + } + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 1; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 38, __pyx_L1_error) + } + + /* "opencood/utils/box_overlaps.pyx":37 + * for k in range(K): + * box_area = ( + * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * # <<<<<<<<<<<<<< + * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + * ) + */ + __pyx_v_box_area = ((((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_11, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_13, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_14, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0)); + + /* "opencood/utils/box_overlaps.pyx":40 + * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + * ) + * for n in range(N): # <<<<<<<<<<<<<< + * iw = ( + * min(boxes[n, 2], query_boxes[k, 2]) - + */ + __pyx_t_19 = __pyx_v_N; + __pyx_t_20 = __pyx_t_19; + for (__pyx_t_21 = 0; __pyx_t_21 < __pyx_t_20; __pyx_t_21+=1) { + __pyx_v_n = __pyx_t_21; + + /* "opencood/utils/box_overlaps.pyx":42 + * for n in range(N): + * iw = ( + * min(boxes[n, 2], query_boxes[k, 2]) - # <<<<<<<<<<<<<< + * max(boxes[n, 0], query_boxes[k, 0]) + 1 + * ) + */ + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 2; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 42, __pyx_L1_error) + } + __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides)); + __pyx_t_17 = __pyx_v_n; + __pyx_t_18 = 2; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 42, __pyx_L1_error) + } + __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)); + __pyx_t_25 = (__pyx_t_22 < __pyx_t_23); + if (__pyx_t_25) { + __pyx_t_24 = __pyx_t_22; + } else { + __pyx_t_24 = __pyx_t_23; + } + + /* "opencood/utils/box_overlaps.pyx":43 + * iw = ( + * min(boxes[n, 2], query_boxes[k, 2]) - + * max(boxes[n, 0], query_boxes[k, 0]) + 1 # <<<<<<<<<<<<<< + * ) + * if iw > 0: + */ + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 0; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 43, __pyx_L1_error) + } + __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides)); + __pyx_t_17 = __pyx_v_n; + __pyx_t_18 = 0; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 43, __pyx_L1_error) + } + __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)); + __pyx_t_25 = (__pyx_t_22 > __pyx_t_23); + if (__pyx_t_25) { + __pyx_t_26 = __pyx_t_22; + } else { + __pyx_t_26 = __pyx_t_23; + } + + /* "opencood/utils/box_overlaps.pyx":42 + * for n in range(N): + * iw = ( + * min(boxes[n, 2], query_boxes[k, 2]) - # <<<<<<<<<<<<<< + * max(boxes[n, 0], query_boxes[k, 0]) + 1 + * ) + */ + __pyx_v_iw = ((__pyx_t_24 - __pyx_t_26) + 1.0); + + /* "opencood/utils/box_overlaps.pyx":45 + * max(boxes[n, 0], query_boxes[k, 0]) + 1 + * ) + * if iw > 0: # <<<<<<<<<<<<<< + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - + */ + __pyx_t_25 = (__pyx_v_iw > 0.0); + if (__pyx_t_25) { + + /* "opencood/utils/box_overlaps.pyx":47 + * if iw > 0: + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - # <<<<<<<<<<<<<< + * max(boxes[n, 1], query_boxes[k, 1]) + 1 + * ) + */ + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 3; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 47, __pyx_L1_error) + } + __pyx_t_26 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides)); + __pyx_t_17 = __pyx_v_n; + __pyx_t_18 = 3; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 47, __pyx_L1_error) + } + __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)); + __pyx_t_25 = (__pyx_t_26 < __pyx_t_24); + if (__pyx_t_25) { + __pyx_t_22 = __pyx_t_26; + } else { + __pyx_t_22 = __pyx_t_24; + } + + /* "opencood/utils/box_overlaps.pyx":48 + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - + * max(boxes[n, 1], query_boxes[k, 1]) + 1 # <<<<<<<<<<<<<< + * ) + * if ih > 0: + */ + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 1; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 48, __pyx_L1_error) + } + __pyx_t_26 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides)); + __pyx_t_17 = __pyx_v_n; + __pyx_t_18 = 1; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 48, __pyx_L1_error) + } + __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)); + __pyx_t_25 = (__pyx_t_26 > __pyx_t_24); + if (__pyx_t_25) { + __pyx_t_23 = __pyx_t_26; + } else { + __pyx_t_23 = __pyx_t_24; + } + + /* "opencood/utils/box_overlaps.pyx":47 + * if iw > 0: + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - # <<<<<<<<<<<<<< + * max(boxes[n, 1], query_boxes[k, 1]) + 1 + * ) + */ + __pyx_v_ih = ((__pyx_t_22 - __pyx_t_23) + 1.0); + + /* "opencood/utils/box_overlaps.pyx":50 + * max(boxes[n, 1], query_boxes[k, 1]) + 1 + * ) + * if ih > 0: # <<<<<<<<<<<<<< + * ua = float( + * (boxes[n, 2] - boxes[n, 0] + 1) * + */ + __pyx_t_25 = (__pyx_v_ih > 0.0); + if (__pyx_t_25) { + + /* "opencood/utils/box_overlaps.pyx":52 + * if ih > 0: + * ua = float( + * (boxes[n, 2] - boxes[n, 0] + 1) * # <<<<<<<<<<<<<< + * (boxes[n, 3] - boxes[n, 1] + 1) + + * box_area - iw * ih + */ + __pyx_t_17 = __pyx_v_n; + __pyx_t_18 = 2; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 52, __pyx_L1_error) + } + __pyx_t_15 = __pyx_v_n; + __pyx_t_16 = 0; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_15 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_16 < 0) { + __pyx_t_16 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_16 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_16 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 52, __pyx_L1_error) + } + + /* "opencood/utils/box_overlaps.pyx":53 + * ua = float( + * (boxes[n, 2] - boxes[n, 0] + 1) * + * (boxes[n, 3] - boxes[n, 1] + 1) + # <<<<<<<<<<<<<< + * box_area - iw * ih + * ) + */ + __pyx_t_13 = __pyx_v_n; + __pyx_t_14 = 3; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_13 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_14 < 0) { + __pyx_t_14 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_14 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_14 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 53, __pyx_L1_error) + } + __pyx_t_10 = __pyx_v_n; + __pyx_t_11 = 1; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_10 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_11 < 0) { + __pyx_t_11 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_11 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_11 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 53, __pyx_L1_error) + } + + /* "opencood/utils/box_overlaps.pyx":51 + * ) + * if ih > 0: + * ua = float( # <<<<<<<<<<<<<< + * (boxes[n, 2] - boxes[n, 0] + 1) * + * (boxes[n, 3] - boxes[n, 1] + 1) + + */ + __pyx_v_ua = ((double)((((((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_13, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_14, __pyx_pybuffernd_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_11, __pyx_pybuffernd_boxes.diminfo[1].strides))) + 1.0)) + __pyx_v_box_area) - (__pyx_v_iw * __pyx_v_ih))); + + /* "opencood/utils/box_overlaps.pyx":56 + * box_area - iw * ih + * ) + * overlaps[n, k] = iw * ih / ua # <<<<<<<<<<<<<< + * return overlaps + * + */ + __pyx_t_23 = (__pyx_v_iw * __pyx_v_ih); + if (unlikely(__pyx_v_ua == 0)) { + PyErr_SetString(PyExc_ZeroDivisionError, "float division"); + __PYX_ERR(0, 56, __pyx_L1_error) + } + __pyx_t_10 = __pyx_v_n; + __pyx_t_13 = __pyx_v_k; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_10 >= (size_t)__pyx_pybuffernd_overlaps.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_13 >= (size_t)__pyx_pybuffernd_overlaps.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 56, __pyx_L1_error) + } + *__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_overlaps.diminfo[0].strides, __pyx_t_13, __pyx_pybuffernd_overlaps.diminfo[1].strides) = (__pyx_t_23 / __pyx_v_ua); + + /* "opencood/utils/box_overlaps.pyx":50 + * max(boxes[n, 1], query_boxes[k, 1]) + 1 + * ) + * if ih > 0: # <<<<<<<<<<<<<< + * ua = float( + * (boxes[n, 2] - boxes[n, 0] + 1) * + */ + } + + /* "opencood/utils/box_overlaps.pyx":45 + * max(boxes[n, 0], query_boxes[k, 0]) + 1 + * ) + * if iw > 0: # <<<<<<<<<<<<<< + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - + */ + } + } + } + + /* "opencood/utils/box_overlaps.pyx":57 + * ) + * overlaps[n, k] = iw * ih / ua + * return overlaps # <<<<<<<<<<<<<< + * + * def bbox_intersections( + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF((PyObject *)__pyx_v_overlaps); + __pyx_r = ((PyObject *)__pyx_v_overlaps); + goto __pyx_L0; + + /* "opencood/utils/box_overlaps.pyx":17 + * + * + * def bbox_overlaps( # <<<<<<<<<<<<<< + * np.ndarray[DTYPE_t, ndim=2] boxes, + * np.ndarray[DTYPE_t, ndim=2] query_boxes): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer); + __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} + __Pyx_AddTraceback("opencood.utils.box_overlaps.bbox_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + goto __pyx_L2; + __pyx_L0:; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer); + __pyx_L2:; + __Pyx_XDECREF((PyObject *)__pyx_v_overlaps); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "opencood/utils/box_overlaps.pyx":59 + * return overlaps + * + * def bbox_intersections( # <<<<<<<<<<<<<< + * np.ndarray[DTYPE_t, ndim=2] boxes, + * np.ndarray[DTYPE_t, ndim=2] query_boxes): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_3bbox_intersections(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +PyDoc_STRVAR(__pyx_doc_8opencood_5utils_12box_overlaps_2bbox_intersections, "\n For each query box compute the intersection ratio covered by boxes\n ----------\n Parameters\n ----------\n boxes: (N, 4) ndarray of float\n query_boxes: (K, 4) ndarray of float\n Returns\n -------\n overlaps: (N, K) ndarray of intersec between boxes and query_boxes\n "); +static PyMethodDef __pyx_mdef_8opencood_5utils_12box_overlaps_3bbox_intersections = {"bbox_intersections", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8opencood_5utils_12box_overlaps_3bbox_intersections, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8opencood_5utils_12box_overlaps_2bbox_intersections}; +static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_3bbox_intersections(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + PyArrayObject *__pyx_v_boxes = 0; + PyArrayObject *__pyx_v_query_boxes = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[2] = {0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("bbox_intersections (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_boxes,&__pyx_n_s_query_boxes,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_boxes)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_query_boxes)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[1]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("bbox_intersections", 1, 2, 2, 1); __PYX_ERR(0, 59, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "bbox_intersections") < 0)) __PYX_ERR(0, 59, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 2)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + } + __pyx_v_boxes = ((PyArrayObject *)values[0]); + __pyx_v_query_boxes = ((PyArrayObject *)values[1]); + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("bbox_intersections", 1, 2, 2, __pyx_nargs); __PYX_ERR(0, 59, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("opencood.utils.box_overlaps.bbox_intersections", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_boxes), __pyx_ptype_5numpy_ndarray, 1, "boxes", 0))) __PYX_ERR(0, 60, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_query_boxes), __pyx_ptype_5numpy_ndarray, 1, "query_boxes", 0))) __PYX_ERR(0, 61, __pyx_L1_error) + __pyx_r = __pyx_pf_8opencood_5utils_12box_overlaps_2bbox_intersections(__pyx_self, __pyx_v_boxes, __pyx_v_query_boxes); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_2bbox_intersections(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes) { + unsigned int __pyx_v_N; + unsigned int __pyx_v_K; + PyArrayObject *__pyx_v_intersec = 0; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_iw; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_ih; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_box_area; + unsigned int __pyx_v_k; + unsigned int __pyx_v_n; + __Pyx_LocalBuf_ND __pyx_pybuffernd_boxes; + __Pyx_Buffer __pyx_pybuffer_boxes; + __Pyx_LocalBuf_ND __pyx_pybuffernd_intersec; + __Pyx_Buffer __pyx_pybuffer_intersec; + __Pyx_LocalBuf_ND __pyx_pybuffernd_query_boxes; + __Pyx_Buffer __pyx_pybuffer_query_boxes; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + npy_intp *__pyx_t_1; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyArrayObject *__pyx_t_6 = NULL; + unsigned int __pyx_t_7; + unsigned int __pyx_t_8; + unsigned int __pyx_t_9; + size_t __pyx_t_10; + Py_ssize_t __pyx_t_11; + int __pyx_t_12; + size_t __pyx_t_13; + Py_ssize_t __pyx_t_14; + size_t __pyx_t_15; + Py_ssize_t __pyx_t_16; + size_t __pyx_t_17; + Py_ssize_t __pyx_t_18; + unsigned int __pyx_t_19; + unsigned int __pyx_t_20; + unsigned int __pyx_t_21; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_22; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_23; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_24; + int __pyx_t_25; + __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_26; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("bbox_intersections", 1); + __pyx_pybuffer_intersec.pybuffer.buf = NULL; + __pyx_pybuffer_intersec.refcount = 0; + __pyx_pybuffernd_intersec.data = NULL; + __pyx_pybuffernd_intersec.rcbuffer = &__pyx_pybuffer_intersec; + __pyx_pybuffer_boxes.pybuffer.buf = NULL; + __pyx_pybuffer_boxes.refcount = 0; + __pyx_pybuffernd_boxes.data = NULL; + __pyx_pybuffernd_boxes.rcbuffer = &__pyx_pybuffer_boxes; + __pyx_pybuffer_query_boxes.pybuffer.buf = NULL; + __pyx_pybuffer_query_boxes.refcount = 0; + __pyx_pybuffernd_query_boxes.data = NULL; + __pyx_pybuffernd_query_boxes.rcbuffer = &__pyx_pybuffer_query_boxes; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_boxes, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error) + } + __pyx_pybuffernd_boxes.diminfo[0].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_boxes.diminfo[0].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_boxes.diminfo[1].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_boxes.diminfo[1].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[1]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_query_boxes, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error) + } + __pyx_pybuffernd_query_boxes.diminfo[0].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_query_boxes.diminfo[0].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_query_boxes.diminfo[1].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_query_boxes.diminfo[1].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[1]; + + /* "opencood/utils/box_overlaps.pyx":73 + * overlaps: (N, K) ndarray of intersec between boxes and query_boxes + * """ + * cdef unsigned int N = boxes.shape[0] # <<<<<<<<<<<<<< + * cdef unsigned int K = query_boxes.shape[0] + * cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) + */ + __pyx_t_1 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_boxes)); if (unlikely(__pyx_t_1 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 73, __pyx_L1_error) + __pyx_v_N = (__pyx_t_1[0]); + + /* "opencood/utils/box_overlaps.pyx":74 + * """ + * cdef unsigned int N = boxes.shape[0] + * cdef unsigned int K = query_boxes.shape[0] # <<<<<<<<<<<<<< + * cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) + * cdef DTYPE_t iw, ih, box_area + */ + __pyx_t_1 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_query_boxes)); if (unlikely(__pyx_t_1 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 74, __pyx_L1_error) + __pyx_v_K = (__pyx_t_1[0]); + + /* "opencood/utils/box_overlaps.pyx":75 + * cdef unsigned int N = boxes.shape[0] + * cdef unsigned int K = query_boxes.shape[0] + * cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) # <<<<<<<<<<<<<< + * cdef DTYPE_t iw, ih, box_area + * cdef DTYPE_t ua + */ + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_PyInt_From_unsigned_int(__pyx_v_N); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_4 = __Pyx_PyInt_From_unsigned_int(__pyx_v_K); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_4); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4)) __PYX_ERR(0, 75, __pyx_L1_error); + __pyx_t_2 = 0; + __pyx_t_4 = 0; + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_GIVEREF(__pyx_t_5); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5)) __PYX_ERR(0, 75, __pyx_L1_error); + __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_DTYPE); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_t_5, __pyx_n_s_dtype, __pyx_t_2) < 0) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 75, __pyx_L1_error) + __pyx_t_6 = ((PyArrayObject *)__pyx_t_2); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_intersec.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES| PyBUF_WRITABLE, 2, 0, __pyx_stack) == -1)) { + __pyx_v_intersec = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_intersec.rcbuffer->pybuffer.buf = NULL; + __PYX_ERR(0, 75, __pyx_L1_error) + } else {__pyx_pybuffernd_intersec.diminfo[0].strides = __pyx_pybuffernd_intersec.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_intersec.diminfo[0].shape = __pyx_pybuffernd_intersec.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_intersec.diminfo[1].strides = __pyx_pybuffernd_intersec.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_intersec.diminfo[1].shape = __pyx_pybuffernd_intersec.rcbuffer->pybuffer.shape[1]; + } + } + __pyx_t_6 = 0; + __pyx_v_intersec = ((PyArrayObject *)__pyx_t_2); + __pyx_t_2 = 0; + + /* "opencood/utils/box_overlaps.pyx":79 + * cdef DTYPE_t ua + * cdef unsigned int k, n + * for k in range(K): # <<<<<<<<<<<<<< + * box_area = ( + * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * + */ + __pyx_t_7 = __pyx_v_K; + __pyx_t_8 = __pyx_t_7; + for (__pyx_t_9 = 0; __pyx_t_9 < __pyx_t_8; __pyx_t_9+=1) { + __pyx_v_k = __pyx_t_9; + + /* "opencood/utils/box_overlaps.pyx":81 + * for k in range(K): + * box_area = ( + * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * # <<<<<<<<<<<<<< + * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + * ) + */ + __pyx_t_10 = __pyx_v_k; + __pyx_t_11 = 2; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_10 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_11 < 0) { + __pyx_t_11 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_11 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_11 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 81, __pyx_L1_error) + } + __pyx_t_13 = __pyx_v_k; + __pyx_t_14 = 0; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_13 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_14 < 0) { + __pyx_t_14 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_14 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_14 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 81, __pyx_L1_error) + } + + /* "opencood/utils/box_overlaps.pyx":82 + * box_area = ( + * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * + * (query_boxes[k, 3] - query_boxes[k, 1] + 1) # <<<<<<<<<<<<<< + * ) + * for n in range(N): + */ + __pyx_t_15 = __pyx_v_k; + __pyx_t_16 = 3; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_15 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_16 < 0) { + __pyx_t_16 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_16 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_16 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 82, __pyx_L1_error) + } + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 1; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 82, __pyx_L1_error) + } + + /* "opencood/utils/box_overlaps.pyx":81 + * for k in range(K): + * box_area = ( + * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * # <<<<<<<<<<<<<< + * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + * ) + */ + __pyx_v_box_area = ((((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_11, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_13, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_14, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0)); + + /* "opencood/utils/box_overlaps.pyx":84 + * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + * ) + * for n in range(N): # <<<<<<<<<<<<<< + * iw = ( + * min(boxes[n, 2], query_boxes[k, 2]) - + */ + __pyx_t_19 = __pyx_v_N; + __pyx_t_20 = __pyx_t_19; + for (__pyx_t_21 = 0; __pyx_t_21 < __pyx_t_20; __pyx_t_21+=1) { + __pyx_v_n = __pyx_t_21; + + /* "opencood/utils/box_overlaps.pyx":86 + * for n in range(N): + * iw = ( + * min(boxes[n, 2], query_boxes[k, 2]) - # <<<<<<<<<<<<<< + * max(boxes[n, 0], query_boxes[k, 0]) + 1 + * ) + */ + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 2; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 86, __pyx_L1_error) + } + __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides)); + __pyx_t_17 = __pyx_v_n; + __pyx_t_18 = 2; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 86, __pyx_L1_error) + } + __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)); + __pyx_t_25 = (__pyx_t_22 < __pyx_t_23); + if (__pyx_t_25) { + __pyx_t_24 = __pyx_t_22; + } else { + __pyx_t_24 = __pyx_t_23; + } + + /* "opencood/utils/box_overlaps.pyx":87 + * iw = ( + * min(boxes[n, 2], query_boxes[k, 2]) - + * max(boxes[n, 0], query_boxes[k, 0]) + 1 # <<<<<<<<<<<<<< + * ) + * if iw > 0: + */ + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 0; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 87, __pyx_L1_error) + } + __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides)); + __pyx_t_17 = __pyx_v_n; + __pyx_t_18 = 0; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 87, __pyx_L1_error) + } + __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)); + __pyx_t_25 = (__pyx_t_22 > __pyx_t_23); + if (__pyx_t_25) { + __pyx_t_26 = __pyx_t_22; + } else { + __pyx_t_26 = __pyx_t_23; + } + + /* "opencood/utils/box_overlaps.pyx":86 + * for n in range(N): + * iw = ( + * min(boxes[n, 2], query_boxes[k, 2]) - # <<<<<<<<<<<<<< + * max(boxes[n, 0], query_boxes[k, 0]) + 1 + * ) + */ + __pyx_v_iw = ((__pyx_t_24 - __pyx_t_26) + 1.0); + + /* "opencood/utils/box_overlaps.pyx":89 + * max(boxes[n, 0], query_boxes[k, 0]) + 1 + * ) + * if iw > 0: # <<<<<<<<<<<<<< + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - + */ + __pyx_t_25 = (__pyx_v_iw > 0.0); + if (__pyx_t_25) { + + /* "opencood/utils/box_overlaps.pyx":91 + * if iw > 0: + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - # <<<<<<<<<<<<<< + * max(boxes[n, 1], query_boxes[k, 1]) + 1 + * ) + */ + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 3; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 91, __pyx_L1_error) + } + __pyx_t_26 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides)); + __pyx_t_17 = __pyx_v_n; + __pyx_t_18 = 3; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 91, __pyx_L1_error) + } + __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)); + __pyx_t_25 = (__pyx_t_26 < __pyx_t_24); + if (__pyx_t_25) { + __pyx_t_22 = __pyx_t_26; + } else { + __pyx_t_22 = __pyx_t_24; + } + + /* "opencood/utils/box_overlaps.pyx":92 + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - + * max(boxes[n, 1], query_boxes[k, 1]) + 1 # <<<<<<<<<<<<<< + * ) + * if ih > 0: + */ + __pyx_t_17 = __pyx_v_k; + __pyx_t_18 = 1; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 92, __pyx_L1_error) + } + __pyx_t_26 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides)); + __pyx_t_17 = __pyx_v_n; + __pyx_t_18 = 1; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0; + if (__pyx_t_18 < 0) { + __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape; + if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1; + } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 92, __pyx_L1_error) + } + __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)); + __pyx_t_25 = (__pyx_t_26 > __pyx_t_24); + if (__pyx_t_25) { + __pyx_t_23 = __pyx_t_26; + } else { + __pyx_t_23 = __pyx_t_24; + } + + /* "opencood/utils/box_overlaps.pyx":91 + * if iw > 0: + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - # <<<<<<<<<<<<<< + * max(boxes[n, 1], query_boxes[k, 1]) + 1 + * ) + */ + __pyx_v_ih = ((__pyx_t_22 - __pyx_t_23) + 1.0); + + /* "opencood/utils/box_overlaps.pyx":94 + * max(boxes[n, 1], query_boxes[k, 1]) + 1 + * ) + * if ih > 0: # <<<<<<<<<<<<<< + * intersec[n, k] = iw * ih / box_area + * return intersec + */ + __pyx_t_25 = (__pyx_v_ih > 0.0); + if (__pyx_t_25) { + + /* "opencood/utils/box_overlaps.pyx":95 + * ) + * if ih > 0: + * intersec[n, k] = iw * ih / box_area # <<<<<<<<<<<<<< + * return intersec + * + */ + __pyx_t_23 = (__pyx_v_iw * __pyx_v_ih); + if (unlikely(__pyx_v_box_area == 0)) { + PyErr_SetString(PyExc_ZeroDivisionError, "float division"); + __PYX_ERR(0, 95, __pyx_L1_error) + } + __pyx_t_17 = __pyx_v_n; + __pyx_t_15 = __pyx_v_k; + __pyx_t_12 = -1; + if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_intersec.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_15 >= (size_t)__pyx_pybuffernd_intersec.diminfo[1].shape)) __pyx_t_12 = 1; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 95, __pyx_L1_error) + } + *__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_intersec.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_intersec.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_intersec.diminfo[1].strides) = (__pyx_t_23 / __pyx_v_box_area); + + /* "opencood/utils/box_overlaps.pyx":94 + * max(boxes[n, 1], query_boxes[k, 1]) + 1 + * ) + * if ih > 0: # <<<<<<<<<<<<<< + * intersec[n, k] = iw * ih / box_area + * return intersec + */ + } + + /* "opencood/utils/box_overlaps.pyx":89 + * max(boxes[n, 0], query_boxes[k, 0]) + 1 + * ) + * if iw > 0: # <<<<<<<<<<<<<< + * ih = ( + * min(boxes[n, 3], query_boxes[k, 3]) - + */ + } + } + } + + /* "opencood/utils/box_overlaps.pyx":96 + * if ih > 0: + * intersec[n, k] = iw * ih / box_area + * return intersec # <<<<<<<<<<<<<< + * + * # Compute bounding box voting + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF((PyObject *)__pyx_v_intersec); + __pyx_r = ((PyObject *)__pyx_v_intersec); + goto __pyx_L0; + + /* "opencood/utils/box_overlaps.pyx":59 + * return overlaps + * + * def bbox_intersections( # <<<<<<<<<<<<<< + * np.ndarray[DTYPE_t, ndim=2] boxes, + * np.ndarray[DTYPE_t, ndim=2] query_boxes): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_intersec.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer); + __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} + __Pyx_AddTraceback("opencood.utils.box_overlaps.bbox_intersections", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + goto __pyx_L2; + __pyx_L0:; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_intersec.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer); + __pyx_L2:; + __Pyx_XDECREF((PyObject *)__pyx_v_intersec); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "opencood/utils/box_overlaps.pyx":99 + * + * # Compute bounding box voting + * def box_vote( # <<<<<<<<<<<<<< + * np.ndarray[float, ndim=2] dets_NMS, + * np.ndarray[float, ndim=2] dets_all): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_5box_vote(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_8opencood_5utils_12box_overlaps_5box_vote = {"box_vote", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8opencood_5utils_12box_overlaps_5box_vote, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_5box_vote(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + PyArrayObject *__pyx_v_dets_NMS = 0; + PyArrayObject *__pyx_v_dets_all = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[2] = {0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("box_vote (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_dets_NMS,&__pyx_n_s_dets_all,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_dets_NMS)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 99, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_dets_all)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[1]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 99, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("box_vote", 1, 2, 2, 1); __PYX_ERR(0, 99, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "box_vote") < 0)) __PYX_ERR(0, 99, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 2)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + } + __pyx_v_dets_NMS = ((PyArrayObject *)values[0]); + __pyx_v_dets_all = ((PyArrayObject *)values[1]); + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("box_vote", 1, 2, 2, __pyx_nargs); __PYX_ERR(0, 99, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("opencood.utils.box_overlaps.box_vote", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_dets_NMS), __pyx_ptype_5numpy_ndarray, 1, "dets_NMS", 0))) __PYX_ERR(0, 100, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_dets_all), __pyx_ptype_5numpy_ndarray, 1, "dets_all", 0))) __PYX_ERR(0, 101, __pyx_L1_error) + __pyx_r = __pyx_pf_8opencood_5utils_12box_overlaps_4box_vote(__pyx_self, __pyx_v_dets_NMS, __pyx_v_dets_all); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_4box_vote(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets_NMS, PyArrayObject *__pyx_v_dets_all) { + PyArrayObject *__pyx_v_dets_voted = 0; + unsigned int __pyx_v_N; + unsigned int __pyx_v_M; + PyArrayObject *__pyx_v_det = 0; + PyArrayObject *__pyx_v_acc_box = 0; + float __pyx_v_acc_score; + PyArrayObject *__pyx_v_det2 = 0; + float __pyx_v_bi0; + float __pyx_v_bi1; + float __pyx_v_bi3; + float __pyx_v_iw; + float __pyx_v_ih; + float __pyx_v_ua; + float __pyx_v_thresh; + unsigned int __pyx_v_i; + unsigned int __pyx_v_m; + PyObject *__pyx_v_bi2 = NULL; + float __pyx_v_ov; + __Pyx_LocalBuf_ND __pyx_pybuffernd_acc_box; + __Pyx_Buffer __pyx_pybuffer_acc_box; + __Pyx_LocalBuf_ND __pyx_pybuffernd_det; + __Pyx_Buffer __pyx_pybuffer_det; + __Pyx_LocalBuf_ND __pyx_pybuffernd_det2; + __Pyx_Buffer __pyx_pybuffer_det2; + __Pyx_LocalBuf_ND __pyx_pybuffernd_dets_NMS; + __Pyx_Buffer __pyx_pybuffer_dets_NMS; + __Pyx_LocalBuf_ND __pyx_pybuffernd_dets_all; + __Pyx_Buffer __pyx_pybuffer_dets_all; + __Pyx_LocalBuf_ND __pyx_pybuffernd_dets_voted; + __Pyx_Buffer __pyx_pybuffer_dets_voted; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + npy_intp *__pyx_t_3; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyArrayObject *__pyx_t_7 = NULL; + unsigned int __pyx_t_8; + unsigned int __pyx_t_9; + unsigned int __pyx_t_10; + PyArrayObject *__pyx_t_11 = NULL; + int __pyx_t_12; + PyObject *__pyx_t_13 = NULL; + PyObject *__pyx_t_14 = NULL; + PyObject *__pyx_t_15 = NULL; + PyArrayObject *__pyx_t_16 = NULL; + unsigned int __pyx_t_17; + unsigned int __pyx_t_18; + unsigned int __pyx_t_19; + PyArrayObject *__pyx_t_20 = NULL; + Py_ssize_t __pyx_t_21; + float __pyx_t_22; + float __pyx_t_23; + float __pyx_t_24; + int __pyx_t_25; + int __pyx_t_26; + Py_ssize_t __pyx_t_27; + Py_ssize_t __pyx_t_28; + Py_ssize_t __pyx_t_29; + Py_ssize_t __pyx_t_30; + Py_ssize_t __pyx_t_31; + Py_ssize_t __pyx_t_32; + Py_ssize_t __pyx_t_33; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("box_vote", 1); + __pyx_pybuffer_dets_voted.pybuffer.buf = NULL; + __pyx_pybuffer_dets_voted.refcount = 0; + __pyx_pybuffernd_dets_voted.data = NULL; + __pyx_pybuffernd_dets_voted.rcbuffer = &__pyx_pybuffer_dets_voted; + __pyx_pybuffer_det.pybuffer.buf = NULL; + __pyx_pybuffer_det.refcount = 0; + __pyx_pybuffernd_det.data = NULL; + __pyx_pybuffernd_det.rcbuffer = &__pyx_pybuffer_det; + __pyx_pybuffer_acc_box.pybuffer.buf = NULL; + __pyx_pybuffer_acc_box.refcount = 0; + __pyx_pybuffernd_acc_box.data = NULL; + __pyx_pybuffernd_acc_box.rcbuffer = &__pyx_pybuffer_acc_box; + __pyx_pybuffer_det2.pybuffer.buf = NULL; + __pyx_pybuffer_det2.refcount = 0; + __pyx_pybuffernd_det2.data = NULL; + __pyx_pybuffernd_det2.rcbuffer = &__pyx_pybuffer_det2; + __pyx_pybuffer_dets_NMS.pybuffer.buf = NULL; + __pyx_pybuffer_dets_NMS.refcount = 0; + __pyx_pybuffernd_dets_NMS.data = NULL; + __pyx_pybuffernd_dets_NMS.rcbuffer = &__pyx_pybuffer_dets_NMS; + __pyx_pybuffer_dets_all.pybuffer.buf = NULL; + __pyx_pybuffer_dets_all.refcount = 0; + __pyx_pybuffernd_dets_all.data = NULL; + __pyx_pybuffernd_dets_all.rcbuffer = &__pyx_pybuffer_dets_all; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer, (PyObject*)__pyx_v_dets_NMS, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 99, __pyx_L1_error) + } + __pyx_pybuffernd_dets_NMS.diminfo[0].strides = __pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_dets_NMS.diminfo[0].shape = __pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_dets_NMS.diminfo[1].strides = __pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_dets_NMS.diminfo[1].shape = __pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer.shape[1]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_dets_all.rcbuffer->pybuffer, (PyObject*)__pyx_v_dets_all, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 99, __pyx_L1_error) + } + __pyx_pybuffernd_dets_all.diminfo[0].strides = __pyx_pybuffernd_dets_all.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_dets_all.diminfo[0].shape = __pyx_pybuffernd_dets_all.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_dets_all.diminfo[1].strides = __pyx_pybuffernd_dets_all.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_dets_all.diminfo[1].shape = __pyx_pybuffernd_dets_all.rcbuffer->pybuffer.shape[1]; + + /* "opencood/utils/box_overlaps.pyx":102 + * np.ndarray[float, ndim=2] dets_NMS, + * np.ndarray[float, ndim=2] dets_all): + * cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32) # <<<<<<<<<<<<<< + * cdef unsigned int N = dets_NMS.shape[0] + * cdef unsigned int M = dets_all.shape[0] + */ + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_zeros); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_dets_NMS)); if (unlikely(__pyx_t_3 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 102, __pyx_L1_error) + __pyx_t_1 = PyInt_FromSsize_t((__pyx_t_3[0])); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_dets_NMS)); if (unlikely(__pyx_t_3 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 102, __pyx_L1_error) + __pyx_t_4 = PyInt_FromSsize_t((__pyx_t_3[1])); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_4); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4)) __PYX_ERR(0, 102, __pyx_L1_error); + __pyx_t_1 = 0; + __pyx_t_4 = 0; + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_GIVEREF(__pyx_t_5); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5)) __PYX_ERR(0, 102, __pyx_L1_error); + __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_float32); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (PyDict_SetItem(__pyx_t_5, __pyx_n_s_dtype, __pyx_t_6) < 0) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 102, __pyx_L1_error) + __pyx_t_7 = ((PyArrayObject *)__pyx_t_6); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_dets_voted.rcbuffer->pybuffer, (PyObject*)__pyx_t_7, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) { + __pyx_v_dets_voted = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.buf = NULL; + __PYX_ERR(0, 102, __pyx_L1_error) + } else {__pyx_pybuffernd_dets_voted.diminfo[0].strides = __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_dets_voted.diminfo[0].shape = __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_dets_voted.diminfo[1].strides = __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_dets_voted.diminfo[1].shape = __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.shape[1]; + } + } + __pyx_t_7 = 0; + __pyx_v_dets_voted = ((PyArrayObject *)__pyx_t_6); + __pyx_t_6 = 0; + + /* "opencood/utils/box_overlaps.pyx":103 + * np.ndarray[float, ndim=2] dets_all): + * cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32) + * cdef unsigned int N = dets_NMS.shape[0] # <<<<<<<<<<<<<< + * cdef unsigned int M = dets_all.shape[0] + * + */ + __pyx_t_3 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_dets_NMS)); if (unlikely(__pyx_t_3 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 103, __pyx_L1_error) + __pyx_v_N = (__pyx_t_3[0]); + + /* "opencood/utils/box_overlaps.pyx":104 + * cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32) + * cdef unsigned int N = dets_NMS.shape[0] + * cdef unsigned int M = dets_all.shape[0] # <<<<<<<<<<<<<< + * + * cdef np.ndarray[float, ndim=1] det + */ + __pyx_t_3 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_dets_all)); if (unlikely(__pyx_t_3 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 104, __pyx_L1_error) + __pyx_v_M = (__pyx_t_3[0]); + + /* "opencood/utils/box_overlaps.pyx":114 + * cdef float iw, ih, ua + * + * cdef float thresh=0.5 # <<<<<<<<<<<<<< + * + * for i in range(N): + */ + __pyx_v_thresh = 0.5; + + /* "opencood/utils/box_overlaps.pyx":116 + * cdef float thresh=0.5 + * + * for i in range(N): # <<<<<<<<<<<<<< + * det = dets_NMS[i, :] + * acc_box = np.zeros((4), dtype=np.float32) + */ + __pyx_t_8 = __pyx_v_N; + __pyx_t_9 = __pyx_t_8; + for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { + __pyx_v_i = __pyx_t_10; + + /* "opencood/utils/box_overlaps.pyx":117 + * + * for i in range(N): + * det = dets_NMS[i, :] # <<<<<<<<<<<<<< + * acc_box = np.zeros((4), dtype=np.float32) + * acc_score = 0.0 + */ + __pyx_t_6 = __Pyx_PyInt_From_unsigned_int(__pyx_v_i); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 117, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 117, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_6)) __PYX_ERR(0, 117, __pyx_L1_error); + __Pyx_INCREF(__pyx_slice__3); + __Pyx_GIVEREF(__pyx_slice__3); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_slice__3)) __PYX_ERR(0, 117, __pyx_L1_error); + __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_dets_NMS), __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 117, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 117, __pyx_L1_error) + __pyx_t_11 = ((PyArrayObject *)__pyx_t_6); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det.rcbuffer->pybuffer); + __pyx_t_12 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_det.rcbuffer->pybuffer, (PyObject*)__pyx_t_11, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack); + if (unlikely(__pyx_t_12 < 0)) { + PyErr_Fetch(&__pyx_t_13, &__pyx_t_14, &__pyx_t_15); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_det.rcbuffer->pybuffer, (PyObject*)__pyx_v_det, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_13); Py_XDECREF(__pyx_t_14); Py_XDECREF(__pyx_t_15); + __Pyx_RaiseBufferFallbackError(); + } else { + PyErr_Restore(__pyx_t_13, __pyx_t_14, __pyx_t_15); + } + __pyx_t_13 = __pyx_t_14 = __pyx_t_15 = 0; + } + __pyx_pybuffernd_det.diminfo[0].strides = __pyx_pybuffernd_det.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_det.diminfo[0].shape = __pyx_pybuffernd_det.rcbuffer->pybuffer.shape[0]; + if (unlikely((__pyx_t_12 < 0))) __PYX_ERR(0, 117, __pyx_L1_error) + } + __pyx_t_11 = 0; + __Pyx_XDECREF_SET(__pyx_v_det, ((PyArrayObject *)__pyx_t_6)); + __pyx_t_6 = 0; + + /* "opencood/utils/box_overlaps.pyx":118 + * for i in range(N): + * det = dets_NMS[i, :] + * acc_box = np.zeros((4), dtype=np.float32) # <<<<<<<<<<<<<< + * acc_score = 0.0 + * + */ + __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_np); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 118, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_zeros); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 118, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 118, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 118, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_float32); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 118, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (PyDict_SetItem(__pyx_t_6, __pyx_n_s_dtype, __pyx_t_2) < 0) __PYX_ERR(0, 118, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_tuple__4, __pyx_t_6); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 118, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 118, __pyx_L1_error) + __pyx_t_16 = ((PyArrayObject *)__pyx_t_2); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer); + __pyx_t_12 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer, (PyObject*)__pyx_t_16, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack); + if (unlikely(__pyx_t_12 < 0)) { + PyErr_Fetch(&__pyx_t_15, &__pyx_t_14, &__pyx_t_13); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer, (PyObject*)__pyx_v_acc_box, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_15); Py_XDECREF(__pyx_t_14); Py_XDECREF(__pyx_t_13); + __Pyx_RaiseBufferFallbackError(); + } else { + PyErr_Restore(__pyx_t_15, __pyx_t_14, __pyx_t_13); + } + __pyx_t_15 = __pyx_t_14 = __pyx_t_13 = 0; + } + __pyx_pybuffernd_acc_box.diminfo[0].strides = __pyx_pybuffernd_acc_box.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_acc_box.diminfo[0].shape = __pyx_pybuffernd_acc_box.rcbuffer->pybuffer.shape[0]; + if (unlikely((__pyx_t_12 < 0))) __PYX_ERR(0, 118, __pyx_L1_error) + } + __pyx_t_16 = 0; + __Pyx_XDECREF_SET(__pyx_v_acc_box, ((PyArrayObject *)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "opencood/utils/box_overlaps.pyx":119 + * det = dets_NMS[i, :] + * acc_box = np.zeros((4), dtype=np.float32) + * acc_score = 0.0 # <<<<<<<<<<<<<< + * + * for m in range(M): + */ + __pyx_v_acc_score = 0.0; + + /* "opencood/utils/box_overlaps.pyx":121 + * acc_score = 0.0 + * + * for m in range(M): # <<<<<<<<<<<<<< + * det2 = dets_all[m, :] + * + */ + __pyx_t_17 = __pyx_v_M; + __pyx_t_18 = __pyx_t_17; + for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { + __pyx_v_m = __pyx_t_19; + + /* "opencood/utils/box_overlaps.pyx":122 + * + * for m in range(M): + * det2 = dets_all[m, :] # <<<<<<<<<<<<<< + * + * bi0 = max(det[0], det2[0]) + */ + __pyx_t_2 = __Pyx_PyInt_From_unsigned_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 122, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 122, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_2)) __PYX_ERR(0, 122, __pyx_L1_error); + __Pyx_INCREF(__pyx_slice__3); + __Pyx_GIVEREF(__pyx_slice__3); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_slice__3)) __PYX_ERR(0, 122, __pyx_L1_error); + __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_dets_all), __pyx_t_6); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 122, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 122, __pyx_L1_error) + __pyx_t_20 = ((PyArrayObject *)__pyx_t_2); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det2.rcbuffer->pybuffer); + __pyx_t_12 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_det2.rcbuffer->pybuffer, (PyObject*)__pyx_t_20, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack); + if (unlikely(__pyx_t_12 < 0)) { + PyErr_Fetch(&__pyx_t_13, &__pyx_t_14, &__pyx_t_15); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_det2.rcbuffer->pybuffer, (PyObject*)__pyx_v_det2, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_13); Py_XDECREF(__pyx_t_14); Py_XDECREF(__pyx_t_15); + __Pyx_RaiseBufferFallbackError(); + } else { + PyErr_Restore(__pyx_t_13, __pyx_t_14, __pyx_t_15); + } + __pyx_t_13 = __pyx_t_14 = __pyx_t_15 = 0; + } + __pyx_pybuffernd_det2.diminfo[0].strides = __pyx_pybuffernd_det2.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_det2.diminfo[0].shape = __pyx_pybuffernd_det2.rcbuffer->pybuffer.shape[0]; + if (unlikely((__pyx_t_12 < 0))) __PYX_ERR(0, 122, __pyx_L1_error) + } + __pyx_t_20 = 0; + __Pyx_XDECREF_SET(__pyx_v_det2, ((PyArrayObject *)__pyx_t_2)); + __pyx_t_2 = 0; + + /* "opencood/utils/box_overlaps.pyx":124 + * det2 = dets_all[m, :] + * + * bi0 = max(det[0], det2[0]) # <<<<<<<<<<<<<< + * bi1 = max(det[1], det2[1]) + * bi2 = min(det[2], det2[2]) + */ + __pyx_t_21 = 0; + __pyx_t_12 = -1; + if (__pyx_t_21 < 0) { + __pyx_t_21 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 124, __pyx_L1_error) + } + __pyx_t_22 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det2.diminfo[0].strides)); + __pyx_t_21 = 0; + __pyx_t_12 = -1; + if (__pyx_t_21 < 0) { + __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape; + if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 124, __pyx_L1_error) + } + __pyx_t_23 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides)); + __pyx_t_25 = (__pyx_t_22 > __pyx_t_23); + if (__pyx_t_25) { + __pyx_t_24 = __pyx_t_22; + } else { + __pyx_t_24 = __pyx_t_23; + } + __pyx_v_bi0 = __pyx_t_24; + + /* "opencood/utils/box_overlaps.pyx":125 + * + * bi0 = max(det[0], det2[0]) + * bi1 = max(det[1], det2[1]) # <<<<<<<<<<<<<< + * bi2 = min(det[2], det2[2]) + * bi3 = min(det[3], det2[3]) + */ + __pyx_t_21 = 1; + __pyx_t_12 = -1; + if (__pyx_t_21 < 0) { + __pyx_t_21 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 125, __pyx_L1_error) + } + __pyx_t_24 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det2.diminfo[0].strides)); + __pyx_t_21 = 1; + __pyx_t_12 = -1; + if (__pyx_t_21 < 0) { + __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape; + if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 125, __pyx_L1_error) + } + __pyx_t_22 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides)); + __pyx_t_25 = (__pyx_t_24 > __pyx_t_22); + if (__pyx_t_25) { + __pyx_t_23 = __pyx_t_24; + } else { + __pyx_t_23 = __pyx_t_22; + } + __pyx_v_bi1 = __pyx_t_23; + + /* "opencood/utils/box_overlaps.pyx":126 + * bi0 = max(det[0], det2[0]) + * bi1 = max(det[1], det2[1]) + * bi2 = min(det[2], det2[2]) # <<<<<<<<<<<<<< + * bi3 = min(det[3], det2[3]) + * + */ + __pyx_t_21 = 2; + __pyx_t_12 = -1; + if (__pyx_t_21 < 0) { + __pyx_t_21 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 126, __pyx_L1_error) + } + __pyx_t_23 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det2.diminfo[0].strides)); + __pyx_t_21 = 2; + __pyx_t_12 = -1; + if (__pyx_t_21 < 0) { + __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape; + if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 126, __pyx_L1_error) + } + __pyx_t_24 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides)); + __pyx_t_25 = (__pyx_t_23 < __pyx_t_24); + if (__pyx_t_25) { + __pyx_t_22 = __pyx_t_23; + } else { + __pyx_t_22 = __pyx_t_24; + } + __pyx_t_2 = PyFloat_FromDouble(__pyx_t_22); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 126, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_XDECREF_SET(__pyx_v_bi2, __pyx_t_2); + __pyx_t_2 = 0; + + /* "opencood/utils/box_overlaps.pyx":127 + * bi1 = max(det[1], det2[1]) + * bi2 = min(det[2], det2[2]) + * bi3 = min(det[3], det2[3]) # <<<<<<<<<<<<<< + * + * iw = bi2 - bi0 + 1 + */ + __pyx_t_21 = 3; + __pyx_t_12 = -1; + if (__pyx_t_21 < 0) { + __pyx_t_21 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 127, __pyx_L1_error) + } + __pyx_t_22 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det2.diminfo[0].strides)); + __pyx_t_21 = 3; + __pyx_t_12 = -1; + if (__pyx_t_21 < 0) { + __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape; + if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 127, __pyx_L1_error) + } + __pyx_t_23 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides)); + __pyx_t_25 = (__pyx_t_22 < __pyx_t_23); + if (__pyx_t_25) { + __pyx_t_24 = __pyx_t_22; + } else { + __pyx_t_24 = __pyx_t_23; + } + __pyx_v_bi3 = __pyx_t_24; + + /* "opencood/utils/box_overlaps.pyx":129 + * bi3 = min(det[3], det2[3]) + * + * iw = bi2 - bi0 + 1 # <<<<<<<<<<<<<< + * ih = bi3 - bi1 + 1 + * + */ + __pyx_t_2 = PyFloat_FromDouble(__pyx_v_bi0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 129, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_6 = PyNumber_Subtract(__pyx_v_bi2, __pyx_t_2); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 129, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_PyInt_AddObjC(__pyx_t_6, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 129, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_24 = __pyx_PyFloat_AsFloat(__pyx_t_2); if (unlikely((__pyx_t_24 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 129, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_v_iw = __pyx_t_24; + + /* "opencood/utils/box_overlaps.pyx":130 + * + * iw = bi2 - bi0 + 1 + * ih = bi3 - bi1 + 1 # <<<<<<<<<<<<<< + * + * if not (iw > 0 and ih > 0): + */ + __pyx_v_ih = ((__pyx_v_bi3 - __pyx_v_bi1) + 1.0); + + /* "opencood/utils/box_overlaps.pyx":132 + * ih = bi3 - bi1 + 1 + * + * if not (iw > 0 and ih > 0): # <<<<<<<<<<<<<< + * continue + * + */ + __pyx_t_26 = (__pyx_v_iw > 0.0); + if (__pyx_t_26) { + } else { + __pyx_t_25 = __pyx_t_26; + goto __pyx_L8_bool_binop_done; + } + __pyx_t_26 = (__pyx_v_ih > 0.0); + __pyx_t_25 = __pyx_t_26; + __pyx_L8_bool_binop_done:; + __pyx_t_26 = (!__pyx_t_25); + if (__pyx_t_26) { + + /* "opencood/utils/box_overlaps.pyx":133 + * + * if not (iw > 0 and ih > 0): + * continue # <<<<<<<<<<<<<< + * + * ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih + */ + goto __pyx_L5_continue; + + /* "opencood/utils/box_overlaps.pyx":132 + * ih = bi3 - bi1 + 1 + * + * if not (iw > 0 and ih > 0): # <<<<<<<<<<<<<< + * continue + * + */ + } + + /* "opencood/utils/box_overlaps.pyx":135 + * continue + * + * ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih # <<<<<<<<<<<<<< + * ov = iw * ih / ua + * + */ + __pyx_t_21 = 2; + __pyx_t_12 = -1; + if (__pyx_t_21 < 0) { + __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape; + if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 135, __pyx_L1_error) + } + __pyx_t_27 = 0; + __pyx_t_12 = -1; + if (__pyx_t_27 < 0) { + __pyx_t_27 += __pyx_pybuffernd_det.diminfo[0].shape; + if (unlikely(__pyx_t_27 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_27 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 135, __pyx_L1_error) + } + __pyx_t_28 = 3; + __pyx_t_12 = -1; + if (__pyx_t_28 < 0) { + __pyx_t_28 += __pyx_pybuffernd_det.diminfo[0].shape; + if (unlikely(__pyx_t_28 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_28 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 135, __pyx_L1_error) + } + __pyx_t_29 = 1; + __pyx_t_12 = -1; + if (__pyx_t_29 < 0) { + __pyx_t_29 += __pyx_pybuffernd_det.diminfo[0].shape; + if (unlikely(__pyx_t_29 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_29 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 135, __pyx_L1_error) + } + __pyx_t_30 = 2; + __pyx_t_12 = -1; + if (__pyx_t_30 < 0) { + __pyx_t_30 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_30 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_30 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 135, __pyx_L1_error) + } + __pyx_t_31 = 0; + __pyx_t_12 = -1; + if (__pyx_t_31 < 0) { + __pyx_t_31 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_31 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_31 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 135, __pyx_L1_error) + } + __pyx_t_32 = 3; + __pyx_t_12 = -1; + if (__pyx_t_32 < 0) { + __pyx_t_32 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_32 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_32 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 135, __pyx_L1_error) + } + __pyx_t_33 = 1; + __pyx_t_12 = -1; + if (__pyx_t_33 < 0) { + __pyx_t_33 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_33 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_33 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 135, __pyx_L1_error) + } + __pyx_v_ua = ((((((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides)) - (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_27, __pyx_pybuffernd_det.diminfo[0].strides))) + 1.0) * (((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_28, __pyx_pybuffernd_det.diminfo[0].strides)) - (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_29, __pyx_pybuffernd_det.diminfo[0].strides))) + 1.0)) + ((((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_30, __pyx_pybuffernd_det2.diminfo[0].strides)) - (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_31, __pyx_pybuffernd_det2.diminfo[0].strides))) + 1.0) * (((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_32, __pyx_pybuffernd_det2.diminfo[0].strides)) - (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_det2.diminfo[0].strides))) + 1.0))) - (__pyx_v_iw * __pyx_v_ih)); + + /* "opencood/utils/box_overlaps.pyx":136 + * + * ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih + * ov = iw * ih / ua # <<<<<<<<<<<<<< + * + * if (ov < thresh): + */ + __pyx_t_24 = (__pyx_v_iw * __pyx_v_ih); + if (unlikely(__pyx_v_ua == 0)) { + PyErr_SetString(PyExc_ZeroDivisionError, "float division"); + __PYX_ERR(0, 136, __pyx_L1_error) + } + __pyx_v_ov = (__pyx_t_24 / __pyx_v_ua); + + /* "opencood/utils/box_overlaps.pyx":138 + * ov = iw * ih / ua + * + * if (ov < thresh): # <<<<<<<<<<<<<< + * continue + * + */ + __pyx_t_26 = (__pyx_v_ov < __pyx_v_thresh); + if (__pyx_t_26) { + + /* "opencood/utils/box_overlaps.pyx":139 + * + * if (ov < thresh): + * continue # <<<<<<<<<<<<<< + * + * acc_box += det2[4] * det2[0:4] + */ + goto __pyx_L5_continue; + + /* "opencood/utils/box_overlaps.pyx":138 + * ov = iw * ih / ua + * + * if (ov < thresh): # <<<<<<<<<<<<<< + * continue + * + */ + } + + /* "opencood/utils/box_overlaps.pyx":141 + * continue + * + * acc_box += det2[4] * det2[0:4] # <<<<<<<<<<<<<< + * acc_score += det2[4] + * + */ + __pyx_t_33 = 4; + __pyx_t_12 = -1; + if (__pyx_t_33 < 0) { + __pyx_t_33 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_33 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_33 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 141, __pyx_L1_error) + } + __pyx_t_2 = PyFloat_FromDouble((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_det2.diminfo[0].strides))); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 141, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_6 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_det2), __pyx_slice__5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 141, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_5 = PyNumber_Multiply(__pyx_t_2, __pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 141, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = PyNumber_InPlaceAdd(((PyObject *)__pyx_v_acc_box), __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 141, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 141, __pyx_L1_error) + __pyx_t_16 = ((PyArrayObject *)__pyx_t_6); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer); + __pyx_t_12 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer, (PyObject*)__pyx_t_16, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack); + if (unlikely(__pyx_t_12 < 0)) { + PyErr_Fetch(&__pyx_t_15, &__pyx_t_14, &__pyx_t_13); + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer, (PyObject*)__pyx_v_acc_box, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + Py_XDECREF(__pyx_t_15); Py_XDECREF(__pyx_t_14); Py_XDECREF(__pyx_t_13); + __Pyx_RaiseBufferFallbackError(); + } else { + PyErr_Restore(__pyx_t_15, __pyx_t_14, __pyx_t_13); + } + __pyx_t_15 = __pyx_t_14 = __pyx_t_13 = 0; + } + __pyx_pybuffernd_acc_box.diminfo[0].strides = __pyx_pybuffernd_acc_box.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_acc_box.diminfo[0].shape = __pyx_pybuffernd_acc_box.rcbuffer->pybuffer.shape[0]; + if (unlikely((__pyx_t_12 < 0))) __PYX_ERR(0, 141, __pyx_L1_error) + } + __pyx_t_16 = 0; + __Pyx_DECREF_SET(__pyx_v_acc_box, ((PyArrayObject *)__pyx_t_6)); + __pyx_t_6 = 0; + + /* "opencood/utils/box_overlaps.pyx":142 + * + * acc_box += det2[4] * det2[0:4] + * acc_score += det2[4] # <<<<<<<<<<<<<< + * + * dets_voted[i][0:4] = acc_box / acc_score + */ + __pyx_t_33 = 4; + __pyx_t_12 = -1; + if (__pyx_t_33 < 0) { + __pyx_t_33 += __pyx_pybuffernd_det2.diminfo[0].shape; + if (unlikely(__pyx_t_33 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_33 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 142, __pyx_L1_error) + } + __pyx_v_acc_score = (__pyx_v_acc_score + (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_det2.diminfo[0].strides))); + __pyx_L5_continue:; + } + + /* "opencood/utils/box_overlaps.pyx":144 + * acc_score += det2[4] + * + * dets_voted[i][0:4] = acc_box / acc_score # <<<<<<<<<<<<<< + * dets_voted[i][4] = det[4] # Keep the original score + * + */ + __pyx_t_6 = PyFloat_FromDouble(__pyx_v_acc_score); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 144, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_5 = __Pyx_PyNumber_Divide(((PyObject *)__pyx_v_acc_box), __pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 144, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_GetItemInt(((PyObject *)__pyx_v_dets_voted), __pyx_v_i, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 144, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + if (__Pyx_PyObject_SetSlice(__pyx_t_6, __pyx_t_5, 0, 4, NULL, NULL, &__pyx_slice__5, 1, 1, 1) < 0) __PYX_ERR(0, 144, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "opencood/utils/box_overlaps.pyx":145 + * + * dets_voted[i][0:4] = acc_box / acc_score + * dets_voted[i][4] = det[4] # Keep the original score # <<<<<<<<<<<<<< + * + * return dets_voted + */ + __pyx_t_33 = 4; + __pyx_t_12 = -1; + if (__pyx_t_33 < 0) { + __pyx_t_33 += __pyx_pybuffernd_det.diminfo[0].shape; + if (unlikely(__pyx_t_33 < 0)) __pyx_t_12 = 0; + } else if (unlikely(__pyx_t_33 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0; + if (unlikely(__pyx_t_12 != -1)) { + __Pyx_RaiseBufferIndexError(__pyx_t_12); + __PYX_ERR(0, 145, __pyx_L1_error) + } + __pyx_t_5 = PyFloat_FromDouble((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_det.diminfo[0].strides))); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_6 = __Pyx_GetItemInt(((PyObject *)__pyx_v_dets_voted), __pyx_v_i, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + if (unlikely((__Pyx_SetItemInt(__pyx_t_6, 4, __pyx_t_5, long, 1, __Pyx_PyInt_From_long, 0, 0, 1) < 0))) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + + /* "opencood/utils/box_overlaps.pyx":147 + * dets_voted[i][4] = det[4] # Keep the original score + * + * return dets_voted # <<<<<<<<<<<<<< + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF((PyObject *)__pyx_v_dets_voted); + __pyx_r = ((PyObject *)__pyx_v_dets_voted); + goto __pyx_L0; + + /* "opencood/utils/box_overlaps.pyx":99 + * + * # Compute bounding box voting + * def box_vote( # <<<<<<<<<<<<<< + * np.ndarray[float, ndim=2] dets_NMS, + * np.ndarray[float, ndim=2] dets_all): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det2.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_all.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_voted.rcbuffer->pybuffer); + __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} + __Pyx_AddTraceback("opencood.utils.box_overlaps.box_vote", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + goto __pyx_L2; + __pyx_L0:; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det2.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_all.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_voted.rcbuffer->pybuffer); + __pyx_L2:; + __Pyx_XDECREF((PyObject *)__pyx_v_dets_voted); + __Pyx_XDECREF((PyObject *)__pyx_v_det); + __Pyx_XDECREF((PyObject *)__pyx_v_acc_box); + __Pyx_XDECREF((PyObject *)__pyx_v_det2); + __Pyx_XDECREF(__pyx_v_bi2); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyMethodDef __pyx_methods[] = { + {0, 0, 0, 0} +}; +#ifndef CYTHON_SMALL_CODE +#if defined(__clang__) + #define CYTHON_SMALL_CODE +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define CYTHON_SMALL_CODE __attribute__((cold)) +#else + #define CYTHON_SMALL_CODE +#endif +#endif +/* #### Code section: pystring_table ### */ + +static int __Pyx_CreateStringTabAndInitStrings(void) { + __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_n_s_DTYPE, __pyx_k_DTYPE, sizeof(__pyx_k_DTYPE), 0, 0, 1, 1}, + {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1}, + {&__pyx_n_s_K, __pyx_k_K, sizeof(__pyx_k_K), 0, 0, 1, 1}, + {&__pyx_n_s_M, __pyx_k_M, sizeof(__pyx_k_M), 0, 0, 1, 1}, + {&__pyx_n_s_N, __pyx_k_N, sizeof(__pyx_k_N), 0, 0, 1, 1}, + {&__pyx_n_s__13, __pyx_k__13, sizeof(__pyx_k__13), 0, 0, 1, 1}, + {&__pyx_n_s__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 0, 1, 1}, + {&__pyx_n_s_acc_box, __pyx_k_acc_box, sizeof(__pyx_k_acc_box), 0, 0, 1, 1}, + {&__pyx_n_s_acc_score, __pyx_k_acc_score, sizeof(__pyx_k_acc_score), 0, 0, 1, 1}, + {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1}, + {&__pyx_n_s_bbox_intersections, __pyx_k_bbox_intersections, sizeof(__pyx_k_bbox_intersections), 0, 0, 1, 1}, + {&__pyx_n_s_bbox_overlaps, __pyx_k_bbox_overlaps, sizeof(__pyx_k_bbox_overlaps), 0, 0, 1, 1}, + {&__pyx_n_s_bi0, __pyx_k_bi0, sizeof(__pyx_k_bi0), 0, 0, 1, 1}, + {&__pyx_n_s_bi1, __pyx_k_bi1, sizeof(__pyx_k_bi1), 0, 0, 1, 1}, + {&__pyx_n_s_bi2, __pyx_k_bi2, sizeof(__pyx_k_bi2), 0, 0, 1, 1}, + {&__pyx_n_s_bi3, __pyx_k_bi3, sizeof(__pyx_k_bi3), 0, 0, 1, 1}, + {&__pyx_n_s_bit2, __pyx_k_bit2, sizeof(__pyx_k_bit2), 0, 0, 1, 1}, + {&__pyx_n_s_box_area, __pyx_k_box_area, sizeof(__pyx_k_box_area), 0, 0, 1, 1}, + {&__pyx_n_s_box_vote, __pyx_k_box_vote, sizeof(__pyx_k_box_vote), 0, 0, 1, 1}, + {&__pyx_n_s_boxes, __pyx_k_boxes, sizeof(__pyx_k_boxes), 0, 0, 1, 1}, + {&__pyx_n_s_class_getitem, __pyx_k_class_getitem, sizeof(__pyx_k_class_getitem), 0, 0, 1, 1}, + {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, + {&__pyx_n_s_det, __pyx_k_det, sizeof(__pyx_k_det), 0, 0, 1, 1}, + {&__pyx_n_s_det2, __pyx_k_det2, sizeof(__pyx_k_det2), 0, 0, 1, 1}, + {&__pyx_n_s_dets_NMS, __pyx_k_dets_NMS, sizeof(__pyx_k_dets_NMS), 0, 0, 1, 1}, + {&__pyx_n_s_dets_all, __pyx_k_dets_all, sizeof(__pyx_k_dets_all), 0, 0, 1, 1}, + {&__pyx_n_s_dets_voted, __pyx_k_dets_voted, sizeof(__pyx_k_dets_voted), 0, 0, 1, 1}, + {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1}, + {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, + {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, + {&__pyx_n_s_ih, __pyx_k_ih, sizeof(__pyx_k_ih), 0, 0, 1, 1}, + {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, + {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1}, + {&__pyx_n_s_intersec, __pyx_k_intersec, sizeof(__pyx_k_intersec), 0, 0, 1, 1}, + {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1}, + {&__pyx_n_s_iw, __pyx_k_iw, sizeof(__pyx_k_iw), 0, 0, 1, 1}, + {&__pyx_n_s_k, __pyx_k_k, sizeof(__pyx_k_k), 0, 0, 1, 1}, + {&__pyx_n_s_m, __pyx_k_m, sizeof(__pyx_k_m), 0, 0, 1, 1}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_n, __pyx_k_n, sizeof(__pyx_k_n), 0, 0, 1, 1}, + {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, + {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1}, + {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1}, + {&__pyx_kp_s_numpy_core_multiarray_failed_to, __pyx_k_numpy_core_multiarray_failed_to, sizeof(__pyx_k_numpy_core_multiarray_failed_to), 0, 0, 1, 0}, + {&__pyx_kp_s_numpy_core_umath_failed_to_impor, __pyx_k_numpy_core_umath_failed_to_impor, sizeof(__pyx_k_numpy_core_umath_failed_to_impor), 0, 0, 1, 0}, + {&__pyx_n_s_opencood_utils_box_overlaps, __pyx_k_opencood_utils_box_overlaps, sizeof(__pyx_k_opencood_utils_box_overlaps), 0, 0, 1, 1}, + {&__pyx_kp_s_opencood_utils_box_overlaps_pyx, __pyx_k_opencood_utils_box_overlaps_pyx, sizeof(__pyx_k_opencood_utils_box_overlaps_pyx), 0, 0, 1, 0}, + {&__pyx_n_s_ov, __pyx_k_ov, sizeof(__pyx_k_ov), 0, 0, 1, 1}, + {&__pyx_n_s_overlaps, __pyx_k_overlaps, sizeof(__pyx_k_overlaps), 0, 0, 1, 1}, + {&__pyx_n_s_query_boxes, __pyx_k_query_boxes, sizeof(__pyx_k_query_boxes), 0, 0, 1, 1}, + {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, + {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_thresh, __pyx_k_thresh, sizeof(__pyx_k_thresh), 0, 0, 1, 1}, + {&__pyx_n_s_ua, __pyx_k_ua, sizeof(__pyx_k_ua), 0, 0, 1, 1}, + {&__pyx_n_s_zeros, __pyx_k_zeros, sizeof(__pyx_k_zeros), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} + }; + return __Pyx_InitStrings(__pyx_string_tab); +} +/* #### Code section: cached_builtins ### */ +static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 35, __pyx_L1_error) + __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(1, 991, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: cached_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":991 + * __pyx_import_array() + * except Exception: + * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_umath() except -1: + */ + __pyx_tuple_ = PyTuple_Pack(1, __pyx_kp_s_numpy_core_multiarray_failed_to); if (unlikely(!__pyx_tuple_)) __PYX_ERR(1, 991, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple_); + __Pyx_GIVEREF(__pyx_tuple_); + + /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":997 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_ufunc() except -1: + */ + __pyx_tuple__2 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(1, 997, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__2); + __Pyx_GIVEREF(__pyx_tuple__2); + + /* "opencood/utils/box_overlaps.pyx":117 + * + * for i in range(N): + * det = dets_NMS[i, :] # <<<<<<<<<<<<<< + * acc_box = np.zeros((4), dtype=np.float32) + * acc_score = 0.0 + */ + __pyx_slice__3 = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice__3)) __PYX_ERR(0, 117, __pyx_L1_error) + __Pyx_GOTREF(__pyx_slice__3); + __Pyx_GIVEREF(__pyx_slice__3); + + /* "opencood/utils/box_overlaps.pyx":118 + * for i in range(N): + * det = dets_NMS[i, :] + * acc_box = np.zeros((4), dtype=np.float32) # <<<<<<<<<<<<<< + * acc_score = 0.0 + * + */ + __pyx_tuple__4 = PyTuple_Pack(1, __pyx_int_4); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 118, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__4); + __Pyx_GIVEREF(__pyx_tuple__4); + + /* "opencood/utils/box_overlaps.pyx":141 + * continue + * + * acc_box += det2[4] * det2[0:4] # <<<<<<<<<<<<<< + * acc_score += det2[4] + * + */ + __pyx_slice__5 = PySlice_New(__pyx_int_0, __pyx_int_4, Py_None); if (unlikely(!__pyx_slice__5)) __PYX_ERR(0, 141, __pyx_L1_error) + __Pyx_GOTREF(__pyx_slice__5); + __Pyx_GIVEREF(__pyx_slice__5); + + /* "opencood/utils/box_overlaps.pyx":17 + * + * + * def bbox_overlaps( # <<<<<<<<<<<<<< + * np.ndarray[DTYPE_t, ndim=2] boxes, + * np.ndarray[DTYPE_t, ndim=2] query_boxes): + */ + __pyx_tuple__7 = PyTuple_Pack(11, __pyx_n_s_boxes, __pyx_n_s_query_boxes, __pyx_n_s_N, __pyx_n_s_K, __pyx_n_s_overlaps, __pyx_n_s_iw, __pyx_n_s_ih, __pyx_n_s_box_area, __pyx_n_s_ua, __pyx_n_s_k, __pyx_n_s_n); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(0, 17, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__7); + __Pyx_GIVEREF(__pyx_tuple__7); + __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 11, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_opencood_utils_box_overlaps_pyx, __pyx_n_s_bbox_overlaps, 17, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(0, 17, __pyx_L1_error) + + /* "opencood/utils/box_overlaps.pyx":59 + * return overlaps + * + * def bbox_intersections( # <<<<<<<<<<<<<< + * np.ndarray[DTYPE_t, ndim=2] boxes, + * np.ndarray[DTYPE_t, ndim=2] query_boxes): + */ + __pyx_tuple__9 = PyTuple_Pack(11, __pyx_n_s_boxes, __pyx_n_s_query_boxes, __pyx_n_s_N, __pyx_n_s_K, __pyx_n_s_intersec, __pyx_n_s_iw, __pyx_n_s_ih, __pyx_n_s_box_area, __pyx_n_s_ua, __pyx_n_s_k, __pyx_n_s_n); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(0, 59, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__9); + __Pyx_GIVEREF(__pyx_tuple__9); + __pyx_codeobj__10 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 11, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__9, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_opencood_utils_box_overlaps_pyx, __pyx_n_s_bbox_intersections, 59, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__10)) __PYX_ERR(0, 59, __pyx_L1_error) + + /* "opencood/utils/box_overlaps.pyx":99 + * + * # Compute bounding box voting + * def box_vote( # <<<<<<<<<<<<<< + * np.ndarray[float, ndim=2] dets_NMS, + * np.ndarray[float, ndim=2] dets_all): + */ + __pyx_tuple__11 = PyTuple_Pack(21, __pyx_n_s_dets_NMS, __pyx_n_s_dets_all, __pyx_n_s_dets_voted, __pyx_n_s_N, __pyx_n_s_M, __pyx_n_s_det, __pyx_n_s_acc_box, __pyx_n_s_acc_score, __pyx_n_s_det2, __pyx_n_s_bi0, __pyx_n_s_bi1, __pyx_n_s_bit2, __pyx_n_s_bi3, __pyx_n_s_iw, __pyx_n_s_ih, __pyx_n_s_ua, __pyx_n_s_thresh, __pyx_n_s_i, __pyx_n_s_m, __pyx_n_s_bi2, __pyx_n_s_ov); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(0, 99, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__11); + __Pyx_GIVEREF(__pyx_tuple__11); + __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 21, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_opencood_utils_box_overlaps_pyx, __pyx_n_s_box_vote, 99, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) __PYX_ERR(0, 99, __pyx_L1_error) + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} +/* #### Code section: init_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) { + if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error); + __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_4 = PyInt_FromLong(4); if (unlikely(!__pyx_int_4)) __PYX_ERR(0, 1, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: init_globals ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { + /* NumpyImportArray.init */ + /* + * Cython has automatically inserted a call to _import_array since + * you didn't include one when you cimported numpy. To disable this + * add the line + * numpy._import_array + */ +#ifdef NPY_FEATURE_VERSION +#ifndef NO_IMPORT_ARRAY +if (unlikely(_import_array() == -1)) { + PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import " + "(auto-generated because you didn't call 'numpy.import_array()' after cimporting numpy; " + "use 'numpy._import_array' to disable if you are certain you don't need it)."); +} +#endif +#endif + +if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error) + + /* InitThreads.init */ + #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 +PyEval_InitThreads(); +#endif + +if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error) + + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: init_module ### */ + +static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ + +static int __Pyx_modinit_global_init_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); + /*--- Global init code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_variable_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); + /*--- Variable export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); + /*--- Function export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_type_init_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); + /*--- Type init code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_type_import_code(void) { + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); + /*--- Type import code ---*/ + __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 9, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_0_11(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", + #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 + sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyTypeObject), + #elif CYTHON_COMPILING_IN_LIMITED_API + sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyTypeObject), + #else + sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyHeapTypeObject), + #endif + __Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_4type_type) __PYX_ERR(2, 9, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyImport_ImportModule("numpy"); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 207, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_ptype_5numpy_dtype = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "dtype", sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArray_Descr),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_dtype) __PYX_ERR(1, 207, __pyx_L1_error) + __pyx_ptype_5numpy_flatiter = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "flatiter", sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayIterObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_flatiter) __PYX_ERR(1, 230, __pyx_L1_error) + __pyx_ptype_5numpy_broadcast = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "broadcast", sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayMultiIterObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_broadcast) __PYX_ERR(1, 234, __pyx_L1_error) + __pyx_ptype_5numpy_ndarray = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "ndarray", sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_ndarray) __PYX_ERR(1, 243, __pyx_L1_error) + __pyx_ptype_5numpy_generic = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "generic", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_generic) __PYX_ERR(1, 815, __pyx_L1_error) + __pyx_ptype_5numpy_number = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "number", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_number) __PYX_ERR(1, 817, __pyx_L1_error) + __pyx_ptype_5numpy_integer = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "integer", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_integer) __PYX_ERR(1, 819, __pyx_L1_error) + __pyx_ptype_5numpy_signedinteger = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "signedinteger", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_signedinteger) __PYX_ERR(1, 821, __pyx_L1_error) + __pyx_ptype_5numpy_unsignedinteger = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "unsignedinteger", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_unsignedinteger) __PYX_ERR(1, 823, __pyx_L1_error) + __pyx_ptype_5numpy_inexact = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "inexact", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_inexact) __PYX_ERR(1, 825, __pyx_L1_error) + __pyx_ptype_5numpy_floating = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "floating", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_floating) __PYX_ERR(1, 827, __pyx_L1_error) + __pyx_ptype_5numpy_complexfloating = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "complexfloating", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_complexfloating) __PYX_ERR(1, 829, __pyx_L1_error) + __pyx_ptype_5numpy_flexible = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "flexible", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_flexible) __PYX_ERR(1, 831, __pyx_L1_error) + __pyx_ptype_5numpy_character = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "character", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_character) __PYX_ERR(1, 833, __pyx_L1_error) + __pyx_ptype_5numpy_ufunc = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "ufunc", sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyUFuncObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_ufunc) __PYX_ERR(1, 871, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_modinit_variable_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); + /*--- Variable import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); + /*--- Function import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + + +#if PY_MAJOR_VERSION >= 3 +#if CYTHON_PEP489_MULTI_PHASE_INIT +static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ +static int __pyx_pymod_exec_box_overlaps(PyObject* module); /*proto*/ +static PyModuleDef_Slot __pyx_moduledef_slots[] = { + {Py_mod_create, (void*)__pyx_pymod_create}, + {Py_mod_exec, (void*)__pyx_pymod_exec_box_overlaps}, + {0, NULL} +}; +#endif + +#ifdef __cplusplus +namespace { + struct PyModuleDef __pyx_moduledef = + #else + static struct PyModuleDef __pyx_moduledef = + #endif + { + PyModuleDef_HEAD_INIT, + "box_overlaps", + 0, /* m_doc */ + #if CYTHON_PEP489_MULTI_PHASE_INIT + 0, /* m_size */ + #elif CYTHON_USE_MODULE_STATE + sizeof(__pyx_mstate), /* m_size */ + #else + -1, /* m_size */ + #endif + __pyx_methods /* m_methods */, + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_moduledef_slots, /* m_slots */ + #else + NULL, /* m_reload */ + #endif + #if CYTHON_USE_MODULE_STATE + __pyx_m_traverse, /* m_traverse */ + __pyx_m_clear, /* m_clear */ + NULL /* m_free */ + #else + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ + #endif + }; + #ifdef __cplusplus +} /* anonymous namespace */ +#endif +#endif + +#ifndef CYTHON_NO_PYINIT_EXPORT +#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC +#elif PY_MAJOR_VERSION < 3 +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" void +#else +#define __Pyx_PyMODINIT_FUNC void +#endif +#else +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * +#else +#define __Pyx_PyMODINIT_FUNC PyObject * +#endif +#endif + + +#if PY_MAJOR_VERSION < 3 +__Pyx_PyMODINIT_FUNC initbox_overlaps(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC initbox_overlaps(void) +#else +__Pyx_PyMODINIT_FUNC PyInit_box_overlaps(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC PyInit_box_overlaps(void) +#if CYTHON_PEP489_MULTI_PHASE_INIT +{ + return PyModuleDef_Init(&__pyx_moduledef); +} +static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { + #if PY_VERSION_HEX >= 0x030700A1 + static PY_INT64_T main_interpreter_id = -1; + PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); + if (main_interpreter_id == -1) { + main_interpreter_id = current_id; + return (unlikely(current_id == -1)) ? -1 : 0; + } else if (unlikely(main_interpreter_id != current_id)) + #else + static PyInterpreterState *main_interpreter = NULL; + PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; + if (!main_interpreter) { + main_interpreter = current_interpreter; + } else if (unlikely(main_interpreter != current_interpreter)) + #endif + { + PyErr_SetString( + PyExc_ImportError, + "Interpreter change detected - this module can only be loaded into one interpreter per process."); + return -1; + } + return 0; +} +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none) +#else +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) +#endif +{ + PyObject *value = PyObject_GetAttrString(spec, from_name); + int result = 0; + if (likely(value)) { + if (allow_none || value != Py_None) { +#if CYTHON_COMPILING_IN_LIMITED_API + result = PyModule_AddObject(module, to_name, value); +#else + result = PyDict_SetItemString(moddict, to_name, value); +#endif + } + Py_DECREF(value); + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + result = -1; + } + return result; +} +static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) { + PyObject *module = NULL, *moddict, *modname; + CYTHON_UNUSED_VAR(def); + if (__Pyx_check_single_interpreter()) + return NULL; + if (__pyx_m) + return __Pyx_NewRef(__pyx_m); + modname = PyObject_GetAttrString(spec, "name"); + if (unlikely(!modname)) goto bad; + module = PyModule_NewObject(modname); + Py_DECREF(modname); + if (unlikely(!module)) goto bad; +#if CYTHON_COMPILING_IN_LIMITED_API + moddict = module; +#else + moddict = PyModule_GetDict(module); + if (unlikely(!moddict)) goto bad; +#endif + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; + return module; +bad: + Py_XDECREF(module); + return NULL; +} + + +static CYTHON_SMALL_CODE int __pyx_pymod_exec_box_overlaps(PyObject *__pyx_pyinit_module) +#endif +#endif +{ + int stringtab_initialized = 0; + #if CYTHON_USE_MODULE_STATE + int pystate_addmodule_run = 0; + #endif + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + #if CYTHON_PEP489_MULTI_PHASE_INIT + if (__pyx_m) { + if (__pyx_m == __pyx_pyinit_module) return 0; + PyErr_SetString(PyExc_RuntimeError, "Module 'box_overlaps' has already been imported. Re-initialisation is not supported."); + return -1; + } + #elif PY_MAJOR_VERSION >= 3 + if (__pyx_m) return __Pyx_NewRef(__pyx_m); + #endif + /*--- Module creation code ---*/ + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_m = __pyx_pyinit_module; + Py_INCREF(__pyx_m); + #else + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4("box_overlaps", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #elif CYTHON_USE_MODULE_STATE + __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) + { + int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); + __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "box_overlaps" pseudovariable */ + if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + pystate_addmodule_run = 1; + } + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #endif + CYTHON_UNUSED_VAR(__pyx_t_1); + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_d); + __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if CYTHON_REFNANNY +__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); +if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); +} +#endif + __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_box_overlaps(void)", 0); + if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pxy_PyFrame_Initialize_Offsets + __Pxy_PyFrame_Initialize_Offsets(); + #endif + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pyx_CyFunction_USED + if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Coroutine_USED + if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_StopAsyncIteration_USED + if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + PyEval_InitThreads(); + #endif + /*--- Initialize various global constants etc. ---*/ + if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + stringtab_initialized = 1; + if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + if (__pyx_module_is_main_opencood__utils__box_overlaps) { + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "opencood.utils.box_overlaps")) { + if (unlikely((PyDict_SetItemString(modules, "opencood.utils.box_overlaps", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + } + } + #endif + /*--- Builtin init code ---*/ + if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Constants init code ---*/ + if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Global type/function init code ---*/ + (void)__Pyx_modinit_global_init_code(); + (void)__Pyx_modinit_variable_export_code(); + (void)__Pyx_modinit_function_export_code(); + (void)__Pyx_modinit_type_init_code(); + if (unlikely((__Pyx_modinit_type_import_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + (void)__Pyx_modinit_variable_import_code(); + (void)__Pyx_modinit_function_import_code(); + /*--- Execution code ---*/ + #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + + /* "opencood/utils/box_overlaps.pyx":8 + * # -------------------------------------------------------- + * + * import numpy as np # <<<<<<<<<<<<<< + * cimport numpy as np + * from cython.parallel import prange, parallel + */ + __pyx_t_2 = __Pyx_ImportDottedModule(__pyx_n_s_numpy, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 8, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_2) < 0) __PYX_ERR(0, 8, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "opencood/utils/box_overlaps.pyx":13 + * + * + * DTYPE = np.float32 # <<<<<<<<<<<<<< + * ctypedef float DTYPE_t + * + */ + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_float32); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_DTYPE, __pyx_t_3) < 0) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "opencood/utils/box_overlaps.pyx":17 + * + * + * def bbox_overlaps( # <<<<<<<<<<<<<< + * np.ndarray[DTYPE_t, ndim=2] boxes, + * np.ndarray[DTYPE_t, ndim=2] query_boxes): + */ + __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_8opencood_5utils_12box_overlaps_1bbox_overlaps, 0, __pyx_n_s_bbox_overlaps, NULL, __pyx_n_s_opencood_utils_box_overlaps, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 17, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_bbox_overlaps, __pyx_t_3) < 0) __PYX_ERR(0, 17, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "opencood/utils/box_overlaps.pyx":59 + * return overlaps + * + * def bbox_intersections( # <<<<<<<<<<<<<< + * np.ndarray[DTYPE_t, ndim=2] boxes, + * np.ndarray[DTYPE_t, ndim=2] query_boxes): + */ + __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_8opencood_5utils_12box_overlaps_3bbox_intersections, 0, __pyx_n_s_bbox_intersections, NULL, __pyx_n_s_opencood_utils_box_overlaps, __pyx_d, ((PyObject *)__pyx_codeobj__10)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 59, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_bbox_intersections, __pyx_t_3) < 0) __PYX_ERR(0, 59, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "opencood/utils/box_overlaps.pyx":99 + * + * # Compute bounding box voting + * def box_vote( # <<<<<<<<<<<<<< + * np.ndarray[float, ndim=2] dets_NMS, + * np.ndarray[float, ndim=2] dets_all): + */ + __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_8opencood_5utils_12box_overlaps_5box_vote, 0, __pyx_n_s_box_vote, NULL, __pyx_n_s_opencood_utils_box_overlaps, __pyx_d, ((PyObject *)__pyx_codeobj__12)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 99, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_box_vote, __pyx_t_3) < 0) __PYX_ERR(0, 99, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "opencood/utils/box_overlaps.pyx":1 + * # -------------------------------------------------------- # <<<<<<<<<<<<<< + * # Fast R-CNN + * # Copyright (c) 2015 Microsoft + */ + __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_3) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /*--- Wrapped vars code ---*/ + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + if (__pyx_m) { + if (__pyx_d && stringtab_initialized) { + __Pyx_AddTraceback("init opencood.utils.box_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + #if !CYTHON_USE_MODULE_STATE + Py_CLEAR(__pyx_m); + #else + Py_DECREF(__pyx_m); + if (pystate_addmodule_run) { + PyObject *tp, *value, *tb; + PyErr_Fetch(&tp, &value, &tb); + PyState_RemoveModule(&__pyx_moduledef); + PyErr_Restore(tp, value, tb); + } + #endif + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init opencood.utils.box_overlaps"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if CYTHON_PEP489_MULTI_PHASE_INIT + return (__pyx_m != NULL) ? 0 : -1; + #elif PY_MAJOR_VERSION >= 3 + return __pyx_m; + #else + return; + #endif +} +/* #### Code section: cleanup_globals ### */ +/* #### Code section: cleanup_module ### */ +/* #### Code section: main_method ### */ +/* #### Code section: utility_code_pragmas ### */ +#ifdef _MSC_VER +#pragma warning( push ) +/* Warning 4127: conditional expression is constant + * Cython uses constant conditional expressions to allow in inline functions to be optimized at + * compile-time, so this warning is not useful + */ +#pragma warning( disable : 4127 ) +#endif + + + +/* #### Code section: utility_code_def ### */ + +/* --- Runtime support code --- */ +/* Refnanny */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule(modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, "RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif + +/* PyErrExceptionMatches */ +#if CYTHON_FAST_THREAD_STATE +static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030C00A6 + PyObject *current_exception = tstate->current_exception; + if (unlikely(!current_exception)) return 0; + exc_type = (PyObject*) Py_TYPE(current_exception); + if (exc_type == err) return 1; +#else + exc_type = tstate->curexc_type; + if (exc_type == err) return 1; + if (unlikely(!exc_type)) return 0; +#endif + #if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(exc_type); + #endif + if (unlikely(PyTuple_Check(err))) { + result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); + } else { + result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err); + } + #if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(exc_type); + #endif + return result; +} +#endif + +/* PyErrFetchRestore */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject *tmp_value; + assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value))); + if (value) { + #if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb)) + #endif + PyException_SetTraceback(value, tb); + } + tmp_value = tstate->current_exception; + tstate->current_exception = value; + Py_XDECREF(tmp_value); + Py_XDECREF(type); + Py_XDECREF(tb); +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#endif +} +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject* exc_value; + exc_value = tstate->current_exception; + tstate->current_exception = 0; + *value = exc_value; + *type = NULL; + *tb = NULL; + if (exc_value) { + *type = (PyObject*) Py_TYPE(exc_value); + Py_INCREF(*type); + #if CYTHON_COMPILING_IN_CPYTHON + *tb = ((PyBaseExceptionObject*) exc_value)->traceback; + Py_XINCREF(*tb); + #else + *tb = PyException_GetTraceback(exc_value); + #endif + } +#else + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +#endif +} +#endif + +/* PyObjectGetAttrStr */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#endif + +/* PyObjectGetAttrStrNoError */ +#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1 +static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) + __Pyx_PyErr_Clear(); +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) { + PyObject *result; +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + (void) PyObject_GetOptionalAttr(obj, attr_name, &result); + return result; +#else +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1 + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) { + return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1); + } +#endif + result = __Pyx_PyObject_GetAttrStr(obj, attr_name); + if (unlikely(!result)) { + __Pyx_PyObject_GetAttrStr_ClearAttributeError(); + } + return result; +#endif +} + +/* GetBuiltinName */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name); + if (unlikely(!result) && !PyErr_Occurred()) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + +/* GetTopmostException */ +#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE +static _PyErr_StackItem * +__Pyx_PyErr_GetTopmostException(PyThreadState *tstate) +{ + _PyErr_StackItem *exc_info = tstate->exc_info; + while ((exc_info->exc_value == NULL || exc_info->exc_value == Py_None) && + exc_info->previous_item != NULL) + { + exc_info = exc_info->previous_item; + } + return exc_info; +} +#endif + +/* SaveResetException */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); + PyObject *exc_value = exc_info->exc_value; + if (exc_value == NULL || exc_value == Py_None) { + *value = NULL; + *type = NULL; + *tb = NULL; + } else { + *value = exc_value; + Py_INCREF(*value); + *type = (PyObject*) Py_TYPE(exc_value); + Py_INCREF(*type); + *tb = PyException_GetTraceback(exc_value); + } + #elif CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); + *type = exc_info->exc_type; + *value = exc_info->exc_value; + *tb = exc_info->exc_traceback; + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); + #else + *type = tstate->exc_type; + *value = tstate->exc_value; + *tb = tstate->exc_traceback; + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); + #endif +} +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = tstate->exc_info; + PyObject *tmp_value = exc_info->exc_value; + exc_info->exc_value = value; + Py_XDECREF(tmp_value); + Py_XDECREF(type); + Py_XDECREF(tb); + #else + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = type; + exc_info->exc_value = value; + exc_info->exc_traceback = tb; + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = type; + tstate->exc_value = value; + tstate->exc_traceback = tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); + #endif +} +#endif + +/* GetException */ +#if CYTHON_FAST_THREAD_STATE +static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) +#else +static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) +#endif +{ + PyObject *local_type = NULL, *local_value, *local_tb = NULL; +#if CYTHON_FAST_THREAD_STATE + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if PY_VERSION_HEX >= 0x030C00A6 + local_value = tstate->current_exception; + tstate->current_exception = 0; + if (likely(local_value)) { + local_type = (PyObject*) Py_TYPE(local_value); + Py_INCREF(local_type); + local_tb = PyException_GetTraceback(local_value); + } + #else + local_type = tstate->curexc_type; + local_value = tstate->curexc_value; + local_tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; + #endif +#else + PyErr_Fetch(&local_type, &local_value, &local_tb); +#endif + PyErr_NormalizeException(&local_type, &local_value, &local_tb); +#if CYTHON_FAST_THREAD_STATE && PY_VERSION_HEX >= 0x030C00A6 + if (unlikely(tstate->current_exception)) +#elif CYTHON_FAST_THREAD_STATE + if (unlikely(tstate->curexc_type)) +#else + if (unlikely(PyErr_Occurred())) +#endif + goto bad; + #if PY_MAJOR_VERSION >= 3 + if (local_tb) { + if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0)) + goto bad; + } + #endif + Py_XINCREF(local_tb); + Py_XINCREF(local_type); + Py_XINCREF(local_value); + *type = local_type; + *value = local_value; + *tb = local_tb; +#if CYTHON_FAST_THREAD_STATE + #if CYTHON_USE_EXC_INFO_STACK + { + _PyErr_StackItem *exc_info = tstate->exc_info; + #if PY_VERSION_HEX >= 0x030B00a4 + tmp_value = exc_info->exc_value; + exc_info->exc_value = local_value; + tmp_type = NULL; + tmp_tb = NULL; + Py_XDECREF(local_type); + Py_XDECREF(local_tb); + #else + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = local_type; + exc_info->exc_value = local_value; + exc_info->exc_traceback = local_tb; + #endif + } + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = local_type; + tstate->exc_value = local_value; + tstate->exc_traceback = local_tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#else + PyErr_SetExcInfo(local_type, local_value, local_tb); +#endif + return 0; +bad: + *type = 0; + *value = 0; + *tb = 0; + Py_XDECREF(local_type); + Py_XDECREF(local_value); + Py_XDECREF(local_tb); + return -1; +} + +/* PyObjectCall */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = Py_TYPE(func)->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = (*call)(func, arg, kw); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* RaiseException */ +#if PY_MAJOR_VERSION < 3 +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { + __Pyx_PyThreadState_declare + CYTHON_UNUSED_VAR(cause); + Py_XINCREF(type); + if (!value || value == Py_None) + value = NULL; + else + Py_INCREF(value); + if (!tb || tb == Py_None) + tb = NULL; + else { + Py_INCREF(tb); + if (!PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto raise_error; + } + } + if (PyType_Check(type)) { +#if CYTHON_COMPILING_IN_PYPY + if (!value) { + Py_INCREF(Py_None); + value = Py_None; + } +#endif + PyErr_NormalizeException(&type, &value, &tb); + } else { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto raise_error; + } + value = type; + type = (PyObject*) Py_TYPE(type); + Py_INCREF(type); + if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto raise_error; + } + } + __Pyx_PyThreadState_assign + __Pyx_ErrRestore(type, value, tb); + return; +raise_error: + Py_XDECREF(value); + Py_XDECREF(type); + Py_XDECREF(tb); + return; +} +#else +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { + PyObject* owned_instance = NULL; + if (tb == Py_None) { + tb = 0; + } else if (tb && !PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto bad; + } + if (value == Py_None) + value = 0; + if (PyExceptionInstance_Check(type)) { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto bad; + } + value = type; + type = (PyObject*) Py_TYPE(value); + } else if (PyExceptionClass_Check(type)) { + PyObject *instance_class = NULL; + if (value && PyExceptionInstance_Check(value)) { + instance_class = (PyObject*) Py_TYPE(value); + if (instance_class != type) { + int is_subclass = PyObject_IsSubclass(instance_class, type); + if (!is_subclass) { + instance_class = NULL; + } else if (unlikely(is_subclass == -1)) { + goto bad; + } else { + type = instance_class; + } + } + } + if (!instance_class) { + PyObject *args; + if (!value) + args = PyTuple_New(0); + else if (PyTuple_Check(value)) { + Py_INCREF(value); + args = value; + } else + args = PyTuple_Pack(1, value); + if (!args) + goto bad; + owned_instance = PyObject_Call(type, args, NULL); + Py_DECREF(args); + if (!owned_instance) + goto bad; + value = owned_instance; + if (!PyExceptionInstance_Check(value)) { + PyErr_Format(PyExc_TypeError, + "calling %R should have returned an instance of " + "BaseException, not %R", + type, Py_TYPE(value)); + goto bad; + } + } + } else { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto bad; + } + if (cause) { + PyObject *fixed_cause; + if (cause == Py_None) { + fixed_cause = NULL; + } else if (PyExceptionClass_Check(cause)) { + fixed_cause = PyObject_CallObject(cause, NULL); + if (fixed_cause == NULL) + goto bad; + } else if (PyExceptionInstance_Check(cause)) { + fixed_cause = cause; + Py_INCREF(fixed_cause); + } else { + PyErr_SetString(PyExc_TypeError, + "exception causes must derive from " + "BaseException"); + goto bad; + } + PyException_SetCause(value, fixed_cause); + } + PyErr_SetObject(type, value); + if (tb) { + #if PY_VERSION_HEX >= 0x030C00A6 + PyException_SetTraceback(value, tb); + #elif CYTHON_FAST_THREAD_STATE + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject* tmp_tb = tstate->curexc_traceback; + if (tb != tmp_tb) { + Py_INCREF(tb); + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_tb); + } +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); + Py_INCREF(tb); + PyErr_Restore(tmp_type, tmp_value, tb); + Py_XDECREF(tmp_tb); +#endif + } +bad: + Py_XDECREF(owned_instance); + return; +} +#endif + +/* TupleAndListFromArray */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { + PyObject *v; + Py_ssize_t i; + for (i = 0; i < length; i++) { + v = dest[i] = src[i]; + Py_INCREF(v); + } +} +static CYTHON_INLINE PyObject * +__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + Py_INCREF(__pyx_empty_tuple); + return __pyx_empty_tuple; + } + res = PyTuple_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n); + return res; +} +static CYTHON_INLINE PyObject * +__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + return PyList_New(0); + } + res = PyList_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n); + return res; +} +#endif + +/* BytesEquals */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else + if (s1 == s2) { + return (equals == Py_EQ); + } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { + const char *ps1, *ps2; + Py_ssize_t length = PyBytes_GET_SIZE(s1); + if (length != PyBytes_GET_SIZE(s2)) + return (equals == Py_NE); + ps1 = PyBytes_AS_STRING(s1); + ps2 = PyBytes_AS_STRING(s2); + if (ps1[0] != ps2[0]) { + return (equals == Py_NE); + } else if (length == 1) { + return (equals == Py_EQ); + } else { + int result; +#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000) + Py_hash_t hash1, hash2; + hash1 = ((PyBytesObject*)s1)->ob_shash; + hash2 = ((PyBytesObject*)s2)->ob_shash; + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + return (equals == Py_NE); + } +#endif + result = memcmp(ps1, ps2, (size_t)length); + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { + return (equals == Py_NE); + } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { + return (equals == Py_NE); + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +#endif +} + +/* UnicodeEquals */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else +#if PY_MAJOR_VERSION < 3 + PyObject* owned_ref = NULL; +#endif + int s1_is_unicode, s2_is_unicode; + if (s1 == s2) { + goto return_eq; + } + s1_is_unicode = PyUnicode_CheckExact(s1); + s2_is_unicode = PyUnicode_CheckExact(s2); +#if PY_MAJOR_VERSION < 3 + if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { + owned_ref = PyUnicode_FromObject(s2); + if (unlikely(!owned_ref)) + return -1; + s2 = owned_ref; + s2_is_unicode = 1; + } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { + owned_ref = PyUnicode_FromObject(s1); + if (unlikely(!owned_ref)) + return -1; + s1 = owned_ref; + s1_is_unicode = 1; + } else if (((!s2_is_unicode) & (!s1_is_unicode))) { + return __Pyx_PyBytes_Equals(s1, s2, equals); + } +#endif + if (s1_is_unicode & s2_is_unicode) { + Py_ssize_t length; + int kind; + void *data1, *data2; + if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) + return -1; + length = __Pyx_PyUnicode_GET_LENGTH(s1); + if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { + goto return_ne; + } +#if CYTHON_USE_UNICODE_INTERNALS + { + Py_hash_t hash1, hash2; + #if CYTHON_PEP393_ENABLED + hash1 = ((PyASCIIObject*)s1)->hash; + hash2 = ((PyASCIIObject*)s2)->hash; + #else + hash1 = ((PyUnicodeObject*)s1)->hash; + hash2 = ((PyUnicodeObject*)s2)->hash; + #endif + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + goto return_ne; + } + } +#endif + kind = __Pyx_PyUnicode_KIND(s1); + if (kind != __Pyx_PyUnicode_KIND(s2)) { + goto return_ne; + } + data1 = __Pyx_PyUnicode_DATA(s1); + data2 = __Pyx_PyUnicode_DATA(s2); + if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { + goto return_ne; + } else if (length == 1) { + goto return_eq; + } else { + int result = memcmp(data1, data2, (size_t)(length * kind)); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & s2_is_unicode) { + goto return_ne; + } else if ((s2 == Py_None) & s1_is_unicode) { + goto return_ne; + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +return_eq: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ); +return_ne: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_NE); +#endif +} + +/* fastcall */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s) +{ + Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames); + for (i = 0; i < n; i++) + { + if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i]; + } + for (i = 0; i < n; i++) + { + int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); + if (unlikely(eq != 0)) { + if (unlikely(eq < 0)) return NULL; + return kwvalues[i]; + } + } + return NULL; +} +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 +CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) { + Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames); + PyObject *dict; + dict = PyDict_New(); + if (unlikely(!dict)) + return NULL; + for (i=0; i= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +/* ParseKeywords */ +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds)); + while (1) { + Py_XDECREF(key); key = NULL; + Py_XDECREF(value); value = NULL; + if (kwds_is_tuple) { + Py_ssize_t size; +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(kwds); +#else + size = PyTuple_Size(kwds); + if (size < 0) goto bad; +#endif + if (pos >= size) break; +#if CYTHON_AVOID_BORROWED_REFS + key = __Pyx_PySequence_ITEM(kwds, pos); + if (!key) goto bad; +#elif CYTHON_ASSUME_SAFE_MACROS + key = PyTuple_GET_ITEM(kwds, pos); +#else + key = PyTuple_GetItem(kwds, pos); + if (!key) goto bad; +#endif + value = kwvalues[pos]; + pos++; + } + else + { + if (!PyDict_Next(kwds, &pos, &key, &value)) break; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + } + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(value); + Py_DECREF(key); +#endif + key = NULL; + value = NULL; + continue; + } +#if !CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + Py_INCREF(value); + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = ( + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key) + ); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + Py_XDECREF(key); + Py_XDECREF(value); + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + #if PY_MAJOR_VERSION < 3 + PyErr_Format(PyExc_TypeError, + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + PyErr_Format(PyExc_TypeError, + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + Py_XDECREF(key); + Py_XDECREF(value); + return -1; +} + +/* ArgTypeTest */ +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) +{ + __Pyx_TypeName type_name; + __Pyx_TypeName obj_type_name; + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + else if (exact) { + #if PY_MAJOR_VERSION == 2 + if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + #endif + } + else { + if (likely(__Pyx_TypeCheck(obj, type))) return 1; + } + type_name = __Pyx_PyType_GetName(type); + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME + ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name); + __Pyx_DECREF_TypeName(type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return 0; +} + +/* IsLittleEndian */ +static CYTHON_INLINE int __Pyx_Is_Little_Endian(void) +{ + union { + uint32_t u32; + uint8_t u8[4]; + } S; + S.u32 = 0x01020304; + return S.u8[0] == 4; +} + +/* BufferFormatCheck */ +static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, + __Pyx_BufFmt_StackElem* stack, + __Pyx_TypeInfo* type) { + stack[0].field = &ctx->root; + stack[0].parent_offset = 0; + ctx->root.type = type; + ctx->root.name = "buffer dtype"; + ctx->root.offset = 0; + ctx->head = stack; + ctx->head->field = &ctx->root; + ctx->fmt_offset = 0; + ctx->head->parent_offset = 0; + ctx->new_packmode = '@'; + ctx->enc_packmode = '@'; + ctx->new_count = 1; + ctx->enc_count = 0; + ctx->enc_type = 0; + ctx->is_complex = 0; + ctx->is_valid_array = 0; + ctx->struct_alignment = 0; + while (type->typegroup == 'S') { + ++ctx->head; + ctx->head->field = type->fields; + ctx->head->parent_offset = 0; + type = type->fields->type; + } +} +static int __Pyx_BufFmt_ParseNumber(const char** ts) { + int count; + const char* t = *ts; + if (*t < '0' || *t > '9') { + return -1; + } else { + count = *t++ - '0'; + while (*t >= '0' && *t <= '9') { + count *= 10; + count += *t++ - '0'; + } + } + *ts = t; + return count; +} +static int __Pyx_BufFmt_ExpectNumber(const char **ts) { + int number = __Pyx_BufFmt_ParseNumber(ts); + if (number == -1) + PyErr_Format(PyExc_ValueError,\ + "Does not understand character buffer dtype format string ('%c')", **ts); + return number; +} +static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) { + PyErr_Format(PyExc_ValueError, + "Unexpected format string character: '%c'", ch); +} +static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) { + switch (ch) { + case '?': return "'bool'"; + case 'c': return "'char'"; + case 'b': return "'signed char'"; + case 'B': return "'unsigned char'"; + case 'h': return "'short'"; + case 'H': return "'unsigned short'"; + case 'i': return "'int'"; + case 'I': return "'unsigned int'"; + case 'l': return "'long'"; + case 'L': return "'unsigned long'"; + case 'q': return "'long long'"; + case 'Q': return "'unsigned long long'"; + case 'f': return (is_complex ? "'complex float'" : "'float'"); + case 'd': return (is_complex ? "'complex double'" : "'double'"); + case 'g': return (is_complex ? "'complex long double'" : "'long double'"); + case 'T': return "a struct"; + case 'O': return "Python object"; + case 'P': return "a pointer"; + case 's': case 'p': return "a string"; + case 0: return "end"; + default: return "unparsable format string"; + } +} +static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) { + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return 2; + case 'i': case 'I': case 'l': case 'L': return 4; + case 'q': case 'Q': return 8; + case 'f': return (is_complex ? 8 : 4); + case 'd': return (is_complex ? 16 : 8); + case 'g': { + PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g').."); + return 0; + } + case 'O': case 'P': return sizeof(void*); + default: + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } +} +static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) { + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return sizeof(short); + case 'i': case 'I': return sizeof(int); + case 'l': case 'L': return sizeof(long); + #ifdef HAVE_LONG_LONG + case 'q': case 'Q': return sizeof(PY_LONG_LONG); + #endif + case 'f': return sizeof(float) * (is_complex ? 2 : 1); + case 'd': return sizeof(double) * (is_complex ? 2 : 1); + case 'g': return sizeof(long double) * (is_complex ? 2 : 1); + case 'O': case 'P': return sizeof(void*); + default: { + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } + } +} +typedef struct { char c; short x; } __Pyx_st_short; +typedef struct { char c; int x; } __Pyx_st_int; +typedef struct { char c; long x; } __Pyx_st_long; +typedef struct { char c; float x; } __Pyx_st_float; +typedef struct { char c; double x; } __Pyx_st_double; +typedef struct { char c; long double x; } __Pyx_st_longdouble; +typedef struct { char c; void *x; } __Pyx_st_void_p; +#ifdef HAVE_LONG_LONG +typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong; +#endif +static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, int is_complex) { + CYTHON_UNUSED_VAR(is_complex); + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short); + case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int); + case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long); +#ifdef HAVE_LONG_LONG + case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG); +#endif + case 'f': return sizeof(__Pyx_st_float) - sizeof(float); + case 'd': return sizeof(__Pyx_st_double) - sizeof(double); + case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double); + case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*); + default: + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } +} +/* These are for computing the padding at the end of the struct to align + on the first member of the struct. This will probably the same as above, + but we don't have any guarantees. + */ +typedef struct { short x; char c; } __Pyx_pad_short; +typedef struct { int x; char c; } __Pyx_pad_int; +typedef struct { long x; char c; } __Pyx_pad_long; +typedef struct { float x; char c; } __Pyx_pad_float; +typedef struct { double x; char c; } __Pyx_pad_double; +typedef struct { long double x; char c; } __Pyx_pad_longdouble; +typedef struct { void *x; char c; } __Pyx_pad_void_p; +#ifdef HAVE_LONG_LONG +typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong; +#endif +static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, int is_complex) { + CYTHON_UNUSED_VAR(is_complex); + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short); + case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int); + case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long); +#ifdef HAVE_LONG_LONG + case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG); +#endif + case 'f': return sizeof(__Pyx_pad_float) - sizeof(float); + case 'd': return sizeof(__Pyx_pad_double) - sizeof(double); + case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double); + case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*); + default: + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } +} +static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) { + switch (ch) { + case 'c': + return 'H'; + case 'b': case 'h': case 'i': + case 'l': case 'q': case 's': case 'p': + return 'I'; + case '?': case 'B': case 'H': case 'I': case 'L': case 'Q': + return 'U'; + case 'f': case 'd': case 'g': + return (is_complex ? 'C' : 'R'); + case 'O': + return 'O'; + case 'P': + return 'P'; + default: { + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } + } +} +static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) { + if (ctx->head == NULL || ctx->head->field == &ctx->root) { + const char* expected; + const char* quote; + if (ctx->head == NULL) { + expected = "end"; + quote = ""; + } else { + expected = ctx->head->field->type->name; + quote = "'"; + } + PyErr_Format(PyExc_ValueError, + "Buffer dtype mismatch, expected %s%s%s but got %s", + quote, expected, quote, + __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex)); + } else { + __Pyx_StructField* field = ctx->head->field; + __Pyx_StructField* parent = (ctx->head - 1)->field; + PyErr_Format(PyExc_ValueError, + "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'", + field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex), + parent->type->name, field->name); + } +} +static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) { + char group; + size_t size, offset, arraysize = 1; + if (ctx->enc_type == 0) return 0; + if (ctx->head->field->type->arraysize[0]) { + int i, ndim = 0; + if (ctx->enc_type == 's' || ctx->enc_type == 'p') { + ctx->is_valid_array = ctx->head->field->type->ndim == 1; + ndim = 1; + if (ctx->enc_count != ctx->head->field->type->arraysize[0]) { + PyErr_Format(PyExc_ValueError, + "Expected a dimension of size %zu, got %zu", + ctx->head->field->type->arraysize[0], ctx->enc_count); + return -1; + } + } + if (!ctx->is_valid_array) { + PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d", + ctx->head->field->type->ndim, ndim); + return -1; + } + for (i = 0; i < ctx->head->field->type->ndim; i++) { + arraysize *= ctx->head->field->type->arraysize[i]; + } + ctx->is_valid_array = 0; + ctx->enc_count = 1; + } + group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex); + do { + __Pyx_StructField* field = ctx->head->field; + __Pyx_TypeInfo* type = field->type; + if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') { + size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex); + } else { + size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex); + } + if (ctx->enc_packmode == '@') { + size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex); + size_t align_mod_offset; + if (align_at == 0) return -1; + align_mod_offset = ctx->fmt_offset % align_at; + if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset; + if (ctx->struct_alignment == 0) + ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type, + ctx->is_complex); + } + if (type->size != size || type->typegroup != group) { + if (type->typegroup == 'C' && type->fields != NULL) { + size_t parent_offset = ctx->head->parent_offset + field->offset; + ++ctx->head; + ctx->head->field = type->fields; + ctx->head->parent_offset = parent_offset; + continue; + } + if ((type->typegroup == 'H' || group == 'H') && type->size == size) { + } else { + __Pyx_BufFmt_RaiseExpected(ctx); + return -1; + } + } + offset = ctx->head->parent_offset + field->offset; + if (ctx->fmt_offset != offset) { + PyErr_Format(PyExc_ValueError, + "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected", + (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset); + return -1; + } + ctx->fmt_offset += size; + if (arraysize) + ctx->fmt_offset += (arraysize - 1) * size; + --ctx->enc_count; + while (1) { + if (field == &ctx->root) { + ctx->head = NULL; + if (ctx->enc_count != 0) { + __Pyx_BufFmt_RaiseExpected(ctx); + return -1; + } + break; + } + ctx->head->field = ++field; + if (field->type == NULL) { + --ctx->head; + field = ctx->head->field; + continue; + } else if (field->type->typegroup == 'S') { + size_t parent_offset = ctx->head->parent_offset + field->offset; + if (field->type->fields->type == NULL) continue; + field = field->type->fields; + ++ctx->head; + ctx->head->field = field; + ctx->head->parent_offset = parent_offset; + break; + } else { + break; + } + } + } while (ctx->enc_count); + ctx->enc_type = 0; + ctx->is_complex = 0; + return 0; +} +static int +__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp) +{ + const char *ts = *tsp; + int i = 0, number, ndim; + ++ts; + if (ctx->new_count != 1) { + PyErr_SetString(PyExc_ValueError, + "Cannot handle repeated arrays in format string"); + return -1; + } + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return -1; + ndim = ctx->head->field->type->ndim; + while (*ts && *ts != ')') { + switch (*ts) { + case ' ': case '\f': case '\r': case '\n': case '\t': case '\v': continue; + default: break; + } + number = __Pyx_BufFmt_ExpectNumber(&ts); + if (number == -1) return -1; + if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i]) { + PyErr_Format(PyExc_ValueError, + "Expected a dimension of size %zu, got %d", + ctx->head->field->type->arraysize[i], number); + return -1; + } + if (*ts != ',' && *ts != ')') { + PyErr_Format(PyExc_ValueError, + "Expected a comma in format string, got '%c'", *ts); + return -1; + } + if (*ts == ',') ts++; + i++; + } + if (i != ndim) { + PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d", + ctx->head->field->type->ndim, i); + return -1; + } + if (!*ts) { + PyErr_SetString(PyExc_ValueError, + "Unexpected end of format string, expected ')'"); + return -1; + } + ctx->is_valid_array = 1; + ctx->new_count = 1; + *tsp = ++ts; + return 0; +} +static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) { + int got_Z = 0; + while (1) { + switch(*ts) { + case 0: + if (ctx->enc_type != 0 && ctx->head == NULL) { + __Pyx_BufFmt_RaiseExpected(ctx); + return NULL; + } + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + if (ctx->head != NULL) { + __Pyx_BufFmt_RaiseExpected(ctx); + return NULL; + } + return ts; + case ' ': + case '\r': + case '\n': + ++ts; + break; + case '<': + if (!__Pyx_Is_Little_Endian()) { + PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler"); + return NULL; + } + ctx->new_packmode = '='; + ++ts; + break; + case '>': + case '!': + if (__Pyx_Is_Little_Endian()) { + PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler"); + return NULL; + } + ctx->new_packmode = '='; + ++ts; + break; + case '=': + case '@': + case '^': + ctx->new_packmode = *ts++; + break; + case 'T': + { + const char* ts_after_sub; + size_t i, struct_count = ctx->new_count; + size_t struct_alignment = ctx->struct_alignment; + ctx->new_count = 1; + ++ts; + if (*ts != '{') { + PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'"); + return NULL; + } + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->enc_type = 0; + ctx->enc_count = 0; + ctx->struct_alignment = 0; + ++ts; + ts_after_sub = ts; + for (i = 0; i != struct_count; ++i) { + ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts); + if (!ts_after_sub) return NULL; + } + ts = ts_after_sub; + if (struct_alignment) ctx->struct_alignment = struct_alignment; + } + break; + case '}': + { + size_t alignment = ctx->struct_alignment; + ++ts; + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->enc_type = 0; + if (alignment && ctx->fmt_offset % alignment) { + ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment); + } + } + return ts; + case 'x': + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->fmt_offset += ctx->new_count; + ctx->new_count = 1; + ctx->enc_count = 0; + ctx->enc_type = 0; + ctx->enc_packmode = ctx->new_packmode; + ++ts; + break; + case 'Z': + got_Z = 1; + ++ts; + if (*ts != 'f' && *ts != 'd' && *ts != 'g') { + __Pyx_BufFmt_RaiseUnexpectedChar('Z'); + return NULL; + } + CYTHON_FALLTHROUGH; + case '?': case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I': + case 'l': case 'L': case 'q': case 'Q': + case 'f': case 'd': case 'g': + case 'O': case 'p': + if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) && + (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) { + ctx->enc_count += ctx->new_count; + ctx->new_count = 1; + got_Z = 0; + ++ts; + break; + } + CYTHON_FALLTHROUGH; + case 's': + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->enc_count = ctx->new_count; + ctx->enc_packmode = ctx->new_packmode; + ctx->enc_type = *ts; + ctx->is_complex = got_Z; + ++ts; + ctx->new_count = 1; + got_Z = 0; + break; + case ':': + ++ts; + while(*ts != ':') ++ts; + ++ts; + break; + case '(': + if (__pyx_buffmt_parse_array(ctx, &ts) < 0) return NULL; + break; + default: + { + int number = __Pyx_BufFmt_ExpectNumber(&ts); + if (number == -1) return NULL; + ctx->new_count = (size_t)number; + } + } + } +} + +/* BufferGetAndValidate */ + static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info) { + if (unlikely(info->buf == NULL)) return; + if (info->suboffsets == __Pyx_minusones) info->suboffsets = NULL; + __Pyx_ReleaseBuffer(info); +} +static void __Pyx_ZeroBuffer(Py_buffer* buf) { + buf->buf = NULL; + buf->obj = NULL; + buf->strides = __Pyx_zeros; + buf->shape = __Pyx_zeros; + buf->suboffsets = __Pyx_minusones; +} +static int __Pyx__GetBufferAndValidate( + Py_buffer* buf, PyObject* obj, __Pyx_TypeInfo* dtype, int flags, + int nd, int cast, __Pyx_BufFmt_StackElem* stack) +{ + buf->buf = NULL; + if (unlikely(__Pyx_GetBuffer(obj, buf, flags) == -1)) { + __Pyx_ZeroBuffer(buf); + return -1; + } + if (unlikely(buf->ndim != nd)) { + PyErr_Format(PyExc_ValueError, + "Buffer has wrong number of dimensions (expected %d, got %d)", + nd, buf->ndim); + goto fail; + } + if (!cast) { + __Pyx_BufFmt_Context ctx; + __Pyx_BufFmt_Init(&ctx, stack, dtype); + if (!__Pyx_BufFmt_CheckString(&ctx, buf->format)) goto fail; + } + if (unlikely((size_t)buf->itemsize != dtype->size)) { + PyErr_Format(PyExc_ValueError, + "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "d byte%s) does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "d byte%s)", + buf->itemsize, (buf->itemsize > 1) ? "s" : "", + dtype->name, (Py_ssize_t)dtype->size, (dtype->size > 1) ? "s" : ""); + goto fail; + } + if (buf->suboffsets == NULL) buf->suboffsets = __Pyx_minusones; + return 0; +fail:; + __Pyx_SafeReleaseBuffer(buf); + return -1; +} + +/* PyDictVersioning */ + #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { + PyObject **dictptr = NULL; + Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; + if (offset) { +#if CYTHON_COMPILING_IN_CPYTHON + dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); +#else + dictptr = _PyObject_GetDictPtr(obj); +#endif + } + return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; +} +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) + return 0; + return obj_dict_version == __Pyx_get_object_dict_version(obj); +} +#endif + +/* GetModuleGlobalName */ + #if CYTHON_USE_DICT_VERSIONS +static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value) +#else +static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name) +#endif +{ + PyObject *result; +#if !CYTHON_AVOID_BORROWED_REFS +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && PY_VERSION_HEX < 0x030d0000 + result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } else if (unlikely(PyErr_Occurred())) { + return NULL; + } +#elif CYTHON_COMPILING_IN_LIMITED_API + if (unlikely(!__pyx_m)) { + return NULL; + } + result = PyObject_GetAttr(__pyx_m, name); + if (likely(result)) { + return result; + } +#else + result = PyDict_GetItem(__pyx_d, name); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } +#endif +#else + result = PyObject_GetItem(__pyx_d, name); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } + PyErr_Clear(); +#endif + return __Pyx_GetBuiltinName(name); +} + +/* ExtTypeTest */ + static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { + __Pyx_TypeName obj_type_name; + __Pyx_TypeName type_name; + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + if (likely(__Pyx_TypeCheck(obj, type))) + return 1; + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + type_name = __Pyx_PyType_GetName(type); + PyErr_Format(PyExc_TypeError, + "Cannot convert " __Pyx_FMT_TYPENAME " to " __Pyx_FMT_TYPENAME, + obj_type_name, type_name); + __Pyx_DECREF_TypeName(obj_type_name); + __Pyx_DECREF_TypeName(type_name); + return 0; +} + +/* BufferIndexError */ + static void __Pyx_RaiseBufferIndexError(int axis) { + PyErr_Format(PyExc_IndexError, + "Out of bounds on buffer access (axis %d)", axis); +} + +/* GetItemInt */ + static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { + PyObject *r; + if (unlikely(!j)) return NULL; + r = PyObject_GetItem(o, j); + Py_DECREF(j); + return r; +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyList_GET_SIZE(o); + } + if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) { + PyObject *r = PyList_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyTuple_GET_SIZE(o); + } + if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS + if (is_list || PyList_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); + if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) { + PyObject *r = PyList_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } + else if (PyTuple_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); + if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } else { + PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; + PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; + if (mm && mm->mp_subscript) { + PyObject *r, *key = PyInt_FromSsize_t(i); + if (unlikely(!key)) return NULL; + r = mm->mp_subscript(o, key); + Py_DECREF(key); + return r; + } + if (likely(sm && sm->sq_item)) { + if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { + Py_ssize_t l = sm->sq_length(o); + if (likely(l >= 0)) { + i += l; + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + return NULL; + PyErr_Clear(); + } + } + return sm->sq_item(o, i); + } + } +#else + if (is_list || !PyMapping_Check(o)) { + return PySequence_GetItem(o, i); + } +#endif + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +} + +/* PyFunctionFastCall */ + #if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL +static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, + PyObject *globals) { + PyFrameObject *f; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject **fastlocals; + Py_ssize_t i; + PyObject *result; + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + assert(tstate != NULL); + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) { + return NULL; + } + fastlocals = __Pyx_PyFrame_GetLocalsplus(f); + for (i = 0; i < na; i++) { + Py_INCREF(*args); + fastlocals[i] = *args++; + } + result = PyEval_EvalFrameEx(f,0); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return result; +} +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + PyObject *closure; +#if PY_MAJOR_VERSION >= 3 + PyObject *kwdefs; +#endif + PyObject *kwtuple, **k; + PyObject **d; + Py_ssize_t nd; + Py_ssize_t nk; + PyObject *result; + assert(kwargs == NULL || PyDict_Check(kwargs)); + nk = kwargs ? PyDict_Size(kwargs) : 0; + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) { + return NULL; + } + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) { + return NULL; + } + #endif + if ( +#if PY_MAJOR_VERSION >= 3 + co->co_kwonlyargcount == 0 && +#endif + likely(kwargs == NULL || nk == 0) && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + if (argdefs == NULL && co->co_argcount == nargs) { + result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); + goto done; + } + else if (nargs == 0 && argdefs != NULL + && co->co_argcount == Py_SIZE(argdefs)) { + /* function called with no arguments, but all parameters have + a default value: use default values as arguments .*/ + args = &PyTuple_GET_ITEM(argdefs, 0); + result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); + goto done; + } + } + if (kwargs != NULL) { + Py_ssize_t pos, i; + kwtuple = PyTuple_New(2 * nk); + if (kwtuple == NULL) { + result = NULL; + goto done; + } + k = &PyTuple_GET_ITEM(kwtuple, 0); + pos = i = 0; + while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { + Py_INCREF(k[i]); + Py_INCREF(k[i+1]); + i += 2; + } + nk = i / 2; + } + else { + kwtuple = NULL; + k = NULL; + } + closure = PyFunction_GET_CLOSURE(func); +#if PY_MAJOR_VERSION >= 3 + kwdefs = PyFunction_GET_KW_DEFAULTS(func); +#endif + if (argdefs != NULL) { + d = &PyTuple_GET_ITEM(argdefs, 0); + nd = Py_SIZE(argdefs); + } + else { + d = NULL; + nd = 0; + } +#if PY_MAJOR_VERSION >= 3 + result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, kwdefs, closure); +#else + result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, closure); +#endif + Py_XDECREF(kwtuple); +done: + Py_LeaveRecursiveCall(); + return result; +} +#endif + +/* PyObjectCallMethO */ + #if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func); + self = __Pyx_CyOrPyCFunction_GET_SELF(func); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectFastCall */ + #if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API +static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) { + PyObject *argstuple; + PyObject *result = 0; + size_t i; + argstuple = PyTuple_New((Py_ssize_t)nargs); + if (unlikely(!argstuple)) return NULL; + for (i = 0; i < nargs; i++) { + Py_INCREF(args[i]); + if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad; + } + result = __Pyx_PyObject_Call(func, argstuple, kwargs); + bad: + Py_DECREF(argstuple); + return result; +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) { + Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs); +#if CYTHON_COMPILING_IN_CPYTHON + if (nargs == 0 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS)) + return __Pyx_PyObject_CallMethO(func, NULL); + } + else if (nargs == 1 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O)) + return __Pyx_PyObject_CallMethO(func, args[0]); + } +#endif + #if PY_VERSION_HEX < 0x030800B1 + #if CYTHON_FAST_PYCCALL + if (PyCFunction_Check(func)) { + if (kwargs) { + return _PyCFunction_FastCallDict(func, args, nargs, kwargs); + } else { + return _PyCFunction_FastCallKeywords(func, args, nargs, NULL); + } + } + #if PY_VERSION_HEX >= 0x030700A1 + if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) { + return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL); + } + #endif + #endif + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs); + } + #endif + #endif + if (kwargs == NULL) { + #if CYTHON_VECTORCALL + #if PY_VERSION_HEX < 0x03090000 + vectorcallfunc f = _PyVectorcall_Function(func); + #else + vectorcallfunc f = PyVectorcall_Function(func); + #endif + if (f) { + return f(func, args, (size_t)nargs, NULL); + } + #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL + if (__Pyx_CyFunction_CheckExact(func)) { + __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func); + if (f) return f(func, args, (size_t)nargs, NULL); + } + #endif + } + if (nargs == 0) { + return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs); + } + #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API + return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs); + #else + return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs); + #endif +} + +/* PyObjectCallOneArg */ + static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *args[2] = {NULL, arg}; + return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* ObjectGetItem */ + #if CYTHON_USE_TYPE_SLOTS +static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject *index) { + PyObject *runerr = NULL; + Py_ssize_t key_value; + key_value = __Pyx_PyIndex_AsSsize_t(index); + if (likely(key_value != -1 || !(runerr = PyErr_Occurred()))) { + return __Pyx_GetItemInt_Fast(obj, key_value, 0, 1, 1); + } + if (PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) { + __Pyx_TypeName index_type_name = __Pyx_PyType_GetName(Py_TYPE(index)); + PyErr_Clear(); + PyErr_Format(PyExc_IndexError, + "cannot fit '" __Pyx_FMT_TYPENAME "' into an index-sized integer", index_type_name); + __Pyx_DECREF_TypeName(index_type_name); + } + return NULL; +} +static PyObject *__Pyx_PyObject_GetItem_Slow(PyObject *obj, PyObject *key) { + __Pyx_TypeName obj_type_name; + if (likely(PyType_Check(obj))) { + PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(obj, __pyx_n_s_class_getitem); + if (!meth) { + PyErr_Clear(); + } else { + PyObject *result = __Pyx_PyObject_CallOneArg(meth, key); + Py_DECREF(meth); + return result; + } + } + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "'" __Pyx_FMT_TYPENAME "' object is not subscriptable", obj_type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return NULL; +} +static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key) { + PyTypeObject *tp = Py_TYPE(obj); + PyMappingMethods *mm = tp->tp_as_mapping; + PySequenceMethods *sm = tp->tp_as_sequence; + if (likely(mm && mm->mp_subscript)) { + return mm->mp_subscript(obj, key); + } + if (likely(sm && sm->sq_item)) { + return __Pyx_PyObject_GetIndex(obj, key); + } + return __Pyx_PyObject_GetItem_Slow(obj, key); +} +#endif + +/* BufferFallbackError */ + static void __Pyx_RaiseBufferFallbackError(void) { + PyErr_SetString(PyExc_ValueError, + "Buffer acquisition failed on assignment; and then reacquiring the old buffer failed too!"); +} + +/* PyIntBinop */ + #if !CYTHON_COMPILING_IN_PYPY +static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check) { + CYTHON_MAYBE_UNUSED_VAR(intval); + CYTHON_MAYBE_UNUSED_VAR(inplace); + CYTHON_UNUSED_VAR(zerodivision_check); + #if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(op1))) { + const long b = intval; + long x; + long a = PyInt_AS_LONG(op1); + + x = (long)((unsigned long)a + (unsigned long)b); + if (likely((x^a) >= 0 || (x^b) >= 0)) + return PyInt_FromLong(x); + return PyLong_Type.tp_as_number->nb_add(op1, op2); + } + #endif + #if CYTHON_USE_PYLONG_INTERNALS + if (likely(PyLong_CheckExact(op1))) { + const long b = intval; + long a, x; +#ifdef HAVE_LONG_LONG + const PY_LONG_LONG llb = intval; + PY_LONG_LONG lla, llx; +#endif + if (unlikely(__Pyx_PyLong_IsZero(op1))) { + return __Pyx_NewRef(op2); + } + if (likely(__Pyx_PyLong_IsCompact(op1))) { + a = __Pyx_PyLong_CompactValue(op1); + } else { + const digit* digits = __Pyx_PyLong_Digits(op1); + const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(op1); + switch (size) { + case -2: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + a = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + #ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + lla = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + #endif + } + CYTHON_FALLTHROUGH; + case 2: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + a = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + #ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + lla = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + #endif + } + CYTHON_FALLTHROUGH; + case -3: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + a = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + #ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + lla = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + #endif + } + CYTHON_FALLTHROUGH; + case 3: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + a = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + #ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + lla = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + #endif + } + CYTHON_FALLTHROUGH; + case -4: + if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + a = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + #ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + lla = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + #endif + } + CYTHON_FALLTHROUGH; + case 4: + if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + a = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + #ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + lla = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + #endif + } + CYTHON_FALLTHROUGH; + default: return PyLong_Type.tp_as_number->nb_add(op1, op2); + } + } + x = a + b; + return PyLong_FromLong(x); +#ifdef HAVE_LONG_LONG + long_long: + llx = lla + llb; + return PyLong_FromLongLong(llx); +#endif + + + } + #endif + if (PyFloat_CheckExact(op1)) { + const long b = intval; +#if CYTHON_COMPILING_IN_LIMITED_API + double a = __pyx_PyFloat_AsDouble(op1); +#else + double a = PyFloat_AS_DOUBLE(op1); +#endif + double result; + + PyFPE_START_PROTECT("add", return NULL) + result = ((double)a) + (double)b; + PyFPE_END_PROTECT(result) + return PyFloat_FromDouble(result); + } + return (inplace ? PyNumber_InPlaceAdd : PyNumber_Add)(op1, op2); +} +#endif + +/* SliceObject */ + static CYTHON_INLINE int __Pyx_PyObject_SetSlice(PyObject* obj, PyObject* value, + Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, + int has_cstart, int has_cstop, int wraparound) { + __Pyx_TypeName obj_type_name; +#if CYTHON_USE_TYPE_SLOTS + PyMappingMethods* mp; +#if PY_MAJOR_VERSION < 3 + PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; + if (likely(ms && ms->sq_ass_slice)) { + if (!has_cstart) { + if (_py_start && (*_py_start != Py_None)) { + cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); + if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstart = 0; + } + if (!has_cstop) { + if (_py_stop && (*_py_stop != Py_None)) { + cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); + if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstop = PY_SSIZE_T_MAX; + } + if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { + Py_ssize_t l = ms->sq_length(obj); + if (likely(l >= 0)) { + if (cstop < 0) { + cstop += l; + if (cstop < 0) cstop = 0; + } + if (cstart < 0) { + cstart += l; + if (cstart < 0) cstart = 0; + } + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + goto bad; + PyErr_Clear(); + } + } + return ms->sq_ass_slice(obj, cstart, cstop, value); + } +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + mp = Py_TYPE(obj)->tp_as_mapping; + if (likely(mp && mp->mp_ass_subscript)) +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + { + int result; + PyObject *py_slice, *py_start, *py_stop; + if (_py_slice) { + py_slice = *_py_slice; + } else { + PyObject* owned_start = NULL; + PyObject* owned_stop = NULL; + if (_py_start) { + py_start = *_py_start; + } else { + if (has_cstart) { + owned_start = py_start = PyInt_FromSsize_t(cstart); + if (unlikely(!py_start)) goto bad; + } else + py_start = Py_None; + } + if (_py_stop) { + py_stop = *_py_stop; + } else { + if (has_cstop) { + owned_stop = py_stop = PyInt_FromSsize_t(cstop); + if (unlikely(!py_stop)) { + Py_XDECREF(owned_start); + goto bad; + } + } else + py_stop = Py_None; + } + py_slice = PySlice_New(py_start, py_stop, Py_None); + Py_XDECREF(owned_start); + Py_XDECREF(owned_stop); + if (unlikely(!py_slice)) goto bad; + } +#if CYTHON_USE_TYPE_SLOTS + result = mp->mp_ass_subscript(obj, py_slice, value); +#else + result = value ? PyObject_SetItem(obj, py_slice, value) : PyObject_DelItem(obj, py_slice); +#endif + if (!_py_slice) { + Py_DECREF(py_slice); + } + return result; + } + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "'" __Pyx_FMT_TYPENAME "' object does not support slice %.10s", + obj_type_name, value ? "assignment" : "deletion"); + __Pyx_DECREF_TypeName(obj_type_name); +bad: + return -1; +} + +/* SetItemInt */ + static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) { + int r; + if (unlikely(!j)) return -1; + r = PyObject_SetItem(o, j, v); + Py_DECREF(j); + return r; +} +static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, + CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS + if (is_list || PyList_CheckExact(o)) { + Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); + if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o)))) { + PyObject* old = PyList_GET_ITEM(o, n); + Py_INCREF(v); + PyList_SET_ITEM(o, n, v); + Py_DECREF(old); + return 1; + } + } else { + PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; + PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; + if (mm && mm->mp_ass_subscript) { + int r; + PyObject *key = PyInt_FromSsize_t(i); + if (unlikely(!key)) return -1; + r = mm->mp_ass_subscript(o, key, v); + Py_DECREF(key); + return r; + } + if (likely(sm && sm->sq_ass_item)) { + if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { + Py_ssize_t l = sm->sq_length(o); + if (likely(l >= 0)) { + i += l; + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + return -1; + PyErr_Clear(); + } + } + return sm->sq_ass_item(o, i, v); + } + } +#else + if (is_list || !PyMapping_Check(o)) + { + return PySequence_SetItem(o, i, v); + } +#endif + return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v); +} + +/* TypeImport */ + #ifndef __PYX_HAVE_RT_ImportType_3_0_11 +#define __PYX_HAVE_RT_ImportType_3_0_11 +static PyTypeObject *__Pyx_ImportType_3_0_11(PyObject *module, const char *module_name, const char *class_name, + size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_11 check_size) +{ + PyObject *result = 0; + char warning[200]; + Py_ssize_t basicsize; + Py_ssize_t itemsize; +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *py_basicsize; + PyObject *py_itemsize; +#endif + result = PyObject_GetAttrString(module, class_name); + if (!result) + goto bad; + if (!PyType_Check(result)) { + PyErr_Format(PyExc_TypeError, + "%.200s.%.200s is not a type object", + module_name, class_name); + goto bad; + } +#if !CYTHON_COMPILING_IN_LIMITED_API + basicsize = ((PyTypeObject *)result)->tp_basicsize; + itemsize = ((PyTypeObject *)result)->tp_itemsize; +#else + py_basicsize = PyObject_GetAttrString(result, "__basicsize__"); + if (!py_basicsize) + goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred()) + goto bad; + py_itemsize = PyObject_GetAttrString(result, "__itemsize__"); + if (!py_itemsize) + goto bad; + itemsize = PyLong_AsSsize_t(py_itemsize); + Py_DECREF(py_itemsize); + py_itemsize = 0; + if (itemsize == (Py_ssize_t)-1 && PyErr_Occurred()) + goto bad; +#endif + if (itemsize) { + if (size % alignment) { + alignment = size % alignment; + } + if (itemsize < (Py_ssize_t)alignment) + itemsize = (Py_ssize_t)alignment; + } + if ((size_t)(basicsize + itemsize) < size) { + PyErr_Format(PyExc_ValueError, + "%.200s.%.200s size changed, may indicate binary incompatibility. " + "Expected %zd from C header, got %zd from PyObject", + module_name, class_name, size, basicsize+itemsize); + goto bad; + } + if (check_size == __Pyx_ImportType_CheckSize_Error_3_0_11 && + ((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) { + PyErr_Format(PyExc_ValueError, + "%.200s.%.200s size changed, may indicate binary incompatibility. " + "Expected %zd from C header, got %zd-%zd from PyObject", + module_name, class_name, size, basicsize, basicsize+itemsize); + goto bad; + } + else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_0_11 && (size_t)basicsize > size) { + PyOS_snprintf(warning, sizeof(warning), + "%s.%s size changed, may indicate binary incompatibility. " + "Expected %zd from C header, got %zd from PyObject", + module_name, class_name, size, basicsize); + if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad; + } + return (PyTypeObject *)result; +bad: + Py_XDECREF(result); + return NULL; +} +#endif + +/* Import */ + static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + PyObject *module = 0; + PyObject *empty_dict = 0; + PyObject *empty_list = 0; + #if PY_MAJOR_VERSION < 3 + PyObject *py_import; + py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); + if (unlikely(!py_import)) + goto bad; + if (!from_list) { + empty_list = PyList_New(0); + if (unlikely(!empty_list)) + goto bad; + from_list = empty_list; + } + #endif + empty_dict = PyDict_New(); + if (unlikely(!empty_dict)) + goto bad; + { + #if PY_MAJOR_VERSION >= 3 + if (level == -1) { + if (strchr(__Pyx_MODULE_NAME, '.') != NULL) { + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, 1); + if (unlikely(!module)) { + if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) + goto bad; + PyErr_Clear(); + } + } + level = 0; + } + #endif + if (!module) { + #if PY_MAJOR_VERSION < 3 + PyObject *py_level = PyInt_FromLong(level); + if (unlikely(!py_level)) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, level); + #endif + } + } +bad: + Py_XDECREF(empty_dict); + Py_XDECREF(empty_list); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_import); + #endif + return module; +} + +/* ImportDottedModule */ + #if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) { + PyObject *partial_name = NULL, *slice = NULL, *sep = NULL; + if (unlikely(PyErr_Occurred())) { + PyErr_Clear(); + } + if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) { + partial_name = name; + } else { + slice = PySequence_GetSlice(parts_tuple, 0, count); + if (unlikely(!slice)) + goto bad; + sep = PyUnicode_FromStringAndSize(".", 1); + if (unlikely(!sep)) + goto bad; + partial_name = PyUnicode_Join(sep, slice); + } + PyErr_Format( +#if PY_MAJOR_VERSION < 3 + PyExc_ImportError, + "No module named '%s'", PyString_AS_STRING(partial_name)); +#else +#if PY_VERSION_HEX >= 0x030600B1 + PyExc_ModuleNotFoundError, +#else + PyExc_ImportError, +#endif + "No module named '%U'", partial_name); +#endif +bad: + Py_XDECREF(sep); + Py_XDECREF(slice); + Py_XDECREF(partial_name); + return NULL; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) { + PyObject *imported_module; +#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) + PyObject *modules = PyImport_GetModuleDict(); + if (unlikely(!modules)) + return NULL; + imported_module = __Pyx_PyDict_GetItemStr(modules, name); + Py_XINCREF(imported_module); +#else + imported_module = PyImport_GetModule(name); +#endif + return imported_module; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) { + Py_ssize_t i, nparts; + nparts = PyTuple_GET_SIZE(parts_tuple); + for (i=1; i < nparts && module; i++) { + PyObject *part, *submodule; +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + part = PyTuple_GET_ITEM(parts_tuple, i); +#else + part = PySequence_ITEM(parts_tuple, i); +#endif + submodule = __Pyx_PyObject_GetAttrStrNoError(module, part); +#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(part); +#endif + Py_DECREF(module); + module = submodule; + } + if (unlikely(!module)) { + return __Pyx__ImportDottedModule_Error(name, parts_tuple, i); + } + return module; +} +#endif +static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if PY_MAJOR_VERSION < 3 + PyObject *module, *from_list, *star = __pyx_n_s__6; + CYTHON_UNUSED_VAR(parts_tuple); + from_list = PyList_New(1); + if (unlikely(!from_list)) + return NULL; + Py_INCREF(star); + PyList_SET_ITEM(from_list, 0, star); + module = __Pyx_Import(name, from_list, 0); + Py_DECREF(from_list); + return module; +#else + PyObject *imported_module; + PyObject *module = __Pyx_Import(name, NULL, 0); + if (!parts_tuple || unlikely(!module)) + return module; + imported_module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(imported_module)) { + Py_DECREF(module); + return imported_module; + } + PyErr_Clear(); + return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); +#endif +} +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1 + PyObject *module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(module)) { + PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec); + if (likely(spec)) { + PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing); + if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) { + Py_DECREF(spec); + spec = NULL; + } + Py_XDECREF(unsafe); + } + if (likely(!spec)) { + PyErr_Clear(); + return module; + } + Py_DECREF(spec); + Py_DECREF(module); + } else if (PyErr_Occurred()) { + PyErr_Clear(); + } +#endif + return __Pyx__ImportDottedModule(name, parts_tuple); +} + +/* FixUpExtensionType */ + #if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) { +#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + CYTHON_UNUSED_VAR(spec); + CYTHON_UNUSED_VAR(type); +#else + const PyType_Slot *slot = spec->slots; + while (slot && slot->slot && slot->slot != Py_tp_members) + slot++; + if (slot && slot->slot == Py_tp_members) { + int changed = 0; +#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON) + const +#endif + PyMemberDef *memb = (PyMemberDef*) slot->pfunc; + while (memb && memb->name) { + if (memb->name[0] == '_' && memb->name[1] == '_') { +#if PY_VERSION_HEX < 0x030900b1 + if (strcmp(memb->name, "__weaklistoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_weaklistoffset = memb->offset; + changed = 1; + } + else if (strcmp(memb->name, "__dictoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_dictoffset = memb->offset; + changed = 1; + } +#if CYTHON_METH_FASTCALL + else if (strcmp(memb->name, "__vectorcalloffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); +#if PY_VERSION_HEX >= 0x030800b4 + type->tp_vectorcall_offset = memb->offset; +#else + type->tp_print = (printfunc) memb->offset; +#endif + changed = 1; + } +#endif +#else + if ((0)); +#endif +#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON + else if (strcmp(memb->name, "__module__") == 0) { + PyObject *descr; + assert(memb->type == T_OBJECT); + assert(memb->flags == 0 || memb->flags == READONLY); + descr = PyDescr_NewMember(type, memb); + if (unlikely(!descr)) + return -1; + if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) { + Py_DECREF(descr); + return -1; + } + Py_DECREF(descr); + changed = 1; + } +#endif + } + memb++; + } + if (changed) + PyType_Modified(type); + } +#endif + return 0; +} +#endif + +/* FetchSharedCythonModule */ + static PyObject *__Pyx_FetchSharedCythonABIModule(void) { + return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME); +} + +/* FetchCommonType */ + static int __Pyx_VerifyCachedType(PyObject *cached_type, + const char *name, + Py_ssize_t basicsize, + Py_ssize_t expected_basicsize) { + if (!PyType_Check(cached_type)) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s is not a type object", name); + return -1; + } + if (basicsize != expected_basicsize) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s has the wrong size, try recompiling", + name); + return -1; + } + return 0; +} +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { + PyObject* abi_module; + const char* object_name; + PyTypeObject *cached_type = NULL; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + object_name = strrchr(type->tp_name, '.'); + object_name = object_name ? object_name+1 : type->tp_name; + cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + if (__Pyx_VerifyCachedType( + (PyObject *)cached_type, + object_name, + cached_type->tp_basicsize, + type->tp_basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + if (PyType_Ready(type) < 0) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0) + goto bad; + Py_INCREF(type); + cached_type = type; +done: + Py_DECREF(abi_module); + return cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#else +static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) { + PyObject *abi_module, *cached_type = NULL; + const char* object_name = strrchr(spec->name, '.'); + object_name = object_name ? object_name+1 : spec->name; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + cached_type = PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + Py_ssize_t basicsize; +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *py_basicsize; + py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__"); + if (unlikely(!py_basicsize)) goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; +#else + basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1; +#endif + if (__Pyx_VerifyCachedType( + cached_type, + object_name, + basicsize, + spec->basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + CYTHON_UNUSED_VAR(module); + cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases); + if (unlikely(!cached_type)) goto bad; + if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad; +done: + Py_DECREF(abi_module); + assert(cached_type == NULL || PyType_Check(cached_type)); + return (PyTypeObject *) cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#endif + +/* PyVectorcallFastCallDict */ + #if CYTHON_METH_FASTCALL +static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + PyObject *res = NULL; + PyObject *kwnames; + PyObject **newargs; + PyObject **kwvalues; + Py_ssize_t i, pos; + size_t j; + PyObject *key, *value; + unsigned long keys_are_strings; + Py_ssize_t nkw = PyDict_GET_SIZE(kw); + newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0])); + if (unlikely(newargs == NULL)) { + PyErr_NoMemory(); + return NULL; + } + for (j = 0; j < nargs; j++) newargs[j] = args[j]; + kwnames = PyTuple_New(nkw); + if (unlikely(kwnames == NULL)) { + PyMem_Free(newargs); + return NULL; + } + kwvalues = newargs + nargs; + pos = i = 0; + keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; + while (PyDict_Next(kw, &pos, &key, &value)) { + keys_are_strings &= Py_TYPE(key)->tp_flags; + Py_INCREF(key); + Py_INCREF(value); + PyTuple_SET_ITEM(kwnames, i, key); + kwvalues[i] = value; + i++; + } + if (unlikely(!keys_are_strings)) { + PyErr_SetString(PyExc_TypeError, "keywords must be strings"); + goto cleanup; + } + res = vc(func, newargs, nargs, kwnames); +cleanup: + Py_DECREF(kwnames); + for (i = 0; i < nkw; i++) + Py_DECREF(kwvalues[i]); + PyMem_Free(newargs); + return res; +} +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) { + return vc(func, args, nargs, NULL); + } + return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw); +} +#endif + +/* CythonFunctionShared */ + #if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + if (__Pyx_CyFunction_Check(func)) { + return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc; + } else if (PyCFunction_Check(func)) { + return PyCFunction_GetFunction(func) == (PyCFunction) cfunc; + } + return 0; +} +#else +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +} +#endif +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) { +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + __Pyx_Py_XDECREF_SET( + __Pyx_CyFunction_GetClassObj(f), + ((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#else + __Pyx_Py_XDECREF_SET( + ((PyCMethodObject *) (f))->mm_class, + (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#endif +} +static PyObject * +__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure) +{ + CYTHON_UNUSED_VAR(closure); + if (unlikely(op->func_doc == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_doc = PyObject_GetAttrString(op->func, "__doc__"); + if (unlikely(!op->func_doc)) return NULL; +#else + if (((PyCFunctionObject*)op)->m_ml->ml_doc) { +#if PY_MAJOR_VERSION >= 3 + op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#else + op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#endif + if (unlikely(op->func_doc == NULL)) + return NULL; + } else { + Py_INCREF(Py_None); + return Py_None; + } +#endif + } + Py_INCREF(op->func_doc); + return op->func_doc; +} +static int +__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (value == NULL) { + value = Py_None; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_doc, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_name == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_name = PyObject_GetAttrString(op->func, "__name__"); +#elif PY_MAJOR_VERSION >= 3 + op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#else + op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#endif + if (unlikely(op->func_name == NULL)) + return NULL; + } + Py_INCREF(op->func_name); + return op->func_name; +} +static int +__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__name__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_name, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_qualname); + return op->func_qualname; +} +static int +__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__qualname__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_qualname, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_dict == NULL)) { + op->func_dict = PyDict_New(); + if (unlikely(op->func_dict == NULL)) + return NULL; + } + Py_INCREF(op->func_dict); + return op->func_dict; +} +static int +__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(value == NULL)) { + PyErr_SetString(PyExc_TypeError, + "function's dictionary may not be deleted"); + return -1; + } + if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "setting function's dictionary to a non-dict"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_dict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_globals); + return op->func_globals; +} +static PyObject * +__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(op); + CYTHON_UNUSED_VAR(context); + Py_INCREF(Py_None); + return Py_None; +} +static PyObject * +__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context) +{ + PyObject* result = (op->func_code) ? op->func_code : Py_None; + CYTHON_UNUSED_VAR(context); + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { + int result = 0; + PyObject *res = op->defaults_getter((PyObject *) op); + if (unlikely(!res)) + return -1; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + op->defaults_tuple = PyTuple_GET_ITEM(res, 0); + Py_INCREF(op->defaults_tuple); + op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); + Py_INCREF(op->defaults_kwdict); + #else + op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0); + if (unlikely(!op->defaults_tuple)) result = -1; + else { + op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1); + if (unlikely(!op->defaults_kwdict)) result = -1; + } + #endif + Py_DECREF(res); + return result; +} +static int +__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyTuple_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__defaults__ must be set to a tuple object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_tuple, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_tuple; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_tuple; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__kwdefaults__ must be set to a dict object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_kwdict; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_kwdict; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value || value == Py_None) { + value = NULL; + } else if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__annotations__ must be set to a dict object"); + return -1; + } + Py_XINCREF(value); + __Pyx_Py_XDECREF_SET(op->func_annotations, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->func_annotations; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + result = PyDict_New(); + if (unlikely(!result)) return NULL; + op->func_annotations = result; + } + Py_INCREF(result); + return result; +} +static PyObject * +__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) { + int is_coroutine; + CYTHON_UNUSED_VAR(context); + if (op->func_is_coroutine) { + return __Pyx_NewRef(op->func_is_coroutine); + } + is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE; +#if PY_VERSION_HEX >= 0x03050000 + if (is_coroutine) { + PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine; + fromlist = PyList_New(1); + if (unlikely(!fromlist)) return NULL; + Py_INCREF(marker); +#if CYTHON_ASSUME_SAFE_MACROS + PyList_SET_ITEM(fromlist, 0, marker); +#else + if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) { + Py_DECREF(marker); + Py_DECREF(fromlist); + return NULL; + } +#endif + module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0); + Py_DECREF(fromlist); + if (unlikely(!module)) goto ignore; + op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker); + Py_DECREF(module); + if (likely(op->func_is_coroutine)) { + return __Pyx_NewRef(op->func_is_coroutine); + } +ignore: + PyErr_Clear(); + } +#endif + op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine); + return __Pyx_NewRef(op->func_is_coroutine); +} +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject * +__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_GetAttrString(op->func, "__module__"); +} +static int +__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_SetAttrString(op->func, "__module__", value); +} +#endif +static PyGetSetDef __pyx_CyFunction_getsets[] = { + {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, + {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, + {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, + {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0}, +#if CYTHON_COMPILING_IN_LIMITED_API + {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0}, +#endif + {0, 0, 0, 0, 0} +}; +static PyMemberDef __pyx_CyFunction_members[] = { +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0}, +#endif +#if CYTHON_USE_TYPE_SPECS + {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0}, +#if CYTHON_METH_FASTCALL +#if CYTHON_BACKPORT_VECTORCALL + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0}, +#else +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0}, +#endif +#endif +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0}, +#else + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0}, +#endif +#endif + {0, 0, 0, 0, 0} +}; +static PyObject * +__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args) +{ + CYTHON_UNUSED_VAR(args); +#if PY_MAJOR_VERSION >= 3 + Py_INCREF(m->func_qualname); + return m->func_qualname; +#else + return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name); +#endif +} +static PyMethodDef __pyx_CyFunction_methods[] = { + {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, + {0, 0, 0, 0} +}; +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) +#else +#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist) +#endif +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { +#if !CYTHON_COMPILING_IN_LIMITED_API + PyCFunctionObject *cf = (PyCFunctionObject*) op; +#endif + if (unlikely(op == NULL)) + return NULL; +#if CYTHON_COMPILING_IN_LIMITED_API + op->func = PyCFunction_NewEx(ml, (PyObject*)op, module); + if (unlikely(!op->func)) return NULL; +#endif + op->flags = flags; + __Pyx_CyFunction_weakreflist(op) = NULL; +#if !CYTHON_COMPILING_IN_LIMITED_API + cf->m_ml = ml; + cf->m_self = (PyObject *) op; +#endif + Py_XINCREF(closure); + op->func_closure = closure; +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_XINCREF(module); + cf->m_module = module; +#endif + op->func_dict = NULL; + op->func_name = NULL; + Py_INCREF(qualname); + op->func_qualname = qualname; + op->func_doc = NULL; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + op->func_classobj = NULL; +#else + ((PyCMethodObject*)op)->mm_class = NULL; +#endif + op->func_globals = globals; + Py_INCREF(op->func_globals); + Py_XINCREF(code); + op->func_code = code; + op->defaults_pyobjects = 0; + op->defaults_size = 0; + op->defaults = NULL; + op->defaults_tuple = NULL; + op->defaults_kwdict = NULL; + op->defaults_getter = NULL; + op->func_annotations = NULL; + op->func_is_coroutine = NULL; +#if CYTHON_METH_FASTCALL + switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { + case METH_NOARGS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS; + break; + case METH_O: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O; + break; + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD; + break; + case METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS; + break; + case METH_VARARGS | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = NULL; + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + Py_DECREF(op); + return NULL; + } +#endif + return (PyObject *) op; +} +static int +__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) +{ + Py_CLEAR(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_CLEAR(m->func); +#else + Py_CLEAR(((PyCFunctionObject*)m)->m_module); +#endif + Py_CLEAR(m->func_dict); + Py_CLEAR(m->func_name); + Py_CLEAR(m->func_qualname); + Py_CLEAR(m->func_doc); + Py_CLEAR(m->func_globals); + Py_CLEAR(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API +#if PY_VERSION_HEX < 0x030900B1 + Py_CLEAR(__Pyx_CyFunction_GetClassObj(m)); +#else + { + PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class; + ((PyCMethodObject *) (m))->mm_class = NULL; + Py_XDECREF(cls); + } +#endif +#endif + Py_CLEAR(m->defaults_tuple); + Py_CLEAR(m->defaults_kwdict); + Py_CLEAR(m->func_annotations); + Py_CLEAR(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_XDECREF(pydefaults[i]); + PyObject_Free(m->defaults); + m->defaults = NULL; + } + return 0; +} +static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + if (__Pyx_CyFunction_weakreflist(m) != NULL) + PyObject_ClearWeakRefs((PyObject *) m); + __Pyx_CyFunction_clear(m); + __Pyx_PyHeapTypeObject_GC_Del(m); +} +static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + PyObject_GC_UnTrack(m); + __Pyx__CyFunction_dealloc(m); +} +static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) +{ + Py_VISIT(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(m->func); +#else + Py_VISIT(((PyCFunctionObject*)m)->m_module); +#endif + Py_VISIT(m->func_dict); + Py_VISIT(m->func_name); + Py_VISIT(m->func_qualname); + Py_VISIT(m->func_doc); + Py_VISIT(m->func_globals); + Py_VISIT(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(__Pyx_CyFunction_GetClassObj(m)); +#endif + Py_VISIT(m->defaults_tuple); + Py_VISIT(m->defaults_kwdict); + Py_VISIT(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_VISIT(pydefaults[i]); + } + return 0; +} +static PyObject* +__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) +{ +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_FromFormat("", + op->func_qualname, (void *)op); +#else + return PyString_FromFormat("", + PyString_AsString(op->func_qualname), (void *)op); +#endif +} +static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *f = ((__pyx_CyFunctionObject*)func)->func; + PyObject *py_name = NULL; + PyCFunction meth; + int flags; + meth = PyCFunction_GetFunction(f); + if (unlikely(!meth)) return NULL; + flags = PyCFunction_GetFlags(f); + if (unlikely(flags < 0)) return NULL; +#else + PyCFunctionObject* f = (PyCFunctionObject*)func; + PyCFunction meth = f->m_ml->ml_meth; + int flags = f->m_ml->ml_flags; +#endif + Py_ssize_t size; + switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) { + case METH_VARARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) + return (*meth)(self, arg); + break; + case METH_VARARGS | METH_KEYWORDS: + return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw); + case METH_NOARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 0)) + return (*meth)(self, NULL); +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + case METH_O: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 1)) { + PyObject *result, *arg0; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + arg0 = PyTuple_GET_ITEM(arg, 0); + #else + arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL; + #endif + result = (*meth)(self, arg0); + #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(arg0); + #endif + return result; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + return NULL; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments", + py_name); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", + f->m_ml->ml_name); +#endif + return NULL; +} +static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *self, *result; +#if CYTHON_COMPILING_IN_LIMITED_API + self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func); + if (unlikely(!self) && PyErr_Occurred()) return NULL; +#else + self = ((PyCFunctionObject*)func)->m_self; +#endif + result = __Pyx_CyFunction_CallMethod(func, self, arg, kw); + return result; +} +static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { + PyObject *result; + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; +#if CYTHON_METH_FASTCALL + __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc); + if (vc) { +#if CYTHON_ASSUME_SAFE_MACROS + return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw); +#else + (void) &__Pyx_PyVectorcall_FastCallDict; + return PyVectorcall_Call(func, args, kw); +#endif + } +#endif + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + Py_ssize_t argc; + PyObject *new_args; + PyObject *self; +#if CYTHON_ASSUME_SAFE_MACROS + argc = PyTuple_GET_SIZE(args); +#else + argc = PyTuple_Size(args); + if (unlikely(!argc) < 0) return NULL; +#endif + new_args = PyTuple_GetSlice(args, 1, argc); + if (unlikely(!new_args)) + return NULL; + self = PyTuple_GetItem(args, 0); + if (unlikely(!self)) { + Py_DECREF(new_args); +#if PY_MAJOR_VERSION > 2 + PyErr_Format(PyExc_TypeError, + "unbound method %.200S() needs an argument", + cyfunc->func_qualname); +#else + PyErr_SetString(PyExc_TypeError, + "unbound method needs an argument"); +#endif + return NULL; + } + result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw); + Py_DECREF(new_args); + } else { + result = __Pyx_CyFunction_Call(func, args, kw); + } + return result; +} +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames) +{ + int ret = 0; + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + if (unlikely(nargs < 1)) { + PyErr_Format(PyExc_TypeError, "%.200s() needs an argument", + ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + ret = 1; + } + if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + return ret; +} +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 0)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, NULL); +} +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 1)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, args[0]); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; + PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc); +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames); +} +#endif +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_CyFunctionType_slots[] = { + {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc}, + {Py_tp_repr, (void *)__Pyx_CyFunction_repr}, + {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod}, + {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse}, + {Py_tp_clear, (void *)__Pyx_CyFunction_clear}, + {Py_tp_methods, (void *)__pyx_CyFunction_methods}, + {Py_tp_members, (void *)__pyx_CyFunction_members}, + {Py_tp_getset, (void *)__pyx_CyFunction_getsets}, + {Py_tp_descr_get, (void *)__Pyx_PyMethod_New}, + {0, 0}, +}; +static PyType_Spec __pyx_CyFunctionType_spec = { + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL) + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + __pyx_CyFunctionType_slots +}; +#else +static PyTypeObject __pyx_CyFunctionType_type = { + PyVarObject_HEAD_INIT(0, 0) + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, + (destructor) __Pyx_CyFunction_dealloc, +#if !CYTHON_METH_FASTCALL + 0, +#elif CYTHON_BACKPORT_VECTORCALL + (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), +#else + offsetof(PyCFunctionObject, vectorcall), +#endif + 0, + 0, +#if PY_MAJOR_VERSION < 3 + 0, +#else + 0, +#endif + (reprfunc) __Pyx_CyFunction_repr, + 0, + 0, + 0, + 0, + __Pyx_CyFunction_CallAsMethod, + 0, + 0, + 0, + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + 0, + (traverseproc) __Pyx_CyFunction_traverse, + (inquiry) __Pyx_CyFunction_clear, + 0, +#if PY_VERSION_HEX < 0x030500A0 + offsetof(__pyx_CyFunctionObject, func_weakreflist), +#else + offsetof(PyCFunctionObject, m_weakreflist), +#endif + 0, + 0, + __pyx_CyFunction_methods, + __pyx_CyFunction_members, + __pyx_CyFunction_getsets, + 0, + 0, + __Pyx_PyMethod_New, + 0, + offsetof(__pyx_CyFunctionObject, func_dict), + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +#if PY_VERSION_HEX >= 0x030400a1 + 0, +#endif +#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, +#endif +#if __PYX_NEED_TP_PRINT_SLOT + 0, +#endif +#if PY_VERSION_HEX >= 0x030C0000 + 0, +#endif +#if PY_VERSION_HEX >= 0x030d00A4 + 0, +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, +#endif +}; +#endif +static int __pyx_CyFunction_init(PyObject *module) { +#if CYTHON_USE_TYPE_SPECS + __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL); +#else + CYTHON_UNUSED_VAR(module); + __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); +#endif + if (unlikely(__pyx_CyFunctionType == NULL)) { + return -1; + } + return 0; +} +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults = PyObject_Malloc(size); + if (unlikely(!m->defaults)) + return PyErr_NoMemory(); + memset(m->defaults, 0, size); + m->defaults_pyobjects = pyobjects; + m->defaults_size = size; + return m->defaults; +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_tuple = tuple; + Py_INCREF(tuple); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_kwdict = dict; + Py_INCREF(dict); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->func_annotations = dict; + Py_INCREF(dict); +} + +/* CythonFunction */ + static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { + PyObject *op = __Pyx_CyFunction_Init( + PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType), + ml, flags, qualname, closure, module, globals, code + ); + if (likely(op)) { + PyObject_GC_Track(op); + } + return op; +} + +/* CLineInTraceback */ + #ifndef CYTHON_CLINE_IN_TRACEBACK +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) { + PyObject *use_cline; + PyObject *ptype, *pvalue, *ptraceback; +#if CYTHON_COMPILING_IN_CPYTHON + PyObject **cython_runtime_dict; +#endif + CYTHON_MAYBE_UNUSED_VAR(tstate); + if (unlikely(!__pyx_cython_runtime)) { + return c_line; + } + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); +#if CYTHON_COMPILING_IN_CPYTHON + cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); + if (likely(cython_runtime_dict)) { + __PYX_PY_DICT_LOOKUP_IF_MODIFIED( + use_cline, *cython_runtime_dict, + __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) + } else +#endif + { + PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); + if (use_cline_obj) { + use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; + Py_DECREF(use_cline_obj); + } else { + PyErr_Clear(); + use_cline = NULL; + } + } + if (!use_cline) { + c_line = 0; + (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); + } + else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { + c_line = 0; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + return c_line; +} +#endif + +/* CodeObjectCache */ + #if !CYTHON_COMPILING_IN_LIMITED_API +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = start + (end - start) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} +#endif + +/* AddTraceback */ + #include "compile.h" +#include "frameobject.h" +#include "traceback.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict, + PyObject *firstlineno, PyObject *name) { + PyObject *replace = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL; + replace = PyObject_GetAttrString(code, "replace"); + if (likely(replace)) { + PyObject *result; + result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict); + Py_DECREF(replace); + return result; + } + PyErr_Clear(); + #if __PYX_LIMITED_VERSION_HEX < 0x030780000 + { + PyObject *compiled = NULL, *result = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL; + compiled = Py_CompileString( + "out = type(code)(\n" + " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n" + " code.co_flags, code.co_code, code.co_consts, code.co_names,\n" + " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n" + " code.co_lnotab)\n", "", Py_file_input); + if (!compiled) return NULL; + result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict); + Py_DECREF(compiled); + if (!result) PyErr_Print(); + Py_DECREF(result); + result = PyDict_GetItemString(scratch_dict, "out"); + if (result) Py_INCREF(result); + return result; + } + #else + return NULL; + #endif +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL; + PyObject *replace = NULL, *getframe = NULL, *frame = NULL; + PyObject *exc_type, *exc_value, *exc_traceback; + int success = 0; + if (c_line) { + (void) __pyx_cfilenm; + (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line); + } + PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); + code_object = Py_CompileString("_getframe()", filename, Py_eval_input); + if (unlikely(!code_object)) goto bad; + py_py_line = PyLong_FromLong(py_line); + if (unlikely(!py_py_line)) goto bad; + py_funcname = PyUnicode_FromString(funcname); + if (unlikely(!py_funcname)) goto bad; + dict = PyDict_New(); + if (unlikely(!dict)) goto bad; + { + PyObject *old_code_object = code_object; + code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname); + Py_DECREF(old_code_object); + } + if (unlikely(!code_object)) goto bad; + getframe = PySys_GetObject("_getframe"); + if (unlikely(!getframe)) goto bad; + if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad; + frame = PyEval_EvalCode(code_object, dict, dict); + if (unlikely(!frame) || frame == Py_None) goto bad; + success = 1; + bad: + PyErr_Restore(exc_type, exc_value, exc_traceback); + Py_XDECREF(code_object); + Py_XDECREF(py_py_line); + Py_XDECREF(py_funcname); + Py_XDECREF(dict); + Py_XDECREF(replace); + if (success) { + PyTraceBack_Here( + (struct _frame*)frame); + } + Py_XDECREF(frame); +} +#else +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = NULL; + PyObject *py_funcname = NULL; + #if PY_MAJOR_VERSION < 3 + PyObject *py_srcfile = NULL; + py_srcfile = PyString_FromString(filename); + if (!py_srcfile) goto bad; + #endif + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + funcname = PyUnicode_AsUTF8(py_funcname); + if (!funcname) goto bad; + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + if (!py_funcname) goto bad; + #endif + } + #if PY_MAJOR_VERSION < 3 + py_code = __Pyx_PyCode_New( + 0, + 0, + 0, + 0, + 0, + 0, + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + #else + py_code = PyCode_NewEmpty(filename, funcname, py_line); + #endif + Py_XDECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_funcname); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_srcfile); + #endif + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject *ptype, *pvalue, *ptraceback; + if (c_line) { + c_line = __Pyx_CLineForTraceback(tstate, c_line); + } + py_code = __pyx_find_code_object(c_line ? -c_line : py_line); + if (!py_code) { + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) { + /* If the code object creation fails, then we should clear the + fetched exception references and propagate the new exception */ + Py_XDECREF(ptype); + Py_XDECREF(pvalue); + Py_XDECREF(ptraceback); + goto bad; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); + } + py_frame = PyFrame_New( + tstate, /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + __Pyx_PyFrame_SetLineNumber(py_frame, py_line); + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} +#endif + +#if PY_MAJOR_VERSION < 3 +static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) { + __Pyx_TypeName obj_type_name; + if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags); + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "'" __Pyx_FMT_TYPENAME "' does not have the buffer interface", + obj_type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return -1; +} +static void __Pyx_ReleaseBuffer(Py_buffer *view) { + PyObject *obj = view->obj; + if (!obj) return; + if (PyObject_CheckBuffer(obj)) { + PyBuffer_Release(view); + return; + } + if ((0)) {} + view->obj = NULL; + Py_DECREF(obj); +} +#endif + + + /* CIntFromPyVerify */ + #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + +/* Declarations */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + return ::std::complex< float >(x, y); + } + #else + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + return x + y*(__pyx_t_float_complex)_Complex_I; + } + #endif +#else + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + __pyx_t_float_complex z; + z.real = x; + z.imag = y; + return z; + } +#endif + +/* Arithmetic */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) +#else + static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + return (a.real == b.real) && (a.imag == b.imag); + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real + b.real; + z.imag = a.imag + b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real - b.real; + z.imag = a.imag - b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real * b.real - a.imag * b.imag; + z.imag = a.real * b.imag + a.imag * b.real; + return z; + } + #if 1 + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + if (b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); + } else if (fabsf(b.real) >= fabsf(b.imag)) { + if (b.real == 0 && b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.imag); + } else { + float r = b.imag / b.real; + float s = (float)(1.0) / (b.real + b.imag * r); + return __pyx_t_float_complex_from_parts( + (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); + } + } else { + float r = b.real / b.imag; + float s = (float)(1.0) / (b.imag + b.real * r); + return __pyx_t_float_complex_from_parts( + (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); + } + } + #else + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + if (b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); + } else { + float denom = b.real * b.real + b.imag * b.imag; + return __pyx_t_float_complex_from_parts( + (a.real * b.real + a.imag * b.imag) / denom, + (a.imag * b.real - a.real * b.imag) / denom); + } + } + #endif + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex a) { + __pyx_t_float_complex z; + z.real = -a.real; + z.imag = -a.imag; + return z; + } + static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex a) { + return (a.real == 0) && (a.imag == 0); + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex a) { + __pyx_t_float_complex z; + z.real = a.real; + z.imag = -a.imag; + return z; + } + #if 1 + static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex z) { + #if !defined(HAVE_HYPOT) || defined(_MSC_VER) + return sqrtf(z.real*z.real + z.imag*z.imag); + #else + return hypotf(z.real, z.imag); + #endif + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + float r, lnr, theta, z_r, z_theta; + if (b.imag == 0 && b.real == (int)b.real) { + if (b.real < 0) { + float denom = a.real * a.real + a.imag * a.imag; + a.real = a.real / denom; + a.imag = -a.imag / denom; + b.real = -b.real; + } + switch ((int)b.real) { + case 0: + z.real = 1; + z.imag = 0; + return z; + case 1: + return a; + case 2: + return __Pyx_c_prod_float(a, a); + case 3: + z = __Pyx_c_prod_float(a, a); + return __Pyx_c_prod_float(z, a); + case 4: + z = __Pyx_c_prod_float(a, a); + return __Pyx_c_prod_float(z, z); + } + } + if (a.imag == 0) { + if (a.real == 0) { + return a; + } else if ((b.imag == 0) && (a.real >= 0)) { + z.real = powf(a.real, b.real); + z.imag = 0; + return z; + } else if (a.real > 0) { + r = a.real; + theta = 0; + } else { + r = -a.real; + theta = atan2f(0.0, -1.0); + } + } else { + r = __Pyx_c_abs_float(a); + theta = atan2f(a.imag, a.real); + } + lnr = logf(r); + z_r = expf(lnr * b.real - theta * b.imag); + z_theta = theta * b.real + lnr * b.imag; + z.real = z_r * cosf(z_theta); + z.imag = z_r * sinf(z_theta); + return z; + } + #endif +#endif + +/* Declarations */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + return ::std::complex< double >(x, y); + } + #else + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + return x + y*(__pyx_t_double_complex)_Complex_I; + } + #endif +#else + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + __pyx_t_double_complex z; + z.real = x; + z.imag = y; + return z; + } +#endif + +/* Arithmetic */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) +#else + static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + return (a.real == b.real) && (a.imag == b.imag); + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real + b.real; + z.imag = a.imag + b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real - b.real; + z.imag = a.imag - b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real * b.real - a.imag * b.imag; + z.imag = a.real * b.imag + a.imag * b.real; + return z; + } + #if 1 + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + if (b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else if (fabs(b.real) >= fabs(b.imag)) { + if (b.real == 0 && b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.imag); + } else { + double r = b.imag / b.real; + double s = (double)(1.0) / (b.real + b.imag * r); + return __pyx_t_double_complex_from_parts( + (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); + } + } else { + double r = b.real / b.imag; + double s = (double)(1.0) / (b.imag + b.real * r); + return __pyx_t_double_complex_from_parts( + (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); + } + } + #else + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + if (b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else { + double denom = b.real * b.real + b.imag * b.imag; + return __pyx_t_double_complex_from_parts( + (a.real * b.real + a.imag * b.imag) / denom, + (a.imag * b.real - a.real * b.imag) / denom); + } + } + #endif + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex a) { + __pyx_t_double_complex z; + z.real = -a.real; + z.imag = -a.imag; + return z; + } + static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex a) { + return (a.real == 0) && (a.imag == 0); + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex a) { + __pyx_t_double_complex z; + z.real = a.real; + z.imag = -a.imag; + return z; + } + #if 1 + static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex z) { + #if !defined(HAVE_HYPOT) || defined(_MSC_VER) + return sqrt(z.real*z.real + z.imag*z.imag); + #else + return hypot(z.real, z.imag); + #endif + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + double r, lnr, theta, z_r, z_theta; + if (b.imag == 0 && b.real == (int)b.real) { + if (b.real < 0) { + double denom = a.real * a.real + a.imag * a.imag; + a.real = a.real / denom; + a.imag = -a.imag / denom; + b.real = -b.real; + } + switch ((int)b.real) { + case 0: + z.real = 1; + z.imag = 0; + return z; + case 1: + return a; + case 2: + return __Pyx_c_prod_double(a, a); + case 3: + z = __Pyx_c_prod_double(a, a); + return __Pyx_c_prod_double(z, a); + case 4: + z = __Pyx_c_prod_double(a, a); + return __Pyx_c_prod_double(z, z); + } + } + if (a.imag == 0) { + if (a.real == 0) { + return a; + } else if ((b.imag == 0) && (a.real >= 0)) { + z.real = pow(a.real, b.real); + z.imag = 0; + return z; + } else if (a.real > 0) { + r = a.real; + theta = 0; + } else { + r = -a.real; + theta = atan2(0.0, -1.0); + } + } else { + r = __Pyx_c_abs_double(a); + theta = atan2(a.imag, a.real); + } + lnr = log(r); + z_r = exp(lnr * b.real - theta * b.imag); + z_theta = theta * b.real + lnr * b.imag; + z.real = z_r * cos(z_theta); + z.imag = z_r * sin(z_theta); + return z; + } + #endif +#endif + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const unsigned int neg_one = (unsigned int) -1, const_zero = (unsigned int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(unsigned int) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(unsigned int) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned int) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(unsigned int) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned int) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(unsigned int), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(unsigned int)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* CIntFromPy */ + static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const unsigned int neg_one = (unsigned int) -1, const_zero = (unsigned int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(unsigned int) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (unsigned int) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + unsigned int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (unsigned int) -1; + val = __Pyx_PyInt_As_unsigned_int(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(unsigned int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(unsigned int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(unsigned int) >= 2 * PyLong_SHIFT)) { + return (unsigned int) (((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(unsigned int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(unsigned int) >= 3 * PyLong_SHIFT)) { + return (unsigned int) (((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(unsigned int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(unsigned int) >= 4 * PyLong_SHIFT)) { + return (unsigned int) (((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (unsigned int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(unsigned int) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(unsigned int) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(unsigned int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(unsigned int) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT)) { + return (unsigned int) (((unsigned int)-1)*(((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(unsigned int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT)) { + return (unsigned int) ((((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT)) { + return (unsigned int) (((unsigned int)-1)*(((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(unsigned int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT)) { + return (unsigned int) ((((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(unsigned int) - 1 > 4 * PyLong_SHIFT)) { + return (unsigned int) (((unsigned int)-1)*(((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(unsigned int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(unsigned int) - 1 > 4 * PyLong_SHIFT)) { + return (unsigned int) ((((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(unsigned int) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(unsigned int) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + unsigned int val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (unsigned int) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (unsigned int) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (unsigned int) -1; + } else { + stepval = v; + } + v = NULL; + val = (unsigned int) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(unsigned int) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((unsigned int) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(unsigned int) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((unsigned int) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((unsigned int) 1) << (sizeof(unsigned int) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (unsigned int) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to unsigned int"); + return (unsigned int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to unsigned int"); + return (unsigned int) -1; +} + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* FormatTypeName */ + #if CYTHON_COMPILING_IN_LIMITED_API +static __Pyx_TypeName +__Pyx_PyType_GetName(PyTypeObject* tp) +{ + PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp, + __pyx_n_s_name); + if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) { + PyErr_Clear(); + Py_XDECREF(name); + name = __Pyx_NewRef(__pyx_n_s__13); + } + return name; +} +#endif + +/* CIntFromPy */ + static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(long) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (long) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + long val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) { + return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) { + return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) { + return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (long) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(long) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(long) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + long val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (long) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (long) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (long) -1; + } else { + stepval = v; + } + v = NULL; + val = (long) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((long) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((long) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (long) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to long"); + return (long) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; +} + +/* CIntFromPy */ + static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(int) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) { + return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) { + return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) { + return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(int) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(int) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + int val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (int) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (int) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (int) -1; + } else { + stepval = v; + } + v = NULL; + val = (int) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((int) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((int) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (int) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int"); + return (int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; +} + +/* FastTypeChecks */ + #if CYTHON_COMPILING_IN_CPYTHON +static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { + while (a) { + a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*); + if (a == b) + return 1; + } + return b == &PyBaseObject_Type; +} +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (a == b) return 1; + mro = a->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(a, b); +} +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (cls == a || cls == b) return 1; + mro = cls->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + PyObject *base = PyTuple_GET_ITEM(mro, i); + if (base == (PyObject *)a || base == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b); +} +#if PY_MAJOR_VERSION == 2 +static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { + PyObject *exception, *value, *tb; + int res; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&exception, &value, &tb); + res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + if (!res) { + res = PyObject_IsSubclass(err, exc_type2); + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + } + __Pyx_ErrRestore(exception, value, tb); + return res; +} +#else +static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { + if (exc_type1) { + return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2); + } else { + return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); + } +} +#endif +static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + assert(PyExceptionClass_Check(exc_type)); + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030B00A4 + return Py_Version & ~0xFFUL; +#else + const char* rt_version = Py_GetVersion(); + unsigned long version = 0; + unsigned long factor = 0x01000000UL; + unsigned int digit = 0; + int i = 0; + while (factor) { + while ('0' <= rt_version[i] && rt_version[i] <= '9') { + digit = digit * 10 + (unsigned int) (rt_version[i] - '0'); + ++i; + } + version += factor * digit; + if (rt_version[i] != '.') + break; + digit = 0; + factor >>= 8; + ++i; + } + return version; +#endif +} +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) { + const unsigned long MAJOR_MINOR = 0xFFFF0000UL; + if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR)) + return 0; + if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR))) + return 1; + { + char message[200]; + PyOS_snprintf(message, sizeof(message), + "compile time Python version %d.%d " + "of module '%.100s' " + "%s " + "runtime version %d.%d", + (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF), + __Pyx_MODULE_NAME, + (allow_newer) ? "was newer than" : "does not match", + (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF) + ); + return PyErr_WarnEx(NULL, message, 1); + } +} + +/* InitStrings */ + #if PY_MAJOR_VERSION >= 3 +static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { + if (t.is_unicode | t.is_str) { + if (t.intern) { + *str = PyUnicode_InternFromString(t.s); + } else if (t.encoding) { + *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); + } else { + *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); + } + } else { + *str = PyBytes_FromStringAndSize(t.s, t.n - 1); + } + if (!*str) + return -1; + if (PyObject_Hash(*str) == -1) + return -1; + return 0; +} +#endif +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + #if PY_MAJOR_VERSION >= 3 + __Pyx_InitString(*t, t->p); + #else + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + if (!*t->p) + return -1; + if (PyObject_Hash(*t->p) == -1) + return -1; + #endif + ++t; + } + return 0; +} + +#include +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { + size_t len = strlen(s); + if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) { + PyErr_SetString(PyExc_OverflowError, "byte string is too long"); + return -1; + } + return (Py_ssize_t) len; +} +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return __Pyx_PyUnicode_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return PyByteArray_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#if !CYTHON_PEP393_ENABLED +static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +} +#else +static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (likely(PyUnicode_IS_ASCII(o))) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else + return PyUnicode_AsUTF8AndSize(o, length); +#endif +} +#endif +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { + return __Pyx_PyUnicode_AsStringAndSize(o, length); + } else +#endif +#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { + int retval; + if (unlikely(!x)) return -1; + retval = __Pyx_PyObject_IsTrue(x); + Py_DECREF(x); + return retval; +} +static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { + __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result)); +#if PY_MAJOR_VERSION >= 3 + if (PyLong_Check(result)) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). " + "The ability to return an instance of a strict subclass of int is deprecated, " + "and may be removed in a future version of Python.", + result_type_name)) { + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; + } + __Pyx_DECREF_TypeName(result_type_name); + return result; + } +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")", + type_name, type_name, result_type_name); + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { +#if CYTHON_USE_TYPE_SLOTS + PyNumberMethods *m; +#endif + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x) || PyLong_Check(x))) +#else + if (likely(PyLong_Check(x))) +#endif + return __Pyx_NewRef(x); +#if CYTHON_USE_TYPE_SLOTS + m = Py_TYPE(x)->tp_as_number; + #if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = m->nb_int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = m->nb_long(x); + } + #else + if (likely(m && m->nb_int)) { + name = "int"; + res = m->nb_int(x); + } + #endif +#else + if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { + res = PyNumber_Int(x); + } +#endif + if (likely(res)) { +#if PY_MAJOR_VERSION < 3 + if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { +#else + if (unlikely(!PyLong_CheckExact(res))) { +#endif + return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) { + if (sizeof(Py_ssize_t) >= sizeof(long)) + return PyInt_AS_LONG(b); + else + return PyInt_AsSsize_t(b); + } +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_USE_PYLONG_INTERNALS + if (likely(__Pyx_PyLong_IsCompact(b))) { + return __Pyx_PyLong_CompactValue(b); + } else { + const digit* digits = __Pyx_PyLong_Digits(b); + const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b); + switch (size) { + case 2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + } + } + #endif + return PyLong_AsSsize_t(b); + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) { + if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) { + return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o); +#if PY_MAJOR_VERSION < 3 + } else if (likely(PyInt_CheckExact(o))) { + return PyInt_AS_LONG(o); +#endif + } else { + Py_ssize_t ival; + PyObject *x; + x = PyNumber_Index(o); + if (!x) return -1; + ival = PyInt_AsLong(x); + Py_DECREF(x); + return ival; + } +} +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { + return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { + return PyInt_FromSize_t(ival); +} + + +/* #### Code section: utility_code_pragmas_end ### */ +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + + + +/* #### Code section: end ### */ +#endif /* Py_PYTHON_H */ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.cpython-37m-x86_64-linux-gnu.so b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.cpython-37m-x86_64-linux-gnu.so new file mode 100755 index 0000000000000000000000000000000000000000..51798ca653806dc7b14d5683cbeab5596a3d24df Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.cpython-37m-x86_64-linux-gnu.so differ diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.pyx b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.pyx new file mode 100644 index 0000000000000000000000000000000000000000..cd543ce481e3da6a5888bf2a64f8ce5c86e28e90 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.pyx @@ -0,0 +1,147 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Sergey Karayev +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np +from cython.parallel import prange, parallel + + +DTYPE = np.float32 +ctypedef float DTYPE_t + + +def bbox_overlaps( + np.ndarray[DTYPE_t, ndim=2] boxes, + np.ndarray[DTYPE_t, ndim=2] query_boxes): + """ + Parameters + ---------- + boxes: (N, 4) ndarray of float + query_boxes: (K, 4) ndarray of float + Returns + ------- + overlaps: (N, K) ndarray of overlap between boxes and query_boxes + """ + cdef unsigned int N = boxes.shape[0] + cdef unsigned int K = query_boxes.shape[0] + cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) + cdef DTYPE_t iw, ih, box_area + cdef DTYPE_t ua + cdef unsigned int k, n + for k in range(K): + box_area = ( + (query_boxes[k, 2] - query_boxes[k, 0] + 1) * + (query_boxes[k, 3] - query_boxes[k, 1] + 1) + ) + for n in range(N): + iw = ( + min(boxes[n, 2], query_boxes[k, 2]) - + max(boxes[n, 0], query_boxes[k, 0]) + 1 + ) + if iw > 0: + ih = ( + min(boxes[n, 3], query_boxes[k, 3]) - + max(boxes[n, 1], query_boxes[k, 1]) + 1 + ) + if ih > 0: + ua = float( + (boxes[n, 2] - boxes[n, 0] + 1) * + (boxes[n, 3] - boxes[n, 1] + 1) + + box_area - iw * ih + ) + overlaps[n, k] = iw * ih / ua + return overlaps + +def bbox_intersections( + np.ndarray[DTYPE_t, ndim=2] boxes, + np.ndarray[DTYPE_t, ndim=2] query_boxes): + """ + For each query box compute the intersection ratio covered by boxes + ---------- + Parameters + ---------- + boxes: (N, 4) ndarray of float + query_boxes: (K, 4) ndarray of float + Returns + ------- + overlaps: (N, K) ndarray of intersec between boxes and query_boxes + """ + cdef unsigned int N = boxes.shape[0] + cdef unsigned int K = query_boxes.shape[0] + cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) + cdef DTYPE_t iw, ih, box_area + cdef DTYPE_t ua + cdef unsigned int k, n + for k in range(K): + box_area = ( + (query_boxes[k, 2] - query_boxes[k, 0] + 1) * + (query_boxes[k, 3] - query_boxes[k, 1] + 1) + ) + for n in range(N): + iw = ( + min(boxes[n, 2], query_boxes[k, 2]) - + max(boxes[n, 0], query_boxes[k, 0]) + 1 + ) + if iw > 0: + ih = ( + min(boxes[n, 3], query_boxes[k, 3]) - + max(boxes[n, 1], query_boxes[k, 1]) + 1 + ) + if ih > 0: + intersec[n, k] = iw * ih / box_area + return intersec + +# Compute bounding box voting +def box_vote( + np.ndarray[float, ndim=2] dets_NMS, + np.ndarray[float, ndim=2] dets_all): + cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32) + cdef unsigned int N = dets_NMS.shape[0] + cdef unsigned int M = dets_all.shape[0] + + cdef np.ndarray[float, ndim=1] det + cdef np.ndarray[float, ndim=1] acc_box + cdef float acc_score + + cdef np.ndarray[float, ndim=1] det2 + cdef float bi0, bi1, bit2, bi3 + cdef float iw, ih, ua + + cdef float thresh=0.5 + + for i in range(N): + det = dets_NMS[i, :] + acc_box = np.zeros((4), dtype=np.float32) + acc_score = 0.0 + + for m in range(M): + det2 = dets_all[m, :] + + bi0 = max(det[0], det2[0]) + bi1 = max(det[1], det2[1]) + bi2 = min(det[2], det2[2]) + bi3 = min(det[3], det2[3]) + + iw = bi2 - bi0 + 1 + ih = bi3 - bi1 + 1 + + if not (iw > 0 and ih > 0): + continue + + ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih + ov = iw * ih / ua + + if (ov < thresh): + continue + + acc_box += det2[4] * det2[0:4] + acc_score += det2[4] + + dets_voted[i][0:4] = acc_box / acc_score + dets_voted[i][4] = det[4] # Keep the original score + + return dets_voted diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3ea755cef42367ac9cbdbf51e5837a53cc914e05 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_utils.py @@ -0,0 +1,1338 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , Hao Xiang , +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Bounding box related utility functions +""" +import sys + +import numpy as np + +import torch +import torch.nn.functional as F +import opencood.utils.common_utils as common_utils +from opencood.utils.transformation_utils import x1_to_x2, x_to_world +from pyquaternion import Quaternion +import copy + + +def corner_to_center_torch(corner3d, order='lwh'): + corner3d_ = corner3d.cpu().numpy() + return torch.from_numpy(corner_to_center(corner3d_, order)).to(corner3d.device) + +def corner_to_center(corner3d, order='lwh'): + """ + Convert 8 corners to x, y, z, dx, dy, dz, yaw. + yaw in radians + + Parameters + ---------- + corner3d : np.ndarray + (N, 8, 3) + + order : str, for output. + 'lwh' or 'hwl' + + Returns + ------- + box3d : np.ndarray + (N, 7) + """ + assert corner3d.ndim == 3 + batch_size = corner3d.shape[0] + + xyz = np.mean(corner3d[:, [0, 3, 5, 6], :], axis=1) + h = abs(np.mean(corner3d[:, 4:, 2] - corner3d[:, :4, 2], axis=1, + keepdims=True)) + l = (np.sqrt(np.sum((corner3d[:, 0, [0, 1]] - corner3d[:, 3, [0, 1]]) ** 2, + axis=1, keepdims=True)) + + np.sqrt(np.sum((corner3d[:, 2, [0, 1]] - corner3d[:, 1, [0, 1]]) ** 2, + axis=1, keepdims=True)) + + np.sqrt(np.sum((corner3d[:, 4, [0, 1]] - corner3d[:, 7, [0, 1]]) ** 2, + axis=1, keepdims=True)) + + np.sqrt(np.sum((corner3d[:, 5, [0, 1]] - corner3d[:, 6, [0, 1]]) ** 2, + axis=1, keepdims=True))) / 4 + + w = (np.sqrt( + np.sum((corner3d[:, 0, [0, 1]] - corner3d[:, 1, [0, 1]]) ** 2, axis=1, + keepdims=True)) + + np.sqrt(np.sum((corner3d[:, 2, [0, 1]] - corner3d[:, 3, [0, 1]]) ** 2, + axis=1, keepdims=True)) + + np.sqrt(np.sum((corner3d[:, 4, [0, 1]] - corner3d[:, 5, [0, 1]]) ** 2, + axis=1, keepdims=True)) + + np.sqrt(np.sum((corner3d[:, 6, [0, 1]] - corner3d[:, 7, [0, 1]]) ** 2, + axis=1, keepdims=True))) / 4 + + theta = (np.arctan2(corner3d[:, 1, 1] - corner3d[:, 2, 1], + corner3d[:, 1, 0] - corner3d[:, 2, 0]) + + np.arctan2(corner3d[:, 0, 1] - corner3d[:, 3, 1], + corner3d[:, 0, 0] - corner3d[:, 3, 0]) + + np.arctan2(corner3d[:, 5, 1] - corner3d[:, 6, 1], + corner3d[:, 5, 0] - corner3d[:, 6, 0]) + + np.arctan2(corner3d[:, 4, 1] - corner3d[:, 7, 1], + corner3d[:, 4, 0] - corner3d[:, 7, 0]))[:, + np.newaxis] / 4 + + if order == 'lwh': + return np.concatenate([xyz, l, w, h, theta], axis=1).reshape( + batch_size, 7) + elif order == 'hwl': + return np.concatenate([xyz, h, w, l, theta], axis=1).reshape( + batch_size, 7) + else: + sys.exit('Unknown order') + + +def boxes_to_corners2d(boxes3d, order): + """ + 0 -------- 1 + | | + | | + | | + 3 -------- 2 + Parameters + __________ + boxes3d: np.ndarray or torch.Tensor + (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center. + + order : str + 'lwh' or 'hwl' + + Returns: + corners2d: np.ndarray or torch.Tensor + (N, 4, 3), the 4 corners of the bounding box. + + """ + corners3d = boxes_to_corners_3d(boxes3d, order) + corners2d = corners3d[:, :4, :] + return corners2d + + +def boxes2d_to_corners2d(boxes2d, order="lwh"): + """ + 0 -------- 1 + | | + | | + | | + 3 -------- 2 + Parameters + __________ + boxes2d: np.ndarray or torch.Tensor + (..., 5) [x, y, dx, dy, heading], (x, y) is the box center. + + order : str + 'lwh' or 'hwl' + + Returns: + corners2d: np.ndarray or torch.Tensor + (..., 4, 2), the 4 corners of the bounding box. + + """ + assert order == "lwh", \ + "boxes2d_to_corners_2d only supports lwh order for now." + boxes2d, is_numpy = common_utils.check_numpy_to_torch(boxes2d) + template = boxes2d.new_tensor(( + [1, -1], [1, 1], [-1, 1], [-1, -1] + )) / 2 + input_shape = boxes2d.shape + boxes2d = boxes2d.view(-1, 5) + corners2d = boxes2d[:, None, 2:4].repeat(1, 4, 1) * template[None, :, :] + corners2d = common_utils.rotate_points_along_z_2d(corners2d.view(-1, 2), + boxes2d[:, + 4].repeat_interleave( + 4)).view(-1, 4, + 2) + corners2d += boxes2d[:, None, 0:2] + corners2d = corners2d.view(*(input_shape[:-1]), 4, 2) + return corners2d + + +def boxes_to_corners_3d(boxes3d, order): + """ + 4 -------- 5 + /| /| + 7 -------- 6 . + | | | | + . 0 -------- 1 + |/ |/ + 3 -------- 2 + Parameters + __________ + boxes3d: np.ndarray or torch.Tensor + (N, 7) [x, y, z, l, w, h, heading], or [x, y, z, h, w, l, heading] + + (x, y, z) is the box center. + + order : str + 'lwh' or 'hwl' + + Returns: + corners3d: np.ndarray or torch.Tensor + (N, 8, 3), the 8 corners of the bounding box. + + + opv2v's left hand coord + + ^ z + | + | + | . x + |/ + +-------> y + + """ + + boxes3d, is_numpy = common_utils.check_numpy_to_torch(boxes3d) + boxes3d_ = boxes3d + + if order == 'hwl': + boxes3d_ = boxes3d[:, [0, 1, 2, 5, 4, 3, 6]] + + template = boxes3d_.new_tensor(( + [1, -1, -1], [1, 1, -1], [-1, 1, -1], [-1, -1, -1], + [1, -1, 1], [1, 1, 1], [-1, 1, 1], [-1, -1, 1], + )) / 2 + + corners3d = boxes3d_[:, None, 3:6].repeat(1, 8, 1) * template[None, :, :] + corners3d = common_utils.rotate_points_along_z(corners3d.view(-1, 8, 3), + boxes3d_[:, 6]).view(-1, 8, + 3) + corners3d += boxes3d_[:, None, 0:3] + + return corners3d.numpy() if is_numpy else corners3d + + +def box3d_to_2d(box3d): + """ + Convert 3D bounding box to 2D. + + Parameters + ---------- + box3d : np.ndarray + (n, 8, 3) + + Returns + ------- + box2d : np.ndarray + (n, 4, 2), project 3d to 2d. + """ + box2d = box3d[:, :4, :2] + return box2d + + +def corner2d_to_standup_box(box2d): + """ + Find the minmaxx, minmaxy for each 2d box. (N, 4, 2) -> (N, 4) + x1, y1, x2, y2 + + Parameters + ---------- + box2d : np.ndarray + (n, 4, 2), four corners of the 2d bounding box. + + Returns + ------- + standup_box2d : np.ndarray + (n, 4) + """ + N = box2d.shape[0] + standup_boxes2d = np.zeros((N, 4)) + + standup_boxes2d[:, 0] = np.min(box2d[:, :, 0], axis=1) + standup_boxes2d[:, 1] = np.min(box2d[:, :, 1], axis=1) + standup_boxes2d[:, 2] = np.max(box2d[:, :, 0], axis=1) + standup_boxes2d[:, 3] = np.max(box2d[:, :, 1], axis=1) + + return standup_boxes2d + + +def corner_to_standup_box_torch(box_corner): + """ + Find the minmax x and y for each bounding box. + + Parameters + ---------- + box_corner : torch.Tensor + Shape: (N, 8, 3) or (N, 4) + + Returns + ------- + standup_box2d : torch.Tensor + (n, 4) + """ + N = box_corner.shape[0] + standup_boxes2d = torch.zeros((N, 4)) + + standup_boxes2d = standup_boxes2d.to(box_corner.device) + + standup_boxes2d[:, 0] = torch.min(box_corner[:, :, 0], dim=1).values + standup_boxes2d[:, 1] = torch.min(box_corner[:, :, 1], dim=1).values + standup_boxes2d[:, 2] = torch.max(box_corner[:, :, 0], dim=1).values + standup_boxes2d[:, 3] = torch.max(box_corner[:, :, 1], dim=1).values + + return standup_boxes2d + + +def project_box3d(box3d, transformation_matrix): + """ + Project the 3d bounding box to another coordinate system based on the + transfomration matrix. + + Parameters + ---------- + box3d : torch.Tensor or np.ndarray + 3D bounding box, (N, 8, 3) + + transformation_matrix : torch.Tensor or np.ndarray + Transformation matrix, (4, 4) + + Returns + ------- + projected_box3d : torch.Tensor + The projected bounding box, (N, 8, 3) + """ + assert transformation_matrix.shape == (4, 4) + box3d, is_numpy = \ + common_utils.check_numpy_to_torch(box3d) + transformation_matrix, _ = \ + common_utils.check_numpy_to_torch(transformation_matrix) + + # (N, 3, 8) + box3d_corner = box3d.transpose(1, 2) + # (N, 1, 8) + torch_ones = torch.ones((box3d_corner.shape[0], 1, 8)) + torch_ones = torch_ones.to(box3d_corner.device) + # (N, 4, 8) + box3d_corner = torch.cat((box3d_corner, torch_ones), + dim=1) + # (N, 4, 8) + projected_box3d = torch.matmul(transformation_matrix, + box3d_corner) + # (N, 8, 3) + projected_box3d = projected_box3d[:, :3, :].transpose(1, 2) + + return projected_box3d if not is_numpy else projected_box3d.numpy() + + +def project_points_by_matrix_torch(points, transformation_matrix): + """ + Project the points to another coordinate system based on the + transfomration matrix. + + IT NOT USED. LATTER ONE WITH THE SAME NAME WILL BE USED. + + Parameters + ---------- + points : torch.Tensor + 3D points, (N, 3) + + transformation_matrix : torch.Tensor + Transformation matrix, (4, 4) + + Returns + ------- + projected_points : torch.Tensor + The projected points, (N, 3) + """ + # convert to homogeneous coordinates via padding 1 at the last dimension. + # (N, 4) + points_homogeneous = F.pad(points, (0, 1), mode="constant", value=1) + # (N, 4) + projected_points = torch.einsum("ik, jk->ij", points_homogeneous, + transformation_matrix) + return projected_points[:, :3] + + +def get_mask_for_boxes_within_range_torch(boxes, gt_range): + """ + Generate mask to remove the bounding boxes + outside the range. + + Parameters + ---------- + boxes : torch.Tensor + Groundtruth bbx, shape: N,8,3 or N,4,2 + + gt_range: list + [xmin, ymin, zmin, xmax, ymax, zmax] + Returns + ------- + mask: torch.Tensor + The mask for bounding box -- True means the + bbx is within the range and False means the + bbx is outside the range. + """ + + # mask out the gt bounding box out fixed range (-140, -40, -3, 140, 40 1) + device = boxes.device + boundary_lower_range = \ + torch.Tensor(gt_range[:2]).reshape(1, 1, -1).to(device) + boundary_higher_range = \ + torch.Tensor(gt_range[3:5]).reshape(1, 1, -1).to(device) + + mask = torch.all( + torch.all(boxes[:, :, :2] >= boundary_lower_range, + dim=-1) & \ + torch.all(boxes[:, :, :2] <= boundary_higher_range, + dim=-1), dim=-1) + + return mask + + +def mask_boxes_outside_range_numpy(boxes, limit_range, order, + min_num_corners=8, return_mask=False): + """ + Parameters + ---------- + boxes: np.ndarray + (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center + + limit_range: list + [minx, miny, minz, maxx, maxy, maxz] + + min_num_corners: int + The required minimum number of corners to be considered as in range. + + order : str + 'lwh' or 'hwl' + + return_mask : bool + Whether return the mask. + + Returns + ------- + boxes: np.ndarray + The filtered boxes. + """ + assert boxes.shape[1] == 8 or boxes.shape[1] == 7 + + new_boxes = boxes.copy() + if boxes.shape[1] == 7: + new_boxes = boxes_to_corners_3d(new_boxes, order) + + mask = ((new_boxes >= limit_range[0:3]) & + (new_boxes <= limit_range[3:6])).all(axis=2) + mask = mask.sum(axis=1) >= min_num_corners # (N) + + if return_mask: + return boxes[mask], mask + return boxes[mask] + + +def create_bbx(extent): + """ + Create bounding box with 8 corners under obstacle vehicle reference. + + Parameters + ---------- + extent : list + half length, width and height + + Returns + ------- + bbx : np.array + The bounding box with 8 corners, shape: (8, 3) + """ + + bbx = np.array([[extent[0], -extent[1], -extent[2]], + [extent[0], extent[1], -extent[2]], + [-extent[0], extent[1], -extent[2]], + [-extent[0], -extent[1], -extent[2]], + [extent[0], -extent[1], extent[2]], + [extent[0], extent[1], extent[2]], + [-extent[0], extent[1], extent[2]], + [-extent[0], -extent[1], extent[2]]]) + + return bbx + + +def project_world_objects(object_dict, + output_dict, + lidar_pose, + lidar_range, + order, + enlarge_z=False): + """ + Project the objects under world coordinates into another coordinate + based on the provided extrinsic. + + Parameters + ---------- + object_dict : dict + The dictionary contains all objects surrounding a certain cav. + + output_dict : dict + key: object id, value: object bbx (xyzlwhyaw). + + lidar_pose : list + (6, ), lidar pose under world coordinate, [x, y, z, roll, yaw, pitch]. + + lidar_range : list + [minx, miny, minz, maxx, maxy, maxz] + + order : str + 'lwh' or 'hwl' + """ + for object_id, object_content in object_dict.items(): + location = object_content['location'] + rotation = object_content['angle'] + center = [0,0,0] if 'center' not in object_content else object_content['center'] + extent = object_content['extent'] + + object_pose = [location[0] + center[0], + location[1] + center[1], + location[2] + center[2], + rotation[0], rotation[1], rotation[2]] + + + object2lidar = x1_to_x2(object_pose, lidar_pose) + + # shape (3, 8) + bbx = create_bbx(extent).T + # bounding box under ego coordinate shape (4, 8) + bbx = np.r_[bbx, [np.ones(bbx.shape[1])]] + + # project the 8 corners to world coordinate + bbx_lidar = np.dot(object2lidar, bbx).T + bbx_lidar = np.expand_dims(bbx_lidar[:, :3], 0) + bbx_lidar = corner_to_center(bbx_lidar, order=order) + + if enlarge_z: + lidar_range_z_larger = copy.deepcopy(lidar_range) + lidar_range_z_larger[2] -= 10 + lidar_range_z_larger[5] += 10 + lidar_range = lidar_range_z_larger + + bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar, + lidar_range, + order) + + if bbx_lidar.shape[0] > 0: + output_dict.update({object_id: bbx_lidar}) + + +def project_world_objects_v2x(object_dict, + output_dict, + reference_lidar_pose, + lidar_range, + order, + lidar_np): + """ + Project the objects under world coordinates into another coordinate + based on the provided extrinsic. + + Parameters + ---------- + object_dict : + gt boxes: numpy.ndarray (N,10) + [x,y,z,dx,dy,dz,w,a,b,c], dxdydz=lwh + object_ids: numpy.ndarray (N,) + + output_dict : dict + key: object id, value: object bbx (xyzlwhyaw). + + reference_lidar_pose : list + (6, ), lidar pose under world coordinate, [x, y, z, roll, yaw, pitch]. + + lidar_range : list + [minx, miny, minz, maxx, maxy, maxz] + + order : str + 'lwh' or 'hwl' + + lidar_np: np.ndarray + point cloud in ego coord. Used to determine if any lidar point hits the box + + + output_dict: [x,y,z, lwh or hwl, yaw] + """ + from icecream import ic + gt_boxes = object_dict['gt_boxes'] + object_ids = object_dict['object_ids'] + for i, object_content in enumerate(gt_boxes): + x,y,z,dx,dy,dz,w,a,b,c = object_content + + q = Quaternion([w,a,b,c]) + T_world_object = q.transformation_matrix + T_world_object[:3,3] = object_content[:3] + + T_world_lidar = x_to_world(reference_lidar_pose) + + object2lidar = np.linalg.solve(T_world_lidar, T_world_object) # T_lidar_object + + + # shape (3, 8). + # or we can use the create_bbx funcion. + x_corners = dx / 2 * np.array([ 1, 1, -1, -1, 1, 1, -1, -1]) # (8,) + y_corners = dy / 2 * np.array([-1, 1, 1, -1, -1, 1, 1, -1]) + z_corners = dz / 2 * np.array([-1, -1, -1, -1, 1, 1, 1, 1]) + + bbx = np.vstack((x_corners, y_corners, z_corners)) # (3, 8) + + # bounding box under ego coordinate shape (4, 8) + bbx = np.r_[bbx, [np.ones(bbx.shape[1])]] + + # project the 8 corners to world coordinate + bbx_lidar = np.dot(object2lidar, bbx).T # (8, 4) + bbx_lidar = np.expand_dims(bbx_lidar[:, :3], 0) # (1, 8, 3) + bbx_lidar = corner_to_center(bbx_lidar, order=order) + + lidar_range_z_larger = copy.deepcopy(lidar_range) + lidar_range_z_larger[2] -= 1 + lidar_range_z_larger[5] += 1 + + bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar, + lidar_range_z_larger, + order) + + + if bbx_lidar.shape[0] > 0: + output_dict.update({object_ids[i]: bbx_lidar}) + + +def get_points_in_rotated_box(p, box_corner): + """ + Get points within a rotated bounding box (2D version). + + Parameters + ---------- + p : numpy.array + Points to be tested with shape (N, 2). + box_corner : numpy.array + Corners of bounding box with shape (4, 2). + + Returns + ------- + p_in_box : numpy.array + Points within the box. + + """ + edge1 = box_corner[1, :] - box_corner[0, :] + edge2 = box_corner[3, :] - box_corner[0, :] + p_rel = p - box_corner[0, :].reshape(1, -1) + + l1 = get_projection_length_for_vector_projection(p_rel, edge1) + l2 = get_projection_length_for_vector_projection(p_rel, edge2) + # A point is within the box, if and only after projecting the + # point onto the two edges s.t. p_rel = [edge1, edge2] @ [l1, l2]^T, + # we have 0<=l1<=1 and 0<=l2<=1. + mask = np.logical_and(l1 >= 0, l1 <= 1) + mask = np.logical_and(mask, l2 >= 0) + mask = np.logical_and(mask, l2 <= 1) + p_in_box = p[mask, :] + return p_in_box + + +def get_points_in_rotated_box_3d(p, box_corner): + """ + Get points within a rotated bounding box (3D version). + + Parameters + ---------- + p : numpy.array + Points to be tested with shape (N, 3). + box_corner : numpy.array + Corners of bounding box with shape (8, 3). + + Returns + ------- + p_in_box : numpy.array + Points within the box. + + """ + edge1 = box_corner[1, :] - box_corner[0, :] + edge2 = box_corner[3, :] - box_corner[0, :] + edge3 = box_corner[4, :] - box_corner[0, :] + + p_rel = p - box_corner[0, :].reshape(1, -1) + + l1 = get_projection_length_for_vector_projection(p_rel, edge1) + l2 = get_projection_length_for_vector_projection(p_rel, edge2) + l3 = get_projection_length_for_vector_projection(p_rel, edge3) + # A point is within the box, if and only after projecting the + # point onto the two edges s.t. p_rel = [edge1, edge2] @ [l1, l2]^T, + # we have 0<=l1<=1 and 0<=l2<=1. + mask1 = np.logical_and(l1 >= 0, l1 <= 1) + mask2 = np.logical_and(l2 >= 0, l2 <= 1) + mask3 = np.logical_and(l3 >= 0, l3 <= 1) + + mask = np.logical_and(mask1, mask2) + mask = np.logical_and(mask, mask3) + p_in_box = p[mask, :] + + return p_in_box + + +def get_projection_length_for_vector_projection(a, b): + """ + Get projection length for the Vector projection of a onto b s.t. + a_projected = length * b. (2D version) See + https://en.wikipedia.org/wiki/Vector_projection#Vector_projection_2 + for more details. + + Parameters + ---------- + a : numpy.array + The vectors to be projected with shape (N, 2). + + b : numpy.array + The vector that is projected onto with shape (2). + + Returns + ------- + length : numpy.array + The length of projected a with respect to b. + """ + assert np.sum(b ** 2, axis=-1) > 1e-6 + length = a.dot(b) / np.sum(b ** 2, axis=-1) + return length + + +def nms_rotated(boxes, scores, threshold): + """Performs rorated non-maximum suppression and returns indices of kept + boxes. + + Parameters + ---------- + boxes : torch.tensor + The location preds with shape (N, 4, 2). + + scores : torch.tensor + The predicted confidence score with shape (N,) + + threshold: float + IoU threshold to use for filtering. + + Returns + ------- + An array of index + """ + if boxes.shape[0] == 0: + return np.array([], dtype=np.int32) + boxes = boxes.cpu().detach().numpy() + scores = scores.cpu().detach().numpy() + + polygons = common_utils.convert_format(boxes) + + top = 1000 + # Get indicies of boxes sorted by scores (highest first) + ixs = scores.argsort()[::-1][:top] + + pick = [] + while len(ixs) > 0: + # Pick top box and add its index to the list + i = ixs[0] + pick.append(i) + # Compute IoU of the picked box with the rest + iou = common_utils.compute_iou(polygons[i], polygons[ixs[1:]]) + # Identify boxes with IoU over the threshold. This + # returns indices into ixs[1:], so add 1 to get + # indices into ixs. + remove_ixs = np.where(iou > threshold)[0] + 1 + # Remove indices of the picked and overlapped boxes. + ixs = np.delete(ixs, remove_ixs) + ixs = np.delete(ixs, 0) + + return np.array(pick, dtype=np.int32) + + +def nms_pytorch(boxes: torch.tensor, thresh_iou: float): + """ + Apply non-maximum suppression to avoid detecting too many + overlapping bounding boxes for a given object. + + Parameters + ---------- + boxes : torch.tensor + The location preds along with the class predscores, + Shape: [num_boxes,5]. + thresh_iou : float + (float) The overlap thresh for suppressing unnecessary boxes. + Returns + ------- + A list of index + """ + + # we extract coordinates for every + # prediction box present in P + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + # we extract the confidence scores as well + scores = boxes[:, 4] + + # calculate area of every block in P + areas = (x2 - x1) * (y2 - y1) + + # sort the prediction boxes in P + # according to their confidence scores + order = scores.argsort() + + # initialise an empty list for + # filtered prediction boxes + keep = [] + + while len(order) > 0: + + # extract the index of the + # prediction with highest score + # we call this prediction S + idx = order[-1] + + # push S in filtered predictions list + keep.append(idx.numpy().item() + if not idx.is_cuda else idx.cpu().detach().numpy().item()) + + # remove S from P + order = order[:-1] + + # sanity check + if len(order) == 0: + break + + # select coordinates of BBoxes according to + # the indices in order + xx1 = torch.index_select(x1, dim=0, index=order) + xx2 = torch.index_select(x2, dim=0, index=order) + yy1 = torch.index_select(y1, dim=0, index=order) + yy2 = torch.index_select(y2, dim=0, index=order) + + # find the coordinates of the intersection boxes + xx1 = torch.max(xx1, x1[idx]) + yy1 = torch.max(yy1, y1[idx]) + xx2 = torch.min(xx2, x2[idx]) + yy2 = torch.min(yy2, y2[idx]) + + # find height and width of the intersection boxes + w = xx2 - xx1 + h = yy2 - yy1 + + # take max with 0.0 to avoid negative w and h + # due to non-overlapping boxes + w = torch.clamp(w, min=0.0) + h = torch.clamp(h, min=0.0) + + # find the intersection area + inter = w * h + + # find the areas of BBoxes according the indices in order + rem_areas = torch.index_select(areas, dim=0, index=order) + + # find the union of every prediction T in P + # with the prediction S + # Note that areas[idx] represents area of S + union = (rem_areas - inter) + areas[idx] + + # find the IoU of every prediction in P with S + IoU = inter / union + + # keep the boxes with IoU less than thresh_iou + mask = IoU < thresh_iou + order = order[mask] + + return keep + + +def remove_large_pred_bbx(bbx_3d): + """ + Remove large bounding box. + + Parameters + ---------- + bbx_3d : torch.Tensor + Predcited 3d bounding box, shape:(N,8,3) + + Returns + ------- + index : torch.Tensor + The keep index. + """ + bbx_x_max = torch.max(bbx_3d[:, :, 0], dim=1)[0] + bbx_x_min = torch.min(bbx_3d[:, :, 0], dim=1)[0] + x_len = bbx_x_max - bbx_x_min + + bbx_y_max = torch.max(bbx_3d[:, :, 1], dim=1)[0] + bbx_y_min = torch.min(bbx_3d[:, :, 1], dim=1)[0] + y_len = bbx_y_max - bbx_y_min + + bbx_z_max = torch.max(bbx_3d[:, :, 1], dim=1)[0] + bbx_z_min = torch.min(bbx_3d[:, :, 1], dim=1)[0] + z_len = bbx_z_max - bbx_z_min + + index = torch.logical_and(x_len <= 6, y_len <= 6) + index = torch.logical_and(index, z_len) + + return index + + +def remove_bbx_abnormal_z(bbx_3d): + """ + Remove bounding box that has negative z axis. + + Parameters + ---------- + bbx_3d : torch.Tensor + Predcited 3d bounding box, shape:(N,8,3) + + Returns + ------- + index : torch.Tensor + The keep index. + """ + bbx_z_min = torch.min(bbx_3d[:, :, 2], dim=1)[0] + bbx_z_max = torch.max(bbx_3d[:, :, 2], dim=1)[0] + # NOTE gjliu: (-3, 5) -> (-100, 100) + index = torch.logical_and(bbx_z_min >= -100, bbx_z_max <= 100) + + return index + + +def project_points_by_matrix_torch(points, transformation_matrix): + """ + Project the points to another coordinate system based on the + transformation matrix. + + Parameters + ---------- + points : torch.Tensor + 3D points, (N, 3) + transformation_matrix : torch.Tensor + Transformation matrix, (4, 4) + Returns + ------- + projected_points : torch.Tensor + The projected points, (N, 3) + """ + points, is_numpy = \ + common_utils.check_numpy_to_torch(points) + transformation_matrix, _ = \ + common_utils.check_numpy_to_torch(transformation_matrix) + + # convert to homogeneous coordinates via padding 1 at the last dimension. + # (N, 4) + points_homogeneous = F.pad(points, (0, 1), mode="constant", value=1) + # (N, 4) + projected_points = torch.einsum("ik, jk->ij", points_homogeneous, + transformation_matrix) + + return projected_points[:, :3] if not is_numpy \ + else projected_points[:, :3].numpy() + + +def box_encode( + boxes, + anchors, + encode_angle_to_vector=False, + encode_angle_with_residual=False, + smooth_dim=False, + norm_velo=False +): + """box encode for VoxelNet + Args: + boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r. + anchors ([N, 7] Tensor): anchors. + """ + + box_ndim = anchors.shape[-1] + + if box_ndim == 7: + xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1) + xg, yg, zg, wg, lg, hg, rg = torch.split(boxes, 1, dim=-1) + else: + xa, ya, za, wa, la, ha, vxa, vya, ra = torch.split(anchors, 1, dim=-1) + xg, yg, zg, wg, lg, hg, vxg, vyg, rg = torch.split(boxes, 1, dim=-1) + + diagonal = torch.sqrt(la ** 2 + wa ** 2) + xt = (xg - xa) / diagonal + yt = (yg - ya) / diagonal + zt = (zg - za) / ha + + if smooth_dim: + lt = lg / la - 1 + wt = wg / wa - 1 + ht = hg / ha - 1 + else: + lt = torch.log(lg / la) + wt = torch.log(wg / wa) + ht = torch.log(hg / ha) + + ret = [xt, yt, zt, wt, lt, ht] + + if box_ndim > 7: + if norm_velo: + vxt = (vxg - vxa) / diagonal + vyt = (vyg - vya) / diagonal + else: + vxt = vxg - vxa + vyt = vyg - vya + ret.extend([vxt, vyt]) + + if encode_angle_to_vector: + rgx = torch.cos(rg) + rgy = torch.sin(rg) + if encode_angle_with_residual: + rax = torch.cos(ra) + ray = torch.sin(ra) + rtx = rgx - rax + rty = rgy - ray + ret.extend([rtx, rty]) + else: + ret.extend([rgx, rgy]) + else: + rt = rg - ra + ret.append(rt) + + return torch.cat(ret, dim=-1) + + +def box_decode( + box_encodings, + anchors, + encode_angle_to_vector=False, + encode_angle_with_residual=False, + bin_loss=False, + smooth_dim=False, + norm_velo=False, +): + """box decode for VoxelNet in lidar + Args: + boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r + anchors ([N, 7] Tensor): anchors + """ + box_ndim = anchors.shape[-1] + + if box_ndim == 9: # False + xa, ya, za, wa, la, ha, vxa, vya, ra = torch.split(anchors, 1, dim=-1) + if encode_angle_to_vector: + xt, yt, zt, wt, lt, ht, vxt, vyt, rtx, rty = torch.split(box_encodings, 1, dim=-1) + else: + xt, yt, zt, wt, lt, ht, vxt, vyt, rt = torch.split(box_encodings, 1, dim=-1) + + elif box_ndim == 7: + xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1) + if encode_angle_to_vector: # False + xt, yt, zt, wt, lt, ht, rtx, rty = torch.split(box_encodings, 1, dim=-1) + else: + xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1) + + diagonal = torch.sqrt(la ** 2 + wa ** 2) + xg = xt * diagonal + xa + yg = yt * diagonal + ya + zg = zt * ha + za + + ret = [xg, yg, zg] + + if smooth_dim: # False + lg = (lt + 1) * la + wg = (wt + 1) * wa + hg = (ht + 1) * ha + else: + lg = torch.exp(lt) * la + wg = torch.exp(wt) * wa + hg = torch.exp(ht) * ha + ret.extend([wg, lg, hg]) + + if encode_angle_to_vector: # False + if encode_angle_with_residual: + rax = torch.cos(ra) + ray = torch.sin(ra) + rgx = rtx + rax + rgy = rty + ray + rg = torch.atan2(rgy, rgx) + else: + rg = torch.atan2(rty, rtx) + else: + rg = rt + ra + + if box_ndim > 7: # False + if norm_velo: + vxg = vxt * diagonal + vxa + vyg = vyt * diagonal + vya + else: + vxg = vxt + vxa + vyg = vyt + vya + ret.extend([vxg, vyg]) + + ret.append(rg) + + return torch.cat(ret, dim=-1) + + +def project_world_objects_dairv2x(object_list, + output_dict, + lidar_pose, + lidar_range, + order): + """ + Project the objects under world coordinates into another coordinate + based on the provided extrinsic. + + Parameters + ---------- + object_list : list + The list contains all objects surrounding a certain cav. + + output_dict : dict + key: object id, value: object bbx (xyzlwhyaw). + + lidar_pose : list + (6, ), lidar pose under world coordinate, [x, y, z, roll, yaw, pitch]. + + lidar_range : list + [minx, miny, minz, maxx, maxy, maxz] + + order : str + 'lwh' or 'hwl' + """ + i = 0 + + for object_content in object_list: + object_id = i + i = i + 1 + lidar_to_world = x_to_world(lidar_pose) # T_world_lidar + world_to_lidar = np.linalg.inv(lidar_to_world) + + corners_world = np.array(object_content['world_8_points']) # [8,3] + corners_world_homo = np.pad(corners_world, ((0,0), (0,1)), constant_values=1) # [8, 4] + corners_lidar = (world_to_lidar @ corners_world_homo.T).T + + lidar_range_z_larger = copy.deepcopy(lidar_range) + lidar_range_z_larger[2] -= 1 + lidar_range_z_larger[5] += 1 + + bbx_lidar = corners_lidar + bbx_lidar = np.expand_dims(bbx_lidar[:, :3], 0) # [1, 8, 3] + bbx_lidar = corner_to_center(bbx_lidar, order=order) + bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar, lidar_range_z_larger, order) + if bbx_lidar.shape[0] > 0: + output_dict.update({object_id: bbx_lidar}) + + +def load_single_objects_dairv2x(object_list, + output_dict, + lidar_range, + order): + """ + + Parameters + ---------- + object_list : list + The list contains all objects surrounding a certain cav. + + output_dict : dict + key: object id, value: object bbx (xyzlwhyaw). + + lidar_range : list + [minx, miny, minz, maxx, maxy, maxz] + + order : str + 'lwh' or 'hwl' + """ + + i = 0 + for object_content in object_list: + object_id = i + if 'rotation' not in object_content: + print(object_content) + x = object_content['3d_location']['x'] + y = object_content['3d_location']['y'] + z = object_content['3d_location']['z'] + l = object_content['3d_dimensions']['l'] + h = object_content['3d_dimensions']['h'] + w = object_content['3d_dimensions']['w'] + rotation = object_content['rotation'] + + if isinstance(x, str): # in camera label, xyz are str + x = eval(x) + y = eval(y) + z = eval(z) + + if l==0 or h ==0 or w==0: + continue + i = i + 1 + + lidar_range_z_larger = copy.deepcopy(lidar_range) + lidar_range_z_larger[2] -= 1 + lidar_range_z_larger[5] += 1 + + bbx_lidar = [x,y,z,h,w,l,rotation] if order=="hwl" else [x,y,z,l,w,h,rotation] # suppose order is in ['hwl', 'lwh'] + bbx_lidar = np.array(bbx_lidar).reshape(1,-1) # [1,7] + + bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar, lidar_range_z_larger, order) + if bbx_lidar.shape[0] > 0: + if object_content['type'] == "Car" or \ + object_content['type'] == "Van" or \ + object_content['type'] == "Truck" or \ + object_content['type'] == "Bus": + output_dict.update({object_id: bbx_lidar}) + + + + +def load_single_objects_dairv2x_hetero(object_list, + output_dict, + lidar_range, + trans_mat, + order): + """ + + Parameters + ---------- + object_list : list + The list contains all objects surrounding a certain cav. + + output_dict : dict + key: object id, value: object bbx (xyzlwhyaw). + + lidar_range : list + [minx, miny, minz, maxx, maxy, maxz] + + order : str + 'lwh' or 'hwl' + """ + + i = 0 + for object_content in object_list: + object_id = i + x = object_content['3d_location']['x'] + y = object_content['3d_location']['y'] + z = object_content['3d_location']['z'] + l = object_content['3d_dimensions']['l'] + h = object_content['3d_dimensions']['h'] + w = object_content['3d_dimensions']['w'] + rotation = object_content['rotation'] + + if isinstance(x, str): # in camera label, xyz are str + x = eval(x) + y = eval(y) + z = eval(z) + + if l==0 or h ==0 or w==0: + continue + i = i + 1 + + lidar_range_z_larger = copy.deepcopy(lidar_range) + lidar_range_z_larger[2] -= 1 + lidar_range_z_larger[5] += 1 + + bbx_lidar = [x,y,z,h,w,l,rotation] if order=="hwl" else [x,y,z,l,w,h,rotation] # suppose order is in ['hwl', 'lwh'] + bbx_lidar = np.array(bbx_lidar).reshape(1,-1) # [1,7] + bbx_lidar_ego = corner_to_center( + project_box3d(boxes_to_corners_3d(bbx_lidar, order), trans_mat) , order=order) + bbx_lidar_ego = mask_boxes_outside_range_numpy(bbx_lidar_ego, lidar_range_z_larger, order) + + if bbx_lidar_ego.shape[0] > 0: + if object_content['type'] == "Car" or \ + object_content['type'] == "Van" or \ + object_content['type'] == "Truck" or \ + object_content['type'] == "Bus": + output_dict.update({object_id: bbx_lidar_ego}) + + + +def box_is_visible(bbx_lidar, visibility_map): + """ + fitler bbx_lidar by visibility map. + + Parameters: + + (0,0)------------px + | ^ x | + | | | + | o---> y | + | | + | | + py-----------------(256,256) + + bbx_lidar : np.ndarray + (1, 7), x, y, z, dx, dy, dz, yaw. dx,dy,dz follows order. + + visibility_map : np.ndarray + (256, 256). Non zero is visible. + """ + + x, y = bbx_lidar[0,:2] + + # rasterize x and y + py = 127 - int(x/0.39) + px = 127 + int(y/0.39) + + if py < 0 or py >= 256 or px < 0 or px >= 256: + return False + + return visibility_map[py, px] > 0 + + +def project_world_visible_objects(object_dict, + output_dict, + lidar_pose, + lidar_range, + order, + visibility_map, + enlarge_z = False): + """ + It's used by CameraDataset. Filtered by visibility map. + + Project the objects under world coordinates into another coordinate + based on the provided extrinsic. + + Parameters + ---------- + object_dict : dict + The dictionary contains all objects surrounding a certain cav. + + output_dict : dict + key: object id, value: object bbx (xyzlwhyaw). + + lidar_pose : list + (6, ), lidar pose under world coordinate, [x, y, z, roll, yaw, pitch]. + + lidar_range : list + [minx, miny, minz, maxx, maxy, maxz] + + order : str + 'lwh' or 'hwl' + + visibility_map : np.ndarray + for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up. + """ + for object_id, object_content in object_dict.items(): + location = object_content['location'] + rotation = object_content['angle'] + center = [0,0,0] if 'center' not in object_content else object_content['center'] + extent = object_content['extent'] + + object_pose = [location[0] + center[0], + location[1] + center[1], + location[2] + center[2], + rotation[0], rotation[1], rotation[2]] + + + object2lidar = x1_to_x2(object_pose, lidar_pose) + + # shape (3, 8) + bbx = create_bbx(extent).T + # bounding box under ego coordinate shape (4, 8) + bbx = np.r_[bbx, [np.ones(bbx.shape[1])]] + + # project the 8 corners to world coordinate + bbx_lidar = np.dot(object2lidar, bbx).T + bbx_lidar = np.expand_dims(bbx_lidar[:, :3], 0) + bbx_lidar = corner_to_center(bbx_lidar, order=order) + if enlarge_z: + lidar_range_z_larger = copy.deepcopy(lidar_range) + lidar_range_z_larger[2] -= 10 + lidar_range_z_larger[5] += 10 + lidar_range = lidar_range_z_larger + + bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar, + lidar_range, + order) + + if bbx_lidar.shape[0] > 0 and box_is_visible(bbx_lidar, visibility_map): + output_dict.update({object_id: bbx_lidar}) + diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/camera_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/camera_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cd7d365b88a6b73a7d9229f5945c77e216e4b6de --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/camera_utils.py @@ -0,0 +1,334 @@ +from PIL import Image +import numpy as np +import torch +import torchvision +import cv2 +import math +from shapely.geometry import Point, MultiPoint + +def load_camera_data(camera_files, preload=True): + """ + Args: + camera_files: list, + store camera path + shape : tuple + (width, height), resize the image, and overcoming the lazy loading. + Returns: + camera_data_list: list, + list of Image, RGB order + """ + camera_data_list = [] + for camera_file in camera_files: + camera_data = Image.open(camera_file) + if preload: + camera_data = camera_data.copy() + camera_data_list.append(camera_data) + return camera_data_list + + +def sample_augmentation(data_aug_conf, is_train): + """ + https://github.com/nv-tlabs/lift-splat-shoot/blob/d74598cb51101e2143097ab270726a561f81f8fd/src/data.py#L96 + """ + H, W = data_aug_conf['H'], data_aug_conf['W'] + fH, fW = data_aug_conf['final_dim'] + if is_train: + resize = np.random.uniform(*data_aug_conf['resize_lim']) + resize_dims = (int(W*resize), int(H*resize)) + newW, newH = resize_dims + crop_h = int((1 - np.random.uniform(*data_aug_conf['bot_pct_lim']))*newH) - fH + crop_w = int(np.random.uniform(0, max(0, newW - fW))) + crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) # [x_start, y_start, x_end, y_end] + flip = False + if data_aug_conf['rand_flip'] and np.random.choice([0, 1]): + flip = True + rotate = np.random.uniform(*data_aug_conf['rot_lim']) + else: + resize = max(fH/H, fW/W) + resize_dims = (int(W*resize), int(H*resize)) + newW, newH = resize_dims + crop_h = int((1 - np.mean(data_aug_conf['bot_pct_lim']))*newH) - fH + crop_w = int(max(0, newW - fW) / 2) + crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) + flip = False + rotate = 0 + return resize, resize_dims, crop, flip, rotate + + +def img_transform(imgs, post_rot, post_tran, + resize, resize_dims, crop, + flip, rotate): + imgs_output = [] + for img in imgs: + # adjust image + img = img.resize(resize_dims) + img = img.crop(crop) + if flip: + img = img.transpose(method=Image.FLIP_LEFT_RIGHT) + img = img.rotate(rotate) + imgs_output.append(img) + + + # post-homography transformation + post_rot *= resize + post_tran -= torch.Tensor(crop[:2]) + + if flip: + A = torch.Tensor([[-1, 0], [0, 1]]) + b = torch.Tensor([crop[2] - crop[0], 0]) + post_rot = A.matmul(post_rot) + post_tran = A.matmul(post_tran) + b + + A = get_rot(rotate/180*np.pi) + b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2 # [x_start, y_start, x_end, y_end] + b = A.matmul(-b) + b + post_rot = A.matmul(post_rot) + post_tran = A.matmul(post_tran) + b + + return imgs_output, post_rot, post_tran + +def get_rot(h): + return torch.Tensor([ + [np.cos(h), np.sin(h)], + [-np.sin(h), np.cos(h)], + ]) + +class NormalizeInverse(torchvision.transforms.Normalize): + # https://discuss.pytorch.org/t/simple-way-to-inverse-transform-normalization/4821/8 + def __init__(self, mean, std): + mean = torch.as_tensor(mean) + std = torch.as_tensor(std) + std_inv = 1 / (std + 1e-7) + mean_inv = -mean * std_inv + super().__init__(mean=mean_inv, std=std_inv) + + def __call__(self, tensor): + return super().__call__(tensor.clone()) + + +denormalize_img = torchvision.transforms.Compose(( + NormalizeInverse(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + torchvision.transforms.ToPILImage(), + )) + + +normalize_img = torchvision.transforms.Compose(( + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), +)) + +img_to_tensor = torchvision.transforms.ToTensor() # [0,255] -> [0,1] + + +def gen_dx_bx(xbound, ybound, zbound): + dx = torch.Tensor([row[2] for row in [xbound, ybound, zbound]]) + bx = torch.Tensor([row[0] + row[2]/2.0 for row in [xbound, ybound, zbound]]) + nx = torch.LongTensor([(row[1] - row[0]) / row[2] for row in [xbound, ybound, zbound]]) + + return dx, bx, nx + + +def bin_depths(depth_map, mode, depth_min, depth_max, num_bins, target=True): + """ + Converts depth map into bin indices + Args: + depth_map [torch.Tensor(H, W)]: Depth Map + mode [string]: Discretiziation mode (See https://arxiv.org/pdf/2005.13423.pdf for more details) + UD: Uniform discretiziation + LID: Linear increasing discretiziation + SID: Spacing increasing discretiziation + depth_min [float]: Minimum depth value + depth_max [float]: Maximum depth value + num_bins [int]: Number of depth bins + target [bool]: Whether the depth bins indices will be used for a target tensor in loss comparison + Returns: + indices [torch.Tensor(H, W)]: Depth bin indices + """ + if mode == "UD": + bin_size = (depth_max - depth_min) / num_bins + indices = ((depth_map - depth_min) / bin_size) + elif mode == "LID": + bin_size = 2 * (depth_max - depth_min) / (num_bins * (1 + num_bins)) + indices = -0.5 + 0.5 * torch.sqrt(1 + 8 * (depth_map - depth_min) / bin_size) + elif mode == "SID": + indices = num_bins * (torch.log(1 + depth_map) - math.log(1 + depth_min)) / \ + (math.log(1 + depth_max) - math.log(1 + depth_min)) + else: + raise NotImplementedError + + if target: + # Remove indicies outside of bounds + # mask = (indices < 0) | (indices > num_bins) | (~torch.isfinite(indices)) + # indices[mask] = num_bins + indices[indices < 0] = 0 + indices[indices >= num_bins] = num_bins - 1 + indices[~torch.isfinite(indices)] = num_bins - 1 + + # Convert to integer + indices = indices.type(torch.int64) + return indices, None + else: + # mask indices outside of bounds + mask = (indices < 0) | (indices >= num_bins) | (~torch.isfinite(indices)) + indices[indices < 0] = 0 + indices[indices >= num_bins] = num_bins - 1 + indices[~torch.isfinite(indices)] = num_bins - 1 + + # Convert to integer + indices = indices.type(torch.int64) + return indices, ~mask + +def depth_discretization(depth_min, depth_max, num_bins, mode): + if mode == "UD": + bin_size = (depth_max - depth_min) / num_bins + depth_discre = depth_min + bin_size * np.arange(num_bins) + elif mode == "LID": + bin_size = 2 * (depth_max - depth_min) / (num_bins * (1 + num_bins)) + depth_discre = depth_min + bin_size * (np.arange(num_bins) * np.arange(1, 1+num_bins)) / 2 + else: + raise NotImplementedError + return depth_discre + +def indices_to_depth(indices, depth_min, depth_max, num_bins, mode): + if mode == "UD": + bin_size = (depth_max - depth_min) / num_bins + depth = indices * bin_size + depth_min + elif mode == "LID": + bin_size = 2 * (depth_max - depth_min) / (num_bins * (1 + num_bins)) + depth = depth_min + bin_size * (indices * (indices+1)) / 2 + else: + raise NotImplementedError + return depth + +def cumsum_trick(x, geom_feats, ranks): + x = x.cumsum(0) + kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool) + kept[:-1] = (ranks[1:] != ranks[:-1]) + + x, geom_feats = x[kept], geom_feats[kept] + x = torch.cat((x[:1], x[1:] - x[:-1])) + + return x, geom_feats + + +class QuickCumsum(torch.autograd.Function): + @staticmethod + def forward(ctx, x, geom_feats, ranks): + x = x.cumsum(0) + kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool) + kept[:-1] = (ranks[1:] != ranks[:-1]) + + x, geom_feats = x[kept], geom_feats[kept] + x = torch.cat((x[:1], x[1:] - x[:-1])) + + # save kept for backward + ctx.save_for_backward(kept) + + # no gradient for geom_feats + ctx.mark_non_differentiable(geom_feats) + + return x, geom_feats + + @staticmethod + def backward(ctx, gradx, gradgeom): + kept, = ctx.saved_tensors + back = torch.cumsum(kept, 0) + back[kept] -= 1 + + val = gradx[back] + + return val, None, None + +def coord_3d_to_2d(gt_box3d, int_matrix, ext_matrix, image_H=600, image_W=800, image=None, idx=None): + """ + Projects XYZ points onto the canvas and returns the projected canvas + coordinates. + + Args: + gt_box3d : np.ndarray + shape (N, 8, 3). point coord in world (LiDAR) coordinate. + int_matrix : np.ndarray + shape (4, 4) + ext_matrix : np.ndarray + shape (4, 4), T_wc, transform point in camera coord to world coord. + + Returns: + gt_box2d : np.ndarray + shape (N, 8, 2). pixel coord (u, v) in the image. You may want to flip them for image data indexing. + gt_box2d_mask : np.ndarray (bool) + shape (N,). If false, this box is out of image boundary + fg_mask : np.ndarray + shape (image_H, image_W), 1 means foreground, 0 means background + """ + N = gt_box3d.shape[0] + xyz = gt_box3d.reshape(-1, 3) # (N*8, 3) + + xyz_hom = np.concatenate( + [xyz, np.ones((xyz.shape[0], 1), dtype=np.float32)], axis=1) + + ext_matrix = np.linalg.inv(ext_matrix)[:3,:4] + img_pts = (int_matrix @ ext_matrix @ xyz_hom.T).T + + depth = img_pts[:, 2] + uv = img_pts[:, :2] / depth[:, None] + uv_int = uv.round().astype(np.int32) # [N*8, 2] + + + # o--------> u + # | + # | + # | + # v v + + + valid_mask1 = ((uv_int[:, 0] >= 0) & (uv_int[:, 0] < image_W) & + (uv_int[:, 1] >= 0) & (uv_int[:, 1] < image_H)).reshape(N, 8) + + valid_mask2 = ((depth > 0.5) & (depth < 100)).reshape(N, 8) + gt_box2d_mask = valid_mask1.any(axis=1) & valid_mask2.all(axis=1) # [N, ] + + gt_box2d = uv_int.reshape(N, 8, 2) # [N, 8, 2] + gt_box2d_u = np.clip(gt_box2d[:,:,0], 0, image_W-1) + gt_box2d_v = np.clip(gt_box2d[:,:,1], 0, image_H-1) + gt_box2d = np.stack((gt_box2d_u, gt_box2d_v), axis=-1) # [N, 8, 2] + + # create fg/bg mask + fg_mask = np.zeros((image_H, image_W)) + for gt_box in gt_box2d[gt_box2d_mask]: + u_min = gt_box[:,0].min() + v_min = gt_box[:,1].min() + u_max = gt_box[:,0].max() + v_max = gt_box[:,1].max() + fg_mask[v_min:v_max, u_min:u_max] = 1 + # poly = MultiPoint(gt_box).convex_hull + # cv2.fillConvexPoly(fg_mask, np.array(list(zip(*poly.exterior.coords.xy)), dtype=np.int32), 1) + + DEBUG = False + if DEBUG: + from matplotlib import pyplot as plt + plt.imshow(image) + for i in range(N): + if gt_box2d_mask[i]: + coord2d = gt_box2d[i] + for start, end in [(0, 1), (1, 2), (2, 3), (3, 0), + (0, 4), (1, 5), (2, 6), (3, 7), + (4, 5), (5, 6), (6, 7), (7, 4)]: + plt.plot(coord2d[[start,end]][:,0], coord2d[[start,end]][:,1], marker="o", c='g') + plt.savefig(f"/GPFS/rhome/yifanlu/OpenCOOD/vis_result/dairv2x_lss_vehonly/image_gt_box2d_{idx}.png", dpi=300) + plt.clf() + plt.imshow(fg_mask) + plt.savefig(f"/GPFS/rhome/yifanlu/OpenCOOD/vis_result/dairv2x_lss_vehonly/image_gt_box2d_{idx}_mask.png", dpi=300) + plt.clf() + + + return gt_box2d, gt_box2d_mask, fg_mask + + +def load_intrinsic_DAIR_V2X(int_dict): + # cam_D : [5, ], what'is this... + # cam_K : [9, ] + cam_D = int_dict['cam_D'] + cam_K = int_dict['cam_K'] + return np.array(cam_K).reshape(3,3) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/cleanup_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/cleanup_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b9582e6042a071b5aae4a2f140f213d42957243e --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/cleanup_utils.py @@ -0,0 +1,20 @@ +import glob +import os +import sys + +def clean_all_numeric_checkpoint(path): + """ + remove all intermediate checkpoint except bestval + + path: str, + a path to log directory + """ + file_list = glob.glob(os.path.join(path, "net_epoch[0-9]*.pth")) + for file in file_list: + os.remove(file) + + +if __name__ == "__main__": + path = sys.argv[1] + assert os.path.isdir(path) + clean_all_numeric_checkpoint(path) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/common_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/common_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..96fe1cfd4218b3d81af9e11879657f4094c24396 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/common_utils.py @@ -0,0 +1,330 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , Hao Xiang , +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Common utilities +""" + +import numpy as np +import torch +from shapely.geometry import Polygon +import json +import pickle +from collections import OrderedDict + +def update_dict(d1,d2): + ''' + credit: https://github.com/yutu-75/update_dict/blob/main/update_dict/update_dict.py + + :param d1: Default nested dictionary,默认嵌套字典; + :param d2: Updated dictionary 需要更新的字典; + :return d1: + Return a dict merged from default and custom + # >>> recursive_update('a', 'b') + Traceback (most recent call last): + ... + TypeError: Params of update_dict should be dicts + # >>> update_dict({'a':{"b":{"c":{"d"}}},"e":{"e1":{"e5":'qwq'}},"e5": {},"ss":"1111"}, + {"e5":'www',"ss":"ssss",'c':{},'ss1':'ss'}) + {'a': {'b': {'c': {}}}, 'e': {'e1': {'e5': 'www'}}, 'e5': 'www', 'ss': 'ssss' + # >>> update_dict({'a':{"b":{"c":{"d":'c'}}},"e":{"e1":{"e5":'qwq'}},"e5": {},"ss":"1111"},{"d":'www'}) + {'a': {'b': {'c': {'d': 'www'}}}, 'e': {'e1': {'e5': 'qwq'}}, 'e5': {}, 'ss': '1111'} + # >>> update_dict({'a': {'c': 1, 'd': {}}, 'b': 4}, {'a': 2}) + {'a': 2, 'b': 4} + ''' + + if not isinstance(d1, dict) or not isinstance(d2, dict): + raise TypeError('Params of update_dict should be dicts') + for i in d1: + if d2.get(i, None) is not None: + d1[i] = d2[i] + if isinstance(d1[i], dict): + update_dict(d1[i],d2) + return d1 + + +def merge_features_to_dict(processed_feature_list, merge=None): + """ + Merge the preprocessed features from different cavs to the same + dictionary. + + Parameters + ---------- + processed_feature_list : list + A list of dictionary containing all processed features from + different cavs. + merge : "stack" or "cat". used for images + + Returns + ------- + merged_feature_dict: dict + key: feature names, value: list of features. + """ + + if len(processed_feature_list) == 0: + return None + + merged_feature_dict = OrderedDict() + + for i in range(len(processed_feature_list)): + for feature_name, feature in processed_feature_list[i].items(): + if feature_name not in merged_feature_dict: + merged_feature_dict[feature_name] = [] + if isinstance(feature, list): + merged_feature_dict[feature_name] += feature + else: + merged_feature_dict[feature_name].append(feature) # merged_feature_dict['coords'] = [f1,f2,f3,f4] + + # stack them + # it usually happens when merging cavs images -> v.shape = [N, Ncam, C, H, W] + # cat them + # it usually happens when merging batches cav images -> v is a list [(N1+N2+...Nn, Ncam, C, H, W))] + if merge=='stack': + for feature_name, features in merged_feature_dict.items(): + merged_feature_dict[feature_name] = torch.stack(features, dim=0) + elif merge=='cat': + for feature_name, features in merged_feature_dict.items(): + merged_feature_dict[feature_name] = torch.cat(features, dim=0) + + return merged_feature_dict + +def load_pkl_files(pkl_path): + with open(pkl_path, 'rb') as f: + data = pickle.load(f) + return data + +def read_json(file_path): + with open(file_path, 'r') as f: + data = json.load(f) + + return data + +def limit_period(val, offset=0.5, period=2*np.pi): + """ + continous part: + [0 - period * offset, period - period * offset) + """ + # 首先,numpy格式数据转换为torch格式 + val, is_numpy = check_numpy_to_torch(val) + # 将方位角限制在[-pi, pi] + ans = val - torch.floor(val / period + offset) * period + return ans.numpy() if is_numpy else ans + + +def check_numpy_to_torch(x): + if isinstance(x, np.ndarray): + return torch.from_numpy(x).float(), True + return x, False + +def check_torch_to_numpy(x): + if isinstance(x, torch.tensor): + return x.cpu().numpy(), True + return x, False + + +def check_contain_nan(x): + if isinstance(x, dict): + return any(check_contain_nan(v) for k, v in x.items()) + if isinstance(x, list): + return any(check_contain_nan(itm) for itm in x) + if isinstance(x, int) or isinstance(x, float): + return False + if isinstance(x, np.ndarray): + return np.any(np.isnan(x)) + return torch.any(x.isnan()).detach().cpu().item() + + +def rotate_points_along_z(points, angle): + """ + Args: + points: (B, N, 3 + C) + angle: (B), radians, angle along z-axis, angle increases x ==> y + Returns: + + """ + points, is_numpy = check_numpy_to_torch(points) + angle, _ = check_numpy_to_torch(angle) + + cosa = torch.cos(angle) + sina = torch.sin(angle) + zeros = angle.new_zeros(points.shape[0]) + ones = angle.new_ones(points.shape[0]) + rot_matrix = torch.stack(( + cosa, sina, zeros, + -sina, cosa, zeros, + zeros, zeros, ones + ), dim=1).view(-1, 3, 3).float() + points_rot = torch.matmul(points[:, :, 0:3].float(), rot_matrix) + points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1) + return points_rot.numpy() if is_numpy else points_rot + + +def rotate_points_along_z_2d(points, angle): + """ + Rorate the points along z-axis. + Parameters + ---------- + points : torch.Tensor / np.ndarray + (N, 2). + angle : torch.Tensor / np.ndarray + (N,) + + Returns + ------- + points_rot : torch.Tensor / np.ndarray + Rorated points with shape (N, 2) + + """ + points, is_numpy = check_numpy_to_torch(points) + angle, _ = check_numpy_to_torch(angle) + cosa = torch.cos(angle) + sina = torch.sin(angle) + # (N, 2, 2) + rot_matrix = torch.stack((cosa, sina, -sina, cosa), dim=1).view(-1, 2, + 2).float() + points_rot = torch.einsum("ik, ikj->ij", points.float(), rot_matrix) + return points_rot.numpy() if is_numpy else points_rot + + +def remove_ego_from_objects(objects, ego_id): + """ + Avoid adding ego vehicle to the object dictionary. + + Parameters + ---------- + objects : dict + The dictionary contained all objects. + + ego_id : int + Ego id. + """ + if ego_id in objects: + del objects[ego_id] + + +def retrieve_ego_id(base_data_dict): + """ + Retrieve the ego vehicle id from sample(origin format). + + Parameters + ---------- + base_data_dict : dict + Data sample in origin format. + + Returns + ------- + ego_id : str + The id of ego vehicle. + """ + ego_id = None + + for cav_id, cav_content in base_data_dict.items(): + if cav_content['ego']: + ego_id = cav_id + break + return ego_id + + +def compute_iou(box, boxes): + """ + Compute iou between box and boxes list + Parameters + ---------- + box : shapely.geometry.Polygon + Bounding box Polygon. + + boxes : list + List of shapely.geometry.Polygon. + + Returns + ------- + iou : np.ndarray + Array of iou between box and boxes. + + """ + # Calculate intersection areas + if np.any(np.array([box.union(b).area for b in boxes])==0): + print('debug') + iou = [box.intersection(b).area / box.union(b).area for b in boxes] + + return np.array(iou, dtype=np.float32) + + +def convert_format(boxes_array): + """ + Convert boxes array to shapely.geometry.Polygon format. + Parameters + ---------- + boxes_array : np.ndarray + (N, 4, 2) or (N, 8, 3). + + Returns + ------- + list of converted shapely.geometry.Polygon object. + + """ + polygons = [Polygon([(box[i, 0], box[i, 1]) for i in range(4)]) for box in + boxes_array] + return np.array(polygons) + + +def torch_tensor_to_numpy(torch_tensor): + """ + Convert a torch tensor to numpy. + + Parameters + ---------- + torch_tensor : torch.Tensor + + Returns + ------- + A numpy array. + """ + return torch_tensor.numpy() if not torch_tensor.is_cuda else \ + torch_tensor.cpu().detach().numpy() + + +def get_voxel_centers(voxel_coords, + downsample_times, + voxel_size, + point_cloud_range): + """ + Args: + voxel_coords: (N, 3) + downsample_times: + voxel_size: + point_cloud_range: + + Returns: + + """ + assert voxel_coords.shape[1] == 3 + voxel_centers = voxel_coords[:, [2, 1, 0]].float() # (xyz) + voxel_size = torch.tensor(voxel_size, device=voxel_centers.device).float() * downsample_times + pc_range = torch.tensor(point_cloud_range[0:3], device=voxel_centers.device).float() + voxel_centers = (voxel_centers + 0.5) * voxel_size + pc_range + return voxel_centers + +def scatter_point_inds(indices, point_inds, shape): + ret = -1 * torch.ones(*shape, dtype=point_inds.dtype, device=point_inds.device) # 初始化结果 (8, 21, 800, 704) + ndim = indices.shape[-1] # 获取坐标维度 4 + flattened_indices = indices.view(-1, ndim) # 将坐标展平 (204916, 4) + # 以下两步是经典操作 + slices = [flattened_indices[:, i] for i in range(ndim)] # 分成4个list + ret[slices] = point_inds # 将voxel的索引写入对应位置 + return ret + +def generate_voxel2pinds(sparse_tensor): + """ + 计算有效voxel在原始空间shape中的索引 + """ + device = sparse_tensor.indices.device # 获取device + batch_size = sparse_tensor.batch_size # 获取batch_size + spatial_shape = sparse_tensor.spatial_shape # 获取空间形状 (21, 800, 704) + indices = sparse_tensor.indices.long() # 获取索引 + point_indices = torch.arange(indices.shape[0], device=device, dtype=torch.int32) # 生成索引 (204916,) + output_shape = [batch_size] + list(spatial_shape) # 计算输出形状 (8, 21, 800, 704) + v2pinds_tensor = scatter_point_inds(indices, point_indices, output_shape) + return v2pinds_tensor diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/draco_compression.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/draco_compression.py new file mode 100644 index 0000000000000000000000000000000000000000..00a0fa8bce138148ca2e177c5280ad76c64916d4 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/draco_compression.py @@ -0,0 +1,69 @@ +""" +To use this script, draco [https://github.com/google/draco.git] should be installed. +1. during test, keypoints coordinats and features should be saved as .ply file using + the funcion save_ply. +2. Compress and anylize the CPM size using function draco_compression. +""" +import random, os, re +import numpy as np +import torch +from glob import glob +import subprocess + +draco = "/media/hdd/yuan/draco/build_dir/draco_encoder" + + +def save_ply(path, batch_coords, batch_features): + # path = "/media/hdd/yuan/OpenCOOD/opencood/logs/fpvrcnn_intermediate_fusion/cpms/" + dirname = "{:06d}".format(random.randint(0, 999999)) + os.mkdir(path + dirname) + for bi, (coords, features) in enumerate(zip(batch_coords[1:], + batch_features[1:])): + header = "ply\n" \ + "format ascii 1.0\n" \ + f"element vertex {len(coords)}\n" \ + "property float x\n" \ + "property float y\n" \ + "property float z\n" + header = header + "".join([f"property float feat{i}\n" for i in range(32)]) + "end_header" + data = torch.cat([coords, features], dim=1).detach().cpu().numpy() + np.savetxt(path + dirname + f"/{bi + 1}.ply", data, + delimiter=' ', header=header, comments='') + + +def draco_compression(ply_path): + files = glob(os.path.join(ply_path, '*/*.ply')) + cpm_sizes = list(map(draco_compression_one, files)) + return cpm_sizes + + +def draco_compression_one(file): + out_file = file.replace('ply', 'drc') + std_out = subprocess.getoutput(f"{draco} -point_cloud -i {file} -o {out_file}") + size_str = re.findall('[0-9]+ bytes', std_out) + if len(size_str)<1: + print("Compression failed:", file) + cpm_size = 0 + else: + cpm_size = int(size_str[0].split(' ')[0]) + + return cpm_size + + +def cal_avg_num_kpts(ply_path): + files = glob(os.path.join(ply_path, '*/*.ply')) + + def read_vertex_num(file): + with open(file, 'r') as f: + size_str = re.findall('element vertex [0-9]+', f.read())[0] + return float(size_str.split(' ')[-1]) * 4 * 32 / 1024 + + sizes = list(map(read_vertex_num, files)) + + return sizes + + +if __name__=="__main__": + cpm_sizes = cal_avg_num_kpts("/media/hdd/yuan/OpenCOOD/opencood/logs/fpvrcnn_intermediate_fusion/cpms") + # cpm_sizes = draco_compression("/media/hdd/yuan/OpenCOOD/opencood/logs/fpvrcnn_intermediate_fusion/cpms") + print(np.array(cpm_sizes).mean()) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/eval_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/eval_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5361dae5532af81d84f4595ebb6cb501048630bb --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/eval_utils.py @@ -0,0 +1,253 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu +# License: TDG-Attribution-NonCommercial-NoDistrib + + +import os + +import numpy as np +import torch + +from opencood.utils import common_utils +from opencood.hypes_yaml import yaml_utils + + +def voc_ap(rec, prec): + """ + VOC 2010 Average Precision. + """ + rec.insert(0, 0.0) + rec.append(1.0) + mrec = rec[:] + + prec.insert(0, 0.0) + prec.append(0.0) + mpre = prec[:] + + for i in range(len(mpre) - 2, -1, -1): + mpre[i] = max(mpre[i], mpre[i + 1]) + + i_list = [] + for i in range(1, len(mrec)): + if mrec[i] != mrec[i - 1]: + i_list.append(i) + + ap = 0.0 + for i in i_list: + ap += ((mrec[i] - mrec[i - 1]) * mpre[i]) + return ap, mrec, mpre + + +def caluclate_tp_fp(det_boxes, det_score, gt_boxes, result_stat, iou_thresh): + """ + Calculate the true positive and false positive numbers of the current + frames. + Parameters + ---------- + det_boxes : torch.Tensor + The detection bounding box, shape (N, 8, 3) or (N, 4, 2). + det_score :torch.Tensor + The confidence score for each preditect bounding box. + gt_boxes : torch.Tensor + The groundtruth bounding box. + result_stat: dict + A dictionary contains fp, tp and gt number. + iou_thresh : float + The iou thresh. + """ + # fp, tp and gt in the current frame + fp = [] + tp = [] + gt = gt_boxes.shape[0] + if det_boxes is not None: + # convert bounding boxes to numpy array + det_boxes = common_utils.torch_tensor_to_numpy(det_boxes) + det_score = common_utils.torch_tensor_to_numpy(det_score) + gt_boxes = common_utils.torch_tensor_to_numpy(gt_boxes) + + # sort the prediction bounding box by score + score_order_descend = np.argsort(-det_score) + det_score = det_score[score_order_descend] # from high to low + det_polygon_list = list(common_utils.convert_format(det_boxes)) + gt_polygon_list = list(common_utils.convert_format(gt_boxes)) + + # match prediction and gt bounding box, in confidence descending order + for i in range(score_order_descend.shape[0]): + det_polygon = det_polygon_list[score_order_descend[i]] + ious = common_utils.compute_iou(det_polygon, gt_polygon_list) + + if len(gt_polygon_list) == 0 or np.max(ious) < iou_thresh: + fp.append(1) + tp.append(0) + continue + + fp.append(0) + tp.append(1) + + gt_index = np.argmax(ious) + gt_polygon_list.pop(gt_index) + result_stat[iou_thresh]['score'] += det_score.tolist() + result_stat[iou_thresh]['fp'] += fp + result_stat[iou_thresh]['tp'] += tp + result_stat[iou_thresh]['gt'] += gt + +def caluclate_tp_fp_multiclass(det_boxes_all, det_score_all, gt_boxes_all, result_stat_all, iou_thresh): + """ + Calculate the true positive and false positive numbers of the current + frames. + Parameters + ---------- + det_boxes : torch.Tensor + The detection bounding box, shape (N, 8, 3) or (N, 4, 2). + det_score :torch.Tensor + The confidence score for each preditect bounding box. + gt_boxes : torch.Tensor + The groundtruth bounding box. + result_stat: dict + A dictionary contains fp, tp and gt number. + iou_thresh : float + The iou thresh. + """ + + class_list = [0,1,3] + for c in range(3): + det_boxes = det_boxes_all[c] + det_score = det_score_all[c] + gt_boxes = gt_boxes_all[c] + result_stat = result_stat_all[class_list[c]] + + if gt_boxes is None: + continue + + # fp, tp and gt in the current frame + fp = [] + tp = [] + gt = gt_boxes.shape[0] + if det_boxes is not None: + # convert bounding boxes to numpy array + det_boxes = common_utils.torch_tensor_to_numpy(det_boxes) + det_score = common_utils.torch_tensor_to_numpy(det_score) + gt_boxes = common_utils.torch_tensor_to_numpy(gt_boxes) + + # sort the prediction bounding box by score + score_order_descend = np.argsort(-det_score) + det_score = det_score[score_order_descend] # from high to low + det_polygon_list = list(common_utils.convert_format(det_boxes)) + gt_polygon_list = list(common_utils.convert_format(gt_boxes)) + + # match prediction and gt bounding box, in confidence descending order + for i in range(score_order_descend.shape[0]): + det_polygon = det_polygon_list[score_order_descend[i]] + ious = common_utils.compute_iou(det_polygon, gt_polygon_list) + + if len(gt_polygon_list) == 0 or np.max(ious) < iou_thresh: + fp.append(1) + tp.append(0) + continue + + fp.append(0) + tp.append(1) + + gt_index = np.argmax(ious) + gt_polygon_list.pop(gt_index) + result_stat[iou_thresh]['score'] += det_score.tolist() + result_stat[iou_thresh]['fp'] += fp + result_stat[iou_thresh]['tp'] += tp + result_stat[iou_thresh]['gt'] += gt + +def calculate_ap(result_stat, iou): + """ + Calculate the average precision and recall, and save them into a txt. + Parameters + ---------- + result_stat : dict + A dictionary contains fp, tp and gt number. + iou : float + """ + iou_5 = result_stat[iou] + + fp = np.array(iou_5['fp']) + tp = np.array(iou_5['tp']) + score = np.array(iou_5['score']) + assert len(fp) == len(tp) and len(tp) == len(score) + + sorted_index = np.argsort(-score) + fp = fp[sorted_index].tolist() + tp = tp[sorted_index].tolist() + + gt_total = iou_5['gt'] + + cumsum = 0 + for idx, val in enumerate(fp): + fp[idx] += cumsum + cumsum += val + + cumsum = 0 + for idx, val in enumerate(tp): + tp[idx] += cumsum + cumsum += val + + rec = tp[:] + for idx, val in enumerate(tp): + rec[idx] = float(tp[idx]) / gt_total + + prec = tp[:] + for idx, val in enumerate(tp): + prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx]) + + ap, mrec, mprec = voc_ap(rec[:], prec[:]) + + return ap, mrec, mprec + + +def eval_final_results_multiclass(result_stat_dict, save_path, infer_info=None): + all_class_results = {} + for tpe in result_stat_dict.keys(): + result_stat = result_stat_dict[tpe] + dump_dict = {} + ap_30, mrec_30, mpre_30 = calculate_ap(result_stat, 0.30) + ap_50, mrec_50, mpre_50 = calculate_ap(result_stat, 0.50) + ap_70, mrec_70, mpre_70 = calculate_ap(result_stat, 0.70) + dump_dict.update({'ap30': ap_30, + 'ap50': ap_50, + 'ap70': ap_70, + 'mpre_50': mpre_50, + 'mrec_50': mrec_50, + 'mpre_70': mpre_70, + 'mrec_70': mrec_70, + }) + print('class_{}:\n'.format(tpe),'The Average Precision at IOU 0.3 is %.2f, ' + 'The Average Precision at IOU 0.5 is %.2f, ' + 'The Average Precision at IOU 0.7 is %.2f' % (ap_30, ap_50, ap_70), '\n') + all_class_results[tpe] = dump_dict + if infer_info is None: + yaml_utils.save_yaml(all_class_results, os.path.join(save_path, 'eval.yaml')) + else: + yaml_utils.save_yaml(all_class_results, os.path.join(save_path, f'eval_{infer_info}.yaml')) + return all_class_results, ap_30, ap_50, ap_70 + +def eval_final_results(result_stat, save_path, infer_info=None): + dump_dict = {} + + ap_30, mrec_30, mpre_30 = calculate_ap(result_stat, 0.30) + ap_50, mrec_50, mpre_50 = calculate_ap(result_stat, 0.50) + ap_70, mrec_70, mpre_70 = calculate_ap(result_stat, 0.70) + + dump_dict.update({'ap30': ap_30, + 'ap_50': ap_50, + 'ap_70': ap_70, + 'mpre_50': mpre_50, + 'mrec_50': mrec_50, + 'mpre_70': mpre_70, + 'mrec_70': mrec_70, + }) + if infer_info is None: + yaml_utils.save_yaml(dump_dict, os.path.join(save_path, 'eval.yaml')) + else: + yaml_utils.save_yaml(dump_dict, os.path.join(save_path, f'eval_{infer_info}.yaml')) + + print('The Average Precision at IOU 0.3 is %.2f, ' + 'The Average Precision at IOU 0.5 is %.2f, ' + 'The Average Precision at IOU 0.7 is %.2f' % (ap_30, ap_50, ap_70)) + + return ap_30, ap_50, ap_70 \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/heter_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/heter_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..240c36db0c4be2615d4d8e964f92e208b167e86b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/heter_utils.py @@ -0,0 +1,155 @@ +""" +Agent Selection Module for Heterogeneous Collaboration. + +Maybe later can use data augment, one sample with different selection setting. +""" +import numpy as np +import random +import os +from collections import OrderedDict +import json + +class Adaptor: + def __init__(self, + ego_modality, + model_modality_list, + modality_assignment, + lidar_channels_dict, + mapping_dict, + cav_preference, + train): + self.ego_modality = ego_modality + self.model_modality_list = model_modality_list + self.modality_assignment = modality_assignment + self.lidar_channels_dict = lidar_channels_dict + self.mapping_dict = mapping_dict + if cav_preference is None: + cav_preference = dict.fromkeys(model_modality_list, 1/len(model_modality_list)) + self.cav_preferece = cav_preference # training, probability for setting non-ego cav modality + self.train = train + + + def reorder_cav_list(self, cav_list, scenario_name): + """ + When evaluation, make the cav that could be ego modality after mapping be the first. + + This can check the training effect of aligner. + + work in basedataset -> reinitialize + """ + if self.train: + # shuffle the cav list + random.shuffle(cav_list) + return cav_list + + assignment = self.modality_assignment[scenario_name] + if assignment[cav_list[0]] not in self.ego_modality: + ego_cav = None + for cav_id, modality in assignment.items(): + if self.mapping_dict[modality] in self.ego_modality: # after mapping the modality is ego + ego_cav = cav_id + break + + if ego_cav is None: + return cav_list + + other_cav = sorted(list(assignment.keys())) + other_cav.remove(ego_cav) + cav_list = [ego_cav] + other_cav + + return cav_list + + def reassign_cav_modality(self, modality_name, idx_in_cav_list): + """ + work in basedataset -> reinitialize + """ + if self.train: + # always assign the ego_modality to idx 0 in cav_list + if idx_in_cav_list == 0: + return np.random.choice(self.ego_modality.split("&")) + return random.choices(list(self.cav_preferece.keys()), weights=self.cav_preferece.values())[0] + else: + return self.mapping_dict[modality_name] + + def unmatched_modality(self, cav_modality): + """ + work in + intermediate_heter_fusion_dataset -> __getitem__ + late_heter_fusion_dataset -> get_item_test + + Returns: + True/False. If the input modality is in the model_modality_list + """ + return cav_modality not in self.model_modality_list + + def switch_lidar_channels(self, cav_modality, lidar_file_path): + """ + Currently only support OPV2V + """ + if self.lidar_channels_dict.get(cav_modality, None) == 32: + return lidar_file_path.replace("OPV2V","OPV2V_Hetero").replace(".pcd", "_32.pcd") + if self.lidar_channels_dict.get(cav_modality, None) == 16: + return lidar_file_path.replace("OPV2V","OPV2V_Hetero").replace(".pcd", "_16.pcd") + return lidar_file_path + + +def assign_modality(root_dir="dataset/OPV2V", output_path="opencood/logs/heter_modality_assign/opv2v.json"): + np.random.seed(303) + splits = ['train', 'test', 'validate'] + scenario_cav_modality_dict = OrderedDict() + + for split in splits: + split_path = os.path.join(root_dir, split) + scenario_folders = sorted([os.path.join(split_path, x) + for x in os.listdir(split_path) if + os.path.isdir(os.path.join(split_path, x))]) + + for scenario_folder in scenario_folders: + scenario_name = scenario_folder.split('/')[-1] + scenario_cav_modality_dict[scenario_name] = OrderedDict() + + cav_list = sorted([x for x in os.listdir(scenario_folder) \ + if os.path.isdir(os.path.join(scenario_folder, x))]) + + # randomly exclude one agent to be M3 + M3_agent_idx = np.random.randint(len(cav_list)) + + for j, cav_id in enumerate(cav_list): + + if j == M3_agent_idx: + scenario_cav_modality_dict[scenario_name][cav_id] = "m3" # M3 modality + else: + scenario_cav_modality_dict[scenario_name][cav_id] = 'm'+str(np.random.randint(1,3)) # can be M1 or M2 mdoality + + with open(output_path, "w") as f: + json.dump(scenario_cav_modality_dict, f, indent=4, sort_keys=True) + + +def assign_modality_4(root_dir="dataset/OPV2V", output_path="opencood/logs/heter_modality_assign/opv2v_4modality.json"): + np.random.seed(303) + splits = ['train', 'test', 'validate'] + scenario_cav_modality_dict = OrderedDict() + + for split in splits: + split_path = os.path.join(root_dir, split) + scenario_folders = sorted([os.path.join(split_path, x) + for x in os.listdir(split_path) if + os.path.isdir(os.path.join(split_path, x))]) + + for scenario_folder in scenario_folders: + scenario_name = scenario_folder.split('/')[-1] + scenario_cav_modality_dict[scenario_name] = OrderedDict() + + cav_list = sorted([x for x in os.listdir(scenario_folder) \ + if os.path.isdir(os.path.join(scenario_folder, x))]) + + perm = np.random.permutation(4) + 1 + for j, cav_id in enumerate(cav_list): + scenario_cav_modality_dict[scenario_name][cav_id] = 'm'+str(perm[j%4]) # m1 or m2 or m3 or m4 + + + with open(output_path, "w") as f: + json.dump(scenario_cav_modality_dict, f, indent=4, sort_keys=True) + +if __name__ == "__main__": + assign_modality_4() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/img2hdf5.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/img2hdf5.py new file mode 100644 index 0000000000000000000000000000000000000000..c68311218e0a60cea0b438e102da74a5ccf12a1d --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/img2hdf5.py @@ -0,0 +1,246 @@ +import os +from multiprocessing import Process +import numpy as np +from tqdm import tqdm +from PIL import Image +import h5py +import sys + + +def load_camera_data(camera_files, preload=True): + """ + Args: + camera_files: list, + store camera path + shape : tuple + (width, height), resize the image, and overcoming the lazy loading. + Returns: + camera_data_list: list, + list of Image, RGB order + """ + camera_data_list = [] + for camera_file in camera_files: + camera_data = Image.open(camera_file) + if preload: + camera_data = camera_data.copy() + camera_data_list.append(camera_data) + return camera_data_list + + +def load_camera_files(cav_path, timestamp, name): + """ + Retrieve the paths to all camera files. + + Parameters + ---------- + cav_path : str + The full file path of current cav. + + timestamp : str + Current timestamp + + Returns + ------- + camera_files : list + The list containing all camera png file paths. + """ + camera0_file = os.path.join(cav_path, + timestamp + f'_{name}0.png') + camera1_file = os.path.join(cav_path, + timestamp + f'_{name}1.png') + camera2_file = os.path.join(cav_path, + timestamp + f'_{name}2.png') + camera3_file = os.path.join(cav_path, + timestamp + f'_{name}3.png') + + return [camera0_file, camera1_file, camera2_file, camera3_file] + + +def load_depth_files(cav_path, timestamp, name): + """ + Retrieve the paths to all camera files. + + Parameters + ---------- + cav_path : str + The full file path of current cav. + + timestamp : str + Current timestamp + + Returns + ------- + camera_files : list + The list containing all camera png file paths. + """ + camera0_file = os.path.join(cav_path, + timestamp + f'_{name}0.png').replace("OPV2V", "OPV2V_Hetero") + camera1_file = os.path.join(cav_path, + timestamp + f'_{name}1.png').replace("OPV2V", "OPV2V_Hetero") + camera2_file = os.path.join(cav_path, + timestamp + f'_{name}2.png').replace("OPV2V", "OPV2V_Hetero") + camera3_file = os.path.join(cav_path, + timestamp + f'_{name}3.png').replace("OPV2V", "OPV2V_Hetero") + + return [camera0_file, camera1_file, camera2_file, camera3_file] + +def parallel_transform(scenario_folders): + print("subprocess...") + for scenario_folder in scenario_folders: + cav_list = sorted(os.listdir(scenario_folder)) + + assert len(cav_list) > 0 + + # loop over all CAV data + for (j, cav_id) in enumerate(cav_list): + cav_path = os.path.join(scenario_folder, cav_id) + if not os.path.isdir(cav_path): + continue + + yaml_files = \ + sorted([os.path.join(cav_path, x) + for x in os.listdir(cav_path) if + x.endswith('.yaml')]) + timestamps = [] + + # extract timestamp + for file in yaml_files: + res = file.split('/')[-1] + timestamp = res.replace('.yaml', '') + timestamps.append(timestamp) + + for timestamp in timestamps: + if os.path.exists(os.path.join(cav_path, timestamp+"_imgs.hdf5")): + continue + camera_files = load_camera_files(cav_path, timestamp, name="camera") + depth_files = load_depth_files(cav_path, timestamp, name="depth") + + if not os.path.exists(depth_files[0]): + # record the scene + print(cav_path) + continue + try: + tmp_data = Image.open(depth_files[0]) + tmp_data = tmp_data.copy() + except: + print(cav_path) + continue + + camera_data = load_camera_data(camera_files, True) + depth_data = load_camera_data(depth_files, True) + print(os.path.join(cav_path, timestamp+"_imgs.hdf5")) + with h5py.File(os.path.join(cav_path, timestamp+"_imgs.hdf5"), "w") as f: + for i in range(4): + f.create_dataset(f"camera{i}", data=camera_data[i]) + for i in range(4): + f.create_dataset(f"depth{i}", data=depth_data[i]) + +def parallel_check(scenario_folders): + print("subprocess...") + for scenario_folder in scenario_folders: + cav_list = sorted(os.listdir(scenario_folder)) + + assert len(cav_list) > 0 + + # loop over all CAV data + for (j, cav_id) in enumerate(cav_list): + cav_path = os.path.join(scenario_folder, cav_id) + if not os.path.isdir(cav_path): + continue + + yaml_files = \ + sorted([os.path.join(cav_path, x) + for x in os.listdir(cav_path) if + x.endswith('.yaml')]) + timestamps = [] + + # extract timestamp + for file in yaml_files: + res = file.split('/')[-1] + timestamp = res.replace('.yaml', '') + timestamps.append(timestamp) + + for timestamp in timestamps: + if os.path.exists(os.path.join(cav_path, timestamp+"_imgs.hdf5")): + continue + camera_files = load_camera_files(cav_path, timestamp, name="camera") + depth_files = load_depth_files(cav_path, timestamp, name="depth") + + if not os.path.exists(depth_files[0]): + # record the scene + print(depth_files[0]) + # break + try: + tmp_data = Image.open(depth_files[0]) + tmp_data = tmp_data.copy() + except: + print(cav_path) + break + + + +def parallel_cleaup(scenario_folders): + print("subprocess...") + for scenario_folder in tqdm(scenario_folders): + cav_list = sorted(os.listdir(scenario_folder)) + + assert len(cav_list) > 0 + + # loop over all CAV data + for (j, cav_id) in enumerate(cav_list): + cav_path = os.path.join(scenario_folder, cav_id) + if not os.path.isdir(cav_path): + continue + + yaml_files = \ + sorted([os.path.join(cav_path, x) + for x in os.listdir(cav_path) if + x.endswith('.yaml')]) + timestamps = [] + + # extract timestamp + for file in yaml_files: + res = file.split('/')[-1] + timestamp = res.replace('.yaml', '') + timestamps.append(timestamp) + + for timestamp in timestamps: + if os.path.exists(os.path.join(cav_path, timestamp+"_imgs.hdf5")): + print(os.path.join(cav_path, timestamp+"_imgs.hdf5")) + os.remove(os.path.join(cav_path, timestamp+"_imgs.hdf5")) + +if __name__=="__main__": + + MP_NUM = 8 + MACHINE_NUM = 1 + + if MACHINE_NUM != 1: + machine_idx = eval(sys.argv[1]) # 0,1,2,3 + + split_folders = [f"/GPFS/rhome/yifanlu/workspace/OpenCOODv2/dataset/OPV2V/{split}" for split in ['train', 'validate', 'test']] + scenario_folders = [] + print(split_folders) + + for root_dir in split_folders: + scenario_folders += sorted([os.path.join(root_dir, x) + for x in os.listdir(root_dir) if + os.path.isdir(os.path.join(root_dir, x))]) + + """ + single machine + """ + if MACHINE_NUM == 1: + mp_split = np.array_split(scenario_folders, MP_NUM) + mp_split = [x.tolist() for x in mp_split] + + for i in range(MP_NUM): + p = Process(target=parallel_check, args=(mp_split[i],)) + p.start() + + if MACHINE_NUM > 1: + mp_split = np.array_split(scenario_folders, MP_NUM * MACHINE_NUM) + mp_split = [x.tolist() for x in mp_split] + + for i in range(machine_idx*MP_NUM, (machine_idx+1)*MP_NUM): + p = Process(target=parallel_transform, args=(mp_split[i],)) + p.start() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/keypoint_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/keypoint_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3cba263a80779edce6e81a9d26583580bc4a6f0b --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/keypoint_utils.py @@ -0,0 +1,374 @@ +import open3d as o3d +import numpy as np +import cv2 +from matplotlib import pyplot as plt +from opencood.utils.subsampling_utils import get_random_subset +from multiprocessing import Process + +vis = False + +def mask_points_by_range(points, limit_range, return_mask=False): + if len(limit_range) == 6: + mask = (points[:, 0] > limit_range[0]) & \ + (points[:, 0] < limit_range[3]) & \ + (points[:, 1] > limit_range[1]) & \ + (points[:, 1] < limit_range[4]) & \ + (points[:, 2] > limit_range[2]) & \ + (points[:, 2] < limit_range[5]) + elif len(limit_range) == 4: + mask = (points[:, 0] > limit_range[0]) & \ + (points[:, 0] < limit_range[2]) & \ + (points[:, 1] > limit_range[1]) & \ + (points[:, 1] < limit_range[3]) + + points_mask = points[mask] + + if return_mask: + return points_mask, mask + else: + return points_mask + +def project_bev(pcd_np, lidar_range, voxel_size): + """ project pcd to bev + Args: + pcd_np: np.ndarray, (N, 3) + + lidar_range: list + range for bev, [x_min, y_min, z_min, x_max, y_max, z_max] + + Return + bev: np.array, (H, W), + H = (y_max - y_min) / voxel_size + W = (x_max - x_min) / voxel_size + + pcd_np_with_idx: np.ndarray, (N_, 4) + last index show it belongs to which grid + """ + [x_min, y_min, z_min, x_max, y_max, z_max] = lidar_range + + pcd_crop_np, mask = mask_points_by_range(pcd_np, lidar_range, return_mask=True) + + pcd_np_with_idx = np.zeros((pcd_np.shape[0], 4)) + pcd_np_with_idx[:,:3] = pcd_np + + H = round((y_max - y_min) / voxel_size) + W = round((x_max - x_min) / voxel_size) + # print(f"BEV map with shape ({H}, {W}).") + + bev = np.zeros((H, W), dtype=np.uint8) + for i, (x,y,z) in enumerate(pcd_np): + y_idx = int((y - y_min) / voxel_size) + x_idx = int((x - x_min) / voxel_size) + if mask[i]: + bev[y_idx, x_idx] = 255 + pcd_np_with_idx[i][3] = y_idx * W + x_idx + + if vis: + plt.imshow(bev) + plt.show() + + return bev, pcd_np_with_idx + +def line_detection(bev_img): + """ + Should we really need detect line? + Is edge enough to use? + """ + edges = cv2.Canny(bev_img, 100, 200) + if vis: + plt.imshow(edges) + plt.show() + + rho = 1 # distance resolution in pixels of the Hough grid + theta = np.pi / 180 # angular resolution in radians of the Hough grid + threshold = 25 # minimum number of votes (intersections in Hough grid cell) + min_line_length = 20 # minimum number of pixels making up a line + max_line_gap = 20 # maximum gap in pixels between connectable line segments + + line_image = np.copy(bev_img) * 0 # creating a blank to draw lines on + + # Run Hough on edge detected image + # Output "lines" is an array containing endpoints of detected line segments + lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]), + min_line_length, max_line_gap) + + for line in lines: + for x1,y1,x2,y2 in line: + cv2.line(line_image,(x1,y1),(x2,y2),(255),1) + + if vis: + plt.imshow(line_image) + plt.show() + + return line_image + + +def get_point_in_voxels(pcd_np, rows, cols, lidar_range, voxel_size, pcd_with_idx): + """ use indice in image to filter point cloud, then sample within it. + Args: + pcd_np: [N, 3] + rows: [M,] non zero index -> row + cols: [M,] non zero index -> col + pcd_with_idx: [N, 4] + Returns: + points_select: [N_, 3] + """ + [x_min, y_min, z_min, x_max, y_max, z_max] = lidar_range + H = round((y_max - y_min) / voxel_size) + W = round((x_max - x_min) / voxel_size) + + M = rows.shape[0] + points_select = np.zeros((0,4)) + + for i in range(M): + # voxel_range = [x_min + voxel_size * cols[i], + # y_min + voxel_size * rows[i], + # x_min + voxel_size * (cols[i] + 1), + # y_min + voxel_size * (rows[i] + 1)] + # points_in_voxel = mask_points_by_range(pcd_np, voxel_range) + + # if not points_in_voxel.any(): + # continue + + points_in_voxel = pcd_with_idx[pcd_with_idx[:,3]==(rows[i]*W + cols[i])] + if not points_in_voxel.any(): + continue + points_select = np.concatenate((points_select, points_in_voxel), axis=0) + + points_select = points_select[:,:3] + + return points_select + + +def get_keypoints(pcd_all_np, pcd_select_np, n_samples, mode = 'farthest'): + if pcd_select_np.shape[0] >= n_samples: + keypoints = get_random_subset(pcd_select_np, n_samples, mode) + else: + keypoints = get_random_subset(pcd_all_np, n_samples - pcd_select_np.shape[0], mode) + keypoints = np.concatenate((keypoints, pcd_select_np), axis=0) + + return keypoints + +def bev_sample(pcd_np, lidar_range, n_samples, mode, voxel_size=0.2, all_samples=False): + """ + Args: + pcd_np: + [N, 3] or [N, 4] + lidar_range: + list len = 4 or len = 6, please use this to remove ground + all_samples: + if True, not use n_samples to subsampling + Returns: + keypoints: np.ndarray + [n_samples, 3] + """ + + pcd_np = pcd_np[:,:3] + print(1) + bev_img, pcd_with_idx = project_bev(pcd_np, lidar_range, voxel_size) + print(2) + lines = line_detection(bev_img) + rows, cols = np.nonzero(lines) + print(3) + points_select = get_point_in_voxels(pcd_np, rows, cols, lidar_range, voxel_size, pcd_with_idx) + print(4) + + if all_samples: + keypoints = points_select + else: + keypoints = get_keypoints(pcd_np, points_select, n_samples, mode) + + print(keypoints.shape) + + return keypoints + +def seq_generate(): + dirs = ["/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/train", + "/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/validate" + "/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/test"] + + + kp_store_path = '/GPFS/rhome/yifanlu/workspace/OpenCOOD/keypoints_file/bev_keypoints' + lidar_range = [-140, -80, -1.5, 140, 80, 1] + n_samples = 1500 + + import os + import opencood.utils.pcd_utils as pcd_utils + + for root_dir in dirs: + scenario_folders = sorted([os.path.join(root_dir, x) + for x in os.listdir(root_dir) if + os.path.isdir(os.path.join(root_dir, x))]) + scenario_folders_name = sorted([x + for x in os.listdir(root_dir) if + os.path.isdir(os.path.join(root_dir, x))]) + # Structure: {scenario_id : {cav_1 : {timestamp1 : {yaml: path, + # lidar: path, cameras:list of path}}}} + + # loop over all scenarios + for (i, scenario_folder) in enumerate(scenario_folders): + # at least 1 cav should show up + cav_list = sorted([x for x in os.listdir(scenario_folder) + if os.path.isdir( + os.path.join(scenario_folder, x))]) + assert len(cav_list) > 0 + + # loop over all CAV data + for (j, cav_id) in enumerate(cav_list): + + # save all yaml files to the dictionary + cav_path = os.path.join(scenario_folder, cav_id) + + yaml_files = \ + sorted([os.path.join(cav_path, x) + for x in os.listdir(cav_path) if + x.endswith('.yaml')]) + timestamps = [] + + # extract timestamp + for file in yaml_files: + res = file.split('/')[-1] + + timestamp = res.replace('.yaml', '') + timestamps.append(timestamp) + + + for timestamp in timestamps: + + yaml_file = os.path.join(cav_path, + timestamp + '.yaml') + lidar_file = os.path.join(cav_path, + timestamp + '.pcd') + + # when init the dataset, it read over all pcd files. + # it maybe slow, but no need to perform keypoint sampling for each time.\ + kp_path = f"{kp_store_path}/{scenario_folders_name[i]}/{cav_id}/{timestamp}.npy" + kp_dir = kp_path.rsplit('/',1)[0] # before filename + + if not os.path.exists(kp_dir): + os.makedirs(kp_dir) + + if not os.path.exists(kp_path): + pcd_np = pcd_utils.pcd_to_np(lidar_file) + kp_file = bev_sample(pcd_np, + lidar_range, + n_samples, + mode='uniform', + all_samples=True) + + np.save(kp_path, kp_file) + + +def parallel_generate(scenario_folder, scenario_folder_name): + + kp_store_path = '/GPFS/rhome/yifanlu/workspace/OpenCOOD/keypoints_file/bev_keypoints' + lidar_range = [-140, -80, -1.5, 140, 80, 1] + + cav_list = sorted([x for x in os.listdir(scenario_folder) + if os.path.isdir( + os.path.join(scenario_folder, x))]) + assert len(cav_list) > 0 + print(cav_list) + + # loop over all CAV data + for (j, cav_id) in enumerate(cav_list): + print(cav_id) + # save all yaml files to the dictionary + cav_path = os.path.join(scenario_folder, cav_id) + + yaml_files = \ + sorted([os.path.join(cav_path, x) + for x in os.listdir(cav_path) if + x.endswith('.yaml')]) + timestamps = [] + + # extract timestamp + for file in yaml_files: + res = file.split('/')[-1] + + timestamp = res.replace('.yaml', '') + timestamps.append(timestamp) + + + for timestamp in timestamps: + + yaml_file = os.path.join(cav_path, + timestamp + '.yaml') + lidar_file = os.path.join(cav_path, + timestamp + '.pcd') + + # when init the dataset, it read over all pcd files. + # it maybe slow, but no need to perform keypoint sampling for each time.\ + target = [250,500,750,1000,1250,1500,2000,2500] + kp_paths = [f"{kp_store_path}/{scenario_folder_name}/{cav_id}/{timestamp}.npy"] + kp_paths += [f"{kp_store_path}_{n_samples}/{scenario_folder_name}/{cav_id}/{timestamp}.npy" for n_samples in target] + flag = True + for kp_path in kp_paths: + if not os.path.exists(kp_path): + flag = False + if flag: + continue + + + pcd_np = pcd_utils.pcd_to_np(lidar_file)[:,:3] + + all_keypoint = bev_sample(pcd_np, + lidar_range, + np.inf, + mode='uniform', + all_samples=True) + + kp_path = f"{kp_store_path}/{scenario_folder_name}/{cav_id}/{timestamp}.npy" + kp_dir = kp_path.rsplit('/',1)[0] # before filename + if not os.path.exists(kp_dir): + os.makedirs(kp_dir) + + if not os.path.exists(kp_path): + np.save(kp_path, all_keypoint) + print(f"saving to {kp_path}") + + + for n_samples in target: + kp_path = f"{kp_store_path}_{n_samples}/{scenario_folder_name}/{cav_id}/{timestamp}.npy" + kp_dir = kp_path.rsplit('/',1)[0] # before filename + + if not os.path.exists(kp_dir): + os.makedirs(kp_dir) + + select_keypoint = get_keypoints(pcd_np, all_keypoint, n_samples) + + if not os.path.exists(kp_path): + np.save(kp_path, select_keypoint) + print(f"saving to {kp_path}") + + + + +if __name__=="__main__": + dirs = ["/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/train", + "/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/validate", + "/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/test"] + + import os + import opencood.utils.pcd_utils as pcd_utils + + scenario_folders = [] + scenario_folders_name = [] + + for root_dir in dirs: + scenario_folders += sorted([os.path.join(root_dir, x) + for x in os.listdir(root_dir) if + os.path.isdir(os.path.join(root_dir, x))]) + scenario_folders_name += sorted([x + for x in os.listdir(root_dir) if + os.path.isdir(os.path.join(root_dir, x))]) + + + + scenario_folders = ['/GPFS/rhome/yifanlu/workspace/OpenCOOD/dataset_link/validate/2021_08_21_17_30_41'] + scenario_folders_name = ['2021_08_21_17_30_41'] + num = len(scenario_folders) + + for i in range(num): + p = Process(target=parallel_generate, args=(scenario_folders[i],scenario_folders_name[i])) + p.start() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/max_consensus.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/max_consensus.py new file mode 100644 index 0000000000000000000000000000000000000000..294afda55f7f642b0056fcee46b759325fe01955 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/max_consensus.py @@ -0,0 +1,131 @@ +import numpy as np +from sklearn.neighbors import NearestNeighbors + + +def max_consunsus_hierarchical(pointsl, pointsr, loc_l, loc_r, resolution=None, radius=1, point_labels=None, label_weights=None, **kwargs): + max_err = kwargs['search_range']# np.array([1, 1, 6]) + min_cons = kwargs['min_cons'] + min_match_acc_points = kwargs['min_match_acc_points'] + pointsl_out, pointsr_out, T, tf_local, cons, matched_pointsl, matched_pointsr = max_consensus2(pointsl, pointsr, -max_err, max_err, + resolution, radius, loc_l, loc_r, + point_labels=point_labels, label_weights=label_weights) + + if matched_pointsl is not None and len(matched_pointsl) > min_match_acc_points: + T, tf = estimate_tf_2d(matched_pointsl, matched_pointsr, pointsl, pointsr_out) + tf_local = tf + tf_local[:2] = tf_local[:2] = tf_local[:2] - loc_r[0, :2] + loc_l[0, :2] + pointsr_homo = np.concatenate([pointsr, np.ones((len(pointsr), 1))], axis=1).T + pointsr_out = (T @ pointsr_homo).T + else: + return None, None, None + + if cons < min_cons: + return None, None, None + return T, tf_local, pointsr_out + + +def max_consensus2(pointsl, pointsr, xyr_min, xyr_max, resolotion, radius, loc_l=None, loc_r=None, point_labels=None, label_weights=None): + tf_matrices, tf_params, tf_params_local = construct_tfs(xyr_min, xyr_max, resolotion, loc_l, loc_r) + rotl, _, _ = construct_tfs(xyr_min[2:], xyr_max[2:], resolotion[2:]) + pointr_homo = np.concatenate([pointsr, np.ones((len(pointsr), 1))], axis=1).T + # pointl_homo = np.concatenate([pointsl, np.ones((len(pointsl), 1))], axis=1).T + pointr_transformed = np.einsum('...ij, ...jk', tf_matrices, np.tile(pointr_homo,(len(tf_matrices), 1, 1))).transpose(0, 2, 1) + pointr_transformed_s = pointr_transformed.reshape(-1, 3)[:, :2] + cur_cons = 0 + pointl_out = pointsl + pointr_out = pointsr + match_T, match_tf_local, matched_pointsl, matched_pointsr = None, None, None, None + # r1 = 0 + for R in rotl[:, :2, :2]: + pointl_transformed = np.einsum('ij, jk', R, pointsl.T).T + nbrs = NearestNeighbors(n_neighbors=1, radius=radius, algorithm='auto').fit(pointl_transformed) + distances, indices = nbrs.kneighbors(pointr_transformed_s) + mask = (distances < radius) + lbll, lblr = point_labels + plus = (np.logical_and(lbll[indices] > 2, mask)).reshape(len(tf_matrices), len(pointsr)) + mask = mask.reshape(len(tf_matrices), len(pointsr)) + pointr_consensus = mask.sum(axis=1) + plus.sum(axis=1) * label_weights[-1] + best_match = np.argmax(pointr_consensus) + match_consensus = pointr_consensus[best_match] + if match_consensus > cur_cons: + pointr_out = pointr_transformed[best_match] + match_T = tf_matrices[best_match] + match_tf_local = tf_params_local[best_match] + accurate_points_mask = plus[best_match] + selected_indices = indices.reshape(len(tf_matrices), len(pointsr))[best_match][accurate_points_mask] + matched_pointsl = pointsl[selected_indices] + matched_pointsr = pointsr[accurate_points_mask] + # r1 = np.arctan2(R[1, 0], R[0, 0]) + pointl_out = pointl_transformed + cur_cons = match_consensus + return pointl_out, pointr_out, match_T, match_tf_local, cur_cons, matched_pointsl, matched_pointsr + + +def max_consensus1(pointsl, pointsr, xyr_min, xyr_max, resolotion, radius, loc_l=None, loc_r=None, point_labels=None, label_weights=None): + tf_matrices, tf_params, tf_params_local = construct_tfs(xyr_min, xyr_max, resolotion, loc_l, loc_r) + pointr_homo = np.concatenate([pointsr, np.ones((len(pointsr), 1))], axis=1).T + pointr_transformed = np.einsum('...ij, ...jk', tf_matrices, np.tile(pointr_homo,(len(tf_matrices), 1, 1))).transpose(0, 2, 1) + pointr_transformed_s = pointr_transformed.reshape(-1, 3)[:, :2] + + nbrs = NearestNeighbors(n_neighbors=1, radius=radius, algorithm='auto').fit(pointsl) + distances, indices = nbrs.kneighbors(pointr_transformed_s) + mask = (distances < radius) + lbll, lblr = point_labels + plus = (np.logical_and(lbll[indices] > 2, mask)).reshape(len(tf_matrices), len(pointsr)) + mask = mask.reshape(len(tf_matrices), len(pointsr)) + pointr_consensus = mask.sum(axis=1) + plus.sum(axis=1) * label_weights[-1] + best_match = np.argmax(pointr_consensus) + match_consensus = pointr_consensus[best_match] + pointr_out = pointr_transformed[best_match] + match_tf = tf_params[best_match] + match_T = tf_matrices[best_match] + match_tf_local = tf_params_local[best_match] + accurate_points_mask = plus[best_match] + selected_indices = indices.reshape(len(tf_matrices), len(pointsr))[best_match][accurate_points_mask] + matched_pointsl = pointsl[selected_indices] + matched_pointsr = pointsr[accurate_points_mask] + return pointr_out, match_T, match_tf_local, match_consensus, matched_pointsl, matched_pointsr + + +def construct_tfs(xyr_min, xyr_max, resolution, loc_l=None, loc_r=None): + input = [np.arange(xyr_min[i], xyr_max[i], resolution[i]) for i in range(len(xyr_min))] + grid = np.meshgrid(*input) + grid = [a.reshape(-1) for a in grid] + tf_parames_local = np.stack(grid, axis=1) + tf_parames_local[:, -1] = tf_parames_local[:, -1] / 180 * np.pi + tf_parames = np.copy(tf_parames_local) + if loc_r is not None: + tf_parames[:, :-1] = tf_parames_local[:, :2] + loc_r[:, :2] - loc_l[:, :2] + sina = np.sin(tf_parames[:, -1]) + cosa = np.cos(tf_parames[:, -1]) + zeros = np.zeros(len(tf_parames), dtype=sina.dtype) + ones = np.ones(len(tf_parames), dtype=sina.dtype) + x = tf_parames[:, 0] if len(xyr_min)>1 else zeros + y = tf_parames[:, 1] if len(xyr_min)>1 else zeros + tfs = np.array([[cosa, -sina, x], + [sina, cosa, y], + [zeros, zeros, ones]]).transpose(2, 0, 1) + return tfs, tf_parames, tf_parames_local + + +def estimate_tf_2d(pointsr, pointsl, pointsl_all, pointsr_all): + # 1 reduce by the center of mass + l_mean = pointsl.mean(axis=0) + r_mean = pointsr.mean(axis=0) + l_reduced = pointsl - l_mean + r_reduced = pointsr - r_mean + # 2 compute the rotation + Sxx = (l_reduced[:, 0] * r_reduced[:, 0]).sum() + Syy = (l_reduced[:, 1] * r_reduced[:, 1]).sum() + Sxy = (l_reduced[:, 0] * r_reduced[:, 1]).sum() + Syx = (l_reduced[:, 1] * r_reduced[:, 0]).sum() + theta = np.arctan2(Sxy - Syx, Sxx + Syy) # / np.pi * 180 + sa = np.sin(theta) + ca = np.cos(theta) + T = np.array([[ca, -sa, 0], + [sa, ca, 0], + [0, 0, 1]]) + t = r_mean.reshape(2, 1) - T[:2, :2] @ l_mean.reshape(2, 1) + # T = T.T + T[:2, 2:] = t + return T, np.array([*t.squeeze(), theta]) diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/model_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c2fa36e16f2def23ba38ad6cd38f66db96467b9e --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/model_utils.py @@ -0,0 +1,322 @@ +import torch +import torch.nn as nn +from collections import OrderedDict + +def fix_bn(m): + classname = m.__class__.__name__ + if classname.find('BatchNorm') != -1: + m.eval() +def unfix_bn(m): + classname = m.__class__.__name__ + if classname.find('BatchNorm') != -1: + m.train() + +def has_trainable_params(module: torch.nn.Module) -> bool: + any_require_grad = any(p.requires_grad for p in module.parameters()) + any_bn_in_train_mode = any(m.training for m in module.modules() if isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d, torch.nn.BatchNorm3d))) + return any_require_grad or any_bn_in_train_mode + +def has_untrainable_params(module: torch.nn.Module) -> bool: + any_not_require_grad = any((not p.requires_grad) for p in module.parameters()) + any_bn_in_eval_mode = any((not m.training) for m in module.modules() if isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d, torch.nn.BatchNorm3d))) + return any_not_require_grad or any_bn_in_eval_mode + +def check_trainable_module(model): + appeared_module_list = [] + has_trainable_list = [] + has_untrainable_list = [] + for name, module in model.named_modules(): + if any([name.startswith(appeared_module_name) for appeared_module_name in appeared_module_list]) or name=='': # the whole model has name '' + continue + appeared_module_list.append(name) + + if has_trainable_params(module): + has_trainable_list.append(name) + if has_untrainable_params(module): + has_untrainable_list.append(name) + + print("=========Those modules have trainable component=========") + print(*has_trainable_list,sep='\n',end='\n\n') + print("=========Those modules have untrainable component=========") + print(*has_untrainable_list,sep='\n',end='\n\n') + + +def load_model_dict(model, pretrained_dict): + """ load pretrained state dict, keys may not match with model + + Args: + model: nn.Module + + pretrained_dict: collections.OrderedDict + + """ + # 1. filter out unnecessary keys + model_dict = model.state_dict() + pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} + # 2. overwrite entries in the existing state dict + model_dict.update(pretrained_dict) + # 3. load the new state dict + model.load_state_dict(model_dict) + return model + + +def weight_init(m): + if isinstance(m, nn.Linear): + nn.init.xavier_normal_(m.weight.data, gain=0.1) + if hasattr(m.bias, 'data'): + nn.init.constant_(m.bias.data, 0) + + elif isinstance(m, nn.Conv2d): + nn.init.xavier_normal_(m.weight, gain=0.1) + # if hasattr(m, 'bias'): + # nn.init.constant_(m.bias, 0) + + # elif isinstance(m, nn.BatchNorm2d): + # nn.init.xavier_normal_(m.weight, gain=0.05) + # nn.init.constant_(m.bias, 0) + +def rename_model_dict_keys(pretrained_dict_path, rename_dict): + """ load pretrained state dict, keys may not match with model + + Args: + model: nn.Module + + pretrained_dict: collections.OrderedDict + + """ + pretrained_dict = torch.load(pretrained_dict_path) + # 1. filter out unnecessary keys + for oldname, newname in rename_dict.items(): + if oldname.endswith("*"): + _oldnames = list(pretrained_dict.keys()) + _oldnames = [x for x in _oldnames if x.startswith(oldname[:-1])] + for _oldname in _oldnames: + if newname != "": + _newname = _oldname.replace(oldname[:-1], newname[:-1]) + pretrained_dict[_newname] = pretrained_dict[_oldname] + pretrained_dict.pop(_oldname) + else: + if newname != "": + pretrained_dict[newname] = pretrained_dict[oldname] + pretrained_dict.pop(oldname) + torch.save(pretrained_dict, pretrained_dict_path) + + +def compose_model(model1, keyname1, model2, keyname2, output_model): + pretrained_dict1 = torch.load(model1) + pretrained_dict2 = torch.load(model2) + + new_dict = OrderedDict() + for keyname in keyname1: + if keyname.endswith("*"): + _oldnames = list(pretrained_dict1.keys()) + _oldnames = [x for x in _oldnames if x.startswith(keyname[:-1])] + for _oldname in _oldnames: + new_dict[_oldname] = pretrained_dict1[_oldname] + + for keyname in keyname2: + if keyname.endswith("*"): + _oldnames = list(pretrained_dict2.keys()) + _oldnames = [x for x in _oldnames if x.startswith(keyname[:-1])] + for _oldname in _oldnames: + new_dict[_oldname] = pretrained_dict2[_oldname] + + torch.save(new_dict, output_model) + + +def switch_model_dict_keys(pretrained_dict_path, switch_dict): + """ load pretrained state dict, keys may not match with model + + Args: + model: nn.Module + + pretrained_dict: collections.OrderedDict + + switch_dict: {"cls_head_lidar": "cls_head_camera"} + """ + pretrained_dict = torch.load(pretrained_dict_path) + # 1. filter out unnecessary keys + for key1, key2 in switch_dict.items(): + all_model_keys = list(pretrained_dict.keys()) + all_key1_weight = [x for x in all_model_keys if x.startswith(key1)] + for key1_weight_name in all_key1_weight: + key2_weight_name = key1_weight_name.replace(key1, key2) + + pretrained_dict[key1_weight_name], pretrained_dict[key2_weight_name] = \ + pretrained_dict[key2_weight_name], pretrained_dict[key1_weight_name] + + torch.save(pretrained_dict, pretrained_dict_path) + + +def rename_m3_to_m4(pretrain_dict): + new_dict = OrderedDict() + for oldname, v in pretrain_dict.items(): + if 'm3.' in oldname: + print(oldname) + newname = oldname.replace("m3.","m4.") + new_dict[newname] = pretrain_dict[oldname] + else: + new_dict[oldname] = pretrain_dict[oldname] + return new_dict + +def create_m1m2m3m4_inter_model(m1m2m3_heter_model, m1m2m4_heter_model): + final_model = OrderedDict() + for k, v in m1m2m3_heter_model.items(): + if k not in final_model: + final_model[k] = v + + for k, v in m1m2m4_heter_model.items(): + if k not in final_model: + final_model[k] = v + + return final_model + +def create_m1m2m3m4_model(m1_late_model, m2_late_model, m3_late_model, m4_late_model): + final_model = OrderedDict() + for k, v in m1_late_model.items(): + if k not in final_model: + final_model[k] = v + else: + print(k, 'is already added.') + print('m1 finish') + + for k, v in m2_late_model.items(): + if k not in final_model: + final_model[k] = v + else: + print(k, 'is already added.') + print('m2 finish') + + for k, v in m3_late_model.items(): + if k not in final_model: + final_model[k] = v + else: + print(k, 'is already added.') + print('m3 finish') + + for k, v in m4_late_model.items(): + if k not in final_model: + final_model[k] = v + else: + print(k, 'is already added.') + print('m4 finish') + + return final_model + +def make_final_inter_model(m1m2m3_heter_model_path, + m1m2m3tom4_heter_model_path): + m1m2m3_heter_model = torch.load(m1m2m3_heter_model_path) + + m1m2m3tom4_heter_model = torch.load(m1m2m3tom4_heter_model_path) + m1m2m4_heter_model = rename_m3_to_m4(m1m2m3tom4_heter_model) + + + final_model = create_m1m2m3m4_inter_model(m1m2m3_heter_model, + m1m2m4_heter_model) + return final_model + + +def make_m1m2m3m4_model(m1_model_path, + m2_model_path, + m3_model_path, + m4_model_path, + out_path): + + m1_model = torch.load(m1_model_path, map_location='cpu') + m2_model = torch.load(m2_model_path, map_location='cpu') + m3_model = torch.load(m3_model_path, map_location='cpu') + m4_model = torch.load(m4_model_path, map_location='cpu') + + + final_model = create_m1m2m3m4_model(m1_model, + m2_model, + m3_model, + m4_model) + torch.save(final_model, out_path) + +def single_model_to_inter_model(model_path, output_path, modality_name='m1'): + pretrain_dict = torch.load(model_path) + newdict = OrderedDict() + for k,v in pretrain_dict.items(): + if k.startswith('shrink') or k.startswith('shrink') or k.startswith('cls_head') \ + or k.startswith('reg_head') or k.startswith('dir_head'): + newdict[k.replace(f"_{modality_name}", '')] = v + elif k.startswith('layer'): + newdict[k.replace(f"layers_{modality_name}", 'backbone')] = v + else: + newdict[k] = v + torch.save(newdict, output_path) + + +if __name__ == "__main__": + + make_m1m2m3m4_model( + 'opencood/logs/FedHCP_opv2v_m1_pointpillars_140.8_40_align_to_m3_singlesup/net_epoch_bestval_at29.pth', + 'opencood/logs/FedHCP_opv2v_m2_LSSeff_140.8_40_align_to_m3_singlesup_warp/net_epoch_bestval_at21.pth', + 'opencood/logs/FedHCP_opv2v_m4_LSSres_140.8_40_align_to_m3_singlesup_warp/net_epoch_bestval_at25.pth', + 'opencood/logs/FedHCP_opv2v_m4_LSSres_140.8_40_align_to_m3_singlesup_warp/net_epoch_bestval_at25.pth', + 'opencood/logs/FedHCP_final_m3base_new/net_epoch1.pth' + ) + + + # make_m1m2m3m4_model( + # '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/single_modality_intermediate/m1_pointpillar_msmax_epoch25.pth', + # '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/single_modality/m2_LSSeff_epoch21.pth', + # '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/single_modality/m3_SECOND32_epoch29.pth', + # '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/single_modality/m4_LSSres_epoch33.pth', + # '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/align_to_m1_4_modality/m1_pointpillar_col_m2_LSSeff_m3_SECOND32_m4_LSSres.pth' + # ) + + # m3_model = torch.load('/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m4_pretrain/LSSres_net_epoch_bestval_at13.pth') + # m4_model = rename_m3_to_m4(m3_model) + # torch.save(m3_model, "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m4_pretrain/LSSres_net_epoch_bestval_at13_m4.pth") + + # finel_model = make_final_inter_model(m1m2m3_heter_model_path='/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/m1_pointpillars_m2_lsseff_m3_SECOND_sharedhead_convnext_block3/net_epoch_bestval_at19.pth', + # m1m2m3tom4_heter_model_path='/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/m1_pointpillars_m2_lsseff_m3_lssres_sharedhead_convnext_block3/net_epoch_bestval_at9.pth') + + # torch.save(finel_model, "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/m1_pointpillars_m2_lsseff_m3_SECOND_m4_lssres/net_epoch1.pth") + + # final_model = make_final_late_model() + # torch.save(final_model, "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/m1_pointpillars_m2_lsseff_m3_SECOND_m4_lssres_late/net_epoch1.pth") + + # dict_path = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_v2xset_heter_lidar_and_camera_pretrain_switch_layer2_layer3_shrink_head/net_epoch1.pth" + # switch_dict = {"lidar_backbone.resnet.layer1": "camera_backbone.resnet.layer1", + # "lidar_backbone.renset.layer2": "camera_backbone.resnet.layer2", + # "cls_head_lidar": "cls_head_camera", + # "reg_head_lidar": "reg_head_camera", + # "dir_head_lidar": "dir_head_camera", + # "shrink_lidar":"shrink_camera"} + # switch_model_dict_keys(dict_path, switch_dict) + + # dict_path = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_v2xset_heter_camera_pretrain_8x_64/net_epoch1.pth" + # rename_dict = {"camera_encoder.*": "", + # "camera_backbone.*": "", + # "shrink_camera.*": "", + # "cls_head_camera.*": "", + # "reg_head_camera.*": "", + # "dir_head_camera.*": "",} + # rename_model_dict_keys(dict_path, rename_dict) + + # dict_path = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/v2xset_heter_late_fusion/net_epoch_bestval_at28.pth" + # rename_dict = {"camencode.*": "camera_encoder.camencode.*", + # "bevencode.*": "camera_encoder.bevencode.*", + # "head.cls_head.*": "cls_head_camera.*", + # "head.reg_head.*": "reg_head_camera.*", + # "head.dir_head.*": "dir_head_camera.*", + # "shrink_conv.*": "shrink_camera.*"} + # rename_model_dict_keys(dict_path, rename_dict) + + # model1 = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m1m2_pretrain_for_late_fusion/m1_pointpillars_m2_lsseff.pth" # lidar + # model2 = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m3_pretrain/LSSres_net_epoch_bestval_at13.pth" # cam + # output_model = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m1m2m3_pretrain_for_late_fusion/m1_pointpillars_m2_lsseff_m3_lssres.pth" + # keyname1 = ['*',] + # keyname2 = ['*',] + # compose_model(model1, keyname1, model2, keyname2, output_model) + + # dict_path = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/v2xset_heter_late_fusion/net_epoch1.pth" + # rename_dict = {"camera_encoder.*": "", + # "head.cls_head_camera.*": "", + # "head.reg_head_camera.*": "", + # "head.dir_head_camera.*": "", + # "shrink_camera.*": ""} + # rename_model_dict_keys(dict_path, rename_dict) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/occ_render.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/occ_render.py new file mode 100644 index 0000000000000000000000000000000000000000..2135cc7263f6ce6646a3785e56b69c6880fecfad --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/occ_render.py @@ -0,0 +1,147 @@ +""" +Functions to render occupancy map from bounding boxes +""" + + +import os +import copy +import re +import io +import logging +import json +import numpy as np +import torch +import carla +import cv2 +import math +import datetime +import pathlib +import torch.utils.data as data +from torchvision import transforms +from PIL import Image +from skimage.measure import block_reduce +import time +import matplotlib.pyplot as plt +import torch.nn.functional as F +import pygame + +def box2occ(infer_result): + + det_range = [36, 12, 12, 12, 0.25] + + attrib_list = ['pred_box_tensor', 'pred_score', 'gt_box_tensor'] + for attrib in attrib_list: + if isinstance(infer_result[attrib], list): + infer_result_tensor = [] + for i in range(len(infer_result[attrib])): + if infer_result[attrib][i] is not None: + infer_result_tensor.append(infer_result[attrib][i]) + if len(infer_result_tensor)>0: + infer_result[attrib] = torch.cat(infer_result_tensor, dim=0) + else: + infer_result[attrib] = None + + ### filte out ego box + if not infer_result['pred_box_tensor'] is None: + if len(infer_result['pred_box_tensor']) > 0: + tmp = infer_result['pred_box_tensor'][:,:,0].clone() + infer_result['pred_box_tensor'][:,:,0]=infer_result['pred_box_tensor'][:,:,1] + infer_result['pred_box_tensor'][:,:,1] = tmp + # measurements = car_data_raw[0]['measurements'] + num_object = infer_result['pred_box_tensor'].shape[0] + # if num_object > 0: + object_list = [] + # transform from lidar pose to ego pose + for i in range(num_object): + transformed_box = infer_result['pred_box_tensor'][i].cpu().numpy() + transformed_box[:,1] += 1.3 + + + location_box = np.mean(transformed_box[:4,:2], 0) + if np.linalg.norm(location_box) < 1.4: + continue + object_list.append(torch.from_numpy(transformed_box)) + if len(object_list) > 0: + processed_pred_box = torch.stack(object_list, dim=0) + else: + processed_pred_box = infer_result['pred_box_tensor'][:0] + else: + processed_pred_box = [] # infer_result['pred_box_tensor'] + + ### turn boxes into occupancy map + if len(processed_pred_box) > 0: + occ_map = turn_traffic_into_map(processed_pred_box[:,:4,:2].cpu(), det_range) + else: + occ_map = turn_traffic_into_map(processed_pred_box, det_range) + + # # N, K, H, W, C=7 + # occ_map = turn_traffic_into_map(pred_traffic, self.det_range) + occ_map_shape = occ_map.shape + occ_map = torch.from_numpy(occ_map).cuda().contiguous().view((-1, 1) + occ_map_shape[1:]) + + return occ_map + +def transform_2d_points(xyz, r1, t1_x, t1_y, r2, t2_x, t2_y): + """ + Build a rotation matrix and take the dot product. + """ + # z value to 1 for rotation + xy1 = xyz.copy() + xy1[:, 2] = 1 + + c, s = np.cos(r1), np.sin(r1) + # 顺时针旋转r1角度,r1车辆坐标转换到world frame + r1_to_world = np.matrix([[c, -s, t1_x], [s, c, t1_y], [0, 0, 1]]) + + # np.dot converts to a matrix, so we explicitly change it back to an array + world = np.asarray(r1_to_world @ xy1.T) + + c, s = np.cos(r2), np.sin(r2) + r2_to_world = np.matrix([[c, -s, t2_x], [s, c, t2_y], [0, 0, 1]]) + # world frame -> r2 frame + # if r1==r2, do nothing + world_to_r2 = np.linalg.inv(r2_to_world) + + out = np.asarray(world_to_r2 @ world).T + # reset z-coordinate + out[:, 2] = xyz[:, 2] + + return out + +def turn_traffic_into_map(all_bbox, det_range): + data_total = [] + for idx in range(1): + + if len(all_bbox) == 0: + all_bbox = np.zeros((1,4,2)) + # plt.cla() + + fig = plt.figure(figsize=(6, 12), dpi=16) + plt.gca().xaxis.set_major_locator(plt.NullLocator()) + plt.gca().yaxis.set_major_locator(plt.NullLocator()) + plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) + plt.margins(0,0) + ax = plt.gca() + ax.set_facecolor("black") + + plt.xlim((-det_range[2], det_range[3])) + plt.ylim((-det_range[1], det_range[0])) + + for i in range(len(all_bbox)): + plt.fill(all_bbox[i,:,0], all_bbox[i,:,1], color = 'white') + + # plt.axis('off') + # If we haven't already shown or saved the plot, then we need to + # draw the figure first... + fig.canvas.draw() + + # Now we can save it to a numpy array. + data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) + data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) + # H=192, W=96, 3 + data_total.append(data[:, :, 0]) + # plt.savefig('/GPFS/public/InterFuser/results/cop3/pnp/multiclass_finetune_fusion_none/test.png') + plt.close() + + occ_map = np.stack(data_total, axis=0) # B * T_p, H, W + return occ_map \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pcd_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pcd_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..20acff058e8ef0368c783cf3369524c916dc5eb2 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pcd_utils.py @@ -0,0 +1,235 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , Hao Xiang , +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Utility functions related to point cloud +""" + +import open3d as o3d +import numpy as np +from pypcd import pypcd + +def pcd_to_np(pcd_file): + """ + Read pcd and return numpy array. + + Parameters + ---------- + pcd_file : str + The pcd file that contains the point cloud. + + Returns + ------- + pcd : o3d.PointCloud + PointCloud object, used for visualization + pcd_np : np.ndarray + The lidar data in numpy format, shape:(n, 4) + + """ + pcd = o3d.io.read_point_cloud(pcd_file) + + xyz = np.asarray(pcd.points) + # we save the intensity in the first channel + intensity = np.expand_dims(np.asarray(pcd.colors)[:, 0], -1) + pcd_np = np.hstack((xyz, intensity)) + + return np.asarray(pcd_np, dtype=np.float32) + + +def mask_points_by_range(points, limit_range): + """ + Remove the lidar points out of the boundary. + + Parameters + ---------- + points : np.ndarray + Lidar points under lidar sensor coordinate system. + + limit_range : list + [x_min, y_min, z_min, x_max, y_max, z_max] + + Returns + ------- + points : np.ndarray + Filtered lidar points. + """ + + mask = (points[:, 0] > limit_range[0]) & (points[:, 0] < limit_range[3])\ + & (points[:, 1] > limit_range[1]) & ( + points[:, 1] < limit_range[4]) \ + & (points[:, 2] > limit_range[2]) & ( + points[:, 2] < limit_range[5]) + + points = points[mask] + + return points + + +def mask_ego_points(points): + """ + Remove the lidar points of the ego vehicle itself. + + Parameters + ---------- + points : np.ndarray + Lidar points under lidar sensor coordinate system. + + Returns + ------- + points : np.ndarray + Filtered lidar points. + """ + mask = (points[:, 0] >= -1.95) & (points[:, 0] <= 2.95) \ + & (points[:, 1] >= -1.1) & (points[:, 1] <= 1.1) + points = points[np.logical_not(mask)] + + return points + +def mask_ego_points_v2(points): + """ + Remove the lidar points of the ego vehicle itself. + + Parameters + ---------- + points : np.ndarray + Lidar points under lidar sensor coordinate system. + + Returns + ------- + points : np.ndarray + Filtered lidar points. + """ + mask = (points[:, 0] >= -2.95) & (points[:, 0] <= 1.95) \ + & (points[:, 1] >= -1.1) & (points[:, 1] <= 1.1) + points = points[np.logical_not(mask)] + + return points + + +def shuffle_points(points): + shuffle_idx = np.random.permutation(points.shape[0]) + points = points[shuffle_idx] + + return points + + +def lidar_project(lidar_data, extrinsic): + """ + Given the extrinsic matrix, project lidar data to another space. + + Parameters + ---------- + lidar_data : np.ndarray + Lidar data, shape: (n, 4) + + extrinsic : np.ndarray + Extrinsic matrix, shape: (4, 4) + + Returns + ------- + projected_lidar : np.ndarray + Projected lida data, shape: (n, 4) + """ + + lidar_xyz = lidar_data[:, :3].T + # (3, n) -> (4, n), homogeneous transformation + lidar_xyz = np.r_[lidar_xyz, [np.ones(lidar_xyz.shape[1])]] + lidar_int = lidar_data[:, 3] + + # transform to ego vehicle space, (3, n) + project_lidar_xyz = np.dot(extrinsic, lidar_xyz)[:3, :] + # (n, 3) + project_lidar_xyz = project_lidar_xyz.T + # concatenate the intensity with xyz, (n, 4) + projected_lidar = np.hstack((project_lidar_xyz, + np.expand_dims(lidar_int, -1))) + + return projected_lidar + + +def projected_lidar_stack(projected_lidar_list): + """ + Stack all projected lidar together. + + Parameters + ---------- + projected_lidar_list : list + The list containing all projected lidar. + + Returns + ------- + stack_lidar : np.ndarray + Stack all projected lidar data together. + """ + stack_lidar = [] + for lidar_data in projected_lidar_list: + stack_lidar.append(lidar_data) + + return np.vstack(stack_lidar) + + +def downsample_lidar(pcd_np, num): + """ + Downsample the lidar points to a certain number. + + Parameters + ---------- + pcd_np : np.ndarray + The lidar points, (n, 4). + + num : int + The downsample target number. + + Returns + ------- + pcd_np : np.ndarray + The downsampled lidar points. + """ + assert pcd_np.shape[0] >= num + + selected_index = np.random.choice((pcd_np.shape[0]), + num, + replace=False) + pcd_np = pcd_np[selected_index] + + return pcd_np + + +def downsample_lidar_minimum(pcd_np_list): + """ + Given a list of pcd, find the minimum number and downsample all + point clouds to the minimum number. + + Parameters + ---------- + pcd_np_list : list + A list of pcd numpy array(n, 4). + Returns + ------- + pcd_np_list : list + Downsampled point clouds. + """ + minimum = np.Inf + + for i in range(len(pcd_np_list)): + num = pcd_np_list[i].shape[0] + minimum = num if minimum > num else minimum + + for (i, pcd_np) in enumerate(pcd_np_list): + pcd_np_list[i] = downsample_lidar(pcd_np, minimum) + + return pcd_np_list + +def read_pcd(pcd_path): + pcd = pypcd.PointCloud.from_path(pcd_path) + time = None + pcd_np_points = np.zeros((pcd.points, 4), dtype=np.float32) + pcd_np_points[:, 0] = np.transpose(pcd.pc_data["x"]) + pcd_np_points[:, 1] = np.transpose(pcd.pc_data["y"]) + pcd_np_points[:, 2] = np.transpose(pcd.pc_data["z"]) + pcd_np_points[:, 3] = np.transpose(pcd.pc_data["intensity"]) / 256.0 + del_index = np.where(np.isnan(pcd_np_points))[0] + pcd_np_points = np.delete(pcd_np_points, del_index, axis=0) + return pcd_np_points, time \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pose_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pose_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9ae569febb73240ff48600fde4f34cac0367e646 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pose_utils.py @@ -0,0 +1,191 @@ +import numpy as np +import torch +import torch.distributions as dist + +def add_noise_data_dict(data_dict, noise_setting): + """ Update the base data dict. + We retrieve lidar_pose and add_noise to it. + And set a clean pose. + """ + if noise_setting['add_noise']: + for cav_id, cav_content in data_dict.items(): + cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose + + if "laplace" in noise_setting['args'].keys() and noise_setting['args']['laplace'] is True: + cav_content['params']['lidar_pose'] = cav_content['params']['lidar_pose'] + \ + generate_noise_laplace( # we just use the same key name + noise_setting['args']['pos_std'], + noise_setting['args']['rot_std'], + noise_setting['args']['pos_mean'], + noise_setting['args']['rot_mean'] + ) + else: + cav_content['params']['lidar_pose'] = cav_content['params']['lidar_pose'] + \ + generate_noise( + noise_setting['args']['pos_std'], + noise_setting['args']['rot_std'], + noise_setting['args']['pos_mean'], + noise_setting['args']['rot_mean'] + ) + + else: + for cav_id, cav_content in data_dict.items(): + cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose + + + return data_dict + +def add_noise_data_dict_asymmetric(data_dict, noise_setting): + """ Update the base data dict. + We retrieve lidar_pose and add_noise to it. + And set a clean pose. + This function add pose error noise for agents with asymmetric detection range + """ + if noise_setting['add_noise']: + for cav_id, cav_content in data_dict.items(): + cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose + + if "laplace" in noise_setting['args'].keys() and noise_setting['args']['laplace'] is True: + noise = generate_noise_laplace( # we just use the same key name + noise_setting['args']['pos_std'], + noise_setting['args']['rot_std'], + noise_setting['args']['pos_mean'], + noise_setting['args']['rot_mean'] + ) + cav_content['params']['lidar_pose'] = cav_content['params']['lidar_pose'] + noise + cav_content['params']['map_pose'] = cav_content['params']['map_pose'] + noise + else: + noise = generate_noise( + noise_setting['args']['pos_std'], + noise_setting['args']['rot_std'], + noise_setting['args']['pos_mean'], + noise_setting['args']['rot_mean'] + ) + cav_content['params']['lidar_pose'] = cav_content['params']['lidar_pose'] + noise + cav_content['params']['map_pose'] = cav_content['params']['map_pose'] + noise + else: + for cav_id, cav_content in data_dict.items(): + cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose + + + return data_dict + + +def generate_noise(pos_std, rot_std, pos_mean=0, rot_mean=0): + """ Add localization error to the 6dof pose + Noise includes position (x,y) and rotation (yaw). + We use gaussian distribution to generate noise. + + Args: + + pos_std : float + std of gaussian dist, in meter + + rot_std : float + std of gaussian dist, in degree + + pos_mean : float + mean of gaussian dist, in meter + + rot_mean : float + mean of gaussian dist, in degree + + Returns: + pose_noise: np.ndarray, [6,] + [x, y, z, roll, yaw, pitch] + """ + + xy = np.random.normal(pos_mean, pos_std, size=(2)) + yaw = np.random.normal(rot_mean, rot_std, size=(1)) + + pose_noise = np.array([xy[0], xy[1], 0, 0, yaw[0], 0]) + + + return pose_noise + + + +def generate_noise_laplace(pos_b, rot_b, pos_mu=0, rot_mu=0): + """ Add localization error to the 6dof pose + Noise includes position (x,y) and rotation (yaw). + We use laplace distribution to generate noise. + + Args: + + pos_b : float + parameter b of laplace dist, in meter + + rot_b : float + parameter b of laplace dist, in degree + + pos_mu : float + mean of laplace dist, in meter + + rot_mu : float + mean of laplace dist, in degree + + Returns: + pose_noise: np.ndarray, [6,] + [x, y, z, roll, yaw, pitch] + """ + + xy = np.random.laplace(pos_mu, pos_b, size=(2)) + yaw = np.random.laplace(rot_mu, rot_b, size=(1)) + + pose_noise = np.array([xy[0], xy[1], 0, 0, yaw[0], 0]) + return pose_noise + + +def generate_noise_torch(pose, pos_std, rot_std, pos_mean=0, rot_mean=0): + """ only used for v2vnet robust. + rotation noise is sampled from von_mises distribution + + Args: + pose : Tensor, [N. 6] + including [x, y, z, roll, yaw, pitch] + + pos_std : float + std of gaussian dist, in meter + + rot_std : float + std of gaussian dist, in degree + + pos_mean : float + mean of gaussian dist, in meter + + rot_mean : float + mean of gaussian dist, in degree + + Returns: + pose_noisy: Tensor, [N, 6] + noisy pose + """ + + N = pose.shape[0] + noise = torch.zeros_like(pose, device=pose.device) + concentration = (180 / (np.pi * rot_std)) ** 2 + + noise[:, :2] = torch.normal(pos_mean, pos_std, size=(N, 2), device=pose.device) + noise[:, 4] = dist.von_mises.VonMises(loc=rot_mean, concentration=concentration).sample((N,)).to(noise.device) + + + return noise + + +def remove_z_axis(T): + """ remove rotation/translation related to z-axis + Args: + T: np.ndarray + [4, 4] + Returns: + T: np.ndarray + [4, 4] + """ + T[2,3] = 0 # z-trans + T[0,2] = 0 + T[1,2] = 0 + T[2,0] = 0 + T[2,1] = 0 + T[2,2] = 1 + + return T \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/setup.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..09eda12a658db87893c88101aa702cebc6f4deac --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/setup.py @@ -0,0 +1,8 @@ +from distutils.core import setup +from Cython.Build import cythonize +import numpy +setup( + name='box overlaps', + ext_modules=cythonize('opencood/utils/box_overlaps.pyx'), + include_dirs=[numpy.get_include()] +) \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/spconv_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/spconv_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..344f519508b38cfc9f86da5fb42291d5e4264046 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/spconv_utils.py @@ -0,0 +1,164 @@ +import spconv +import torch +import numpy as np +from spconv.modules import SparseModule +from opencood.utils.box_utils import project_points_by_matrix_torch +from torch_scatter import scatter +from icecream import ic + +class RemoveDuplicate(SparseModule): + """ + Only keep one when duplicated + """ + def forward(self, x: spconv.SparseConvTensor): + inds = x.indices + spatial_shape = [x.batch_size, *x.spatial_shape] + spatial_stride = [0] * len(spatial_shape) + val = 1 + for i in range(inds.shape[1] - 1, -1, -1): + spatial_stride[i] = val + val *= spatial_shape[i] + indices_index = inds[:, -1].clone() + + for i in range(len(spatial_shape) - 1): + indices_index += spatial_stride[i] * inds[:, i] + + _, unique_inds = torch.unique(indices_index, return_inverse=True) + unique_inds = torch.unique(unique_inds) + new_inds = inds[unique_inds] + new_features = x.features[unique_inds] + res = spconv.SparseConvTensor(new_features, new_inds, x.spatial_shape, + x.batch_size, x.grid) + return res + +class MergeDuplicate(SparseModule): + def __init__(self, reduce="max"): + super().__init__() + self.reduce = reduce + def forward(self, x: spconv.SparseConvTensor): + inds = x.indices + spatial_shape = [x.batch_size, *x.spatial_shape] + spatial_stride = [0] * len(spatial_shape) + val = 1 + for i in range(inds.shape[1] - 1, -1, -1): + spatial_stride[i] = val + val *= spatial_shape[i] + indices_index = inds[:, -1].clone() + + for i in range(len(spatial_shape) - 1): + indices_index += spatial_stride[i] * inds[:, i] + + _, unique_inds = torch.unique(indices_index, return_inverse=True) # [0, 1, 0] + + scatter_feature = x.features # [N_point, features] + scatter_indices = unique_inds # [N_point, ] + + out_feature = scatter(scatter_feature, scatter_indices, dim=0, reduce=self.reduce) # [N', num_features] + out_indices = scatter(scatter_indices, scatter_indices, dim=0, reduce="mean") + out_indices = inds[out_indices] # [N', ndim+1] + + res = spconv.SparseConvTensor(out_feature, out_indices, x.spatial_shape, + x.batch_size, x.grid) + return res + + +def fuseSparseTensor(x_list): + """ + Suppose same spatial shape. + Need eliminate same pos tensor later + """ + new_features = torch.cat([x.features for x in x_list], dim=0) + new_indice = torch.cat([x.indices for x in x_list], dim=0) + res = spconv.SparseConvTensor(new_features, new_indice, x_list[0].spatial_shape, + x_list[0].batch_size, x_list[0].grid) + return res + + +class warpSparseTensor(SparseModule): + """ + warp the sparse tensor. + 1. Retrieve the indices + 2. turn indices to grid point + 3. transform grid point + 4. turn back to indices + 5. construct new sparse tensor + Args: + x: SparseTensor, + spatial_shape:(z,y,x) + transformation: torch.Tensor + [4,4] + voxel_size: torch.Tensor + [v_x, v_y, v_z] + range3d: list + [xmin, xmax, ymin, ymax, zmin, zmax] + + """ + def indices_to_point(self, indices, transformation_matrix, voxel_size, range3d): + """ + indices: [batch_id, z, y, x] + """ + indices_xyz = indices[:,[3,2,1]].clone().double() # [x, y, z] + indices_xyz[:,0] += torch.div(range3d[0], voxel_size[0]) + indices_xyz[:,1] += torch.div(range3d[1], voxel_size[1]) + indices_xyz[:,2] += torch.div(range3d[2], voxel_size[2]) + indices_xyz += 0.5 + + points_xyz = indices_xyz * voxel_size # [N_points, 3] + points_xyz_new = project_points_by_matrix_torch(points_xyz, transformation_matrix) + + return points_xyz_new + + def construct_new_tensor(self, x, points_xyz, voxel_size, range3d): + """ + points_new: tensor + [N_points, ndim + 1], first dim is batch id + """ + mask = (points_xyz[:, 0] > range3d[0]) & (points_xyz[:, 0] < range3d[3])\ + & (points_xyz[:, 1] > range3d[1]) & (points_xyz[:, 1] < range3d[4]) \ + & (points_xyz[:, 2] > range3d[2]) & (points_xyz[:, 2] < range3d[5]) + + features_new = x.features[mask] + points_xyz = points_xyz[mask] + new_indices = x.indices[mask].clone() + + new_indices_xyz = torch.div(points_xyz, voxel_size) + + new_indices_xyz[:,0] -= torch.div(range3d[0], voxel_size[0]) + new_indices_xyz[:,1] -= torch.div(range3d[1], voxel_size[1]) + new_indices_xyz[:,2] -= torch.div(range3d[2], voxel_size[2]) + + new_indices[:,1:] = new_indices_xyz[:,[2,1,0]].long() + + return spconv.SparseConvTensor(features_new, new_indices, x.spatial_shape, x.batch_size, x.grid) + + + def forward(self, x, transformation_matrix, voxel_size, range3d): + points_new = self.indices_to_point(x.indices, transformation_matrix, voxel_size, range3d) + return self.construct_new_tensor(x, points_new, voxel_size, range3d) + + + + +def test(): + feature1 = torch.randn(2,8) + feature2 = torch.randn(2,8) + indices1 = torch.Tensor([[0,0,1,2],[0,0,2,3]]) + indices2 = torch.Tensor([[0,0,1,3],[0,0,2,4]]) + spatial_shape = (4,3,5) # z,y,x + batch_size = 1 + + voxel_size = (0.4, 0.4, 1) + pc_range = [-40, -40, -3, 40, 40, 1] + tfm = torch.eye(4) + tfm[1,3] += 2 + warpsp = warpSparseTensor() + sp1 = spconv.SparseConvTensor(feature1, indices1,spatial_shape, batch_size) + sp2 = warpsp(sp1, tfm, voxel_size, pc_range) + ic(sp1.features) + ic(sp1.indices) + ic(sp2.features) + ic(sp2.indices) + + +if __name__ == "__main__": + test() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/subsampling_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/subsampling_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b8b6fd566361abe0d630541cfd58bc995e200842 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/subsampling_utils.py @@ -0,0 +1,254 @@ +""" +https://github.com/AmnonDrory/BestBuddiesRegistration/blob/main/code/bb_pc/utils/subsampling.py +""" + +import numpy as np +import open3d as o3d +import pandas as pd +from copy import deepcopy + +num_features = 3 + +def calc_bin_inds(PC, n_bins, axis, mode): + N = PC.shape[0] + if "adaptive" in mode: + inds = np.round(np.linspace(0, N, n_bins + 1)).astype(int) + s = np.sort(PC[:, axis]) + thresh = s[inds[1:]-1] + else: # "equally_spaced" + thresh = np.linspace(np.min(PC[:,axis]), np.max(PC[:,axis]), n_bins + 1) + thresh = thresh[1:] + + bin_ind = np.zeros(N) + np.nan + for i in range(n_bins): + is_cur = (PC[:, axis] <= thresh[i]) & np.isnan(bin_ind) + bin_ind[is_cur] = i + + assert np.sum(np.isnan(bin_ind)) == 0, "Error: not all samples were assigned to a bin" + + return bin_ind + +def voxelGrid_filter_inner(PC, num_samples, mode): + + if "equal_nbins_per_axis" in mode: + n_bins = int(np.ceil(num_samples ** (1. / 3))) + n_bins_x = n_bins + n_bins_y = n_bins + n_bins_z = n_bins + else: + span = [] + for axis in range(3): + span.append( np.max(PC[:,axis])-np.min(PC[:,axis]) ) + normalized_num_samples = num_samples * (span[0]**2 / (span[1]*span[2])) + n_bins_x = int(np.ceil(normalized_num_samples ** (1. / 3))) + n_bins_y = int(np.ceil(n_bins_x * (span[1]/span[0]))) + n_bins_z = int(np.ceil(n_bins_x * (span[2] / span[0]))) + assert (n_bins_x * n_bins_y * n_bins_z) >= num_samples, "Error" + x_bin_inds = calc_bin_inds(PC, n_bins_x, 0, mode) + y_bin_inds = calc_bin_inds(PC, n_bins_y, 1, mode) + z_bin_inds = calc_bin_inds(PC, n_bins_z, 2, mode) + + data = np.hstack([x_bin_inds.reshape([-1,1]), + y_bin_inds.reshape([-1,1]), + z_bin_inds.reshape([-1,1]), + PC]) + + df = pd.DataFrame(data, columns=['x_ind', 'y_ind', 'z_ind', 'x', 'y', 'z']) + newPC = np.array(df.groupby(['x_ind', 'y_ind', 'z_ind']).mean()) + + return newPC + +def voxelGrid_filter(PC, num_requested_samples, mode): + """ + Sub-sample a point cloud by defining a grid of voxels, and returning the average point in each one. + + :param PC: Nx3 array, point cloud, each row is a sample + :param num_samples: numbver of requested samples + :param mode: list of strings, can contain any of the following: + "exact_number" - return exactly num_requested_samples, otherwise may return more than requested number (but never less) + "equal_nbins_per_axis" - same number of bins for each axis (x,y,z). Otherwise the bins are cube shaped, and usually a different number of bins fits in each of the dimensions. + "adaptive" - smaller bins where there is more data. Otherwise, all bins are the same size. + :return: newPC - a point cloud with approximately num_requested_samples + """ + num_samples = num_requested_samples + N = PC.shape[0] + done = False + MAX_ATTEMPTS = 40 + ACCELERATION_FACTOR = 2 + MAX_DIVERGENCE_TIME = 4 + TOLERANCE = 0.05 + rel_history = [] + newPC_history = [] + while not done: + newPC = voxelGrid_filter_inner(PC, num_samples, mode) + new_N = newPC.shape[0] + newPC_history.append(newPC) + relative_error_in_size = (new_N/float(num_requested_samples)) -1 + rel_history.append(relative_error_in_size) + if (relative_error_in_size < 0) or (relative_error_in_size > TOLERANCE): + best_ind = np.argmin(np.abs(rel_history)) + if (len(rel_history) - best_ind > MAX_DIVERGENCE_TIME) and (np.max(rel_history) > 0): + done = True + else: + num_samples = int(np.ceil(num_samples*float(num_requested_samples)/new_N)) + if (np.max(rel_history) < 0): + num_samples = int(ACCELERATION_FACTOR*num_samples) + + else: + done = True + + if len(rel_history) >= MAX_ATTEMPTS: + done = True + + if len(rel_history) >= MAX_ATTEMPTS: + assert False, "voxelGrid_filter could not supply required number of samples" + print("Error: voxelGrid_filter could not supply required number of samples, recovering") + best_ind = np.argmax(rel_history) + return newPC_history[best_ind] + + rel_history_above_only = np.array(rel_history) + rel_history_above_only[rel_history_above_only<0] = np.inf + best_ind_above = np.argmin(rel_history_above_only) + + newPC = newPC_history[best_ind_above] + if 'exact_number' in mode: + p = np.random.permutation(newPC.shape[0]) + inds = p[:num_requested_samples] + newPC = newPC[inds,:] + + return newPC + +def voxel_filter(pcd, N): + # pcd is of open3d point cloud class + if "numpy" in str(type(pcd)): + tmp = o3d.geometry.PointCloud() + tmp.points = o3d.utility.Vector3dVector(pcd) + pcd = tmp + K = np.shape(pcd.points)[0] + vs = 1e-3 + while K>N: + pcd = o3d.geometry.voxel_down_sample(pcd, voxel_size=vs) + vs *= 2 + K = np.shape(pcd.points)[0] + return pcd + +def calc_distances(p0, points): + return ((p0 - points) ** 2).sum(axis=1) + +def fps_from_given_pc(pts, K, given_pc): + """ + copied from https://github.com/orendv/learning_to_sample/blob/master/reconstruction/src/sample_net_point_net_ae.py + :param self: + :param pts: + :param K: + :param given_pc: + :return: + """ + farthest_pts = np.zeros((K, 3)) + t = given_pc.shape[0] + farthest_pts[0:t,:] = given_pc + + distances = calc_distances(farthest_pts[0], pts) + for i in range(1, t): + distances = np.minimum(distances, calc_distances(farthest_pts[i,:], pts)) + + for i in range(t, K): + farthest_pts[i,:] = pts[np.argmax(distances),:] + distances = np.minimum(distances, calc_distances(farthest_pts[i,:], pts)) + return farthest_pts + +def get_random_subset(PC, num_samples, mode="farthest", submode=None, allow_overask=False): + """ + Subsample a point cloud, using either of various methods + + :param PC: + :param num_samples: + :param mode: + :param n_bins: + :param submode: Relevant for the "r_normalized" and "r_squared_normalized" methods. + :return: + """ + if num_samples > PC.shape[0]: + if allow_overask: + return PC + else: + assert False, "Error: requesting more samples than there are" + + if PC.shape[0] == num_samples: + result = PC + if mode == "uniform": + inds = np.random.permutation(PC.shape[0])[:num_samples] + result = PC[inds, :] + elif mode == "farthest": + first_ind = np.random.permutation(PC.shape[0])[0] + result = fps_from_given_pc(PC, num_samples, PC[first_ind:(first_ind+1), :]) + elif "voxel" in mode: + if submode is None: + submode = ["equal_nbins_per_axis"] + + # The voxelGrid subsampling algorithm has no randomality. + # we force it to have some by rendomly removing a small subset of the points + + keep_fraction = 0.9 + num_keep = int(PC.shape[0]*keep_fraction) + if num_samples < num_keep: + PC = get_random_subset(PC, num_keep, mode="uniform") + result = voxelGrid_filter(PC, num_samples, submode) + + else: + assert False, "unknown mode" + + return result + +def subsample_fraction(PC, fraction): + N = PC.shape[0] + subset_size = int(np.round(N * fraction)) + inds = np.random.permutation(N)[:subset_size] + return PC[inds,:] + + +def keep_closest(PC, max_dist): + R = np.sqrt(np.sum(PC ** 2, axis=1)) + return PC[R <= max_dist, :] + + +def fit_plane(PC): + xy1 = deepcopy(PC) + xy1[:, 2] = 1 + z = PC[:, 2] + abc, _, _, _ = np.linalg.lstsq(xy1, z, rcond=None) + return abc + + +def is_on_plane(PC, abc, thickness): + all_xy1 = deepcopy(PC) + all_xy1[:, 2] = 1 + predicted_road_z = np.matmul(all_xy1, abc.reshape([-1, 1])).flatten() + res = np.abs(PC[:, 2] - predicted_road_z) <= thickness + return res + +def remove_road(PC): + mode = "plane" # "constant_height" + local_PC = keep_closest(PC, 10) + count, bin_edges = np.histogram(local_PC[:, 2], 100) + bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:]) + ind_of_most_frequent = np.argmax(count) + road_z = bin_centers[ind_of_most_frequent] + road_thickness = 0.5 # meters + if mode == "constant_height": + is_road = np.abs(PC[:, 2] - road_z) <= road_thickness + elif mode == "plane": + raw_is_road = np.abs(local_PC[:, 2] - road_z) <= road_thickness + raw_road_points = local_PC[raw_is_road, :] + xy1 = deepcopy(raw_road_points) + xy1[:, 2] = 1 + z = raw_road_points[:, 2] + abc, _, _, _ = np.linalg.lstsq(xy1, z, rcond=None) + all_xy1 = deepcopy(PC) + all_xy1[:, 2] = 1 + predicted_road_z = np.matmul(all_xy1, abc.reshape([-1, 1])).flatten() + is_road = np.abs(PC[:, 2] - predicted_road_z) <= road_thickness + else: + assert False, "unknown mode" + + return PC[~is_road, :] \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/transformation_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/transformation_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c574dbe6f48f5e0d70abfcd5207e39f6097c5293 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/transformation_utils.py @@ -0,0 +1,548 @@ +# -*- coding: utf-8 -*- +# Author: Runsheng Xu , Hao Xiang , +# License: TDG-Attribution-NonCommercial-NoDistrib + + +""" +Transformation utils +""" + +from re import X +import numpy as np +import torch +from icecream import ic +from pyquaternion import Quaternion +from opencood.utils.common_utils import check_numpy_to_torch + +def regroup(x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + +def get_pairwise_transformation(base_data_dict, max_cav, proj_first): + """ + Get pair-wise transformation matrix accross different agents. + + Parameters + ---------- + base_data_dict : dict + Key : cav id, item: transformation matrix to ego, lidar points. + + max_cav : int + The maximum number of cav, default 5 + + Return + ------ + pairwise_t_matrix : np.array + The pairwise transformation matrix across each cav. + shape: (L, L, 4, 4), L is the max cav number in a scene + pairwise_t_matrix[i, j] is Tji, i_to_j + """ + pairwise_t_matrix = np.tile(np.eye(4), (max_cav, max_cav, 1, 1)) # (L, L, 4, 4) + + if proj_first: + # if lidar projected to ego first, then the pairwise matrix + # becomes identity + # no need to warp again in fusion time. + + # pairwise_t_matrix[:, :] = np.identity(4) + return pairwise_t_matrix + else: + t_list = [] + + # save all transformation matrix in a list in order first. + for cav_id, cav_content in base_data_dict.items(): + lidar_pose = cav_content['params']['lidar_pose'] + t_list.append(x_to_world(lidar_pose)) # Twx + + for i in range(len(t_list)): + for j in range(len(t_list)): + # identity matrix to self + if i != j: + # i->j: TiPi=TjPj, Tj^(-1)TiPi = Pj + # t_matrix = np.dot(np.linalg.inv(t_list[j]), t_list[i]) + t_matrix = np.linalg.solve(t_list[j], t_list[i]) # Tjw*Twi = Tji + pairwise_t_matrix[i, j] = t_matrix + + return pairwise_t_matrix + +def get_pairwise_transformation_asymmetric(base_data_dict, max_cav, proj_first): + """ + Get pair-wise transformation matrix accross different agents with detection range being asymmetric. + + Parameters + ---------- + base_data_dict : dict + Key : cav id, item: transformation matrix to ego, lidar points. + + max_cav : int + The maximum number of cav, default 5 + + Return + ------ + pairwise_t_matrix : np.array + The pairwise transformation matrix across each cav. + shape: (L, L, 4, 4), L is the max cav number in a scene + pairwise_t_matrix[i, j] is Tji, i_to_j + """ + pairwise_t_matrix = np.tile(np.eye(4), (max_cav, max_cav, 1, 1)) # (L, L, 4, 4) + + if proj_first: + # if lidar projected to ego first, then the pairwise matrix + # becomes identity + # no need to warp again in fusion time. + + # pairwise_t_matrix[:, :] = np.identity(4) + return pairwise_t_matrix + else: + t_list = [] + # save all transformation matrix in a list in order first. + for cav_id, cav_content in base_data_dict.items(): + lidar_pose = cav_content['params']['map_pose'] + t_list.append(x_to_world(lidar_pose)) # Twx + + for i in range(len(t_list)): + for j in range(len(t_list)): + # identity matrix to self + if i != j: + # i->j: TiPi=TjPj, Tj^(-1)TiPi = Pj + # t_matrix = np.dot(np.linalg.inv(t_list[j]), t_list[i]) + t_matrix = np.linalg.solve(t_list[j], t_list[i]) # Tjw*Twi = Tji + pairwise_t_matrix[i, j] = t_matrix + + return pairwise_t_matrix + +def normalize_pairwise_tfm(pairwise_t_matrix, H, W, discrete_ratio, downsample_rate=1): + """ + normalize the pairwise transformation matrix to affine matrix need by torch.nn.functional.affine_grid() + + pairwise_t_matrix: torch.tensor + [B, L, L, 4, 4], B batchsize, L max_cav + H: num. + Feature map height + W: num. + Feature map width + discrete_ratio * downsample_rate: num. + One pixel on the feature map corresponds to the actual physical distance + """ + + pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3] + pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W + pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H + pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (downsample_rate * discrete_ratio * W) * 2 + pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (downsample_rate * discrete_ratio * H) * 2 + + return pairwise_t_matrix + +def pose_to_tfm(pose): + """ Transform batch of pose to tfm + Args: + pose: torch.Tensor or np.ndarray + [N, 3], x, y, yaw, in degree + [N, 6], x, y, z, roll, yaw, pitch, in degree + + roll and pitch follows carla coordinate + Returns: + tfm: torch.Tensor + [N, 4, 4] + """ + + pose_tensor, is_np = check_numpy_to_torch(pose) + pose = pose_tensor + + + if pose.shape[1] == 3: + N = pose.shape[0] + x = pose[:,0] + y = pose[:,1] + yaw = pose[:,2] + + tfm = torch.eye(4, device=pose.device).view(1,4,4).repeat(N,1,1) + tfm[:,0,0] = torch.cos(torch.deg2rad(yaw)) + tfm[:,0,1] = - torch.sin(torch.deg2rad(yaw)) + tfm[:,1,0] = torch.sin(torch.deg2rad(yaw)) + tfm[:,1,1] = torch.cos(torch.deg2rad(yaw)) + tfm[:,0,3] = x + tfm[:,1,3] = y + + elif pose.shape[1] == 6: + N = pose.shape[0] + x = pose[:,0] + y = pose[:,1] + z = pose[:,2] + roll = pose[:,3] + yaw = pose[:,4] + pitch = pose[:,5] + + c_y = torch.cos(torch.deg2rad(yaw)) + s_y = torch.sin(torch.deg2rad(yaw)) + c_r = torch.cos(torch.deg2rad(roll)) + s_r = torch.sin(torch.deg2rad(roll)) + c_p = torch.cos(torch.deg2rad(pitch)) + s_p = torch.sin(torch.deg2rad(pitch)) + + tfm = torch.eye(4, device=pose.device).view(1,4,4).repeat(N,1,1) + + # translation matrix + tfm[:, 0, 3] = x + tfm[:, 1, 3] = y + tfm[:, 2, 3] = z + + # rotation matrix + tfm[:, 0, 0] = c_p * c_y + tfm[:, 0, 1] = c_y * s_p * s_r - s_y * c_r + tfm[:, 0, 2] = -c_y * s_p * c_r - s_y * s_r + tfm[:, 1, 0] = s_y * c_p + tfm[:, 1, 1] = s_y * s_p * s_r + c_y * c_r + tfm[:, 1, 2] = -s_y * s_p * c_r + c_y * s_r + tfm[:, 2, 0] = s_p + tfm[:, 2, 1] = -c_p * s_r + tfm[:, 2, 2] = c_p * c_r + + if is_np: + tfm = tfm.numpy() + + return tfm + + + + +def tfm_to_pose(tfm: np.ndarray): + """ + turn transformation matrix to [x, y, z, roll, yaw, pitch] + we use radians format. + tfm is pose in transformation format, and XYZ order, i.e. roll-pitch-yaw + """ + # There forumlas are designed from x_to_world, but equal to the one below. + yaw = np.degrees(np.arctan2(tfm[1,0], tfm[0,0])) # clockwise in carla + roll = np.degrees(np.arctan2(-tfm[2,1], tfm[2,2])) # but counter-clockwise in carla + pitch = np.degrees(np.arctan2(tfm[2,0], ((tfm[2,1]**2 + tfm[2,2]**2) ** 0.5)) ) # but counter-clockwise in carla + + + # These formulas are designed for consistent axis orientation + # yaw = np.degrees(np.arctan2(tfm[1,0], tfm[0,0])) # clockwise in carla + # roll = np.degrees(np.arctan2(tfm[2,1], tfm[2,2])) # but counter-clockwise in carla + # pitch = np.degrees(np.arctan2(-tfm[2,0], ((tfm[2,1]**2 + tfm[2,2]**2) ** 0.5)) ) # but counter-clockwise in carla + + # roll = - roll + # pitch = - pitch + + x, y, z = tfm[:3,3] + return([x, y, z, roll, yaw, pitch]) + +def tfm_to_xycs_torch(tfm: torch.Tensor): + """ + similar to tfm_to_pose_torch, + return x/y/cos(yaw)/sin(yaw) + """ + x = tfm[:,0,3] + y = tfm[:,1,3] + + cos = tfm[:,0,0] + sin = tfm[:,1,0] + + pose = torch.stack([x,y,cos,sin]).T # (N, 4) + + return pose + +def xycs_to_tfm_torch(xycs: torch.Tensor): + """ + Args: xycs + [N, 4] + """ + N = xycs.shape[0] + tfm = torch.eye(4, device=xycs.device).view(1,4,4).repeat(N,1,1) + + x, y, cos, sin = xycs[:,0], xycs[:,1], xycs[:,2], xycs[:,3] + + tfm[:,0,0] = cos + tfm[:,0,1] = - sin + tfm[:,1,0] = sin + tfm[:,1,1] = cos + tfm[:,0,3] = x + tfm[:,1,3] = y + + return tfm + +def tfm_to_pose_torch(tfm: torch.Tensor, dof: int): + """ + turn transformation matrix to [x, y, z, roll, yaw, pitch] + we use degree format. + tfm is pose in transformation format, and XYZ order, i.e. roll-pitch-yaw + + Args: + tfm: [N, 4, 4] + dof: 3 or 6 + Returns: + 6dof pose: [N, 6] + """ + + # There forumlas are designed from x_to_world, but equal to the one below. + yaw = torch.rad2deg(torch.atan2(tfm[:,1,0], tfm[:,0,0])) # clockwise in carla + roll = torch.rad2deg(torch.atan2(-tfm[:,2,1], tfm[:,2,2])) # but counter-clockwise in carla + pitch = torch.rad2deg(torch.atan2(tfm[:,2,0], (tfm[:,2,1]**2 + tfm[:,2,2]**2) ** 0.5)) # but counter-clockwise in carla + + # These formulas are designed for consistent axis orientation + # yaw = torch.rad2deg(torch.atan2(tfm[:,1,0], tfm[:,0,0])) # clockwise in carla + # roll = torch.rad2deg(torch.atan2(tfm[:,2,1], tfm[:,2,2])) # but counter-clockwise in carla + # pitch = torch.rad2deg(torch.atan2(-tfm[:,2,0], (tfm[:,2,1]**2 + tfm[:,2,2]**2) ** 0.5)) # but counter-clockwise in carla + + # roll = - roll + # pitch = - pitch + + x = tfm[:,0,3] + y = tfm[:,1,3] + z = tfm[:,2,3] + + if dof == 6: + pose = torch.stack([x,y,z,roll,yaw,pitch]).T # (N, 6) + elif dof == 3: + pose = torch.stack([x,y,yaw]).T + else: + raise("Only support returning 3dof/6dof pose.") + + return pose + + +def x_to_world(pose): + """ + The transformation matrix from x-coordinate system to carla world system + Also is the pose in world coordinate: T_world_x + + Parameters + ---------- + pose : list + [x, y, z, roll, yaw, pitch], degree + + Returns + ------- + matrix : np.ndarray + The transformation matrix. + """ + x, y, z, roll, yaw, pitch = pose[:] + + # used for rotation matrix + c_y = np.cos(np.radians(yaw)) + s_y = np.sin(np.radians(yaw)) + c_r = np.cos(np.radians(roll)) + s_r = np.sin(np.radians(roll)) + c_p = np.cos(np.radians(pitch)) + s_p = np.sin(np.radians(pitch)) + + matrix = np.identity(4) + + # translation matrix + matrix[0, 3] = x + matrix[1, 3] = y + matrix[2, 3] = z + + # rotation matrix + matrix[0, 0] = c_p * c_y + matrix[0, 1] = c_y * s_p * s_r - s_y * c_r + matrix[0, 2] = -c_y * s_p * c_r - s_y * s_r + matrix[1, 0] = s_y * c_p + matrix[1, 1] = s_y * s_p * s_r + c_y * c_r + matrix[1, 2] = -s_y * s_p * c_r + c_y * s_r + matrix[2, 0] = s_p + matrix[2, 1] = -c_p * s_r + matrix[2, 2] = c_p * c_r + + return matrix + + +def x1_to_x2(x1, x2): + """ + Transformation matrix from x1 to x2. T_x2_x1 + + Parameters + ---------- + x1 : list + The pose of x1 under world coordinates. + x2 : list + The pose of x2 under world coordinates. + + yaw, pitch, roll in degree + + Returns + ------- + transformation_matrix : np.ndarray + The transformation matrix. + + """ + x1_to_world = x_to_world(x1) # wP = x1_to_world * 1P, so x1_to_world is Tw1 + x2_to_world = x_to_world(x2) # Tw2 + world_to_x2 = np.linalg.inv(x2_to_world) # T2w + + transformation_matrix = np.dot(world_to_x2, x1_to_world) # T2w * Tw1 = T21 + return transformation_matrix + + +def dist_to_continuous(p_dist, displacement_dist, res, downsample_rate): + """ + Convert points discretized format to continuous space for BEV representation. + Parameters + ---------- + p_dist : numpy.array + Points in discretized coorindates. + + displacement_dist : numpy.array + Discretized coordinates of bottom left origin. + + res : float + Discretization resolution. + + downsample_rate : int + Dowmsamping rate. + + Returns + ------- + p_continuous : numpy.array + Points in continuous coorindates. + + """ + p_dist = np.copy(p_dist) + p_dist = p_dist + displacement_dist + p_continuous = p_dist * res * downsample_rate + return p_continuous + + +def get_pairwise_transformation_torch(lidar_poses, max_cav, record_len, dof): + """ + Get pair-wise transformation matrix accross different agents. + Designed for batch data + + Parameters + ---------- + lidar_poses : tensor, [N, 3] or [N, 6] + 3 or 6 dof pose of lidar. + + max_cav : int + The maximum number of cav, default 5 + + record: list + shape (B) + + dof: int, 3 or 6 + + Return + ------ + pairwise_t_matrix : np.array + The pairwise transformation matrix across each cav. + shape: (B, L, L, 4, 4), L is the max cav number in a scene + pairwise_t_matrix[i, j] is Tji, i_to_j + """ + def regroup(x, record_len): + cum_sum_len = torch.cumsum(record_len, dim=0) + split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) + return split_x + + B = len(record_len) + lidar_poses_list = regroup(lidar_poses, record_len) + + pairwise_t_matrix = torch.eye(4, device=lidar_poses.device).view(1,1,1,4,4).repeat(B, max_cav, max_cav, 1, 1) # (B, L, L, 4, 4) + # save all transformation matrix in a list in order first. + for b in range(B): + lidar_poses = lidar_poses_list[b] # [N_cav, 3] or [N_cav, 6]. + t_list = pose_to_tfm(lidar_poses) # Twx, [N_cav, 4, 4] + + for i in range(len(t_list)): + for j in range(len(t_list)): + # identity matrix to self + if i != j: + # i->j: TiPi=TjPj, Tj^(-1)TiPi = Pj + # t_matrix = np.dot(np.linalg.inv(t_list[j]), t_list[i]) + t_matrix = torch.linalg.solve(t_list[j], t_list[i]) # Tjw*Twi = Tji + pairwise_t_matrix[b][i, j] = t_matrix + + return pairwise_t_matrix + + +def get_relative_transformation(lidar_poses): + """ + Args: + lidar_pose: np.ndarray + [N, dof], lidar pose in world coordinate + N is the agent number, dof is 3/6. + + [x, y, z, roll, yaw, pitch], degree + + Returns: + relative transformation, in ego's coordinate + """ + N = lidar_poses.shape[0] + dof = lidar_poses.shape[1] + + if dof == 3: + full_lidar_poses = np.zeros((N, 6)) + full_lidar_poses[:,[0,1,4]] = lidar_poses + lidar_poses = full_lidar_poses + + relative_t_matrix = np.eye(4).reshape(1,4,4).repeat(N, axis=0) # [N, 4, 4] + for i in range(1, N): + relative_t_matrix[i] = x1_to_x2(lidar_poses[i], lidar_poses[0]) + + return relative_t_matrix + + + +def muilt_coord(rotationA2B, translationA2B, rotationB2C, translationB2C): + rotationA2B = np.array(rotationA2B).reshape(3, 3) + rotationB2C = np.array(rotationB2C).reshape(3, 3) + rotation = np.dot(rotationB2C, rotationA2B) + translationA2B = np.array(translationA2B).reshape(3, 1) + translationB2C = np.array(translationB2C).reshape(3, 1) + translation = np.dot(rotationB2C, translationA2B) + translationB2C + + return rotation, translation + + +def veh_side_rot_and_trans_to_trasnformation_matrix(lidar_to_novatel_json_file,novatel_to_world_json_file): + matrix = np.empty([4,4]) + rotationA2B = lidar_to_novatel_json_file["transform"]["rotation"] + translationA2B = lidar_to_novatel_json_file["transform"]["translation"] + rotationB2C = novatel_to_world_json_file["rotation"] + translationB2C = novatel_to_world_json_file["translation"] + rotation,translation = muilt_coord(rotationA2B, translationA2B, rotationB2C, translationB2C) + matrix[0:3, 0:3] = rotation + matrix[:, 3][0:3] = np.array(translation)[:, 0] + matrix[3, 0:3] = 0 + matrix[3, 3] = 1 + + return matrix + +def inf_side_rot_and_trans_to_trasnformation_matrix(json_file,system_error_offset): + matrix = np.empty([4,4]) + matrix[0:3, 0:3] = json_file["rotation"] + translation = np.array(json_file["translation"]) + translation[0][0] = translation[0][0] + system_error_offset["delta_x"] + translation[1][0] = translation[1][0] + system_error_offset["delta_y"] #为啥有[1][0]??? --> translation是(3,1)的 + matrix[:, 3][0:3] = translation[:, 0] + matrix[3, 0:3] = 0 + matrix[3, 3] = 1 + + return matrix + +def rot_and_trans_to_trasnformation_matrix(json_file): + matrix = np.empty([4,4]) + matrix[0:3, 0:3] = json_file["rotation"] + matrix[:, 3][0:3] = np.array(json_file["translation"])[:, 0] + matrix[3, 0:3] = 0 + matrix[3, 3] = 1 + + return matrix + + +def test(): + random_pose = np.random.randn(6) + tfm = x_to_world(random_pose) + pose_result = tfm_to_pose(tfm) + tfm2 = x_to_world(pose_result) + + print(random_pose) + print(pose_result) + print() + print(tfm) + print(tfm2) + +if __name__ == "__main__": + test() \ No newline at end of file diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/waypoint2map.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/waypoint2map.py new file mode 100644 index 0000000000000000000000000000000000000000..f22e74b06ad9222f76cbc0d9febd72dec731f827 --- /dev/null +++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/waypoint2map.py @@ -0,0 +1,164 @@ +''' +Functions: Transform waypoints to bev maps. +''' + +import numpy as np +import torch.nn.functional as F +import torch + +def global2grid(waypoints, grid_coord=[96,288,1/2,3/4], det_range=[-36,-12,-10,36,12,10]): + X, Y, r_x, r_y = grid_coord + center_y, center_x = Y * r_y, X * r_x + waypoints *= grid_coord[1]/(det_range[3]-det_range[0]) + waypoints[:,:,0] = waypoints[:,:,0] + center_x + waypoints[:,:,1] = waypoints[:,:,1] + center_y + return waypoints + +def waypoints2map(waypoints, grid_coord=[192,96,3/4,1/2]): + Y, X, r_y, r_x = grid_coord + B, N, _ = waypoints.shape # [B, N, 2] + bev_map = np.zeros([B, Y, X]) + grids = global2grid(waypoints) + grids = np.array(grids, dtype=np.uint8) + batch_idx = np.repeat(np.arange(B),N) + x_idx = grids[:,:,0].flatten() + y_idx = grids[:,:,1].flatten() + valid_mask = (y_idx > (-1)) * (y_idx < Y) * (x_idx > (-1)) * (x_idx < X) + valid_idx = np.where(valid_mask*1)[0] + bev_map[batch_idx[valid_idx], y_idx[valid_idx], x_idx[valid_idx]] = 1 + # print(bev_map.sum()) + # print(len(valid_idx)) + return bev_map + +def gradcam_resize(bev_map, scale=50): + ''' + bev_map: [B,Y,X] torch.tensor + ''' + bev_map = torch.Tensor(bev_map) + bev_map = bev_map.unsqueeze(1) + bev_map_expand = F.max_pool2d(bev_map, scale, stride=1, padding=(scale-1)//2) + return bev_map_expand.squeeze(1) + + +def gaussian_2d(shape, sigma=1): + """Generate gaussian map. + + Args: + shape (list[int]): Shape of the map. + sigma (float): Sigma to generate gaussian map. + Defaults to 1. + + Returns: + np.ndarray: Generated gaussian map. + """ + m, n = [(ss - 1.) / 2. for ss in shape] + y, x = np.ogrid[-m:m + 1, -n:n + 1] + + h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) + h[h < np.finfo(h.dtype).eps * h.max()] = 0 + return h + + +def gaussian_radius(det_size, min_overlap=0.5): + """Get radius of gaussian. + + Args: + det_size (tuple[torch.Tensor]): Size of the detection result. + min_overlap (float): Gaussian_overlap. Defaults to 0.5. + + Returns: + torch.Tensor: Computed radius. + """ + height, width = det_size + + a1 = 1 + b1 = (height + width) + c1 = width * height * (1 - min_overlap) / (1 + min_overlap) + sq1 = np.sqrt(b1**2 - 4 * a1 * c1) + r1 = (b1 + sq1) / (2 * a1) + + a2 = 4 + b2 = 2 * (height + width) + c2 = (1 - min_overlap) * width * height + sq2 = np.sqrt(b2**2 - 4 * a2 * c2) + r2 = (b2 + sq2) / (2 * a2) + + a3 = 4 * min_overlap + b3 = -2 * min_overlap * (height + width) + c3 = (min_overlap - 1) * width * height + sq3 = np.sqrt(b3**2 - 4 * a3 * c3) + r3 = (b3 + sq3) / (2 * a3) + return min(r1, r2, r3) + + +def draw_gaussian(heatmap, center, radius, ratio=5, k=1): + """Get gaussian masked heatmap. + + Args: + heatmap (torch.Tensor): Heatmap to be masked. + center (torch.Tensor): Center coord of the heatmap. + radius (int): Radius of gausian. + K (int): Multiple of masked_gaussian. Defaults to 1. + + Returns: + torch.Tensor: Masked heatmap. + """ + diameter = 2 * radius + 1 + gaussian = gaussian_2d((diameter, diameter), sigma=diameter/ratio ) + + # x, y = int(center[0]), int(center[1]) + x, y = int(center[1]), int(center[0]) + + height, width = heatmap.shape[0:2] + + left, right = min(x, radius), min(width - x, radius + 1) + top, bottom = min(y, radius), min(height - y, radius + 1) + + masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] + masked_gaussian = gaussian[radius - top:radius + bottom, + radius - left:radius + right] + + if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: + # torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap) + np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) + # masked_heatmap = np.max([masked_heatmap[None,], (masked_gaussian * k)[None,]], axis=0)[0] + # heatmap[y - top:y + bottom, x - left:x + right] = masked_heatmap + return heatmap + +def draw_heatmap(heatmap, x, y, radius=50, sigma=5): + feature_map_size = heatmap.shape + + # throw out not in range objects to avoid out of array + # area when creating the heatmap + if not (0 <= x < feature_map_size[0] + and 0 <= y < feature_map_size[1]): + return heatmap + + heatmap = draw_gaussian(heatmap, (x,y), radius, sigma) + return heatmap + + +def waypoints2map_radius(waypoints, radius=40, sigma_reverse=5, grid_coord=[96,288,1/2,3/4], det_range=[-36,-12,-10,36,12,10]): + + waypoints[:,:,1] *= -1 + X, Y, r_x, r_y = grid_coord + B, N, _ = waypoints.shape # [B, N, 2] + bev_map = np.zeros([B, X, Y]) + grids = global2grid(waypoints, grid_coord=grid_coord, det_range=det_range) + # grids = np.array(grids, dtype=np.uint8) + batch_idx = np.repeat(np.arange(B),N) + x_idx = grids[:,:,0].flatten() + y_idx = grids[:,:,1].flatten() + valid_mask = (y_idx > (-1)) * (y_idx < Y) * (x_idx > (-1)) * (x_idx < X) + valid_idx = np.where(valid_mask*1)[0] + + radius *= grid_coord[0]/96*24/(det_range[4]-det_range[1]) + radius = int(radius) + + for i in valid_idx: + b = batch_idx[i] + x = x_idx[i] + y = y_idx[i] + bev_map[b] = draw_heatmap(bev_map[b], x, y, radius, sigma_reverse) + + return bev_map