xiangbog commited on Feb 2

Commit

d1125e4

1 Parent(s): bc63ebe

Upload model checkpoint

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

v2xverse_late_multiclass_2025_01_28_08_49_56/config.yaml +213 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/events.out.tfevents.1738072197.poliwag.engin.umich.edu +3 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/net_epoch_bestval_at14.pth +3 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__init__.py +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__pycache__/__init__.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__init__.py +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/__init__.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/augment_utils.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/data_augmentor.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/augment_utils.py +88 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/data_augmentor.py +120 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__init__.py +35 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/__init__.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_fusion_dataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_multiclass_fusion_dataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_2stage_fusion_dataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_fusion_dataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_heter_fusion_dataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_multiclass_fusion_dataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_fusion_dataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_heter_fusion_dataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_multiclass_fusion_dataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/dairv2x_basedataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/opv2v_basedataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xset_basedataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xsim_basedataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xverse_basedataset.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/dairv2x_basedataset.py +285 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/opv2v_basedataset.py +479 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xset_basedataset.py +24 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xsim_basedataset.py +238 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xverse_basedataset.py +1118 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_fusion_dataset.py +414 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_multiclass_fusion_dataset.py +899 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_2stage_fusion_dataset.py +603 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_fusion_dataset.py +679 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_heter_fusion_dataset.py +752 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_multiclass_fusion_dataset.py +892 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_fusion_dataset.py +564 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_heter_fusion_dataset.py +565 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multi_fusion_dataset.py +631 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multiclass_fusion_dataset.py +1233 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__init__.py +27 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/__init__.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/base_postprocessor.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/bev_postprocessor.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/ciassd_postprocessor.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/fpvrcnn_postprocessor.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/uncertainty_voxel_postprocessor.cpython-37.pyc +0 -0
v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/voxel_postprocessor.cpython-37.pyc +0 -0

v2xverse_late_multiclass_2025_01_28_08_49_56/config.yaml ADDED Viewed

	@@ -0,0 +1,213 @@

+comm_range: 200
+data_augment:
+- ALONG_AXIS_LIST:
+  - x
+  NAME: random_world_flip
+- NAME: random_world_rotation
+  WORLD_ROT_ANGLE:
+  - -0.78539816
+  - 0.78539816
+- NAME: random_world_scaling
+  WORLD_SCALE_RANGE:
+  - 0.95
+  - 1.05
+fusion:
+  args:
+    clip_pc: false
+    proj_first: false
+  core_method: intermediatemulticlass
+  dataset: v2xverse
+input_source:
+- lidar
+label_type: lidar
+loss:
+  args:
+    cls_weight: 5.0
+    code_weights:
+    - 1.0
+    - 1.0
+    - 1.0
+    - 1.0
+    - 1.0
+    - 1.0
+    - 5.0
+    - 5.0
+    loc_weight: 1.0
+    target_assigner_config:
+      box_coder: ResidualCoder
+      cav_lidar_range: &id004
+      - -36
+      - -12
+      - -22
+      - 36
+      - 12
+      - 14
+      gaussian_overlap: 0.1
+      max_objs: 40
+      min_radius: 2
+      out_size_factor: 2
+      voxel_size: &id001
+      - 0.125
+      - 0.125
+      - 36
+  core_method: center_point_loss_multiclass
+lr_scheduler:
+  core_method: multistep
+  gamma: 0.1
+  step_size:
+  - 8
+  - 15
+model:
+  args:
+    anchor_number: 3
+    att:
+      feat_dim: 64
+    base_bev_backbone:
+      compression: 0
+      layer_nums: &id002
+      - 3
+      - 4
+      - 5
+      layer_strides:
+      - 2
+      - 2
+      - 2
+      num_filters: &id003
+      - 64
+      - 128
+      - 256
+      num_upsample_filter:
+      - 128
+      - 128
+      - 128
+      resnet: true
+      upsample_strides:
+      - 1
+      - 2
+      - 4
+      voxel_size: *id001
+    fusion_args:
+      agg_operator:
+        feature_dim: 256
+        mode: MAX
+      downsample_rate: 2
+      dropout_rate: 0
+      in_channels: 256
+      layer_nums: *id002
+      multi_scale: false
+      n_head: 8
+      num_filters: *id003
+      only_attention: true
+      voxel_size: *id001
+    fusion_method: max
+    lidar_range: *id004
+    max_cav: 5
+    multi_class: true
+    out_size_factor: 2
+    pillar_vfe:
+      num_filters:
+      - 64
+      use_absolute_xyz: true
+      use_norm: true
+      with_distance: false
+    point_pillar_scatter:
+      grid_size: !!python/object/apply:numpy.core.multiarray._reconstruct
+        args:
+        - !!python/name:numpy.ndarray ''
+        - !!python/tuple
+          - 0
+        - !!binary |
+          Yg==
+        state: !!python/tuple
+        - 1
+        - !!python/tuple
+          - 3
+        - !!python/object/apply:numpy.dtype
+          args:
+          - i8
+          - 0
+          - 1
+          state: !!python/tuple
+          - 3
+          - <
+          - null
+          - null
+          - null
+          - -1
+          - -1
+          - 0
+        - false
+        - !!binary |
+          QAIAAAAAAADAAAAAAAAAAAEAAAAAAAAA
+      num_features: 64
+    shrink_header:
+      dim:
+      - 128
+      input_dim: 384
+      kernal_size:
+      - 3
+      padding:
+      - 1
+      stride:
+      - 1
+    supervise_fusion: false
+    supervise_single: true
+    voxel_size: *id001
+  core_method: point_pillar_single_multiclass
+name: v2xverse_late_multiclass
+noise_setting: !!python/object/apply:collections.OrderedDict
+- - - add_noise
+    - false
+optimizer:
+  args:
+    eps: 1.0e-10
+    weight_decay: 0.0001
+  core_method: Adam
+  lr: 0.002
+postprocess:
+  anchor_args:
+    D: 1
+    H: 192
+    W: 576
+    cav_lidar_range: *id004
+    feature_stride: 2
+    h: 1.56
+    l: 3.9
+    num: 1
+    r: &id005
+    - 0
+    vd: 36
+    vh: 0.125
+    vw: 0.125
+    w: 1.6
+  core_method: VoxelPostprocessor
+  dir_args:
+    anchor_yaw: *id005
+    dir_offset: 0.7853
+    num_bins: 1
+  gt_range: *id004
+  max_num: 100
+  nms_thresh: 0.15
+  order: hwl
+  target_args:
+    neg_threshold: 0.45
+    pos_threshold: 0.6
+    score_threshold: 0.2
+preprocess:
+  args:
+    max_points_per_voxel: 32
+    max_voxel_test: 70000
+    max_voxel_train: 32000
+    voxel_size: *id001
+  cav_lidar_range: *id004
+  core_method: SpVoxelPreprocessor
+root_dir: external_paths/data_root
+test_dir: external_paths/data_root
+train_params:
+  batch_size: 4
+  epoches: 40
+  eval_freq: 1
+  max_cav: 5
+  save_freq: 1
+validate_dir: external_paths/data_root
+yaml_parser: load_point_pillar_params

v2xverse_late_multiclass_2025_01_28_08_49_56/events.out.tfevents.1738072197.poliwag.engin.umich.edu ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac3b8a28e7fba347631b57fb22d403037b9f1fa244f0b566d60222d5c9bf5756
+size 498679515

v2xverse_late_multiclass_2025_01_28_08_49_56/net_epoch_bestval_at14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba3fef03956eb6da6eb9721db6baf142f81f85ac84cd95324c1e37065d387b50
+size 32820345

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__init__.py ADDED Viewed

File without changes

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (158 Bytes). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__init__.py ADDED Viewed

File without changes

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (168 Bytes). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/augment_utils.cpython-37.pyc ADDED Viewed

Binary file (2.44 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/data_augmentor.cpython-37.pyc ADDED Viewed

Binary file (2.96 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/augment_utils.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# -*- coding: utf-8 -*-
+# Author: OpenPCDet
+import numpy as np
+from opencood.utils import common_utils
+def random_flip_along_x(gt_boxes, points):
+    """
+    Args:
+        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
+        points: (M, 3 + C)
+    Returns:
+    """
+    enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
+    if enable:
+        gt_boxes[:, 1] = -gt_boxes[:, 1]
+        gt_boxes[:, 6] = -gt_boxes[:, 6]
+        points[:, 1] = -points[:, 1]
+        if gt_boxes.shape[1] > 7:
+            gt_boxes[:, 8] = -gt_boxes[:, 8]
+    return gt_boxes, points
+def random_flip_along_y(gt_boxes, points):
+    """
+    Args:
+        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
+        points: (M, 3 + C)
+    Returns:
+    """
+    enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
+    if enable:
+        gt_boxes[:, 0] = -gt_boxes[:, 0]
+        gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi)
+        points[:, 0] = -points[:, 0]
+        if gt_boxes.shape[1] > 7:
+            gt_boxes[:, 7] = -gt_boxes[:, 7]
+    return gt_boxes, points
+def global_rotation(gt_boxes, points, rot_range):
+    """
+    Args:
+        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
+        points: (M, 3 + C),
+        rot_range: [min, max]
+    Returns:
+    """
+    noise_rotation = np.random.uniform(rot_range[0],
+                                       rot_range[1])
+    points = common_utils.rotate_points_along_z(points[np.newaxis, :, :],
+                                                np.array([noise_rotation]))[0]
+    gt_boxes[:, 0:3] = \
+        common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3],
+                                           np.array([noise_rotation]))[0]
+    gt_boxes[:, 6] += noise_rotation
+    if gt_boxes.shape[1] > 7:
+        gt_boxes[:, 7:9] = common_utils.rotate_points_along_z(
+            np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[
+            np.newaxis, :, :],
+            np.array([noise_rotation]))[0][:, 0:2]
+    return gt_boxes, points
+def global_scaling(gt_boxes, points, scale_range):
+    """
+    Args:
+        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
+        points: (M, 3 + C),
+        scale_range: [min, max]
+    Returns:
+    """
+    if scale_range[1] - scale_range[0] < 1e-3:
+        return gt_boxes, points
+    noise_scale = np.random.uniform(scale_range[0], scale_range[1])
+    points[:, :3] *= noise_scale
+    gt_boxes[:, :6] *= noise_scale
+    return gt_boxes, points

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/data_augmentor.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# -*- coding: utf-8 -*-
+"""
+Class for data augmentation
+"""
+# Author: Runsheng Xu <[email protected]>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+from functools import partial
+import numpy as np
+from opencood.data_utils.augmentor import augment_utils
+class DataAugmentor(object):
+    """
+    Data Augmentor.
+    Parameters
+    ----------
+    augment_config : list
+        A list of augmentation configuration.
+    Attributes
+    ----------
+    data_augmentor_queue : list
+        The list of data augmented functions.
+    """
+    def __init__(self, augment_config, train=True):
+        self.data_augmentor_queue = []
+        self.train = train
+        for cur_cfg in augment_config:
+            cur_augmentor = getattr(self, cur_cfg['NAME'])(config=cur_cfg)
+            self.data_augmentor_queue.append(cur_augmentor)
+    def random_world_flip(self, data_dict=None, config=None):
+        if data_dict is None:
+            return partial(self.random_world_flip, config=config)
+        gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
+                                    data_dict['object_bbx_mask'], \
+                                    data_dict['lidar_np']
+        gt_boxes_valid = gt_boxes[gt_mask == 1]
+        for cur_axis in config['ALONG_AXIS_LIST']:
+            assert cur_axis in ['x', 'y']
+            gt_boxes_valid, points = getattr(augment_utils,
+                                             'random_flip_along_%s' % cur_axis)(
+                gt_boxes_valid, points,
+            )
+        gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
+        data_dict['object_bbx_center'] = gt_boxes
+        data_dict['object_bbx_mask'] = gt_mask
+        data_dict['lidar_np'] = points
+        return data_dict
+    def random_world_rotation(self, data_dict=None, config=None):
+        if data_dict is None:
+            return partial(self.random_world_rotation, config=config)
+        rot_range = config['WORLD_ROT_ANGLE']
+        if not isinstance(rot_range, list):
+            rot_range = [-rot_range, rot_range]
+        gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
+                                    data_dict['object_bbx_mask'], \
+                                    data_dict['lidar_np']
+        gt_boxes_valid = gt_boxes[gt_mask == 1]
+        gt_boxes_valid, points = augment_utils.global_rotation(
+            gt_boxes_valid, points, rot_range=rot_range
+        )
+        gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
+        data_dict['object_bbx_center'] = gt_boxes
+        data_dict['object_bbx_mask'] = gt_mask
+        data_dict['lidar_np'] = points
+        return data_dict
+    def random_world_scaling(self, data_dict=None, config=None):
+        if data_dict is None:
+            return partial(self.random_world_scaling, config=config)
+        gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
+                                    data_dict['object_bbx_mask'], \
+                                    data_dict['lidar_np']
+        gt_boxes_valid = gt_boxes[gt_mask == 1]
+        gt_boxes_valid, points = augment_utils.global_scaling(
+            gt_boxes_valid, points, config['WORLD_SCALE_RANGE']
+        )
+        gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
+        data_dict['object_bbx_center'] = gt_boxes
+        data_dict['object_bbx_mask'] = gt_mask
+        data_dict['lidar_np'] = points
+        return data_dict
+    def forward(self, data_dict):
+        """
+        Args:
+            data_dict:
+                points: (N, 3 + C_in)
+                gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading]
+                gt_names: optional, (N), string
+                ...
+        Returns:
+        """
+        if self.train:
+            for cur_augmentor in self.data_augmentor_queue:
+                data_dict = cur_augmentor(data_dict=data_dict)
+        return data_dict

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from opencood.data_utils.datasets.late_fusion_dataset import getLateFusionDataset
+from opencood.data_utils.datasets.late_heter_fusion_dataset import getLateheterFusionDataset
+from opencood.data_utils.datasets.late_multiclass_fusion_dataset import getLatemulticlassFusionDataset
+from opencood.data_utils.datasets.early_fusion_dataset import getEarlyFusionDataset
+from opencood.data_utils.datasets.intermediate_fusion_dataset import getIntermediateFusionDataset
+from opencood.data_utils.datasets.intermediate_multiclass_fusion_dataset import getIntermediatemulticlassFusionDataset
+from opencood.data_utils.datasets.intermediate_2stage_fusion_dataset import getIntermediate2stageFusionDataset
+from opencood.data_utils.datasets.intermediate_heter_fusion_dataset import getIntermediateheterFusionDataset
+from opencood.data_utils.datasets.basedataset.opv2v_basedataset import OPV2VBaseDataset
+from opencood.data_utils.datasets.basedataset.v2xsim_basedataset import V2XSIMBaseDataset
+from opencood.data_utils.datasets.basedataset.dairv2x_basedataset import DAIRV2XBaseDataset
+from opencood.data_utils.datasets.basedataset.v2xset_basedataset import V2XSETBaseDataset
+from opencood.data_utils.datasets.basedataset.v2xverse_basedataset import V2XVERSEBaseDataset
+from opencood.data_utils.datasets.late_multiclass_fusion_dataset import getLatemulticlassFusionDataset
+from opencood.data_utils.datasets.early_multiclass_fusion_dataset import getEarlymulticlassFusionDataset
+def build_dataset(dataset_cfg, visualize=False, train=True):
+    fusion_name = dataset_cfg['fusion']['core_method']
+    dataset_name = dataset_cfg['fusion']['dataset']
+    assert fusion_name in ['late', 'lateheter', 'intermediate', 'intermediate2stage', 'intermediateheter', 'intermediatemulticlass', 'early', 'latemulticlass', 'earlymulticlass']
+    assert dataset_name in ['opv2v', 'v2xsim', 'dairv2x', 'v2xset', 'v2xverse']
+    fusion_dataset_func = "get" + fusion_name.capitalize() + "FusionDataset"
+    fusion_dataset_func = eval(fusion_dataset_func)
+    base_dataset_cls = dataset_name.upper() + "BaseDataset"
+    base_dataset_cls = eval(base_dataset_cls)
+    dataset = fusion_dataset_func(base_dataset_cls)(
+        params=dataset_cfg,
+        visualize=visualize,
+        train=train
+    )
+    return dataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (2.34 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_fusion_dataset.cpython-37.pyc ADDED Viewed

Binary file (9.46 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_multiclass_fusion_dataset.cpython-37.pyc ADDED Viewed

Binary file (19 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_2stage_fusion_dataset.cpython-37.pyc ADDED Viewed

Binary file (12.6 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_fusion_dataset.cpython-37.pyc ADDED Viewed

Binary file (14.6 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_heter_fusion_dataset.cpython-37.pyc ADDED Viewed

Binary file (16.2 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_multiclass_fusion_dataset.cpython-37.pyc ADDED Viewed

Binary file (19 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_fusion_dataset.cpython-37.pyc ADDED Viewed

Binary file (11.9 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_heter_fusion_dataset.cpython-37.pyc ADDED Viewed

Binary file (12.8 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_multiclass_fusion_dataset.cpython-37.pyc ADDED Viewed

Binary file (24 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/dairv2x_basedataset.cpython-37.pyc ADDED Viewed

Binary file (9.18 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/opv2v_basedataset.cpython-37.pyc ADDED Viewed

Binary file (12.3 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xset_basedataset.cpython-37.pyc ADDED Viewed

Binary file (1.38 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xsim_basedataset.cpython-37.pyc ADDED Viewed

Binary file (6.29 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xverse_basedataset.cpython-37.pyc ADDED Viewed

Binary file (31.3 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/dairv2x_basedataset.py ADDED Viewed

	@@ -0,0 +1,285 @@

+import os
+from collections import OrderedDict
+import cv2
+import h5py
+import torch
+import numpy as np
+from functools import partial
+from torch.utils.data import Dataset
+from PIL import Image
+import random
+import opencood.utils.pcd_utils as pcd_utils
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.hypes_yaml.yaml_utils import load_yaml
+from opencood.utils.pcd_utils import downsample_lidar_minimum
+from opencood.utils.camera_utils import load_camera_data, load_intrinsic_DAIR_V2X
+from opencood.utils.common_utils import read_json
+from opencood.utils.transformation_utils import tfm_to_pose, rot_and_trans_to_trasnformation_matrix
+from opencood.utils.transformation_utils import veh_side_rot_and_trans_to_trasnformation_matrix
+from opencood.utils.transformation_utils import inf_side_rot_and_trans_to_trasnformation_matrix
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+class DAIRV2XBaseDataset(Dataset):
+    def __init__(self, params, visualize, train=True):
+        self.params = params
+        self.visualize = visualize
+        self.train = train
+        self.pre_processor = build_preprocessor(params["preprocess"], train)
+        self.post_processor = build_postprocessor(params["postprocess"], train)
+        self.post_processor.generate_gt_bbx = self.post_processor.generate_gt_bbx_by_iou
+        if 'data_augment' in params: # late and early
+            self.data_augmentor = DataAugmentor(params['data_augment'], train)
+        else: # intermediate
+            self.data_augmentor = None
+        if 'clip_pc' in params['fusion']['args'] and params['fusion']['args']['clip_pc']:
+            self.clip_pc = True
+        else:
+            self.clip_pc = False
+        if 'train_params' not in params or 'max_cav' not in params['train_params']:
+            self.max_cav = 2
+        else:
+            self.max_cav = params['train_params']['max_cav']
+        self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False
+        self.load_camera_file = True if 'camera' in params['input_source'] else False
+        self.load_depth_file = True if 'depth' in params['input_source'] else False
+        assert self.load_depth_file is False
+        self.label_type = params['label_type'] # 'lidar' or 'camera'
+        self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \
+                                                    else self.generate_object_center_camera
+        if self.load_camera_file:
+            self.data_aug_conf = params["fusion"]["args"]["data_aug_conf"]
+        if self.train:
+            split_dir = params['root_dir']
+        else:
+            split_dir = params['validate_dir']
+        self.root_dir = params['data_dir']
+        self.split_info = read_json(split_dir)
+        co_datainfo = read_json(os.path.join(self.root_dir, 'cooperative/data_info.json'))
+        self.co_data = OrderedDict()
+        for frame_info in co_datainfo:
+            veh_frame_id = frame_info['vehicle_image_path'].split("/")[-1].replace(".jpg", "")
+            self.co_data[veh_frame_id] = frame_info
+        if "noise_setting" not in self.params:
+            self.params['noise_setting'] = OrderedDict()
+            self.params['noise_setting']['add_noise'] = False
+    def reinitialize(self):
+        pass
+    def retrieve_base_data(self, idx):
+        """
+        Given the index, return the corresponding data.
+        NOTICE!
+        It is different from Intermediate Fusion and Early Fusion
+        Label is not cooperative and loaded for both veh side and inf side.
+        Parameters
+        ----------
+        idx : int
+            Index given by dataloader.
+        Returns
+        -------
+        data : dict
+            The dictionary contains loaded yaml params and lidar data for
+            each cav.
+        """
+        veh_frame_id = self.split_info[idx]
+        frame_info = self.co_data[veh_frame_id]
+        system_error_offset = frame_info["system_error_offset"]
+        data = OrderedDict()
+        data[0] = OrderedDict()
+        data[0]['ego'] = True
+        data[1] = OrderedDict()
+        data[1]['ego'] = False
+        data[0]['params'] = OrderedDict()
+        data[1]['params'] = OrderedDict()
+        # pose of agent
+        lidar_to_novatel = read_json(os.path.join(self.root_dir,'vehicle-side/calib/lidar_to_novatel/'+str(veh_frame_id)+'.json'))
+        novatel_to_world = read_json(os.path.join(self.root_dir,'vehicle-side/calib/novatel_to_world/'+str(veh_frame_id)+'.json'))
+        transformation_matrix = veh_side_rot_and_trans_to_trasnformation_matrix(lidar_to_novatel, novatel_to_world)
+        data[0]['params']['lidar_pose'] = tfm_to_pose(transformation_matrix)
+        inf_frame_id = frame_info['infrastructure_image_path'].split("/")[-1].replace(".jpg", "")
+        virtuallidar_to_world = read_json(os.path.join(self.root_dir,'infrastructure-side/calib/virtuallidar_to_world/'+str(inf_frame_id)+'.json'))
+        transformation_matrix = inf_side_rot_and_trans_to_trasnformation_matrix(virtuallidar_to_world, system_error_offset)
+        data[1]['params']['lidar_pose'] = tfm_to_pose(transformation_matrix)
+        data[0]['params']['vehicles_front'] = read_json(os.path.join(self.root_dir,frame_info['cooperative_label_path'].replace("label_world", "label_world_backup")))
+        data[0]['params']['vehicles_all'] = read_json(os.path.join(self.root_dir,frame_info['cooperative_label_path']))
+        data[1]['params']['vehicles_front'] = [] # we only load cooperative label in vehicle side
+        data[1]['params']['vehicles_all'] = [] # we only load cooperative label in vehicle side
+        if self.load_camera_file:
+            data[0]['camera_data'] = load_camera_data([os.path.join(self.root_dir, frame_info["vehicle_image_path"])])
+            data[0]['params']['camera0'] = OrderedDict()
+            data[0]['params']['camera0']['extrinsic'] = rot_and_trans_to_trasnformation_matrix( \
+                                            read_json(os.path.join(self.root_dir, 'vehicle-side/calib/lidar_to_camera/'+str(veh_frame_id)+'.json')))
+            data[0]['params']['camera0']['intrinsic'] = load_intrinsic_DAIR_V2X( \
+                                            read_json(os.path.join(self.root_dir, 'vehicle-side/calib/camera_intrinsic/'+str(veh_frame_id)+'.json')))
+            data[1]['camera_data']= load_camera_data([os.path.join(self.root_dir,frame_info["infrastructure_image_path"])])
+            data[1]['params']['camera0'] = OrderedDict()
+            data[1]['params']['camera0']['extrinsic'] = rot_and_trans_to_trasnformation_matrix( \
+                                            read_json(os.path.join(self.root_dir, 'infrastructure-side/calib/virtuallidar_to_camera/'+str(inf_frame_id)+'.json')))
+            data[1]['params']['camera0']['intrinsic'] = load_intrinsic_DAIR_V2X( \
+                                            read_json(os.path.join(self.root_dir, 'infrastructure-side/calib/camera_intrinsic/'+str(inf_frame_id)+'.json')))
+        if self.load_lidar_file or self.visualize:
+            data[0]['lidar_np'], _ = pcd_utils.read_pcd(os.path.join(self.root_dir,frame_info["vehicle_pointcloud_path"]))
+            data[1]['lidar_np'], _ = pcd_utils.read_pcd(os.path.join(self.root_dir,frame_info["infrastructure_pointcloud_path"]))
+        # Label for single side
+        data[0]['params']['vehicles_single_front'] = read_json(os.path.join(self.root_dir, \
+                                'vehicle-side/label/lidar_backup/{}.json'.format(veh_frame_id)))
+        data[0]['params']['vehicles_single_all'] = read_json(os.path.join(self.root_dir, \
+                                'vehicle-side/label/lidar/{}.json'.format(veh_frame_id)))
+        data[1]['params']['vehicles_single_front'] = read_json(os.path.join(self.root_dir, \
+                                'infrastructure-side/label/virtuallidar/{}.json'.format(inf_frame_id)))
+        data[1]['params']['vehicles_single_all'] = read_json(os.path.join(self.root_dir, \
+                                'infrastructure-side/label/virtuallidar/{}.json'.format(inf_frame_id)))
+        if getattr(self, "heterogeneous", False):
+            self.generate_object_center_lidar = \
+                                partial(self.generate_object_center_single_hetero, modality='lidar')
+            self.generate_object_center_camera = \
+                                partial(self.generate_object_center_single_hetero, modality='camera')
+            # by default
+            data[0]['modality_name'] = 'm1'
+            data[1]['modality_name'] = 'm2'
+            # veh cam inf lidar
+            data[0]['modality_name'] = 'm2'
+            data[1]['modality_name'] = 'm1'
+            if self.train: # randomly choose LiDAR or Camera to be Ego
+                p = np.random.rand()
+                if p > 0.5:
+                    data[0], data[1] = data[1], data[0]
+                    data[0]['ego'] = True
+                    data[1]['ego'] = False
+            else:
+                # evaluate, the agent of ego modality should be ego
+                if self.adaptor.mapping_dict[data[0]['modality_name']] not in self.ego_modality and \
+                    self.adaptor.mapping_dict[data[1]['modality_name']] in self.ego_modality:
+                    data[0], data[1] = data[1], data[0]
+                    data[0]['ego'] = True
+                    data[1]['ego'] = False
+            data[0]['modality_name'] = self.adaptor.reassign_cav_modality(data[0]['modality_name'], 0)
+            data[1]['modality_name'] = self.adaptor.reassign_cav_modality(data[1]['modality_name'], 1)
+        return data
+    def __len__(self):
+        return len(self.split_info)
+    def __getitem__(self, idx):
+        pass
+    def generate_object_center_lidar(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        reference lidar 's coordinate
+        """
+        for cav_content in cav_contents:
+            cav_content['params']['vehicles'] = cav_content['params']['vehicles_all']
+        return self.post_processor.generate_object_center_dairv2x(cav_contents,
+                                                        reference_lidar_pose)
+    def generate_object_center_camera(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        reference lidar 's coordinate
+        """
+        for cav_content in cav_contents:
+            cav_content['params']['vehicles'] = cav_content['params']['vehicles_front']
+        return self.post_processor.generate_object_center_dairv2x(cav_contents,
+                                                        reference_lidar_pose)
+    ### Add new func for single side
+    def generate_object_center_single(self,
+                               cav_contents,
+                               reference_lidar_pose,
+                               **kwargs):
+        """
+        veh or inf 's coordinate.
+        reference_lidar_pose is of no use.
+        """
+        suffix = "_single"
+        for cav_content in cav_contents:
+            cav_content['params']['vehicles_single'] = \
+                    cav_content['params']['vehicles_single_front'] if self.label_type == 'camera' else \
+                    cav_content['params']['vehicles_single_all']
+        return self.post_processor.generate_object_center_dairv2x_single(cav_contents, suffix)
+    ### Add for heterogeneous, transforming the single label from self coord. to ego coord.
+    def generate_object_center_single_hetero(self,
+                                            cav_contents,
+                                            reference_lidar_pose,
+                                            modality):
+        """
+        loading the object from single agent.
+        The same as *generate_object_center_single*, but it will transform the object to reference(ego) coordinate,
+        using reference_lidar_pose.
+        """
+        suffix = "_single"
+        for cav_content in cav_contents:
+            cav_content['params']['vehicles_single'] = \
+                    cav_content['params']['vehicles_single_front'] if modality == 'camera' else \
+                    cav_content['params']['vehicles_single_all']
+        return self.post_processor.generate_object_center_dairv2x_single_hetero(cav_contents, reference_lidar_pose, suffix)
+    def get_ext_int(self, params, camera_id):
+        lidar_to_camera = params["camera%d" % camera_id]['extrinsic'].astype(np.float32) # R_cw
+        camera_to_lidar = np.linalg.inv(lidar_to_camera) # R_wc
+        camera_intrinsic = params["camera%d" % camera_id]['intrinsic'].astype(np.float32
+        )
+        return camera_to_lidar, camera_intrinsic
+    def augment(self, lidar_np, object_bbx_center, object_bbx_mask):
+        """
+        Given the raw point cloud, augment by flipping and rotation.
+        Parameters
+        ----------
+        lidar_np : np.ndarray
+            (n, 4) shape
+        object_bbx_center : np.ndarray
+            (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw
+        object_bbx_mask : np.ndarray
+            Indicate which elements in object_bbx_center are padded.
+        """
+        tmp_dict = {'lidar_np': lidar_np,
+                    'object_bbx_center': object_bbx_center,
+                    'object_bbx_mask': object_bbx_mask}
+        tmp_dict = self.data_augmentor.forward(tmp_dict)
+        lidar_np = tmp_dict['lidar_np']
+        object_bbx_center = tmp_dict['object_bbx_center']
+        object_bbx_mask = tmp_dict['object_bbx_mask']
+        return lidar_np, object_bbx_center, object_bbx_mask

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/opv2v_basedataset.py ADDED Viewed

	@@ -0,0 +1,479 @@

+import os
+from collections import OrderedDict
+import cv2
+import h5py
+import torch
+import numpy as np
+from torch.utils.data import Dataset
+from PIL import Image
+import json
+import random
+import opencood.utils.pcd_utils as pcd_utils
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.hypes_yaml.yaml_utils import load_yaml
+from opencood.utils.camera_utils import load_camera_data
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+class OPV2VBaseDataset(Dataset):
+    def __init__(self, params, visualize, train=True):
+        self.params = params
+        self.visualize = visualize
+        self.train = train
+        self.pre_processor = build_preprocessor(params["preprocess"], train)
+        self.post_processor = build_postprocessor(params["postprocess"], train)
+        if 'data_augment' in params: # late and early
+            self.data_augmentor = DataAugmentor(params['data_augment'], train)
+        else: # intermediate
+            self.data_augmentor = None
+        if self.train:
+            root_dir = params['root_dir']
+        else:
+            root_dir = params['validate_dir']
+        self.root_dir = root_dir
+        print("Dataset dir:", root_dir)
+        if 'train_params' not in params or \
+                'max_cav' not in params['train_params']:
+            self.max_cav = 5
+        else:
+            self.max_cav = params['train_params']['max_cav']
+        self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False
+        self.load_camera_file = True if 'camera' in params['input_source'] else False
+        self.load_depth_file = True if 'depth' in params['input_source'] else False
+        self.label_type = params['label_type'] # 'lidar' or 'camera'
+        self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \
+                                            else self.generate_object_center_camera
+        self.generate_object_center_single = self.generate_object_center # will it follows 'self.generate_object_center' when 'self.generate_object_center' change?
+        if self.load_camera_file:
+            self.data_aug_conf = params["fusion"]["args"]["data_aug_conf"]
+        # by default, we load lidar, camera and metadata. But users may
+        # define additional inputs/tasks
+        self.add_data_extension = \
+            params['add_data_extension'] if 'add_data_extension' \
+                                            in params else []
+        if "noise_setting" not in self.params:
+            self.params['noise_setting'] = OrderedDict()
+            self.params['noise_setting']['add_noise'] = False
+        # first load all paths of different scenarios
+        scenario_folders = sorted([os.path.join(root_dir, x)
+                                   for x in os.listdir(root_dir) if
+                                   os.path.isdir(os.path.join(root_dir, x))])
+        self.scenario_folders = scenario_folders
+        self.reinitialize()
+    def reinitialize(self):
+        # Structure: {scenario_id : {cav_1 : {timestamp1 : {yaml: path,
+        # lidar: path, cameras:list of path}}}}
+        self.scenario_database = OrderedDict()
+        self.len_record = []
+        # loop over all scenarios
+        for (i, scenario_folder) in enumerate(self.scenario_folders):
+            self.scenario_database.update({i: OrderedDict()})
+            # at least 1 cav should show up
+            if self.train:
+                cav_list = [x for x in os.listdir(scenario_folder)
+                            if os.path.isdir(
+                        os.path.join(scenario_folder, x))]
+                # cav_list = sorted(cav_list)
+                random.shuffle(cav_list)
+            else:
+                cav_list = sorted([x for x in os.listdir(scenario_folder)
+                                   if os.path.isdir(
+                        os.path.join(scenario_folder, x))])
+            assert len(cav_list) > 0
+            """
+            roadside unit data's id is always negative, so here we want to
+            make sure they will be in the end of the list as they shouldn't
+            be ego vehicle.
+            """
+            if int(cav_list[0]) < 0:
+                cav_list = cav_list[1:] + [cav_list[0]]
+            """
+            make the first cav to be ego modality
+            """
+            if getattr(self, "heterogeneous", False):
+                scenario_name = scenario_folder.split("/")[-1]
+                cav_list = self.adaptor.reorder_cav_list(cav_list, scenario_name)
+            # loop over all CAV data
+            for (j, cav_id) in enumerate(cav_list):
+                if j > self.max_cav - 1:
+                    print('too many cavs reinitialize')
+                    break
+                self.scenario_database[i][cav_id] = OrderedDict()
+                # save all yaml files to the dictionary
+                cav_path = os.path.join(scenario_folder, cav_id)
+                yaml_files = \
+                    sorted([os.path.join(cav_path, x)
+                            for x in os.listdir(cav_path) if
+                            x.endswith('.yaml') and 'additional' not in x])
+                # this timestamp is not ready
+                yaml_files = [x for x in yaml_files if not ("2021_08_20_21_10_24" in x and "000265" in x)]
+                timestamps = self.extract_timestamps(yaml_files)
+                for timestamp in timestamps:
+                    self.scenario_database[i][cav_id][timestamp] = \
+                        OrderedDict()
+                    yaml_file = os.path.join(cav_path,
+                                             timestamp + '.yaml')
+                    lidar_file = os.path.join(cav_path,
+                                              timestamp + '.pcd')
+                    camera_files = self.find_camera_files(cav_path,
+                                                timestamp)
+                    depth_files = self.find_camera_files(cav_path,
+                                                timestamp, sensor="depth")
+                    self.scenario_database[i][cav_id][timestamp]['yaml'] = \
+                        yaml_file
+                    self.scenario_database[i][cav_id][timestamp]['lidar'] = \
+                        lidar_file
+                    self.scenario_database[i][cav_id][timestamp]['cameras'] = \
+                        camera_files
+                    self.scenario_database[i][cav_id][timestamp]['depths'] = \
+                        depth_files
+                    if getattr(self, "heterogeneous", False):
+                        scenario_name = scenario_folder.split("/")[-1]
+                        cav_modality = self.adaptor.reassign_cav_modality(self.modality_assignment[scenario_name][cav_id] , j)
+                        self.scenario_database[i][cav_id][timestamp]['modality_name'] = cav_modality
+                        self.scenario_database[i][cav_id][timestamp]['lidar'] = \
+                            self.adaptor.switch_lidar_channels(cav_modality, lidar_file)
+                   # load extra data
+                    for file_extension in self.add_data_extension:
+                        file_name = \
+                            os.path.join(cav_path,
+                                         timestamp + '_' + file_extension)
+                        self.scenario_database[i][cav_id][timestamp][
+                            file_extension] = file_name
+                # Assume all cavs will have the same timestamps length. Thus
+                # we only need to calculate for the first vehicle in the
+                # scene.
+                if j == 0:
+                    # we regard the agent with the minimum id as the ego
+                    self.scenario_database[i][cav_id]['ego'] = True
+                    if not self.len_record:
+                        self.len_record.append(len(timestamps))
+                    else:
+                        prev_last = self.len_record[-1]
+                        self.len_record.append(prev_last + len(timestamps))
+                else:
+                    self.scenario_database[i][cav_id]['ego'] = False
+    def retrieve_base_data(self, idx):
+        """
+        Given the index, return the corresponding data.
+        Parameters
+        ----------
+        idx : int
+            Index given by dataloader.
+        Returns
+        -------
+        data : dict
+            The dictionary contains loaded yaml params and lidar data for
+            each cav.
+        """
+        # we loop the accumulated length list to see get the scenario index
+        scenario_index = 0
+        for i, ele in enumerate(self.len_record):
+            if idx < ele:
+                scenario_index = i
+                break
+        scenario_database = self.scenario_database[scenario_index]
+        # check the timestamp index
+        timestamp_index = idx if scenario_index == 0 else \
+            idx - self.len_record[scenario_index - 1]
+        # retrieve the corresponding timestamp key
+        timestamp_key = self.return_timestamp_key(scenario_database,
+                                                  timestamp_index)
+        data = OrderedDict()
+        # load files for all CAVs
+        for cav_id, cav_content in scenario_database.items():
+            data[cav_id] = OrderedDict()
+            data[cav_id]['ego'] = cav_content['ego']
+            # load param file: json is faster than yaml
+            json_file = cav_content[timestamp_key]['yaml'].replace("yaml", "json")
+            if os.path.exists(json_file):
+                with open(json_file, "r") as f:
+                    data[cav_id]['params'] = json.load(f)
+            else:
+                data[cav_id]['params'] = \
+                    load_yaml(cav_content[timestamp_key]['yaml'])
+            # load camera file: hdf5 is faster than png
+            hdf5_file = cav_content[timestamp_key]['cameras'][0].replace("camera0.png", "imgs.hdf5")
+            if os.path.exists(hdf5_file):
+                with h5py.File(hdf5_file, "r") as f:
+                    data[cav_id]['camera_data'] = []
+                    data[cav_id]['depth_data'] = []
+                    for i in range(4):
+                        data[cav_id]['camera_data'].append(Image.fromarray(f[f'camera{i}'][()]))
+                        data[cav_id]['depth_data'].append(Image.fromarray(f[f'depth{i}'][()]))
+            else:
+                if self.load_camera_file:
+                    data[cav_id]['camera_data'] = \
+                        load_camera_data(cav_content[timestamp_key]['cameras'])
+                if self.load_depth_file:
+                    data[cav_id]['depth_data'] = \
+                        load_camera_data(cav_content[timestamp_key]['depths'])
+            # load lidar file
+            if self.load_lidar_file or self.visualize:
+                data[cav_id]['lidar_np'] = \
+                    pcd_utils.pcd_to_np(cav_content[timestamp_key]['lidar'])
+            if getattr(self, "heterogeneous", False):
+                data[cav_id]['modality_name'] = cav_content[timestamp_key]['modality_name']
+            for file_extension in self.add_data_extension:
+                # if not find in the current directory
+                # go to additional folder
+                if not os.path.exists(cav_content[timestamp_key][file_extension]):
+                    cav_content[timestamp_key][file_extension] = cav_content[timestamp_key][file_extension].replace("train","additional/train")
+                    cav_content[timestamp_key][file_extension] = cav_content[timestamp_key][file_extension].replace("validate","additional/validate")
+                    cav_content[timestamp_key][file_extension] = cav_content[timestamp_key][file_extension].replace("test","additional/test")
+                if '.yaml' in file_extension:
+                    data[cav_id][file_extension] = \
+                        load_yaml(cav_content[timestamp_key][file_extension])
+                else:
+                    data[cav_id][file_extension] = \
+                        cv2.imread(cav_content[timestamp_key][file_extension])
+        return data
+    def __len__(self):
+        return self.len_record[-1]
+    def __getitem__(self, idx):
+        """
+        Abstract method, needs to be define by the children class.
+        """
+        pass
+    @staticmethod
+    def extract_timestamps(yaml_files):
+        """
+        Given the list of the yaml files, extract the mocked timestamps.
+        Parameters
+        ----------
+        yaml_files : list
+            The full path of all yaml files of ego vehicle
+        Returns
+        -------
+        timestamps : list
+            The list containing timestamps only.
+        """
+        timestamps = []
+        for file in yaml_files:
+            res = file.split('/')[-1]
+            timestamp = res.replace('.yaml', '')
+            timestamps.append(timestamp)
+        return timestamps
+    @staticmethod
+    def return_timestamp_key(scenario_database, timestamp_index):
+        """
+        Given the timestamp index, return the correct timestamp key, e.g.
+        2 --> '000078'.
+        Parameters
+        ----------
+        scenario_database : OrderedDict
+            The dictionary contains all contents in the current scenario.
+        timestamp_index : int
+            The index for timestamp.
+        Returns
+        -------
+        timestamp_key : str
+            The timestamp key saved in the cav dictionary.
+        """
+        # get all timestamp keys
+        timestamp_keys = list(scenario_database.items())[0][1]
+        # retrieve the correct index
+        timestamp_key = list(timestamp_keys.items())[timestamp_index][0]
+        return timestamp_key
+    @staticmethod
+    def find_camera_files(cav_path, timestamp, sensor="camera"):
+        """
+        Retrieve the paths to all camera files.
+        Parameters
+        ----------
+        cav_path : str
+            The full file path of current cav.
+        timestamp : str
+            Current timestamp
+        sensor : str
+            "camera" or "depth"
+        Returns
+        -------
+        camera_files : list
+            The list containing all camera png file paths.
+        """
+        camera0_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}0.png')
+        camera1_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}1.png')
+        camera2_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}2.png')
+        camera3_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}3.png')
+        return [camera0_file, camera1_file, camera2_file, camera3_file]
+    def augment(self, lidar_np, object_bbx_center, object_bbx_mask):
+        """
+        Given the raw point cloud, augment by flipping and rotation.
+        Parameters
+        ----------
+        lidar_np : np.ndarray
+            (n, 4) shape
+        object_bbx_center : np.ndarray
+            (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw
+        object_bbx_mask : np.ndarray
+            Indicate which elements in object_bbx_center are padded.
+        """
+        tmp_dict = {'lidar_np': lidar_np,
+                    'object_bbx_center': object_bbx_center,
+                    'object_bbx_mask': object_bbx_mask}
+        tmp_dict = self.data_augmentor.forward(tmp_dict)
+        lidar_np = tmp_dict['lidar_np']
+        object_bbx_center = tmp_dict['object_bbx_center']
+        object_bbx_mask = tmp_dict['object_bbx_mask']
+        return lidar_np, object_bbx_center, object_bbx_mask
+    def generate_object_center_lidar(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+        The object_bbx_center is in ego coordinate.
+        Notice: it is a wrap of postprocessor
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        return self.post_processor.generate_object_center(cav_contents,
+                                                        reference_lidar_pose)
+    def generate_object_center_camera(self,
+                                cav_contents,
+                                reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+        The object_bbx_center is in ego coordinate.
+        Notice: it is a wrap of postprocessor
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+        visibility_map : np.ndarray
+            for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up.
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        return self.post_processor.generate_visible_object_center(
+            cav_contents, reference_lidar_pose
+        )
+    def get_ext_int(self, params, camera_id):
+        camera_coords = np.array(params["camera%d" % camera_id]["cords"]).astype(
+            np.float32)
+        camera_to_lidar = x1_to_x2(
+            camera_coords, params["lidar_pose_clean"]
+        ).astype(np.float32)  # T_LiDAR_camera
+        camera_to_lidar = camera_to_lidar @ np.array(
+            [[0, 0, 1, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]],
+            dtype=np.float32)  # UE4 coord to opencv coord
+        camera_intrinsic = np.array(params["camera%d" % camera_id]["intrinsic"]).astype(
+            np.float32
+        )
+        return camera_to_lidar, camera_intrinsic

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xset_basedataset.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from opencood.data_utils.datasets.basedataset.opv2v_basedataset import OPV2VBaseDataset
+# All the same as OPV2V
+class V2XSETBaseDataset(OPV2VBaseDataset):
+    def __init__(self, params, visulize, train=True):
+        super().__init__(params, visulize, train)
+        if self.load_camera_file is True: # '2021_09_09_13_20_58'. This scenario has only 3 camera files?
+            scenario_folders_new = [x for x in self.scenario_folders if '2021_09_09_13_20_58' not in x]
+            self.scenario_folders = scenario_folders_new
+            self.reinitialize()
+    def generate_object_center_camera(self,
+                                cav_contents,
+                                reference_lidar_pose):
+        """
+        Since V2XSet has not release bev_visiblity map, we can only filter object by range.
+        Suppose the detection range of camera is within 50m
+        """
+        return self.post_processor.generate_object_center_v2xset_camera(
+            cav_contents, reference_lidar_pose
+        )

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xsim_basedataset.py ADDED Viewed

	@@ -0,0 +1,238 @@

+# Author: Yangheng Zhao <[email protected]>
+import os
+import pickle
+from collections import OrderedDict
+from typing import Dict
+from abc import abstractmethod
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.common_utils import read_json
+from opencood.utils.transformation_utils import tfm_to_pose
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+class V2XSIMBaseDataset(Dataset):
+    """
+        First version.
+        Load V2X-sim 2.0 using yifan lu's pickle file.
+        Only support LiDAR data.
+    """
+    def __init__(self,
+                 params: Dict,
+                 visualize: bool = False,
+                 train: bool = True):
+        self.params = params
+        self.visualize = visualize
+        self.train = train
+        self.pre_processor = build_preprocessor(params["preprocess"], train)
+        self.post_processor = build_postprocessor(params["postprocess"], train)
+        if 'data_augment' in params: # late and early
+            self.data_augmentor = DataAugmentor(params['data_augment'], train)
+        else: # intermediate
+            self.data_augmentor = None
+        if self.train:
+            root_dir = params['root_dir']
+        else:
+            root_dir = params['validate_dir']
+        self.root_dir = root_dir
+        print("Dataset dir:", root_dir)
+        if 'train_params' not in params or \
+                'max_cav' not in params['train_params']:
+            self.max_cav = 5
+        else:
+            self.max_cav = params['train_params']['max_cav']
+        self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False
+        self.load_camera_file = True if 'camera' in params['input_source'] else False
+        self.load_depth_file = True if 'depth' in params['input_source'] else False
+        self.label_type = params['label_type'] # 'lidar' or 'camera'
+        assert self.label_type in ['lidar', 'camera']
+        self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \
+                                            else self.generate_object_center_camera
+        self.generate_object_center_single = self.generate_object_center
+        self.add_data_extension = \
+            params['add_data_extension'] if 'add_data_extension' \
+                                            in params else []
+        if "noise_setting" not in self.params:
+            self.params['noise_setting'] = OrderedDict()
+            self.params['noise_setting']['add_noise'] = False
+        with open(self.root_dir, 'rb') as f:
+            dataset_info = pickle.load(f)
+        self.dataset_info_pkl = dataset_info
+        # TODO param: one as ego or all as ego?
+        self.ego_mode = 'one'  # "all"
+        self.reinitialize()
+    def reinitialize(self):
+        self.scene_database = OrderedDict()
+        if self.ego_mode == 'one':
+            self.len_record = len(self.dataset_info_pkl)
+        else:
+            raise NotImplementedError(self.ego_mode)
+        for i, scene_info in enumerate(self.dataset_info_pkl):
+            self.scene_database.update({i: OrderedDict()})
+            cav_num = scene_info['agent_num']
+            assert cav_num > 0
+            if self.train:
+                cav_ids = 1 + np.random.permutation(cav_num)
+            else:
+                cav_ids = list(range(1, cav_num + 1))
+            for j, cav_id in enumerate(cav_ids):
+                if j > self.max_cav - 1:
+                    print('too many cavs reinitialize')
+                    break
+                self.scene_database[i][cav_id] = OrderedDict()
+                self.scene_database[i][cav_id]['ego'] = j==0
+                self.scene_database[i][cav_id]['lidar'] = scene_info[f'lidar_path_{cav_id}']
+                # need to delete this line is running in /GPFS
+                self.scene_database[i][cav_id]['lidar'] = \
+                    self.scene_database[i][cav_id]['lidar'].replace("/GPFS/rhome/yifanlu/workspace/dataset/v2xsim2-complete", "dataset/V2X-Sim-2.0")
+                self.scene_database[i][cav_id]['params'] = OrderedDict()
+                self.scene_database[i][cav_id][
+                    'params']['lidar_pose'] = tfm_to_pose(
+                        scene_info[f"lidar_pose_{cav_id}"]
+                    )  # [x, y, z, roll, pitch, yaw]
+                self.scene_database[i][cav_id]['params'][
+                    'vehicles'] = scene_info[f'labels_{cav_id}'][
+                        'gt_boxes_global']
+                self.scene_database[i][cav_id]['params'][
+                    'object_ids'] = scene_info[f'labels_{cav_id}'][
+                        'gt_object_ids'].tolist()
+    def __len__(self) -> int:
+        return self.len_record
+    @abstractmethod
+    def __getitem__(self, index):
+        pass
+    def retrieve_base_data(self, idx):
+        """
+        Given the index, return the corresponding data.
+        Parameters
+        ----------
+        idx : int
+            Index given by dataloader.
+        Returns
+        -------
+        data : dict
+            The dictionary contains loaded yaml params and lidar data for
+            each cav.
+        """
+        data = OrderedDict()
+        # {
+        #     'cav_id0':{
+        #         'ego': bool,
+        #         'params': {
+        #           'lidar_pose': [x, y, z, roll, pitch, yaw],
+        #           'vehicles':{
+        #                   'id': {'angle', 'center', 'extent', 'location'},
+        #                   ...
+        #               }
+        #           },# 包含agent位置信息和object信息
+        #         'camera_data':,
+        #         'depth_data':,
+        #         'lidar_np':,
+        #         ...
+        #     }
+        #     'cav_id1': ,
+        #     ...
+        # }
+        scene = self.scene_database[idx]
+        for cav_id, cav_content in scene.items():
+            data[f'{cav_id}'] = OrderedDict()
+            data[f'{cav_id}']['ego'] = cav_content['ego']
+            data[f'{cav_id}']['params'] = cav_content['params']
+            # load the corresponding data into the dictionary
+            nbr_dims = 4  # x,y,z,intensity
+            scan = np.fromfile(cav_content['lidar'], dtype='float32')
+            points = scan.reshape((-1, 5))[:, :nbr_dims]
+            data[f'{cav_id}']['lidar_np'] = points
+        return data
+    def generate_object_center_lidar(self, cav_contents, reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+        Notice: it is a wrap of postprocessor function
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        return self.post_processor.generate_object_center_v2x(
+            cav_contents, reference_lidar_pose)
+    def generate_object_center_camera(self, cav_contents, reference_lidar_pose):
+        raise NotImplementedError()
+    def augment(self, lidar_np, object_bbx_center, object_bbx_mask):
+        """
+        Given the raw point cloud, augment by flipping and rotation.
+        Parameters
+        ----------
+        lidar_np : np.ndarray
+            (n, 4) shape
+        object_bbx_center : np.ndarray
+            (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw
+        object_bbx_mask : np.ndarray
+            Indicate which elements in object_bbx_center are padded.
+        """
+        tmp_dict = {'lidar_np': lidar_np,
+                    'object_bbx_center': object_bbx_center,
+                    'object_bbx_mask': object_bbx_mask}
+        tmp_dict = self.data_augmentor.forward(tmp_dict)
+        lidar_np = tmp_dict['lidar_np']
+        object_bbx_center = tmp_dict['object_bbx_center']
+        object_bbx_mask = tmp_dict['object_bbx_mask']
+        return lidar_np, object_bbx_center, object_bbx_mask

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xverse_basedataset.py ADDED Viewed

	@@ -0,0 +1,1118 @@

+import os
+from collections import OrderedDict
+import cv2
+import h5py
+import torch
+import torchvision
+import numpy as np
+from torch.utils.data import Dataset
+from PIL import Image
+import json
+import random
+import re
+import math
+import logging
+_logger = logging.getLogger(__name__)
+import opencood.utils.pcd_utils as pcd_utils
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.hypes_yaml.yaml_utils import load_yaml
+from opencood.utils.camera_utils import load_camera_data
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+class V2XVERSEBaseDataset(Dataset):
+    def __init__(self, params, visualize, train=True):
+        self.params = params
+        self.visualize = visualize
+        self.train = train
+        self.pre_processor = build_preprocessor(params["preprocess"], train)
+        self.post_processor = build_postprocessor(params["postprocess"], train)
+        self.data_augmentor = DataAugmentor(params['data_augment'],
+                                            train)
+        self.frame_gap = params.get('frame_gap',200)
+        self.time_delay = params.get('time_delay',0)
+        if 'target_assigner_config' in self.params['loss']['args']:
+            self.det_range = self.params['loss']['args']['target_assigner_config']['cav_lidar_range'] # [-36, -36, -22, 36, 36, 14]
+        else:
+            self.det_range = [-36, -36, -22, 36, 36, 14]
+        if self.time_delay % self.frame_gap != 0:
+            print("Time delay of v2xverse dataset should be a multiple of frame_gap !")
+        self.frame_delay = int(self.time_delay / self.frame_gap)
+        print(f'*** time_delay = {self.time_delay} ***')
+        self.test_flag = False
+        if self.train:
+            root_dir = params['root_dir']
+            towns = [1,2,3,4,6]
+        elif not visualize:
+            root_dir = params['validate_dir']
+            towns = [7,10] # [6,7,8,9,10]
+        else:
+            root_dir = params['test_dir']
+            towns = [5]
+            self.test_flag = True
+        self.root_dir = root_dir
+        self.clock = 0
+        print("Dataset dir:", root_dir)
+        if 'train_params' not in params or \
+                'max_cav' not in params['train_params']:
+            self.max_cav = 5
+        else:
+            self.max_cav = params['train_params']['max_cav']
+        self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False
+        self.load_camera_file = True if 'camera' in params['input_source'] else False
+        self.load_depth_file = True if 'depth' in params['input_source'] else False
+        self.label_type = params['label_type'] # 'lidar' or 'camera'
+        self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \
+                                            else self.generate_object_center_camera
+        self.generate_object_center_single = self.generate_object_center # will it follows 'self.generate_object_center' when 'self.generate_object_center' change?
+        if self.load_camera_file:
+            self.data_aug_conf = params["fusion"]["args"]["data_aug_conf"]
+        # by default, we load lidar, camera and metadata. But users may
+        # define additional inputs/tasks
+        self.add_data_extension = \
+            params['add_data_extension'] if 'add_data_extension' \
+                                            in params else []
+        if "noise_setting" not in self.params:
+            self.params['noise_setting'] = OrderedDict()
+            self.params['noise_setting']['add_noise'] = False
+        if root_dir is None:
+            print('Not loading from an existing dataset!')
+            return
+        if not os.path.exists(root_dir):
+            print('Dataset path do not exists!')
+            return
+        # first load all paths of different scenarios
+        scenario_folders = sorted([os.path.join(root_dir, x)
+                                   for x in os.listdir(root_dir) if
+                                   os.path.isdir(os.path.join(root_dir, x))])
+        self.scenario_folders = scenario_folders
+        #################################
+        ## v2xverse data load
+        #################################
+        self.rsu_change_frame = 25
+        self.route_frames = []
+        data_index_name = 'dataset_index.txt'
+        if 'index_file' in self.params:
+            data_index_name = self.params['index_file'] + '.txt'
+        print('data_index_name:', data_index_name)
+        dataset_indexs = self._load_text(data_index_name).split('\n')
+        filter_file = None
+        if 'filte_danger' in self.params:
+            if os.path.exists(os.path.join(self.root_dir,self.params['filte_danger'])):
+                filter_file = self._load_json(self.params['filte_danger'])
+        weathers = [0,1,2,3,4,5,6,7,8,9,10]
+        pattern = re.compile('weather-(\d+).*town(\d\d)')
+        for line in dataset_indexs:
+            if len(line.split()) != 3:
+                continue
+            path, frames, egos = line.split()
+            route_path = os.path.join(self.root_dir, path)
+            frames = int(frames)
+            res = pattern.findall(path)
+            if len(res) != 1:
+                continue
+            weather = int(res[0][0])
+            town = int(res[0][1])
+            if weather not in weathers or town not in towns:
+                continue
+            files = os.listdir(route_path)
+            ego_files = [file for file in files if file.startswith('ego')]
+            rsu_files = [file for file in files if file.startswith('rsu')]
+            # recompute rsu change frames
+            file_len_list = []
+            if len(rsu_files) > 0:
+                for rsu_file in ['rsu_1000', 'rsu_1001']:
+                    if rsu_file in rsu_files:
+                        rsu_frame_len = len(os.listdir(os.path.join(route_path,rsu_file,'measurements')))
+                        file_len_list.append(rsu_frame_len)
+            self.rsu_change_frame = max(file_len_list) + 1
+            for j, file in enumerate(ego_files):
+                ego_path = os.path.join(path, file)
+                others_list = ego_files[:j]+ego_files[j+1:]
+                others_path_list = []
+                for others in others_list:
+                    others_path_list.append(os.path.join(path, others))
+                for i in range(frames):
+                    # reduce the ratio of frames not at junction
+                    if filter_file is not None:
+                        danger_frame_flag = False
+                        for route_id in filter_file:
+                            if route_path.endswith(filter_file[route_id]['sub_path']):
+                                for junction_range in filter_file[route_id]['selected_frames'][file]:
+                                    if i > junction_range[0] and i < junction_range[1]+15:
+                                        danger_frame_flag = True
+                        if (not danger_frame_flag):
+                            continue
+                    scene_dict = {}
+                    scene_dict['ego'] = ego_path
+                    scene_dict['other_egos'] = others_path_list
+                    scene_dict['num_car'] = len(ego_files)
+                    scene_dict['rsu'] = []
+                    # order of rsu
+                    if i%self.rsu_change_frame != 0  and len(rsu_files)>0:
+                        order = int(i/self.rsu_change_frame)+1 #  int(i/10)+1
+                        rsu_path = 'rsu_{}00{}'.format(order, ego_path[-1])
+                        if True: # os.path.exists(os.path.join(route_path, rsu_path,'measurements','{}.json'.format(str(i).zfill(4)))):
+                            scene_dict['rsu'].append(os.path.join(path, rsu_path))
+                    self.route_frames.append((scene_dict, i)) # (scene_dict, i)
+        self.label_mode = self.params.get('label_mode', 'v2xverse')
+        self.first_det = False
+        print("Sub route dir nums: %d" % len(self.route_frames))
+    def _load_text(self, path):
+        text = open(os.path.join(self.root_dir,path), 'r').read()
+        return text
+    def _load_image(self, path):
+        trans_totensor = torchvision.transforms.ToTensor()
+        trans_toPIL = torchvision.transforms.ToPILImage()
+        try:
+            img = Image.open(os.path.join(self.root_dir,path))
+            img_tensor = trans_totensor(img)
+            img_PIL = trans_toPIL(img_tensor)
+        except Exception as e:
+            _logger.info(path)
+            n = path[-8:-4]
+            new_path = path[:-8] + "%04d.jpg" % (int(n) - 1)
+            img = Image.open(os.path.join(self.root_dir,new_path))
+            img_tensor = trans_totensor(img)
+            img_PIL = trans_toPIL(img_tensor)
+        return img_PIL
+    def _load_json(self, path):
+        try:
+            json_value = json.load(open(os.path.join(self.root_dir,path)))
+        except Exception as e:
+            _logger.info(path)
+            n = path[-9:-5]
+            new_path = path[:-9] + "%04d.json" % (int(n) - 1)
+            json_value = json.load(open(os.path.join(self.root_dir,new_path)))
+        return json_value
+    def _load_npy(self, path):
+        try:
+            array = np.load(os.path.join(self.root_dir,path), allow_pickle=True)
+        except Exception as e:
+            _logger.info(path)
+            n = path[-8:-4]
+            new_path = path[:-8] + "%04d.npy" % (int(n) - 1)
+            array = np.load(os.path.join(self.root_dir,new_path), allow_pickle=True)
+        return array
+    def get_one_record(self, route_dir, frame_id, agent='ego', visible_actors=None, tpe='all', extra_source=None):
+        '''
+        Parameters
+        ----------
+        scene_dict: str, index given by dataloader.
+        frame_id: int, frame id.
+        Returns
+        -------
+        data:
+            structure: dict{
+                ####################
+                # input to the model
+                ####################
+                'agent': 'ego' or 'other_ego', # whether it is the ego car
+                'rgb_[direction]': torch.Tenser, # direction in [left, right, center], shape (3, 128, 128)
+                'rgb': torch.Tensor, front rgb image , # shape (3, 224, 224)
+                'measurements': torch.Tensor, size [7]: the first 6 dims is the onehot vector of command, and the last dim is car speed
+                'command': int, 0-5, discrete command signal 0:left, 1:right, 2:straight,
+                                                    # 3: lane follow, 4:lane change left, 5: lane change right
+                'pose': np.array, shape(3,), lidar pose[gps_x, gps_y, theta]
+                'detmap_pose': pose for density map
+                'target_point': torch.Tensor, size[2], (x,y) coordinate in the left hand coordinate system,
+                                                                 where X-axis towards right side of the car
+                'lidar': np.ndarray, # shape (3, 224, 224), 2D projection of lidar, range x:[-28m, 28m], y:[-28m,28m]
+                                        in the right hand coordinate system with X-axis towards left of car
+                ####################
+                # target of model
+                ####################
+                'img_traffic': not yet used in model
+                'command_waypoints': torch.Tensor, size[10,2], 10 (x,y) coordinates in the same coordinate system with target point
+                'is_junction': int, 0 or 1, 1 means the car is at junction
+                'traffic_light_state': int, 0 or 1
+                'det_data': np.array, (400,7), flattened density map, 7 feature dims corresponds to
+                                                [prob_obj, box bias_X, box bias_Y, box_orientation, l, w, speed]
+                'img_traj': not yet used in model
+                'stop_sign': int, 0 or 1, exist of stop sign
+        },
+        '''
+        output_record = OrderedDict()
+        if agent == 'ego':
+            output_record['ego'] = True
+        else:
+            output_record['ego'] = False
+        BEV = None
+        if route_dir is not None:
+            measurements = self._load_json(os.path.join(route_dir, "measurements", "%04d.json" % frame_id))
+            actors_data = self._load_json(os.path.join(route_dir, "actors_data", "%04d.json" % frame_id))
+        elif extra_source is not None:
+            if 'actors_data' in extra_source:
+                actors_data = extra_source['actors_data']
+            else:
+                actors_data = {}
+            measurements = extra_source['measurements']
+        ego_loc = np.array([measurements['x'], measurements['y']])
+        output_record['params'] = {}
+        cam_list = ['front','right','left','rear']
+        cam_angle_list = [0, 60, -60, 180]
+        for cam_id in range(4):
+            output_record['params']['camera{}'.format(cam_id)] = {}
+            output_record['params']['camera{}'.format(cam_id)]['cords'] = [measurements['x'], measurements['y'], 1.0,\
+	 						                                                0,measurements['theta']/np.pi*180+cam_angle_list[cam_id],0]
+            output_record['params']['camera{}'.format(cam_id)]['extrinsic'] = measurements['camera_{}_extrinsics'.format(cam_list[cam_id])]
+            output_record['params']['camera{}'.format(cam_id)]['intrinsic'] = measurements['camera_{}_intrinsics'.format(cam_list[cam_id])]
+        if 'speed' in measurements:
+            output_record['params']['ego_speed'] = measurements['speed']*3.6
+        else:
+            output_record['params']['ego_speed'] = 0
+        output_record['params']['lidar_pose'] = \
+                        [measurements['lidar_pose_x'], measurements['lidar_pose_y'], 0, \
+                        0,measurements['theta']/np.pi*180-90,0]
+        self.distance_to_map_center = (self.det_range[3]-self.det_range[0])/2+self.det_range[0]
+        output_record['params']['map_pose'] = \
+                        [measurements['lidar_pose_x'] + self.distance_to_map_center*np.cos(measurements["theta"]-np.pi/2),
+                         measurements['lidar_pose_y'] + self.distance_to_map_center*np.sin(measurements["theta"]-np.pi/2), 0, \
+                        0,measurements['theta']/np.pi*180-90,0]
+        detmap_pose_x = measurements['lidar_pose_x'] + self.distance_to_map_center*np.cos(measurements["theta"]-np.pi/2)
+        detmap_pose_y = measurements['lidar_pose_y'] + self.distance_to_map_center*np.sin(measurements["theta"]-np.pi/2)
+        detmap_theta = measurements["theta"] + np.pi/2
+        output_record['detmap_pose'] = np.array([-detmap_pose_y, detmap_pose_x, detmap_theta])
+        output_record['params']['lidar_pose_clean'] = output_record['params']['lidar_pose']
+        output_record['params']['plan_trajectory'] = []
+        output_record['params']['true_ego_pos'] = \
+                        [measurements['lidar_pose_x'], measurements['lidar_pose_y'], 0, \
+                         0,measurements['theta']/np.pi*180,0]
+        output_record['params']['predicted_ego_pos'] = \
+                        [measurements['lidar_pose_x'], measurements['lidar_pose_y'], 0, \
+                        0,measurements['theta']/np.pi*180,0]
+        if tpe == 'all':
+            if route_dir is not None:
+                lidar = self._load_npy(os.path.join(route_dir, "lidar", "%04d.npy" % frame_id))
+                output_record['rgb_front'] = self._load_image(os.path.join(route_dir, "rgb_front", "%04d.jpg" % frame_id))
+                output_record['rgb_left'] = self._load_image(os.path.join(route_dir, "rgb_left", "%04d.jpg" % frame_id))
+                output_record['rgb_right'] = self._load_image(os.path.join(route_dir, "rgb_right", "%04d.jpg" % frame_id))
+                output_record['rgb_rear'] = self._load_image(os.path.join(route_dir, "rgb_rear", "%04d.jpg" % frame_id))
+                if agent != 'rsu':
+                    BEV = self._load_image(os.path.join(route_dir, "birdview", "%04d.jpg" % frame_id))
+            elif extra_source is not None:
+                lidar = extra_source['lidar']
+                if 'rgb_front' in extra_source:
+                    output_record['rgb_front'] = extra_source['rgb_front']
+                    output_record['rgb_left'] = extra_source['rgb_left']
+                    output_record['rgb_right'] = extra_source['rgb_right']
+                    output_record['rgb_rear'] = extra_source['rgb_rear']
+                else:
+                    output_record['rgb_front'] = None
+                    output_record['rgb_left'] = None
+                    output_record['rgb_right'] = None
+                    output_record['rgb_rear'] = None
+                BEV = None
+            output_record['lidar_np'] = lidar
+            lidar_transformed = np.zeros((output_record['lidar_np'].shape))
+            lidar_transformed[:,0] = output_record['lidar_np'][:,1]
+            lidar_transformed[:,1] = -output_record['lidar_np'][:,0]
+            lidar_transformed[:,2:] = output_record['lidar_np'][:,2:]
+            output_record['lidar_np'] = lidar_transformed.astype(np.float32)
+            output_record['lidar_np'][:, 2] += measurements['lidar_pose_z']
+        if visible_actors is not None:
+            actors_data = self.filter_actors_data_according_to_visible(actors_data, visible_actors)
+        ################ LSS debug TODO: clean up this function #####################
+        if not self.first_det:
+            import copy
+            if True: # agent=='rsu':
+                measurements["affected_light_id"] = -1
+                measurements["is_vehicle_present"] = []
+                measurements["is_bike_present"] = []
+                measurements["is_junction_vehicle_present"] = []
+                measurements["is_pedestrian_present"] = []
+                measurements["future_waypoints"] = []
+            cop3_range = [36,12,12,12, 0.25]
+            heatmap = generate_heatmap_multiclass(
+                copy.deepcopy(measurements), copy.deepcopy(actors_data), max_distance=36
+            )
+            self.det_data = (
+                generate_det_data_multiclass(
+                    heatmap, copy.deepcopy(measurements), copy.deepcopy(actors_data), cop3_range
+                )
+                .reshape(3, int((cop3_range[0]+cop3_range[1])/cop3_range[4]
+                            *(cop3_range[2]+cop3_range[3])/cop3_range[4]), -1) #(2, H*W,7)
+                .astype(np.float32)
+            )
+            self.first_det = True
+            if self.label_mode == 'cop3':
+                self.first_det = False
+        output_record['det_data'] = self.det_data
+        ##############################################################
+        if agent == 'rsu' :
+            for actor_id in actors_data.keys():
+                if actors_data[actor_id]['tpe'] == 0:
+                    box = actors_data[actor_id]['box']
+                    if abs(box[0]-0.8214) < 0.01 and abs(box[1]-0.18625) < 0.01 :
+                        actors_data[actor_id]['tpe'] = 3
+        output_record['params']['vehicles'] = {}
+        for actor_id in actors_data.keys():
+            ######################
+            ## debug
+            ######################
+            # if agent == 'ego':
+            #     continue
+            if tpe in [0, 1, 3]:
+                if actors_data[actor_id]['tpe'] != tpe:
+                    continue
+            # exclude ego car
+            loc_actor = np.array(actors_data[actor_id]['loc'][0:2])
+            dis = np.linalg.norm(ego_loc - loc_actor)
+            if dis < 0.1:
+                continue
+            if not ('box' in actors_data[actor_id].keys() and 'ori' in actors_data[actor_id].keys() and 'loc' in actors_data[actor_id].keys()):
+                continue
+            output_record['params']['vehicles'][actor_id] = {}
+            output_record['params']['vehicles'][actor_id]['tpe'] = actors_data[actor_id]['tpe']
+            yaw = math.degrees(math.atan(actors_data[actor_id]['ori'][1]/actors_data[actor_id]['ori'][0]))
+            pitch = math.degrees(math.asin(actors_data[actor_id]['ori'][2]))
+            output_record['params']['vehicles'][actor_id]['angle'] = [0,yaw,pitch]
+            output_record['params']['vehicles'][actor_id]['center'] = [0,0,actors_data[actor_id]['box'][2]]
+            output_record['params']['vehicles'][actor_id]['extent'] = actors_data[actor_id]['box']
+            output_record['params']['vehicles'][actor_id]['location'] = [actors_data[actor_id]['loc'][0],actors_data[actor_id]['loc'][1],0]
+            output_record['params']['vehicles'][actor_id]['speed'] = 3.6 * math.sqrt(actors_data[actor_id]['vel'][0]**2+actors_data[actor_id]['vel'][1]**2 )
+        direction_list = ['front','left','right','rear']
+        theta_list = [0,-60,60,180]
+        dis_list = [0,0,0,-2.6]
+        camera_data_list = []
+        for i, direction in enumerate(direction_list):
+            if 'rgb_{}'.format(direction) in output_record:
+                camera_data_list.append(output_record['rgb_{}'.format(direction)])
+            dis_to_lidar = dis_list[i]
+            output_record['params']['camera{}'.format(i)]['cords'] = \
+                                                                    [measurements['x'] + dis_to_lidar*np.sin(measurements['theta']), measurements['y'] - dis_to_lidar*np.cos(measurements['theta']), 2.3,\
+                                                                    0,measurements['theta']/np.pi*180 - 90  + theta_list[i],0]
+            output_record['params']['camera{}'.format(i)]['extrinsic'] = measurements['camera_{}_extrinsics'.format(direction_list[i])]
+            output_record['params']['camera{}'.format(i)]['intrinsic'] = measurements['camera_{}_intrinsics'.format(direction_list[i])]
+        output_record['camera_data'] = camera_data_list
+        bev_visibility_np = 255*np.ones((256,256,3), dtype=np.uint8)
+        output_record['bev_visibility.png'] = bev_visibility_np
+        if agent != 'rsu':
+            output_record['BEV'] = BEV
+        else:
+            output_record['BEV'] = None
+        return output_record
+    def filter_actors_data_according_to_visible(self, actors_data, visible_actors):
+        to_del_id = []
+        for actors_id in actors_data.keys():
+            if actors_id in visible_actors:
+                continue
+            to_del_id.append(actors_id)
+        for actors_id in to_del_id:
+            del actors_data[actors_id]
+        return actors_data
+    def get_visible_actors_one_term(self, route_dir, frame_id):
+        cur_visible_actors = []
+        actors_data = self._load_json(os.path.join(route_dir, "actors_data", "%04d.json" % frame_id))
+        for actors_id in actors_data:
+            if actors_data[actors_id]['tpe']==2:
+                continue
+            if not 'lidar_visible' in actors_data[actors_id]:
+                cur_visible_actors.append(actors_id)
+                print('Lose of lidar_visible!')
+                continue
+            if actors_data[actors_id]['lidar_visible']==1:
+                cur_visible_actors.append(actors_id)
+        return cur_visible_actors
+    def get_visible_actors(self, scene_dict, frame_id):
+        visible_actors = {} # id only
+        if self.test_flag:
+            visible_actors['car_0'] = None
+            for i, route_dir in enumerate(scene_dict['other_egos']):
+                visible_actors['car_{}'.format(i+1)] = None
+            for i, rsu_dir in enumerate(scene_dict['rsu']):
+                visible_actors['rsu_{}'.format(i)] = None
+        else:
+            visible_actors['car_0'] = self.get_visible_actors_one_term(scene_dict['ego'], frame_id)
+            if self.params['train_params']['max_cav'] > 1:
+                for i, route_dir in enumerate(scene_dict['other_egos']):
+                    visible_actors['car_{}'.format(i+1)] = self.get_visible_actors_one_term(route_dir, frame_id)
+                for i, rsu_dir in enumerate(scene_dict['rsu']):
+                    visible_actors['rsu_{}'.format(i)] = self.get_visible_actors_one_term(rsu_dir, frame_id)
+            for keys in visible_actors:
+                visible_actors[keys] = list(set(visible_actors[keys]))
+        return visible_actors
+    def retrieve_base_data(self, idx, tpe='all', extra_source=None, data_dir=None):
+        if extra_source is None:
+            if data_dir is not None:
+                scene_dict, frame_id = data_dir
+            else:
+                scene_dict, frame_id = self.route_frames[idx]
+            frame_id_latency = frame_id - self.frame_delay
+            visible_actors = None
+            visible_actors = self.get_visible_actors(scene_dict, frame_id)
+            data = OrderedDict()
+            data['car_0'] = self.get_one_record(scene_dict['ego'], frame_id , agent='ego', visible_actors=visible_actors['car_0'], tpe=tpe)
+            if self.params['train_params']['max_cav'] > 1:
+                for i, route_dir in enumerate(scene_dict['other_egos']):
+                    try:
+                        data['car_{}'.format(i+1)] = self.get_one_record(route_dir, frame_id_latency , agent='other_ego', visible_actors=visible_actors['car_{}'.format(i+1)], tpe=tpe)
+                    except:
+                        print('load other ego failed')
+                        continue
+            if self.params['train_params']['max_cav'] > 2:
+                for i, rsu_dir in enumerate(scene_dict['rsu']):
+                    try:
+                        data['rsu_{}'.format(i)] = self.get_one_record(rsu_dir, frame_id_latency, agent='rsu', visible_actors=visible_actors['rsu_{}'.format(i)], tpe=tpe)
+                    except:
+                        print('load rsu failed')
+                        continue
+        else:
+            data = OrderedDict()
+            scene_dict = None
+            frame_id = None
+            data['car_0'] = self.get_one_record(route_dir=None, frame_id=None , agent='ego', visible_actors=None, tpe=tpe, extra_source=extra_source['car_data'][0])
+            if self.params['train_params']['max_cav'] > 1:
+                if len(extra_source['car_data']) > 1:
+                    for i in range(len(extra_source['car_data'])-1):
+                        data['car_{}'.format(i+1)] = self.get_one_record(route_dir=None, frame_id=None , agent='other_ego', visible_actors=None, tpe=tpe, extra_source=extra_source['car_data'][i+1])
+                for i in range(len(extra_source['rsu_data'])):
+                    data['rsu_{}'.format(i)] = self.get_one_record(route_dir=None, frame_id=None , agent='rsu', visible_actors=None, tpe=tpe, extra_source=extra_source['rsu_data'][i])
+        data['car_0']['scene_dict'] = scene_dict
+        data['car_0']['frame_id'] = frame_id
+        return data
+    def __len__(self):
+        return len(self.route_frames)
+    def __getitem__(self, idx):
+        """
+        Abstract method, needs to be define by the children class.
+        """
+        pass
+    @staticmethod
+    def extract_timestamps(yaml_files):
+        """
+        Given the list of the yaml files, extract the mocked timestamps.
+        Parameters
+        ----------
+        yaml_files : list
+            The full path of all yaml files of ego vehicle
+        Returns
+        -------
+        timestamps : list
+            The list containing timestamps only.
+        """
+        timestamps = []
+        for file in yaml_files:
+            res = file.split('/')[-1]
+            timestamp = res.replace('.yaml', '')
+            timestamps.append(timestamp)
+        return timestamps
+    @staticmethod
+    def return_timestamp_key(scenario_database, timestamp_index):
+        """
+        Given the timestamp index, return the correct timestamp key, e.g.
+        2 --> '000078'.
+        Parameters
+        ----------
+        scenario_database : OrderedDict
+            The dictionary contains all contents in the current scenario.
+        timestamp_index : int
+            The index for timestamp.
+        Returns
+        -------
+        timestamp_key : str
+            The timestamp key saved in the cav dictionary.
+        """
+        # get all timestamp keys
+        timestamp_keys = list(scenario_database.items())[0][1]
+        # retrieve the correct index
+        timestamp_key = list(timestamp_keys.items())[timestamp_index][0]
+        return timestamp_key
+    @staticmethod
+    def find_camera_files(cav_path, timestamp, sensor="camera"):
+        """
+        Retrieve the paths to all camera files.
+        Parameters
+        ----------
+        cav_path : str
+            The full file path of current cav.
+        timestamp : str
+            Current timestamp
+        sensor : str
+            "camera" or "depth"
+        Returns
+        -------
+        camera_files : list
+            The list containing all camera png file paths.
+        """
+        camera0_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}0.png')
+        camera1_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}1.png')
+        camera2_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}2.png')
+        camera3_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}3.png')
+        return [camera0_file, camera1_file, camera2_file, camera3_file]
+    def augment(self, lidar_np, object_bbx_center, object_bbx_mask):
+        """
+        Given the raw point cloud, augment by flipping and rotation.
+        Parameters
+        ----------
+        lidar_np : np.ndarray
+            (n, 4) shape
+        object_bbx_center : np.ndarray
+            (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw
+        object_bbx_mask : np.ndarray
+            Indicate which elements in object_bbx_center are padded.
+        """
+        tmp_dict = {'lidar_np': lidar_np,
+                    'object_bbx_center': object_bbx_center,
+                    'object_bbx_mask': object_bbx_mask}
+        tmp_dict = self.data_augmentor.forward(tmp_dict)
+        lidar_np = tmp_dict['lidar_np']
+        object_bbx_center = tmp_dict['object_bbx_center']
+        object_bbx_mask = tmp_dict['object_bbx_mask']
+        return lidar_np, object_bbx_center, object_bbx_mask
+    def generate_object_center_lidar(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+        The object_bbx_center is in ego coordinate.
+        Notice: it is a wrap of postprocessor
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        return self.post_processor.generate_object_center(cav_contents,
+                                                        reference_lidar_pose)
+    def generate_object_center_camera(self,
+                                cav_contents,
+                                reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+        The object_bbx_center is in ego coordinate.
+        Notice: it is a wrap of postprocessor
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+        visibility_map : np.ndarray
+            for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up.
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        return self.post_processor.generate_visible_object_center(
+            cav_contents, reference_lidar_pose
+        )
+    def get_ext_int(self, params, camera_id):
+        if self.params['extrinsic'] == 1:
+            return self.get_ext_int_1(params, camera_id)
+        elif self.params['extrinsic'] == 2:
+            return self.get_ext_int_2(params, camera_id)
+    def get_ext_int_1(self, params, camera_id):
+        camera_coords = np.array(params["camera%d" % camera_id]["cords"]).astype(
+            np.float32)
+        camera_to_lidar = x1_to_x2(
+            camera_coords, params["lidar_pose_clean"]
+        ).astype(np.float32)  # T_LiDAR_camera
+        camera_to_lidar = camera_to_lidar @ np.array(
+            [[0, 0, 1, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]],
+            dtype=np.float32)  # UE4 coord to opencv coord
+        camera_intrinsic = np.array(params["camera%d" % camera_id]["intrinsic"]).astype(
+            np.float32
+        )
+        return camera_to_lidar, camera_intrinsic
+    def get_ext_int_2(self, params, camera_id):
+        camera_extrinsic = np.array(params["camera%d" % camera_id]["extrinsic"]).astype(
+            np.float32)
+        camera_extrinsic = camera_extrinsic @ np.array(
+            [[0, 0, 1, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]],
+            dtype=np.float32)  # UE4 coord to opencv coord
+        camera_intrinsic = np.array(params["camera%d" % camera_id]["intrinsic"]).astype(
+            np.float32
+        )
+        return camera_extrinsic, camera_intrinsic
+VALUES = [255]
+EXTENT = [0]
+def generate_heatmap_multiclass(measurements, actors_data, max_distance=30, pixels_per_meter=8):
+    actors_data_multiclass = {
+        0: {}, 1: {}, 2:{}, 3:{}
+    }
+    for _id in actors_data.keys():
+        actors_data_multiclass[actors_data[_id]['tpe']][_id] = actors_data[_id]
+    heatmap_0 = generate_heatmap(measurements, actors_data_multiclass[0], max_distance, pixels_per_meter)
+    heatmap_1 = generate_heatmap(measurements, actors_data_multiclass[1], max_distance, pixels_per_meter)
+    # heatmap_2 = generate_heatmap(measurements, actors_data_multiclass[2], max_distance, pixels_per_meter) # traffic light, not used
+    heatmap_3 = generate_heatmap(measurements, actors_data_multiclass[3], max_distance, pixels_per_meter)
+    return {0: heatmap_0, 1: heatmap_1, 3: heatmap_3}
+def get_yaw_angle(forward_vector):
+    forward_vector = forward_vector / np.linalg.norm(forward_vector)
+    yaw = math.acos(forward_vector[0])
+    if forward_vector[1] < 0:
+        yaw = 2 * np.pi - yaw
+    return yaw
+def generate_heatmap(measurements, actors_data, max_distance=30, pixels_per_meter=8):
+    img_size = max_distance * pixels_per_meter * 2
+    img = np.zeros((img_size, img_size, 3), np.int)
+    ego_x = measurements["lidar_pose_x"]
+    ego_y = measurements["lidar_pose_y"]
+    ego_theta = measurements["theta"]
+    R = np.array(
+        [
+            [np.cos(ego_theta), -np.sin(ego_theta)],
+            [np.sin(ego_theta), np.cos(ego_theta)],
+        ]
+    )
+    ego_id = None
+    for _id in actors_data:
+        color = np.array([1, 1, 1])
+        if actors_data[_id]["tpe"] == 2:
+            if int(_id) == int(measurements["affected_light_id"]):
+                if actors_data[_id]["sta"] == 0:
+                    color = np.array([1, 1, 1])
+                else:
+                    color = np.array([0, 0, 0])
+                yaw = get_yaw_angle(actors_data[_id]["ori"])
+                TR = np.array([[np.cos(yaw), np.sin(yaw)], [-np.sin(yaw), np.cos(yaw)]])
+                actors_data[_id]["loc"] = np.array(
+                    actors_data[_id]["loc"][:2]
+                ) + TR.T.dot(np.array(actors_data[_id]["taigger_loc"])[:2])
+                actors_data[_id]["ori"] = np.array(actors_data[_id]["ori"])
+                actors_data[_id]["box"] = np.array(actors_data[_id]["trigger_box"]) * 2
+            else:
+                continue
+        raw_loc = actors_data[_id]["loc"]
+        if (raw_loc[0] - ego_x) ** 2 + (raw_loc[1] - ego_y) ** 2 <= 2:
+            ego_id = _id
+            color = np.array([0, 1, 1])
+        new_loc = R.T.dot(np.array([raw_loc[0] - ego_x, raw_loc[1] - ego_y]))
+        actors_data[_id]["loc"] = np.array(new_loc)
+        raw_ori = actors_data[_id]["ori"]
+        new_ori = R.T.dot(np.array([raw_ori[0], raw_ori[1]]))
+        actors_data[_id]["ori"] = np.array(new_ori)
+        actors_data[_id]["box"] = np.array(actors_data[_id]["box"])
+        if int(_id) in measurements["is_vehicle_present"]:
+            color = np.array([1, 1, 1])
+        elif int(_id) in measurements["is_bike_present"]:
+            color = np.array([1, 1, 1])
+        elif int(_id) in measurements["is_junction_vehicle_present"]:
+            color = np.array([1, 1, 1])
+        elif int(_id) in measurements["is_pedestrian_present"]:
+            color = np.array([1, 1, 1])
+        actors_data[_id]["color"] = color
+    if ego_id is not None and ego_id in actors_data:
+        del actors_data[ego_id]  # Do not show ego car
+    for _id in actors_data:
+        if actors_data[_id]["tpe"] == 2:
+            continue  # FIXME donot add traffix light
+            if int(_id) != int(measurements["affected_light_id"]):
+                continue
+            if actors_data[_id]["sta"] != 0:
+                continue
+        act_img = np.zeros((img_size, img_size, 3), np.uint8)
+        loc = actors_data[_id]["loc"][:2]
+        ori = actors_data[_id]["ori"][:2]
+        box = actors_data[_id]["box"]
+        if box[0] < 1.5:
+            box = box * 1.5  # FIXME enlarge the size of pedstrian and bike
+        color = actors_data[_id]["color"]
+        for i in range(len(VALUES)):
+            act_img = add_rect(
+                act_img,
+                loc,
+                ori,
+                box + EXTENT[i],
+                VALUES[i],
+                pixels_per_meter,
+                max_distance,
+                color,
+            )
+        act_img = np.clip(act_img, 0, 255)
+        img = img + act_img
+    img = np.clip(img, 0, 255)
+    img = img.astype(np.uint8)
+    img = img[:, :, 0]
+    return img
+def add_rect(img, loc, ori, box, value, pixels_per_meter, max_distance, color):
+    img_size = max_distance * pixels_per_meter * 2
+    vet_ori = np.array([-ori[1], ori[0]])
+    hor_offset = box[0] * ori
+    vet_offset = box[1] * vet_ori
+    left_up = (loc + hor_offset + vet_offset + max_distance) * pixels_per_meter
+    left_down = (loc + hor_offset - vet_offset + max_distance) * pixels_per_meter
+    right_up = (loc - hor_offset + vet_offset + max_distance) * pixels_per_meter
+    right_down = (loc - hor_offset - vet_offset + max_distance) * pixels_per_meter
+    left_up = np.around(left_up).astype(np.int)
+    left_down = np.around(left_down).astype(np.int)
+    right_down = np.around(right_down).astype(np.int)
+    right_up = np.around(right_up).astype(np.int)
+    left_up = list(left_up)
+    left_down = list(left_down)
+    right_up = list(right_up)
+    right_down = list(right_down)
+    color = [int(x) for x in value * color]
+    cv2.fillConvexPoly(img, np.array([left_up, left_down, right_down, right_up]), color)
+    return img
+def generate_det_data_multiclass(
+    heatmap, measurements, actors_data, det_range=[30,10,10,10, 0.8]
+):
+    actors_data_multiclass = {
+        0: {}, 1: {}, 2: {}, 3:{}
+    }
+    for _id in actors_data.keys():
+        actors_data_multiclass[actors_data[_id]['tpe']][_id] = actors_data[_id]
+    det_data = []
+    for _class in range(4):
+        if _class != 2:
+            det_data.append(generate_det_data(heatmap[_class], measurements, actors_data_multiclass[_class], det_range))
+    return np.array(det_data)
+from skimage.measure import block_reduce
+def generate_det_data(
+    heatmap, measurements, actors_data, det_range=[30,10,10,10, 0.8]
+):
+    res = det_range[4]
+    max_distance = max(det_range)
+    traffic_heatmap = block_reduce(heatmap, block_size=(int(8*res), int(8*res)), func=np.mean)
+    traffic_heatmap = np.clip(traffic_heatmap, 0.0, 255.0)
+    traffic_heatmap = traffic_heatmap[:int((det_range[0]+det_range[1])/res), int((max_distance-det_range[2])/res):int((max_distance+det_range[3])/res)]
+    det_data = np.zeros((int((det_range[0]+det_range[1])/res), int((det_range[2]+det_range[3])/res), 7)) # (50,25,7)
+    vertical, horizontal = det_data.shape[:2]
+    ego_x = measurements["lidar_pose_x"]
+    ego_y = measurements["lidar_pose_y"]
+    ego_theta = measurements["theta"]
+    R = np.array(
+        [
+            [np.cos(ego_theta), -np.sin(ego_theta)],
+            [np.sin(ego_theta), np.cos(ego_theta)],
+        ]
+    )
+    need_deleted_ids = []
+    for _id in actors_data:
+        raw_loc = actors_data[_id]["loc"]
+        new_loc = R.T.dot(np.array([raw_loc[0] - ego_x, raw_loc[1] - ego_y]))
+        new_loc[1] = -new_loc[1]
+        actors_data[_id]["loc"] = np.array(new_loc)
+        raw_ori = actors_data[_id]["ori"]
+        new_ori = R.T.dot(np.array([raw_ori[0], raw_ori[1]]))
+        dis = new_loc[0] ** 2 + new_loc[1] ** 2
+        if (
+            dis <= 2
+            or dis >= (max_distance) ** 2 * 2
+            or "box" not in actors_data[_id]
+            or actors_data[_id]['tpe'] == 2
+        ):
+            need_deleted_ids.append(_id)
+            continue
+        actors_data[_id]["ori"] = np.array(new_ori)
+        actors_data[_id]["box"] = np.array(actors_data[_id]["box"])
+    for _id in need_deleted_ids:
+        del actors_data[_id]
+    for i in range(vertical):  # 50
+        for j in range(horizontal):  # 25
+            if traffic_heatmap[i][j] < 0.05 * 255.0:
+                continue
+            center_x, center_y = convert_grid_to_xy(i, j, det_range)
+            min_dis = 1000
+            min_id = None
+            for _id in actors_data:
+                loc = actors_data[_id]["loc"][:2]
+                ori = actors_data[_id]["ori"][:2]
+                box = actors_data[_id]["box"]
+                dis = (loc[0] - center_x) ** 2 + (loc[1] - center_y) ** 2
+                if dis < min_dis:
+                    min_dis = dis
+                    min_id = _id
+            if min_id is None:
+                continue
+            loc = actors_data[min_id]["loc"][:2]
+            ori = actors_data[min_id]["ori"][:2]
+            box = actors_data[min_id]["box"]
+            theta = (get_yaw_angle(ori) / np.pi + 2) % 2
+            speed = np.linalg.norm(actors_data[min_id]["vel"])
+            # prob = np.power(0.5 / max(0.5, np.sqrt(min_dis)), 0.5)
+            det_data[i][j] = np.array(
+                [
+                    0,
+                    (loc[0] - center_x) * 3.0,
+                    (loc[1] - center_y) * 3.0,
+                    theta / 2.0,
+                    box[0] / 7.0,
+                    box[1] / 4.0,
+                    0,
+                ]
+            )
+    heatmap = np.zeros((int((det_range[0]+det_range[1])/res), int((det_range[2]+det_range[3])/res))) # (50,25)
+    for _id in actors_data:
+        loc = actors_data[_id]["loc"][:2]
+        ori = actors_data[_id]["ori"][:2]
+        box = actors_data[_id]["box"]
+        try:
+            x,y = loc
+            i,j = convert_xy_to_grid(x,y,det_range)
+            i = int(np.around(i))
+            j = int(np.around(j))
+            if i < vertical and i > 0 and j > 0 and j < horizontal:
+                det_data[i][j][-1] = 1.0
+            ################## Gaussian Heatmap #####################
+            w, h = box[:2]/det_range[4]
+            heatmap = draw_heatmap(heatmap, h, w, j, i)
+            #########################################################
+            # theta = (get_yaw_angle(ori) / np.pi + 2) % 2
+            # center_x, center_y = convert_grid_to_xy(i, j, det_range)
+            # det_data[i][j] = np.array(
+            #     [
+            #         0,
+            #         (loc[0] - center_x) * 3.0,
+            #         (loc[1] - center_y) * 3.0,
+            #         theta / 2.0,
+            #         box[0] / 7.0,
+            #         box[1] / 4.0,
+            #         0,
+            #     ]
+            # )
+        except:
+            print('actor data error, skip!')
+    det_data[:,:,0] = heatmap
+    return det_data
+def convert_grid_to_xy(i, j, det_range):
+    x = det_range[4]*(j + 0.5) - det_range[2]
+    y = det_range[0] - det_range[4]*(i+0.5)
+    return x, y
+def convert_xy_to_grid(x, y, det_range):
+    j = (x + det_range[2]) / det_range[4] - 0.5
+    i = (det_range[0] - y) / det_range[4] - 0.5
+    return i, j
+def draw_heatmap(heatmap, h, w, x, y):
+    feature_map_size = heatmap.shape
+    radius = gaussian_radius(
+                    (h, w),
+                    min_overlap=0.1)
+    radius = max(2, int(radius))
+    # throw out not in range objects to avoid out of array
+    # area when creating the heatmap
+    if not (0 <= y < feature_map_size[0]
+            and 0 <= x < feature_map_size[1]):
+        return heatmap
+    heatmap = draw_gaussian(heatmap, (x,y), radius)
+    return heatmap
+def draw_gaussian(heatmap, center, radius, k=1):
+    """Get gaussian masked heatmap.
+    Args:
+        heatmap (torch.Tensor): Heatmap to be masked.
+        center (torch.Tensor): Center coord of the heatmap.
+        radius (int): Radius of gausian.
+        K (int): Multiple of masked_gaussian. Defaults to 1.
+    Returns:
+        torch.Tensor: Masked heatmap.
+    """
+    diameter = 2 * radius + 1
+    gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)
+    x, y = int(center[0]), int(center[1])
+    height, width = heatmap.shape[0:2]
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = gaussian[radius - top:radius + bottom,
+                                radius - left:radius + right]
+    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
+        # torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+        np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+    #     masked_heatmap = np.max([masked_heatmap[None,], (masked_gaussian * k)[None,]], axis=0)[0]
+    # heatmap[y - top:y + bottom, x - left:x + right] = masked_heatmap
+    return heatmap
+def gaussian_2d(shape, sigma=1):
+    """Generate gaussian map.
+    Args:
+        shape (list[int]): Shape of the map.
+        sigma (float): Sigma to generate gaussian map.
+            Defaults to 1.
+    Returns:
+        np.ndarray: Generated gaussian map.
+    """
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
+    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+    return h
+def gaussian_radius(det_size, min_overlap=0.5):
+    """Get radius of gaussian.
+    Args:
+        det_size (tuple[torch.Tensor]): Size of the detection result.
+        min_overlap (float): Gaussian_overlap. Defaults to 0.5.
+    Returns:
+        torch.Tensor: Computed radius.
+    """
+    height, width = det_size
+    a1 = 1
+    b1 = (height + width)
+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+    sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
+    r1 = (b1 + sq1) / (2 * a1)
+    a2 = 4
+    b2 = 2 * (height + width)
+    c2 = (1 - min_overlap) * width * height
+    sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
+    r2 = (b2 + sq2) / (2 * a2)
+    a3 = 4 * min_overlap
+    b3 = -2 * min_overlap * (height + width)
+    c3 = (min_overlap - 1) * width * height
+    sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
+    r3 = (b3 + sq3) / (2 * a3)
+    return min(r1, r2, r3)

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,414 @@

+# early fusion dataset
+import torch
+import numpy as np
+from opencood.utils.pcd_utils import downsample_lidar_minimum
+import math
+from collections import OrderedDict
+from opencood.utils import box_utils
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.hypes_yaml.yaml_utils import load_yaml
+from opencood.utils.pcd_utils import \
+    mask_points_by_range, mask_ego_points, shuffle_points, \
+    downsample_lidar_minimum
+from opencood.utils.transformation_utils import x1_to_x2
+def getEarlyFusionDataset(cls):
+    class EarlyFusionDataset(cls):
+        """
+        This dataset is used for early fusion, where each CAV transmit the raw
+        point cloud to the ego vehicle.
+        """
+        def __init__(self, params, visualize, train=True):
+            super(EarlyFusionDataset, self).__init__(params, visualize, train)
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            assert self.supervise_single is False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+            ego_id = -1
+            ego_lidar_pose = []
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    break
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+            projected_lidar_stack = []
+            object_stack = []
+            object_id_stack = []
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                if distance > self.params['comm_range']:
+                    continue
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_lidar_pose)
+                # all these lidar and object coordinates are projected to ego
+                # already.
+                projected_lidar_stack.append(
+                    selected_cav_processed['projected_lidar'])
+                object_stack.append(selected_cav_processed['object_bbx_center'])
+                object_id_stack += selected_cav_processed['object_ids']
+            # exclude all repetitive objects
+            unique_indices = \
+                [object_id_stack.index(x) for x in set(object_id_stack)]
+            object_stack = np.vstack(object_stack)
+            object_stack = object_stack[unique_indices]
+            # make sure bounding boxes across all frames have the same number
+            object_bbx_center = \
+                np.zeros((self.params['postprocess']['max_num'], 7))
+            mask = np.zeros(self.params['postprocess']['max_num'])
+            object_bbx_center[:object_stack.shape[0], :] = object_stack
+            mask[:object_stack.shape[0]] = 1
+            # convert list to numpy array, (N, 4)
+            projected_lidar_stack = np.vstack(projected_lidar_stack)
+            # data augmentation
+            projected_lidar_stack, object_bbx_center, mask = \
+                self.augment(projected_lidar_stack, object_bbx_center, mask)
+            # we do lidar filtering in the stacked lidar
+            projected_lidar_stack = mask_points_by_range(projected_lidar_stack,
+                                                        self.params['preprocess'][
+                                                            'cav_lidar_range'])
+            # augmentation may remove some of the bbx out of range
+            object_bbx_center_valid = object_bbx_center[mask == 1]
+            object_bbx_center_valid, range_mask = \
+                box_utils.mask_boxes_outside_range_numpy(object_bbx_center_valid,
+                                                        self.params['preprocess'][
+                                                            'cav_lidar_range'],
+                                                        self.params['postprocess'][
+                                                            'order'],
+                                                        return_mask=True
+                                                        )
+            mask[object_bbx_center_valid.shape[0]:] = 0
+            object_bbx_center[:object_bbx_center_valid.shape[0]] = \
+                object_bbx_center_valid
+            object_bbx_center[object_bbx_center_valid.shape[0]:] = 0
+            unique_indices = list(np.array(unique_indices)[range_mask])
+            # pre-process the lidar to voxel/bev/downsampled lidar
+            lidar_dict = self.pre_processor.preprocess(projected_lidar_stack)
+            # generate the anchor boxes
+            anchor_box = self.post_processor.generate_anchor_box()
+            # generate targets label
+            label_dict = \
+                self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center,
+                    anchors=anchor_box,
+                    mask=mask)
+            processed_data_dict['ego'].update(
+                {'object_bbx_center': object_bbx_center,
+                'object_bbx_mask': mask,
+                'object_ids': [object_id_stack[i] for i in unique_indices],
+                'anchor_box': anchor_box,
+                'processed_lidar': lidar_dict,
+                'label_dict': label_dict})
+            if self.visualize:
+                processed_data_dict['ego'].update({'origin_lidar':
+                                                    projected_lidar_stack})
+            return processed_data_dict
+        def get_item_single_car(self, selected_cav_base, ego_pose):
+            """
+            Project the lidar and bbx to ego space first, and then do clipping.
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+            ego_pose : list
+                The ego vehicle lidar pose under world coordinate.
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose)
+            # retrieve objects under ego coordinates
+            object_bbx_center, object_bbx_mask, object_ids = \
+                self.generate_object_center([selected_cav_base],
+                                                        ego_pose)
+            # filter lidar
+            lidar_np = selected_cav_base['lidar_np']
+            lidar_np = shuffle_points(lidar_np)
+            # remove points that hit itself
+            lidar_np = mask_ego_points(lidar_np)
+            # project the lidar to ego space
+            lidar_np[:, :3] = \
+                box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                        transformation_matrix)
+            selected_cav_processed.update(
+                {'object_bbx_center': object_bbx_center[object_bbx_mask == 1],
+                'object_ids': object_ids,
+                'projected_lidar': lidar_np})
+            return selected_cav_processed
+        def collate_batch_test(self, batch):
+            """
+            Customized collate function for pytorch dataloader during testing
+            for late fusion dataset.
+            Parameters
+            ----------
+            batch : dict
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # currently, we only support batch size of 1 during testing
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            batch = batch[0] # only ego
+            output_dict = {}
+            for cav_id, cav_content in batch.items():
+                output_dict.update({cav_id: {}})
+                # shape: (1, max_num, 7)
+                object_bbx_center = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_center']]))
+                object_bbx_mask = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_mask']]))
+                object_ids = cav_content['object_ids']
+                # the anchor box is the same for all bounding boxes usually, thus
+                # we don't need the batch dimension.
+                if cav_content['anchor_box'] is not None:
+                    output_dict[cav_id].update({'anchor_box':
+                        torch.from_numpy(np.array(
+                            cav_content[
+                                'anchor_box']))})
+                if self.visualize:
+                    origin_lidar = [cav_content['origin_lidar']]
+                # processed lidar dictionary
+                processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(
+                        [cav_content['processed_lidar']])
+                # label dictionary
+                label_torch_dict = \
+                    self.post_processor.collate_batch([cav_content['label_dict']])
+                # save the transformation matrix (4, 4) to ego vehicle
+                transformation_matrix_torch = \
+                    torch.from_numpy(np.identity(4)).float()
+                transformation_matrix_clean_torch = \
+                    torch.from_numpy(np.identity(4)).float()
+                output_dict[cav_id].update({'object_bbx_center': object_bbx_center,
+                                            'object_bbx_mask': object_bbx_mask,
+                                            'processed_lidar': processed_lidar_torch_dict,
+                                            'label_dict': label_torch_dict,
+                                            'object_ids': object_ids,
+                                            'transformation_matrix': transformation_matrix_torch,
+                                            'transformation_matrix_clean': transformation_matrix_clean_torch})
+                if self.visualize:
+                    origin_lidar = \
+                        np.array(
+                            downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                    origin_lidar = torch.from_numpy(origin_lidar)
+                    output_dict[cav_id].update({'origin_lidar': origin_lidar})
+            return output_dict
+        def collate_batch_train(self, batch):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            label_dict_list = []
+            origin_lidar = []
+            # heterogeneous
+            lidar_agent_list = []
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+            ### 2022.10.10 single gt ####
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                object_ids.append(ego_dict['object_ids'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                label_dict_list.append(ego_dict['label_dict'])
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                ### 2022.10.10 single gt ####
+                if self.supervise_single:
+                    pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                    neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                    targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+                if self.heterogeneous:
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask})
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            # add pairwise_t_matrix to label dict
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0]})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+            if self.supervise_single:
+                output_dict['ego'].update({
+                    "label_dict_single" :
+                        {"pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                        "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                        "targets": torch.cat(targets_single, dim=0)}
+                })
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+            return output_dict
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            return pred_box_tensor, pred_score, gt_box_tensor
+    return EarlyFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_multiclass_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,899 @@

+# early fusion dataset
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+# from opencood.utils.heter_utils import AgentSelector
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict, add_noise_data_dict_asymmetric
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    mask_ego_points_v2,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+def getEarlymulticlassFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class EarlymulticlassFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+                self.selector = AgentSelector(params['heter'], self.max_cav)
+            self.kd_flag = params.get('kd_flag', False)
+            self.box_align = False
+            if "box_align" in params:
+                self.box_align = True
+                self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result']
+                self.stage1_result = read_json(self.stage1_result_path)
+                self.box_align_args = params['box_align']['args']
+            self.multiclass = params['model']['args']['multi_class']
+            self.online_eval_only = False
+        def get_item_single_car(self, selected_cav_base, ego_cav_base, base_data_dict, tpe='all', cav_id='car_0', online_eval=False):
+            """
+            Process a single CAV's information for the train/test pipeline.
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+            selected_pose, selected_pose_clean = selected_cav_base['params']['lidar_pose'], selected_cav_base['params']['lidar_pose_clean']
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            # lidar
+            if tpe == 'all':
+                if self.load_lidar_file or self.visualize:
+                    # process lidar
+                    lidar_np = selected_cav_base['lidar_np']
+                    lidar_np = shuffle_points(lidar_np)
+                    # remove points that hit itself
+                    if not cav_id.startswith('rsu'):
+                        lidar_np = mask_ego_points_v2(lidar_np)
+                    # project the lidar to ego space
+                    # x,y,z in ego space
+                    project_lidar_bank = []
+                    lidar_bank = []
+                    for agent_id in base_data_dict:
+                        collab_cav_base = base_data_dict[agent_id]
+                        collab_lidar_np = collab_cav_base['lidar_np']
+                        collab_lidar_np = shuffle_points(collab_lidar_np)
+                        # remove points that hit itself
+                        if not agent_id.startswith('rsu'):
+                            collab_lidar_np = mask_ego_points_v2(collab_lidar_np)
+                        # project the lidar to ego space
+                        # x,y,z in ego space
+                        # calculate the transformation matrix
+                        transformation_matrix_for_selected = \
+                            x1_to_x2(collab_cav_base['params']['lidar_pose'],
+                                    selected_pose) # T_ego_cav
+                        projected_collab_lidar = \
+                            box_utils.project_points_by_matrix_torch(collab_lidar_np[:, :3],
+                                                                        transformation_matrix_for_selected)
+                        project_lidar_bank.append(projected_collab_lidar)
+                        lidar_bank.append(collab_lidar_np)
+                    projected_lidar = np.concatenate(project_lidar_bank, axis=0)
+                    lidar_np = np.concatenate(lidar_bank, axis=0)
+                    # if self.proj_first:
+                    lidar_np[:, :3] = projected_lidar
+                    if self.visualize:
+                        # filter lidar
+                        if not selected_cav_base['ego']:
+                            projected_lidar *= 0
+                        selected_cav_processed.update({'projected_lidar': projected_lidar})
+                    if self.kd_flag:
+                        lidar_proj_np = copy.deepcopy(lidar_np)
+                        lidar_proj_np[:,:3] = projected_lidar
+                        selected_cav_processed.update({'projected_lidar': lidar_proj_np})
+                    processed_lidar = self.pre_processor.preprocess(lidar_np)
+                    selected_cav_processed.update({'processed_features': processed_lidar})
+            if not online_eval:
+                # generate targets label single GT, note the reference pose is itself.
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                    [selected_cav_base], selected_cav_base['params']['lidar_pose']
+                )
+                label_dict = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        label_dict = self.post_processor.generate_label(
+                            gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                        )
+                selected_cav_processed.update({
+                                    "single_label_dict": label_dict,
+                                    "single_object_bbx_center": object_bbx_center,
+                                    "single_object_bbx_mask": object_bbx_mask})
+            if tpe == 'all':
+                # camera
+                if self.load_camera_file:
+                    camera_data_list = selected_cav_base["camera_data"]
+                    params = selected_cav_base["params"]
+                    imgs = []
+                    rots = []
+                    trans = []
+                    intrins = []
+                    extrinsics = []
+                    post_rots = []
+                    post_trans = []
+                    for idx, img in enumerate(camera_data_list):
+                        camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+                        intrin = torch.from_numpy(camera_intrinsic)
+                        rot = torch.from_numpy(
+                            camera_to_lidar[:3, :3]
+                        )  # R_wc, we consider world-coord is the lidar-coord
+                        tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+                        post_rot = torch.eye(2)
+                        post_tran = torch.zeros(2)
+                        img_src = [img]
+                        # depth
+                        if self.load_depth_file:
+                            depth_img = selected_cav_base["depth_data"][idx]
+                            img_src.append(depth_img)
+                        else:
+                            depth_img = None
+                        # data augmentation
+                        resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                            self.data_aug_conf, self.train
+                        )
+                        img_src, post_rot2, post_tran2 = img_transform(
+                            img_src,
+                            post_rot,
+                            post_tran,
+                            resize=resize,
+                            resize_dims=resize_dims,
+                            crop=crop,
+                            flip=flip,
+                            rotate=rotate,
+                        )
+                        # for convenience, make augmentation matrices 3x3
+                        post_tran = torch.zeros(3)
+                        post_rot = torch.eye(3)
+                        post_tran[:2] = post_tran2
+                        post_rot[:2, :2] = post_rot2
+                        # decouple RGB and Depth
+                        img_src[0] = normalize_img(img_src[0])
+                        if self.load_depth_file:
+                            img_src[1] = img_to_tensor(img_src[1]) * 255
+                        imgs.append(torch.cat(img_src, dim=0))
+                        intrins.append(intrin)
+                        extrinsics.append(torch.from_numpy(camera_to_lidar))
+                        rots.append(rot)
+                        trans.append(tran)
+                        post_rots.append(post_rot)
+                        post_trans.append(post_tran)
+                    selected_cav_processed.update(
+                        {
+                        "image_inputs":
+                            {
+                                "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                                "intrins": torch.stack(intrins),
+                                "extrinsics": torch.stack(extrinsics),
+                                "rots": torch.stack(rots),
+                                "trans": torch.stack(trans),
+                                "post_rots": torch.stack(post_rots),
+                                "post_trans": torch.stack(post_trans),
+                            }
+                        }
+                    )
+                # anchor box
+                selected_cav_processed.update({"anchor_box": self.anchor_box})
+            if not online_eval:
+                # note the reference pose ego
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                            ego_pose_clean)
+                selected_cav_processed.update(
+                    {
+                        "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                        "object_bbx_mask": object_bbx_mask,
+                        "object_ids": object_ids,
+                    }
+                )
+            selected_cav_processed.update(
+                {
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+            return selected_cav_processed
+        def __getitem__(self, idx, extra_source=None, data_dir=None):
+            if data_dir is not None:
+                extra_source=1
+            object_bbx_center_list = []
+            object_bbx_mask_list = []
+            object_id_dict = {}
+            object_bbx_center_list_single = []
+            object_bbx_mask_list_single = []
+            output_dict = {}
+            for tpe in ['all', 0, 1, 3]:
+                output_single_class = self.__getitem_single_class__(idx, tpe, extra_source, data_dir)
+                output_dict[tpe] = output_single_class
+                if tpe == 'all' and extra_source==None:
+                    continue
+                elif tpe == 'all' and extra_source!=None:
+                    break
+                object_bbx_center_list.append(output_single_class['ego']['object_bbx_center'])
+                object_bbx_mask_list.append(output_single_class['ego']['object_bbx_mask'])
+                if self.supervise_single:
+                    object_bbx_center_list_single.append(output_single_class['ego']['single_object_bbx_center_torch'])
+                    object_bbx_mask_list_single.append(output_single_class['ego']['single_object_bbx_mask_torch'])
+                object_id_dict[tpe] = output_single_class['ego']['object_ids']
+            if self.multiclass and extra_source==None:
+                output_dict['all']['ego']['object_bbx_center'] = np.stack(object_bbx_center_list, axis=0)
+                output_dict['all']['ego']['object_bbx_mask'] = np.stack(object_bbx_mask_list, axis=0)
+                if self.supervise_single:
+                    output_dict['all']['ego']['single_object_bbx_center_torch'] = torch.stack(object_bbx_center_list_single, axis=1)
+                    output_dict['all']['ego']['single_object_bbx_mask_torch'] = torch.stack(object_bbx_mask_list_single, axis=1)
+                output_dict['all']['ego']['object_ids'] = object_id_dict
+            # print('finish get item')
+            return output_dict['all']
+        def __getitem_single_class__(self, idx, tpe=None, extra_source=None, data_dir=None):
+            if extra_source is None and data_dir is None:
+                base_data_dict = self.retrieve_base_data(idx, tpe) ## {id:{'ego':True/False, 'params': {'lidar_pose','speed','vehicles','ego_pos',...}, 'lidar_np': array (N,4)}}
+            elif data_dir is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, data_dir=data_dir)
+            elif extra_source is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, extra_source=extra_source)
+            # base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+            base_data_dict = add_noise_data_dict_asymmetric(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+            if self.visualize or self.kd_flag:
+                projected_lidar_stack = []
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+            if self.box_align and str(idx) in self.stage1_result.keys():  # False
+                from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np
+                stage1_content = self.stage1_result[str(idx)]
+                if stage1_content is not None:
+                    all_agent_id_list = stage1_content['cav_id_list'] # include those out of range
+                    all_agent_corners_list = stage1_content['pred_corner3d_np_list']
+                    all_agent_uncertainty_list = stage1_content['uncertainty_np_list']
+                    cur_agent_id_list = cav_id_list
+                    cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list]
+                    cur_agnet_pose = np.array(cur_agent_pose)
+                    cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list`
+                    pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64)
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64)
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0:
+                        refined_pose = box_alignment_relative_sample_np(pred_corners_list,
+                                                                        cur_agnet_pose,
+                                                                        uncertainty_list=uncertainty_list,
+                                                                        **self.box_align_args)
+                        cur_agnet_pose[:,[0,1,4]] = refined_pose
+                        for i, cav_id in enumerate(cav_id_list):
+                            lidar_pose_list[i] = cur_agnet_pose[i].tolist()
+                            base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist()
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+            # heterogeneous
+            if self.heterogeneous:
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num]
+                processed_data_dict['ego'].update({"lidar_agent": lidar_agent})
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+                # dynamic object center generator! for heterogeneous input
+                if (not self.visualize) and self.heterogeneous and lidar_agent[_i]:
+                    self.generate_object_center = self.generate_object_center_lidar
+                elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]):
+                    self.generate_object_center = self.generate_object_center_camera
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base,
+                    base_data_dict,
+                    tpe,
+                    cav_id,
+                    extra_source!=None)
+                if extra_source==None:
+                    object_stack.append(selected_cav_processed['object_bbx_center'])
+                    object_id_stack += selected_cav_processed['object_ids']
+                if tpe == 'all':
+                    if self.load_lidar_file:
+                        processed_features.append(
+                            selected_cav_processed['processed_features'])
+                    if self.load_camera_file:
+                        agents_image_inputs.append(
+                            selected_cav_processed['image_inputs'])
+                    if self.visualize or self.kd_flag:
+                        projected_lidar_stack.append(
+                            selected_cav_processed['projected_lidar'])
+                if self.supervise_single and extra_source==None:
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+            # generate single view GT label
+            if self.supervise_single and extra_source==None:
+                single_label_dicts = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+            if self.kd_flag:
+                stack_lidar_np = np.vstack(projected_lidar_stack)
+                stack_lidar_np = mask_points_by_range(stack_lidar_np,
+                                            self.params['preprocess'][
+                                                'cav_lidar_range'])
+                stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np)
+                processed_data_dict['ego'].update({'teacher_processed_lidar':
+                stack_feature_processed})
+            if extra_source is None:
+                # exclude all repetitive objects
+                unique_indices = \
+                    [object_id_stack.index(x) for x in set(object_id_stack)]
+                object_stack = np.vstack(object_stack)
+                object_stack = object_stack[unique_indices]
+                # make sure bounding boxes across all frames have the same number
+                object_bbx_center = \
+                    np.zeros((self.params['postprocess']['max_num'], 7))
+                mask = np.zeros(self.params['postprocess']['max_num'])
+                object_bbx_center[:object_stack.shape[0], :] = object_stack
+                mask[:object_stack.shape[0]] = 1
+                processed_data_dict['ego'].update(
+                    {'object_bbx_center': object_bbx_center,  # (100,7)
+                    'object_bbx_mask': mask, # (100,)
+                    'object_ids': [object_id_stack[i] for i in unique_indices],
+                    }
+                )
+            # generate targets label
+            label_dict = {}
+            if tpe == 'all':
+                # unused label
+                if False:
+                    label_dict = \
+                        self.post_processor.generate_label(
+                            gt_box_center=object_bbx_center,
+                            anchors=self.anchor_box,
+                            mask=mask)
+            processed_data_dict['ego'].update(
+                {
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+            if tpe == 'all':
+                if self.load_lidar_file:
+                    merged_feature_dict = merge_features_to_dict(processed_features)
+                    processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict})
+                if self.load_camera_file:
+                    merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                    processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+                if self.visualize:
+                    processed_data_dict['ego'].update({'origin_lidar':
+                                                    #    projected_lidar_stack})
+                        np.vstack(
+                            projected_lidar_stack)})
+                    processed_data_dict['ego'].update({'lidar_len': [len(projected_lidar_stack[i]) for i in range(len(projected_lidar_stack))]})
+                processed_data_dict['ego'].update({'sample_idx': idx,
+                                                    'cav_id_list': cav_id_list})
+                img_front_list = []
+                img_left_list = []
+                img_right_list = []
+                BEV_list = []
+                if self.visualize:
+                    for car_id in base_data_dict:
+                        if not base_data_dict[car_id]['ego'] == True:
+                            continue
+                        if 'rgb_front' in base_data_dict[car_id] and 'rgb_left' in base_data_dict[car_id] and 'rgb_right' in base_data_dict[car_id] and 'BEV' in base_data_dict[car_id] :
+                            img_front_list.append(base_data_dict[car_id]['rgb_front'])
+                            img_left_list.append(base_data_dict[car_id]['rgb_left'])
+                            img_right_list.append(base_data_dict[car_id]['rgb_right'])
+                            BEV_list.append(base_data_dict[car_id]['BEV'])
+                processed_data_dict['ego'].update({'img_front': img_front_list,
+                                                    'img_left': img_left_list,
+                                                    'img_right': img_right_list,
+                                                    'BEV': BEV_list})
+            processed_data_dict['ego'].update({'scene_dict': base_data_dict['car_0']['scene_dict'],
+                                                    'frame_id': base_data_dict['car_0']['frame_id'],
+                                                    })
+            return processed_data_dict
+        def collate_batch_train(self, batch, online_eval_only=False):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_len = []
+            lidar_pose_clean_list = []
+            # heterogeneous
+            lidar_agent_list = []
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+            # disconet
+            teacher_processed_lidar_list = []
+            # image
+            img_front = []
+            img_left = []
+            img_right = []
+            BEV = []
+            dict_list = []
+            ### 2022.10.10 single gt ####
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                if not online_eval_only:
+                    object_bbx_center.append(ego_dict['object_bbx_center'])
+                    object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                    object_ids.append(ego_dict['object_ids'])
+                else:
+                    object_ids.append(None)
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                record_len.append(ego_dict['cav_num'])
+                label_dict_list.append(ego_dict['label_dict'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+                dict_list.append([ego_dict['scene_dict'], ego_dict['frame_id']])
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                    lidar_len.append(ego_dict['lidar_len'])
+                    if len(ego_dict['img_front']) > 0 and len(ego_dict['img_right']) > 0 and len(ego_dict['img_left']) > 0 and len(ego_dict['BEV']) > 0:
+                        img_front.append(ego_dict['img_front'][0])
+                        img_left.append(ego_dict['img_left'][0])
+                        img_right.append(ego_dict['img_right'][0])
+                        BEV.append(ego_dict['BEV'][0])
+                if self.kd_flag:
+                    teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar'])
+                ### 2022.10.10 single gt ####
+                if self.supervise_single and not online_eval_only:
+                    # unused label
+                    if False:
+                        pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                        neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                        targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+            # convert to numpy, (B, max_num, 7)
+            if not online_eval_only:
+                object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+                object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            else:
+                object_bbx_center = None
+                object_bbx_mask = None
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            # unused label
+            label_torch_dict = {}
+            if False:
+                label_torch_dict = \
+                    self.post_processor.collate_batch(label_dict_list)
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                     'object_bbx_mask': object_bbx_mask})
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+            output_dict['ego'].update({'dict_list': dict_list})
+            if self.visualize:
+                origin_lidar = torch.from_numpy(np.array(origin_lidar))
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+                lidar_len = np.array(lidar_len)
+                output_dict['ego'].update({'lidar_len': lidar_len})
+                output_dict['ego'].update({'img_front': img_front})
+                output_dict['ego'].update({'img_right': img_right})
+                output_dict['ego'].update({'img_left': img_left})
+                output_dict['ego'].update({'BEV': BEV})
+            if self.kd_flag:
+                teacher_processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(teacher_processed_lidar_list)
+                output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict})
+            if self.supervise_single and not online_eval_only:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            # "pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                            # "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                            # "targets": torch.cat(targets_single, dim=0),
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+            return output_dict
+        def collate_batch_test(self, batch, online_eval_only=False):
+            self.online_eval_only = online_eval_only
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            output_dict = self.collate_batch_train(batch, online_eval_only)
+            if output_dict is None:
+                return None
+            # check if anchor box in the batch
+            if batch[0]['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list']
+            })
+            return output_dict
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            return pred_box_tensor, pred_score, gt_box_tensor
+        def post_process_multiclass(self, data_dict, output_dict, online_eval_only=False):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            if online_eval_only == False:
+                online_eval_only = self.online_eval_only
+            num_class = output_dict['ego']['cls_preds'].shape[1]
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+            num_list = [0,1,3]
+            for i in range(num_class):
+                data_dict_single = copy.deepcopy(data_dict)
+                output_dict_single = copy.deepcopy(output_dict)
+                if not online_eval_only:
+                    data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:]
+                    data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:]
+                    data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]]
+                output_dict_single['ego']['cls_preds'] = output_dict['ego']['cls_preds'][:,i:i+1,:,:]
+                output_dict_single['ego']['reg_preds'] = output_dict['ego']['reg_preds_multiclass'][:,i,:,:]
+                pred_box_tensor, pred_score = \
+                    self.post_processor.post_process(data_dict_single, output_dict_single)
+                if not online_eval_only:
+                    gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict_single)
+                else:
+                    gt_box_tensor = None
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+    return EarlymulticlassFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_2stage_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,603 @@

+# intermediate fusion dataset
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+def getIntermediate2stageFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class Intermediate2stageFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # intermediate and supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            # it is assert to be False but by default it will load single label for 1-stage training.
+            assert self.supervise_single is False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+        def get_item_single_car(self, selected_cav_base, ego_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            # lidar
+            if self.load_lidar_file or self.visualize:
+                # process lidar
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                # remove points that hit itself
+                lidar_np = mask_ego_points(lidar_np)
+                # no projected lidar
+                no_project_lidar = copy.deepcopy(lidar_np)
+                # project the lidar to ego space
+                # x,y,z in ego space
+                projected_lidar = \
+                    box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                                transformation_matrix)
+                if self.proj_first: #
+                    lidar_np[:, :3] = projected_lidar
+                if self.visualize:
+                    # filter lidar
+                    selected_cav_processed.update({'projected_lidar': projected_lidar})
+                processed_lidar = self.pre_processor.preprocess(lidar_np)
+                selected_cav_processed.update({'projected_lidar': projected_lidar,
+                                               'no_projected_lidar': no_project_lidar,
+                                               'processed_features': processed_lidar})
+            # generate targets label single GT, note the reference pose is itself.
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                [selected_cav_base], selected_cav_base['params']['lidar_pose']
+            )
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({"object_bbx_center_no_coop": object_bbx_center[object_bbx_mask==1],
+                                           "single_label_dict": label_dict})
+            # camera
+            if self.load_camera_file:
+                camera_data_list = selected_cav_base["camera_data"]
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                post_rots = []
+                post_trans = []
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+                    img_src = [img]
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        self.data_aug_conf, self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+                    # decouple RGB and Depth
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+                selected_cav_processed.update(
+                    {
+                    "image_inputs":
+                        {
+                            "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+            # anchor box
+            selected_cav_processed.update({"anchor_box": self.anchor_box})
+            # note the reference pose ego
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                        ego_pose_clean)
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+            return selected_cav_processed
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_stack = []
+            no_projected_lidar_stack = []
+            vsa_lidar_stack = []
+            if self.visualize:
+                projected_lidar_stack = []
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+            # heterogeneous
+            if self.heterogeneous:
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num]
+                processed_data_dict['ego'].update({"lidar_agent": lidar_agent})
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+                # dynamic object center generator! for heterogeneous input.
+                if (not self.visualize) and self.heterogeneous and lidar_agent[_i]:
+                    self.generate_object_center = self.generate_object_center_lidar
+                elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]):
+                    self.generate_object_center = self.generate_object_center_camera
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base)
+                object_stack.append(selected_cav_processed['object_bbx_center'])
+                object_id_stack += selected_cav_processed['object_ids']
+                if self.load_lidar_file:
+                    processed_features.append(
+                        selected_cav_processed['processed_features'])
+                    if self.proj_first:
+                        vsa_lidar_stack.append(selected_cav_processed['projected_lidar'])
+                    else:
+                        vsa_lidar_stack.append(selected_cav_processed['no_projected_lidar'])
+                if self.load_camera_file:
+                    agents_image_inputs.append(
+                        selected_cav_processed['image_inputs'])
+                if self.visualize:
+                    projected_lidar_stack.append(
+                        selected_cav_processed['projected_lidar'])
+                single_label_list.append(selected_cav_processed['single_label_dict'])
+            # generate single view label (no coop) label
+            label_dict_no_coop = single_label_list # [{cav1_label}, {cav2_label}...]
+            # exclude all repetitive objects
+            unique_indices = \
+                [object_id_stack.index(x) for x in set(object_id_stack)]
+            object_stack = np.vstack(object_stack)
+            object_stack = object_stack[unique_indices]
+            # make sure bounding boxes across all frames have the same number
+            object_bbx_center = \
+                np.zeros((self.params['postprocess']['max_num'], 7))
+            mask = np.zeros(self.params['postprocess']['max_num'])
+            object_bbx_center[:object_stack.shape[0], :] = object_stack
+            mask[:object_stack.shape[0]] = 1
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_features)
+                processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict,
+                                                   'vsa_lidar': vsa_lidar_stack})
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            # generate targets label
+            label_dict_coop = \
+                self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center,
+                    anchors=self.anchor_box,
+                    mask=mask)
+            label_dict = {
+                'stage1': label_dict_no_coop, # list
+                'stage2': label_dict_coop # dict
+            }
+            processed_data_dict['ego'].update(
+                {'object_bbx_center': object_bbx_center,
+                'object_bbx_mask': mask,
+                'object_ids': [object_id_stack[i] for i in unique_indices],
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+            if self.visualize:
+                processed_data_dict['ego'].update({'origin_lidar':
+                    np.vstack(
+                        projected_lidar_stack)})
+            processed_data_dict['ego'].update({'sample_idx': idx,
+                                                'cav_id_list': cav_id_list})
+            return processed_data_dict
+        def collate_batch_train(self, batch):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_no_coop_batch_list = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            vsa_lidar = []
+            lidar_pose_clean_list = []
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+            # heterogeneous
+            lidar_agent_list = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                object_ids.append(ego_dict['object_ids'])
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                    vsa_lidar.append(ego_dict['vsa_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                record_len.append(ego_dict['cav_num'])
+                label_dict_no_coop_batch_list.append(ego_dict['label_dict']['stage1'])
+                label_dict_list.append(ego_dict['label_dict']['stage2'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            # example: {'voxel_features':[np.array([1,2,3]]),
+            # np.array([3,5,6]), ...]}
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+                # [sum(record_len), C, H, W]
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            label_dict_no_coop_cavs_batch_list = [label_dict for label_dict_cavs_list in
+                                    label_dict_no_coop_batch_list for label_dict in
+                                    label_dict_cavs_list]
+            label_no_coop_torch_dict = \
+                                    self.post_processor.collate_batch(label_dict_no_coop_cavs_batch_list)
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({ 'object_bbx_center': object_bbx_center,
+                                        'object_bbx_mask': object_bbx_mask,
+                                        'record_len': record_len,
+                                        'label_dict': {
+                                            'stage1': label_no_coop_torch_dict,
+                                            'stage2': label_torch_dict,
+                                        },
+                                        'object_ids': object_ids[0],
+                                        'pairwise_t_matrix': pairwise_t_matrix,
+                                        'lidar_pose_clean': lidar_pose_clean,
+                                        'lidar_pose': lidar_pose,
+                                        'proj_first': self.proj_first,
+                                        'anchor_box': self.anchor_box_torch})
+            if self.load_lidar_file:
+                coords = []
+                idx = 0
+                for b in range(len(batch)):
+                    for points in vsa_lidar[b]:
+                        assert len(points) != 0
+                        coor_pad = np.pad(points, ((0, 0), (1, 0)),
+                                        mode="constant", constant_values=idx)
+                        coords.append(coor_pad)
+                        idx += 1
+                origin_lidar_for_vsa = np.concatenate(coords, axis=0)
+                origin_lidar_for_vsa = torch.from_numpy(origin_lidar_for_vsa)
+                output_dict['ego'].update({'origin_lidar_for_vsa': origin_lidar_for_vsa})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+            return output_dict
+        def collate_batch_test(self, batch):
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            output_dict = self.collate_batch_train(batch)
+            if output_dict is None:
+                return None
+            # check if anchor box in the batch
+            output_dict['ego'].update({'anchor_box': self.anchor_box_torch})
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list']
+            })
+            return output_dict
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            return pred_box_tensor, pred_score, gt_box_tensor
+    return Intermediate2stageFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,679 @@

+# intermediate fusion dataset
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+def getIntermediateFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class IntermediateFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # intermediate and supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+            self.kd_flag = params.get('kd_flag', False)
+            self.box_align = False
+            if "box_align" in params:
+                self.box_align = True
+                self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result']
+                self.stage1_result = read_json(self.stage1_result_path)
+                self.box_align_args = params['box_align']['args']
+        def get_item_single_car(self, selected_cav_base, ego_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            # lidar
+            if self.load_lidar_file or self.visualize:
+                # process lidar
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                # remove points that hit itself
+                lidar_np = mask_ego_points(lidar_np)
+                # project the lidar to ego space
+                # x,y,z in ego space
+                projected_lidar = \
+                    box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                                transformation_matrix)
+                if self.proj_first:
+                    lidar_np[:, :3] = projected_lidar
+                if self.visualize:
+                    # filter lidar
+                    selected_cav_processed.update({'projected_lidar': projected_lidar})
+                if self.kd_flag:
+                    lidar_proj_np = copy.deepcopy(lidar_np)
+                    lidar_proj_np[:,:3] = projected_lidar
+                    selected_cav_processed.update({'projected_lidar': lidar_proj_np})
+                processed_lidar = self.pre_processor.preprocess(lidar_np)
+                selected_cav_processed.update({'processed_features': processed_lidar})
+            # generate targets label single GT, note the reference pose is itself.
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                [selected_cav_base], selected_cav_base['params']['lidar_pose']
+            )
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({
+                                "single_label_dict": label_dict,
+                                "single_object_bbx_center": object_bbx_center,
+                                "single_object_bbx_mask": object_bbx_mask})
+            # camera
+            if self.load_camera_file:
+                camera_data_list = selected_cav_base["camera_data"]
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                extrinsics = []
+                post_rots = []
+                post_trans = []
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+                    img_src = [img]
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        self.data_aug_conf, self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+                    # decouple RGB and Depth
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    extrinsics.append(torch.from_numpy(camera_to_lidar))
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+                selected_cav_processed.update(
+                    {
+                    "image_inputs":
+                        {
+                            "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "extrinsics": torch.stack(extrinsics),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+            # anchor box
+            selected_cav_processed.update({"anchor_box": self.anchor_box})
+            # note the reference pose ego
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                        ego_pose_clean)
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+            return selected_cav_processed
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+            if self.visualize or self.kd_flag:
+                projected_lidar_stack = []
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+            ########## Updated by Yifan Lu 2022.1.26 ############
+            # box align to correct pose.
+            # stage1_content contains all agent. Even out of comm range.
+            if self.box_align and str(idx) in self.stage1_result.keys():
+                from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np
+                stage1_content = self.stage1_result[str(idx)]
+                if stage1_content is not None:
+                    all_agent_id_list = stage1_content['cav_id_list'] # include those out of range
+                    all_agent_corners_list = stage1_content['pred_corner3d_np_list']
+                    all_agent_uncertainty_list = stage1_content['uncertainty_np_list']
+                    cur_agent_id_list = cav_id_list
+                    cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list]
+                    cur_agnet_pose = np.array(cur_agent_pose)
+                    cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list`
+                    pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64)
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64)
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0:
+                        refined_pose = box_alignment_relative_sample_np(pred_corners_list,
+                                                                        cur_agnet_pose,
+                                                                        uncertainty_list=uncertainty_list,
+                                                                        **self.box_align_args)
+                        cur_agnet_pose[:,[0,1,4]] = refined_pose
+                        for i, cav_id in enumerate(cav_id_list):
+                            lidar_pose_list[i] = cur_agnet_pose[i].tolist()
+                            base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist()
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+            # heterogeneous
+            if self.heterogeneous:
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num]
+                processed_data_dict['ego'].update({"lidar_agent": lidar_agent})
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+                # dynamic object center generator! for heterogeneous input
+                if (not self.visualize) and self.heterogeneous and lidar_agent[_i]:
+                    self.generate_object_center = self.generate_object_center_lidar
+                elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]):
+                    self.generate_object_center = self.generate_object_center_camera
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base)
+                object_stack.append(selected_cav_processed['object_bbx_center'])
+                object_id_stack += selected_cav_processed['object_ids']
+                if self.load_lidar_file:
+                    processed_features.append(
+                        selected_cav_processed['processed_features'])
+                if self.load_camera_file:
+                    agents_image_inputs.append(
+                        selected_cav_processed['image_inputs'])
+                if self.visualize or self.kd_flag:
+                    projected_lidar_stack.append(
+                        selected_cav_processed['projected_lidar'])
+                if self.supervise_single:
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+            # generate single view GT label
+            if self.supervise_single:
+                single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+            if self.kd_flag:
+                stack_lidar_np = np.vstack(projected_lidar_stack)
+                stack_lidar_np = mask_points_by_range(stack_lidar_np,
+                                            self.params['preprocess'][
+                                                'cav_lidar_range'])
+                stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np)
+                processed_data_dict['ego'].update({'teacher_processed_lidar':
+                stack_feature_processed})
+            # exclude all repetitive objects
+            unique_indices = \
+                [object_id_stack.index(x) for x in set(object_id_stack)]
+            object_stack = np.vstack(object_stack)
+            object_stack = object_stack[unique_indices]
+            # make sure bounding boxes across all frames have the same number
+            object_bbx_center = \
+                np.zeros((self.params['postprocess']['max_num'], 7))
+            mask = np.zeros(self.params['postprocess']['max_num'])
+            object_bbx_center[:object_stack.shape[0], :] = object_stack
+            mask[:object_stack.shape[0]] = 1
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_features)
+                processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict})
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            # generate targets label
+            label_dict = \
+                self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center,
+                    anchors=self.anchor_box,
+                    mask=mask)
+            processed_data_dict['ego'].update(
+                {'object_bbx_center': object_bbx_center,
+                'object_bbx_mask': mask,
+                'object_ids': [object_id_stack[i] for i in unique_indices],
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+            if self.visualize:
+                processed_data_dict['ego'].update({'origin_lidar':
+                    np.vstack(
+                        projected_lidar_stack)})
+            processed_data_dict['ego'].update({'sample_idx': idx,
+                                                'cav_id_list': cav_id_list})
+            return processed_data_dict
+        def collate_batch_train(self, batch):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_pose_clean_list = []
+            # heterogeneous
+            lidar_agent_list = []
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+            # disconet
+            teacher_processed_lidar_list = []
+            ### 2022.10.10 single gt ####
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                object_ids.append(ego_dict['object_ids'])
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                record_len.append(ego_dict['cav_num'])
+                label_dict_list.append(ego_dict['label_dict'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                if self.kd_flag:
+                    teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar'])
+                ### 2022.10.10 single gt ####
+                if self.supervise_single:
+                    pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                    neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                    targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                     'object_bbx_mask': object_bbx_mask})
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+            if self.kd_flag:
+                teacher_processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(teacher_processed_lidar_list)
+                output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict})
+            if self.supervise_single:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            "pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                            "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                            "targets": torch.cat(targets_single, dim=0),
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+            return output_dict
+        def collate_batch_test(self, batch):
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            output_dict = self.collate_batch_train(batch)
+            if output_dict is None:
+                return None
+            # check if anchor box in the batch
+            if batch[0]['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list']
+            })
+            return output_dict
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            return pred_box_tensor, pred_score, gt_box_tensor
+    return IntermediateFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_heter_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,752 @@

+'''
+intermediate heter fusion dataset
+Note that for DAIR-V2X dataset,
+Each agent should retrieve the objects itself, and merge them by iou,
+instead of using the cooperative label.
+'''
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.utils.common_utils import merge_features_to_dict, compute_iou, convert_format
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+from opencood.utils.heter_utils import Adaptor
+def getIntermediateheterFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class IntermediateheterFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # intermediate and supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            self.heterogeneous = True
+            self.modality_assignment = read_json(params['heter']['assignment_path'])
+            self.ego_modality = params['heter']['ego_modality'] # "m1" or "m1&m2" or "m3"
+            self.modality_name_list = list(params['heter']['modality_setting'].keys())
+            self.sensor_type_dict = OrderedDict()
+            lidar_channels_dict = params['heter'].get('lidar_channels_dict', OrderedDict())
+            mapping_dict = params['heter']['mapping_dict']
+            cav_preference = params['heter'].get("cav_preference", None)
+            self.adaptor = Adaptor(self.ego_modality,
+                                   self.modality_name_list,
+                                   self.modality_assignment,
+                                   lidar_channels_dict,
+                                   mapping_dict,
+                                   cav_preference,
+                                   train)
+            for modality_name, modal_setting in params['heter']['modality_setting'].items():
+                self.sensor_type_dict[modality_name] = modal_setting['sensor_type']
+                if modal_setting['sensor_type'] == 'lidar':
+                    setattr(self, f"pre_processor_{modality_name}", build_preprocessor(modal_setting['preprocess'], train))
+                elif modal_setting['sensor_type'] == 'camera':
+                    setattr(self, f"data_aug_conf_{modality_name}", modal_setting['data_aug_conf'])
+                else:
+                    raise("Not support this type of sensor")
+            self.reinitialize()
+            self.kd_flag = params.get('kd_flag', False)
+            self.box_align = False
+            if "box_align" in params:
+                self.box_align = True
+                self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result']
+                self.stage1_result = read_json(self.stage1_result_path)
+                self.box_align_args = params['box_align']['args']
+        def get_item_single_car(self, selected_cav_base, ego_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            modality_name = selected_cav_base['modality_name']
+            sensor_type = self.sensor_type_dict[modality_name]
+            # lidar
+            if sensor_type == "lidar" or self.visualize:
+                # process lidar
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                # remove points that hit itself
+                lidar_np = mask_ego_points(lidar_np)
+                # project the lidar to ego space
+                # x,y,z in ego space
+                projected_lidar = \
+                    box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                                transformation_matrix)
+                if self.proj_first:
+                    lidar_np[:, :3] = projected_lidar
+                if self.visualize:
+                    # filter lidar
+                    selected_cav_processed.update({'projected_lidar': projected_lidar})
+                if self.kd_flag:
+                    lidar_proj_np = copy.deepcopy(lidar_np)
+                    lidar_proj_np[:,:3] = projected_lidar
+                    selected_cav_processed.update({'projected_lidar': lidar_proj_np})
+                if sensor_type == "lidar":
+                    processed_lidar = eval(f"self.pre_processor_{modality_name}").preprocess(lidar_np)
+                    selected_cav_processed.update({f'processed_features_{modality_name}': processed_lidar})
+            # generate targets label single GT, note the reference pose is itself.
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                [selected_cav_base], selected_cav_base['params']['lidar_pose']
+            )
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({
+                                "single_label_dict": label_dict,
+                                "single_object_bbx_center": object_bbx_center,
+                                "single_object_bbx_mask": object_bbx_mask})
+            # camera
+            if sensor_type == "camera":
+                camera_data_list = selected_cav_base["camera_data"]
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                extrinsics = []
+                post_rots = []
+                post_trans = []
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+                    img_src = [img]
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        eval(f"self.data_aug_conf_{modality_name}"), self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+                    # decouple RGB and Depth
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    extrinsics.append(torch.from_numpy(camera_to_lidar))
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+                selected_cav_processed.update(
+                    {
+                    f"image_inputs_{modality_name}":
+                        {
+                            "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "extrinsics": torch.stack(extrinsics),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+            # anchor box
+            selected_cav_processed.update({"anchor_box": self.anchor_box})
+            # note the reference pose ego
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                        ego_pose_clean)
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+            return selected_cav_processed
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+            input_list_m1 = [] # can contain lidar or camera
+            input_list_m2 = []
+            input_list_m3 = []
+            input_list_m4 = []
+            agent_modality_list = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            exclude_agent = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+            if self.visualize or self.kd_flag:
+                projected_lidar_stack = []
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    exclude_agent.append(cav_id)
+                    continue
+                # if modality not match
+                if self.adaptor.unmatched_modality(selected_cav_base['modality_name']):
+                    exclude_agent.append(cav_id)
+                    continue
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)
+            if len(cav_id_list) == 0:
+                return None
+            for cav_id in exclude_agent:
+                base_data_dict.pop(cav_id)
+            ########## Updated by Yifan Lu 2022.1.26 ############
+            # box align to correct pose.
+            # stage1_content contains all agent. Even out of comm range.
+            if self.box_align and str(idx) in self.stage1_result.keys():
+                from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np
+                stage1_content = self.stage1_result[str(idx)]
+                if stage1_content is not None:
+                    all_agent_id_list = stage1_content['cav_id_list'] # include those out of range
+                    all_agent_corners_list = stage1_content['pred_corner3d_np_list']
+                    all_agent_uncertainty_list = stage1_content['uncertainty_np_list']
+                    cur_agent_id_list = cav_id_list
+                    cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list]
+                    cur_agnet_pose = np.array(cur_agent_pose)
+                    cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list`
+                    pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64)
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64)
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0:
+                        refined_pose = box_alignment_relative_sample_np(pred_corners_list,
+                                                                        cur_agnet_pose,
+                                                                        uncertainty_list=uncertainty_list,
+                                                                        **self.box_align_args)
+                        cur_agnet_pose[:,[0,1,4]] = refined_pose
+                        for i, cav_id in enumerate(cav_id_list):
+                            lidar_pose_list[i] = cur_agnet_pose[i].tolist()
+                            base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist()
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+                modality_name = selected_cav_base['modality_name']
+                sensor_type = self.sensor_type_dict[selected_cav_base['modality_name']]
+                # dynamic object center generator! for heterogeneous input
+                if not self.visualize:
+                    self.generate_object_center = eval(f"self.generate_object_center_{sensor_type}")
+                # need discussion. In test phase, use lidar label.
+                else:
+                    self.generate_object_center = self.generate_object_center_lidar
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base)
+                object_stack.append(selected_cav_processed['object_bbx_center'])
+                object_id_stack += selected_cav_processed['object_ids']
+                if sensor_type == "lidar":
+                    eval(f"input_list_{modality_name}").append(selected_cav_processed[f"processed_features_{modality_name}"])
+                elif sensor_type == "camera":
+                    eval(f"input_list_{modality_name}").append(selected_cav_processed[f"image_inputs_{modality_name}"])
+                else:
+                    raise
+                agent_modality_list.append(modality_name)
+                if self.visualize or self.kd_flag:
+                    projected_lidar_stack.append(
+                        selected_cav_processed['projected_lidar'])
+                if self.supervise_single or self.heterogeneous:
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+            # generate single view GT label
+            if self.supervise_single or self.heterogeneous:
+                single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+            if self.kd_flag:
+                stack_lidar_np = np.vstack(projected_lidar_stack)
+                stack_lidar_np = mask_points_by_range(stack_lidar_np,
+                                            self.params['preprocess'][
+                                                'cav_lidar_range'])
+                stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np)
+                processed_data_dict['ego'].update({'teacher_processed_lidar':
+                stack_feature_processed})
+            # exculude all repetitve objects, DAIR-V2X
+            if self.params['fusion']['dataset'] == 'dairv2x':
+                if len(object_stack) == 1:
+                    object_stack = object_stack[0]
+                else:
+                    ego_boxes_np = object_stack[0]
+                    cav_boxes_np = object_stack[1]
+                    order = self.params['postprocess']['order']
+                    ego_corners_np = box_utils.boxes_to_corners_3d(ego_boxes_np, order)
+                    cav_corners_np = box_utils.boxes_to_corners_3d(cav_boxes_np, order)
+                    ego_polygon_list = list(convert_format(ego_corners_np))
+                    cav_polygon_list = list(convert_format(cav_corners_np))
+                    iou_thresh = 0.05
+                    gt_boxes_from_cav = []
+                    for i in range(len(cav_polygon_list)):
+                        cav_polygon = cav_polygon_list[i]
+                        ious = compute_iou(cav_polygon, ego_polygon_list)
+                        if (ious > iou_thresh).any():
+                            continue
+                        gt_boxes_from_cav.append(cav_boxes_np[i])
+                    if len(gt_boxes_from_cav):
+                        object_stack_from_cav = np.stack(gt_boxes_from_cav)
+                        object_stack = np.vstack([ego_boxes_np, object_stack_from_cav])
+                    else:
+                        object_stack = ego_boxes_np
+                unique_indices = np.arange(object_stack.shape[0])
+                object_id_stack = np.arange(object_stack.shape[0])
+            else:
+                # exclude all repetitive objects, OPV2V-H
+                unique_indices = \
+                    [object_id_stack.index(x) for x in set(object_id_stack)]
+                object_stack = np.vstack(object_stack)
+                object_stack = object_stack[unique_indices]
+            # make sure bounding boxes across all frames have the same number
+            object_bbx_center = \
+                np.zeros((self.params['postprocess']['max_num'], 7))
+            mask = np.zeros(self.params['postprocess']['max_num'])
+            object_bbx_center[:object_stack.shape[0], :] = object_stack
+            mask[:object_stack.shape[0]] = 1
+            for modality_name in self.modality_name_list:
+                if self.sensor_type_dict[modality_name] == "lidar":
+                    merged_feature_dict = merge_features_to_dict(eval(f"input_list_{modality_name}"))
+                    processed_data_dict['ego'].update({f'input_{modality_name}': merged_feature_dict}) # maybe None
+                elif self.sensor_type_dict[modality_name] == "camera":
+                    merged_image_inputs_dict = merge_features_to_dict(eval(f"input_list_{modality_name}"), merge='stack')
+                    processed_data_dict['ego'].update({f'input_{modality_name}': merged_image_inputs_dict}) # maybe None
+            processed_data_dict['ego'].update({'agent_modality_list': agent_modality_list})
+            # generate targets label
+            label_dict = \
+                self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center,
+                    anchors=self.anchor_box,
+                    mask=mask)
+            processed_data_dict['ego'].update(
+                {'object_bbx_center': object_bbx_center,
+                'object_bbx_mask': mask,
+                'object_ids': [object_id_stack[i] for i in unique_indices],
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+            if self.visualize:
+                processed_data_dict['ego'].update({'origin_lidar':
+                    np.vstack(
+                        projected_lidar_stack)})
+            processed_data_dict['ego'].update({'sample_idx': idx,
+                                                'cav_id_list': cav_id_list})
+            return processed_data_dict
+        def collate_batch_train(self, batch):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            inputs_list_m1 = []
+            inputs_list_m2 = []
+            inputs_list_m3 = []
+            inputs_list_m4 = []
+            agent_modality_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_pose_clean_list = []
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+            # disconet
+            teacher_processed_lidar_list = []
+            ### 2022.10.10 single gt ####
+            if self.supervise_single or self.heterogeneous:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                object_ids.append(ego_dict['object_ids'])
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                for modality_name in self.modality_name_list:
+                    if ego_dict[f'input_{modality_name}'] is not None:
+                        eval(f"inputs_list_{modality_name}").append(ego_dict[f'input_{modality_name}']) # OrderedDict() if empty?
+                agent_modality_list.extend(ego_dict['agent_modality_list'])
+                record_len.append(ego_dict['cav_num'])
+                label_dict_list.append(ego_dict['label_dict'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                if self.kd_flag:
+                    teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar'])
+                ### 2022.10.10 single gt ####
+                if self.supervise_single or self.heterogeneous:
+                    pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                    neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                    targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            # 2023.2.5
+            for modality_name in self.modality_name_list:
+                if len(eval(f"inputs_list_{modality_name}")) != 0:
+                    if self.sensor_type_dict[modality_name] == "lidar":
+                        merged_feature_dict = merge_features_to_dict(eval(f"inputs_list_{modality_name}"))
+                        processed_lidar_torch_dict = eval(f"self.pre_processor_{modality_name}").collate_batch(merged_feature_dict)
+                        output_dict['ego'].update({f'inputs_{modality_name}': processed_lidar_torch_dict})
+                    elif self.sensor_type_dict[modality_name] == "camera":
+                        merged_image_inputs_dict = merge_features_to_dict(eval(f"inputs_list_{modality_name}"), merge='cat')
+                        output_dict['ego'].update({f'inputs_{modality_name}': merged_image_inputs_dict})
+            output_dict['ego'].update({"agent_modality_list": agent_modality_list})
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                     'object_bbx_mask': object_bbx_mask})
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+            if self.kd_flag:
+                teacher_processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(teacher_processed_lidar_list)
+                output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict})
+            if self.supervise_single  or self.heterogeneous:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            "pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                            "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                            "targets": torch.cat(targets_single, dim=0),
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+            return output_dict
+        def collate_batch_test(self, batch):
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            if batch[0] is None:
+                return None
+            output_dict = self.collate_batch_train(batch)
+            if output_dict is None:
+                return None
+            # check if anchor box in the batch
+            if batch[0]['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list'],
+                "agent_modality_list": batch[0]['ego']['agent_modality_list']
+            })
+            return output_dict
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            return pred_box_tensor, pred_score, gt_box_tensor
+    return IntermediateheterFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_multiclass_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,892 @@

+# intermediate fusion dataset
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+# from opencood.utils.heter_utils import AgentSelector
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation, get_pairwise_transformation_asymmetric
+from opencood.utils.pose_utils import add_noise_data_dict, add_noise_data_dict_asymmetric
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    mask_ego_points_v2,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+def getIntermediatemulticlassFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class IntermediatemulticlassFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # intermediate and supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+                self.selector = AgentSelector(params['heter'], self.max_cav)
+            self.kd_flag = params.get('kd_flag', False)
+            self.box_align = False
+            if "box_align" in params:
+                self.box_align = True
+                self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result']
+                self.stage1_result = read_json(self.stage1_result_path)
+                self.box_align_args = params['box_align']['args']
+            self.multiclass = params['model']['args']['multi_class']
+            self.online_eval_only = False
+        def get_item_single_car(self, selected_cav_base, ego_cav_base, tpe='all', cav_id='car_0', online_eval=False):
+            """
+            Process a single CAV's information for the train/test pipeline.
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            # lidar
+            if tpe == 'all':
+                if self.load_lidar_file or self.visualize:
+                    # process lidar
+                    lidar_np = selected_cav_base['lidar_np']
+                    lidar_np = shuffle_points(lidar_np)
+                    # remove points that hit itself
+                    if not cav_id.startswith('rsu'):
+                        lidar_np = mask_ego_points_v2(lidar_np)
+                    # project the lidar to ego space
+                    # x,y,z in ego space
+                    projected_lidar = \
+                        box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                                    transformation_matrix)
+                    if self.proj_first:
+                        lidar_np[:, :3] = projected_lidar
+                    if self.visualize:
+                        # filter lidar
+                        selected_cav_processed.update({'projected_lidar': projected_lidar})
+                    if self.kd_flag:
+                        lidar_proj_np = copy.deepcopy(lidar_np)
+                        lidar_proj_np[:,:3] = projected_lidar
+                        selected_cav_processed.update({'projected_lidar': lidar_proj_np})
+                    processed_lidar = self.pre_processor.preprocess(lidar_np)
+                    selected_cav_processed.update({'processed_features': processed_lidar})
+            if True: # not online_eval:
+                # generate targets label single GT, note the reference pose is itself.
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                    [selected_cav_base], selected_cav_base['params']['lidar_pose']
+                )
+                label_dict = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        label_dict = self.post_processor.generate_label(
+                            gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                        )
+                selected_cav_processed.update({
+                                    "single_label_dict": label_dict,
+                                    "single_object_bbx_center": object_bbx_center,
+                                    "single_object_bbx_mask": object_bbx_mask})
+            if tpe == 'all':
+                # camera
+                if self.load_camera_file:
+                    camera_data_list = selected_cav_base["camera_data"]
+                    params = selected_cav_base["params"]
+                    imgs = []
+                    rots = []
+                    trans = []
+                    intrins = []
+                    extrinsics = []
+                    post_rots = []
+                    post_trans = []
+                    for idx, img in enumerate(camera_data_list):
+                        camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+                        intrin = torch.from_numpy(camera_intrinsic)
+                        rot = torch.from_numpy(
+                            camera_to_lidar[:3, :3]
+                        )  # R_wc, we consider world-coord is the lidar-coord
+                        tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+                        post_rot = torch.eye(2)
+                        post_tran = torch.zeros(2)
+                        img_src = [img]
+                        # depth
+                        if self.load_depth_file:
+                            depth_img = selected_cav_base["depth_data"][idx]
+                            img_src.append(depth_img)
+                        else:
+                            depth_img = None
+                        # data augmentation
+                        resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                            self.data_aug_conf, self.train
+                        )
+                        img_src, post_rot2, post_tran2 = img_transform(
+                            img_src,
+                            post_rot,
+                            post_tran,
+                            resize=resize,
+                            resize_dims=resize_dims,
+                            crop=crop,
+                            flip=flip,
+                            rotate=rotate,
+                        )
+                        # for convenience, make augmentation matrices 3x3
+                        post_tran = torch.zeros(3)
+                        post_rot = torch.eye(3)
+                        post_tran[:2] = post_tran2
+                        post_rot[:2, :2] = post_rot2
+                        # decouple RGB and Depth
+                        img_src[0] = normalize_img(img_src[0])
+                        if self.load_depth_file:
+                            img_src[1] = img_to_tensor(img_src[1]) * 255
+                        imgs.append(torch.cat(img_src, dim=0))
+                        intrins.append(intrin)
+                        extrinsics.append(torch.from_numpy(camera_to_lidar))
+                        rots.append(rot)
+                        trans.append(tran)
+                        post_rots.append(post_rot)
+                        post_trans.append(post_tran)
+                    selected_cav_processed.update(
+                        {
+                        "image_inputs":
+                            {
+                                "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                                "intrins": torch.stack(intrins),
+                                "extrinsics": torch.stack(extrinsics),
+                                "rots": torch.stack(rots),
+                                "trans": torch.stack(trans),
+                                "post_rots": torch.stack(post_rots),
+                                "post_trans": torch.stack(post_trans),
+                            }
+                        }
+                    )
+                # anchor box
+                selected_cav_processed.update({"anchor_box": self.anchor_box})
+            if True: # not online_eval:
+                # note the reference pose ego
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                            ego_pose_clean)
+                selected_cav_processed.update(
+                    {
+                        "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                        "object_bbx_mask": object_bbx_mask,
+                        "object_ids": object_ids,
+                    }
+                )
+            selected_cav_processed.update(
+                {
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+            return selected_cav_processed
+        def __getitem__(self, idx, extra_source=None, data_dir=None, plan_without_perception_gt=True):
+            if (data_dir is not None) and (plan_without_perception_gt):
+                extra_source=1
+            object_bbx_center_list = []
+            object_bbx_mask_list = []
+            object_id_dict = {}
+            object_bbx_center_list_single = []
+            object_bbx_mask_list_single = []
+            output_dict = {}
+            for tpe in ['all', 0, 1, 3]:
+                output_single_class = self.__getitem_single_class__(idx, tpe, extra_source, data_dir)
+                output_dict[tpe] = output_single_class
+                if tpe == 'all':
+                    continue
+                elif tpe == 'all' and extra_source!=None:
+                    break
+                object_bbx_center_list.append(output_single_class['ego']['object_bbx_center'])
+                object_bbx_mask_list.append(output_single_class['ego']['object_bbx_mask'])
+                if self.supervise_single:
+                    object_bbx_center_list_single.append(output_single_class['ego']['single_object_bbx_center_torch'])
+                    object_bbx_mask_list_single.append(output_single_class['ego']['single_object_bbx_mask_torch'])
+                object_id_dict[tpe] = output_single_class['ego']['object_ids']
+            if True: # self.multiclass and extra_source==None:
+                output_dict['all']['ego']['object_bbx_center'] = np.stack(object_bbx_center_list, axis=0)
+                output_dict['all']['ego']['object_bbx_mask'] = np.stack(object_bbx_mask_list, axis=0)
+                if self.supervise_single:
+                    output_dict['all']['ego']['single_object_bbx_center_torch'] = torch.stack(object_bbx_center_list_single, axis=1)
+                    output_dict['all']['ego']['single_object_bbx_mask_torch'] = torch.stack(object_bbx_mask_list_single, axis=1)
+                output_dict['all']['ego']['object_ids'] = object_id_dict
+            # print('finish get item')
+            return output_dict['all']
+        def __getitem_single_class__(self, idx, tpe=None, extra_source=None, data_dir=None):
+            if extra_source is None and data_dir is None:
+                base_data_dict = self.retrieve_base_data(idx, tpe)
+            elif data_dir is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, data_dir=data_dir)
+            elif extra_source is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, extra_source=extra_source)
+            base_data_dict = add_noise_data_dict_asymmetric(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+            if self.visualize or self.kd_flag:
+                projected_lidar_stack = []
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+            ########## Updated by Yifan Lu 2022.1.26 ############
+            # box align to correct pose.
+            # stage1_content contains all agent. Even out of comm range.
+            if self.box_align and str(idx) in self.stage1_result.keys():  # False
+                from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np
+                stage1_content = self.stage1_result[str(idx)]
+                if stage1_content is not None:
+                    all_agent_id_list = stage1_content['cav_id_list'] # include those out of range
+                    all_agent_corners_list = stage1_content['pred_corner3d_np_list']
+                    all_agent_uncertainty_list = stage1_content['uncertainty_np_list']
+                    cur_agent_id_list = cav_id_list
+                    cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list]
+                    cur_agnet_pose = np.array(cur_agent_pose)
+                    cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list`
+                    pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64)
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64)
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0:
+                        refined_pose = box_alignment_relative_sample_np(pred_corners_list,
+                                                                        cur_agnet_pose,
+                                                                        uncertainty_list=uncertainty_list,
+                                                                        **self.box_align_args)
+                        cur_agnet_pose[:,[0,1,4]] = refined_pose
+                        for i, cav_id in enumerate(cav_id_list):
+                            lidar_pose_list[i] = cur_agnet_pose[i].tolist()
+                            base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist()
+            pairwise_t_matrix = \
+                get_pairwise_transformation_asymmetric(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+            # heterogeneous
+            if self.heterogeneous:
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num]
+                processed_data_dict['ego'].update({"lidar_agent": lidar_agent})
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+                # dynamic object center generator! for heterogeneous input
+                if (not self.visualize) and self.heterogeneous and lidar_agent[_i]:
+                    self.generate_object_center = self.generate_object_center_lidar
+                elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]):
+                    self.generate_object_center = self.generate_object_center_camera
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base,
+                    tpe,
+                    cav_id,
+                    extra_source!=None)
+                if True: #extra_source==None:
+                    object_stack.append(selected_cav_processed['object_bbx_center'])
+                    object_id_stack += selected_cav_processed['object_ids']
+                if tpe == 'all':
+                    if self.load_lidar_file:
+                        processed_features.append(
+                            selected_cav_processed['processed_features'])
+                    if self.load_camera_file:
+                        agents_image_inputs.append(
+                            selected_cav_processed['image_inputs'])
+                    if self.visualize or self.kd_flag:
+                        projected_lidar_stack.append(
+                            selected_cav_processed['projected_lidar'])
+                if True: #self.supervise_single and extra_source==None:
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+            # generate single view GT label
+            if True: # self.supervise_single and extra_source==None:
+                single_label_dicts = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+            if self.kd_flag:
+                stack_lidar_np = np.vstack(projected_lidar_stack)
+                stack_lidar_np = mask_points_by_range(stack_lidar_np,
+                                            self.params['preprocess'][
+                                                'cav_lidar_range'])
+                stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np)
+                processed_data_dict['ego'].update({'teacher_processed_lidar':
+                stack_feature_processed})
+            if True: # extra_source is None:
+                # exclude all repetitive objects
+                unique_indices = \
+                    [object_id_stack.index(x) for x in set(object_id_stack)]
+                object_stack = np.vstack(object_stack)
+                object_stack = object_stack[unique_indices]
+                # make sure bounding boxes across all frames have the same number
+                object_bbx_center = \
+                    np.zeros((self.params['postprocess']['max_num'], 7))
+                mask = np.zeros(self.params['postprocess']['max_num'])
+                object_bbx_center[:object_stack.shape[0], :] = object_stack
+                mask[:object_stack.shape[0]] = 1
+                processed_data_dict['ego'].update(
+                    {'object_bbx_center': object_bbx_center,  # (100,7)
+                    'object_bbx_mask': mask, # (100,)
+                    'object_ids': [object_id_stack[i] for i in unique_indices],
+                    }
+                )
+            # generate targets label
+            label_dict = {}
+            if tpe == 'all':
+                # unused label
+                if False:
+                    label_dict = \
+                        self.post_processor.generate_label(
+                            gt_box_center=object_bbx_center,
+                            anchors=self.anchor_box,
+                            mask=mask)
+            processed_data_dict['ego'].update(
+                {
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+            if tpe == 'all':
+                if self.load_lidar_file:
+                    merged_feature_dict = merge_features_to_dict(processed_features)
+                    processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict})
+                if self.load_camera_file:
+                    merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                    processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+                if self.visualize:
+                    processed_data_dict['ego'].update({'origin_lidar':
+                                                    #    projected_lidar_stack})
+                        np.vstack(
+                            projected_lidar_stack)})
+                    processed_data_dict['ego'].update({'lidar_len': [len(projected_lidar_stack[i]) for i in range(len(projected_lidar_stack))]})
+                processed_data_dict['ego'].update({'sample_idx': idx,
+                                                    'cav_id_list': cav_id_list})
+                img_front_list = []
+                img_left_list = []
+                img_right_list = []
+                BEV_list = []
+                if self.visualize:
+                    for car_id in base_data_dict:
+                        if not base_data_dict[car_id]['ego'] == True:
+                            continue
+                        if 'rgb_front' in base_data_dict[car_id] and 'rgb_left' in base_data_dict[car_id] and 'rgb_right' in base_data_dict[car_id] and 'BEV' in base_data_dict[car_id] :
+                            img_front_list.append(base_data_dict[car_id]['rgb_front'])
+                            img_left_list.append(base_data_dict[car_id]['rgb_left'])
+                            img_right_list.append(base_data_dict[car_id]['rgb_right'])
+                            BEV_list.append(base_data_dict[car_id]['BEV'])
+                processed_data_dict['ego'].update({'img_front': img_front_list,
+                                                    'img_left': img_left_list,
+                                                    'img_right': img_right_list,
+                                                    'BEV': BEV_list})
+            processed_data_dict['ego'].update({'scene_dict': base_data_dict['car_0']['scene_dict'],
+                                                    'frame_id': base_data_dict['car_0']['frame_id'],
+                                                    })
+            # TODO: LSS debug
+            processed_data_dict['ego'].update({"det_data": base_data_dict['car_0']['det_data']})
+            detmap_pose_list = []
+            for car_id in base_data_dict:
+                detmap_pose_list.append(base_data_dict[car_id]['detmap_pose'])
+            detmap_pose_list = torch.from_numpy(np.array(detmap_pose_list))
+            processed_data_dict['ego'].update({"detmap_pose": detmap_pose_list})
+            ##
+            return processed_data_dict
+        def collate_batch_train(self, batch, online_eval_only=False):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_len = []
+            lidar_pose_clean_list = []
+            # heterogeneous
+            lidar_agent_list = []
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+            # disconet
+            teacher_processed_lidar_list = []
+            # image
+            img_front = []
+            img_left = []
+            img_right = []
+            BEV = []
+            dict_list = []
+            # TODO: LSS debug
+            det_data = []
+            detmap_pose = []
+            ### 2022.10.10 single gt ####
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                det_data.append(torch.from_numpy(ego_dict['det_data']).unsqueeze(0))
+                detmap_pose.append(ego_dict['detmap_pose'])
+                if not online_eval_only:
+                    object_bbx_center.append(ego_dict['object_bbx_center'])
+                    object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                    object_ids.append(ego_dict['object_ids'])
+                else:
+                    object_ids.append(None)
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                record_len.append(ego_dict['cav_num'])
+                label_dict_list.append(ego_dict['label_dict'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+                dict_list.append([ego_dict['scene_dict'], ego_dict['frame_id']])
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                    lidar_len.append(ego_dict['lidar_len'])
+                    if len(ego_dict['img_front']) > 0 and len(ego_dict['img_right']) > 0 and len(ego_dict['img_left']) > 0 and len(ego_dict['BEV']) > 0:
+                        img_front.append(ego_dict['img_front'][0])
+                        img_left.append(ego_dict['img_left'][0])
+                        img_right.append(ego_dict['img_right'][0])
+                        BEV.append(ego_dict['BEV'][0])
+                if self.kd_flag:
+                    teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar'])
+                ### 2022.10.10 single gt ####
+                if self.supervise_single and not online_eval_only:
+                    # unused label
+                    if False:
+                        pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                        neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                        targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+            # convert to numpy, (B, max_num, 7)
+            if not online_eval_only:
+                object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+                object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            else:
+                object_bbx_center = None
+                object_bbx_mask = None
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            # unused label
+            label_torch_dict = {}
+            if False:
+                label_torch_dict = \
+                    self.post_processor.collate_batch(label_dict_list)
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                     'object_bbx_mask': object_bbx_mask})
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+            output_dict['ego'].update({'dict_list': dict_list})
+            if self.visualize:
+                origin_lidar = torch.from_numpy(np.array(origin_lidar))
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+                lidar_len = np.array(lidar_len)
+                output_dict['ego'].update({'lidar_len': lidar_len})
+                output_dict['ego'].update({'img_front': img_front})
+                output_dict['ego'].update({'img_right': img_right})
+                output_dict['ego'].update({'img_left': img_left})
+                output_dict['ego'].update({'BEV': BEV})
+            if self.kd_flag:
+                teacher_processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(teacher_processed_lidar_list)
+                output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict})
+            if self.supervise_single and not online_eval_only:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+            # TODO: LSS debug
+            det_data = torch.cat(det_data, dim=0)
+            detmap_pose = torch.cat(detmap_pose, dim=0)
+            output_dict['ego'].update({'detmap_pose': detmap_pose})
+            output_dict['ego']['label_dict'].update({
+                                    'det_data': det_data})
+            return output_dict
+        def collate_batch_test(self, batch, online_eval_only=False):
+            self.online_eval_only = online_eval_only
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            output_dict = self.collate_batch_train(batch, online_eval_only)
+            if output_dict is None:
+                return None
+            # check if anchor box in the batch
+            if batch[0]['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list']
+            })
+            return output_dict
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            return pred_box_tensor, pred_score, gt_box_tensor
+        def post_process_multiclass(self, data_dict, output_dict, online_eval_only=False):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            if online_eval_only == False:
+                online_eval_only = self.online_eval_only
+            num_class = output_dict['ego']['cls_preds'].shape[1]
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+            num_list = [0,1,3]
+            for i in range(num_class):
+                data_dict_single = copy.deepcopy(data_dict)
+                output_dict_single = copy.deepcopy(output_dict)
+                if not online_eval_only:
+                    data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:]
+                    data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:]
+                    data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]]
+                output_dict_single['ego']['cls_preds'] = output_dict['ego']['cls_preds'][:,i:i+1,:,:]
+                output_dict_single['ego']['reg_preds'] = output_dict['ego']['reg_preds_multiclass'][:,i,:,:]
+                pred_box_tensor, pred_score = \
+                    self.post_processor.post_process(data_dict_single, output_dict_single)
+                if not online_eval_only:
+                    gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict_single)
+                else:
+                    gt_box_tensor = None
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+    return IntermediatemulticlassFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,564 @@

+# late fusion dataset
+import random
+import math
+from collections import OrderedDict
+import cv2
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+def getLateFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class LateFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            if self.train:
+                reformat_data_dict = self.get_item_train(base_data_dict)
+            else:
+                reformat_data_dict = self.get_item_test(base_data_dict, idx)
+            return reformat_data_dict
+        def get_item_train(self, base_data_dict):
+            processed_data_dict = OrderedDict()
+            base_data_dict = add_noise_data_dict(
+                base_data_dict, self.params["noise_setting"]
+            )
+            # during training, we return a random cav's data
+            # only one vehicle is in processed_data_dict
+            if not self.visualize:
+                selected_cav_id, selected_cav_base = random.choice(
+                    list(base_data_dict.items())
+                )
+            else:
+                selected_cav_id, selected_cav_base = list(base_data_dict.items())[0]
+            selected_cav_processed = self.get_item_single_car(selected_cav_base)
+            processed_data_dict.update({"ego": selected_cav_processed})
+            return processed_data_dict
+        def get_item_test(self, base_data_dict, idx):
+            """
+                processed_data_dict.keys() = ['ego', "650", "659", ...]
+            """
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            ego_id = -1
+            ego_lidar_pose = []
+            cav_id_list = []
+            lidar_pose_list = []
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_lidar_pose_clean = cav_content['params']['lidar_pose_clean']
+                    break
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                if distance > self.params['comm_range']:
+                    continue
+                cav_id_list.append(cav_id)
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose'])
+            cav_id_list_newname = []
+            for cav_id in cav_id_list:
+                selected_cav_base = base_data_dict[cav_id]
+                # find the transformation matrix from current cav to ego.
+                cav_lidar_pose = selected_cav_base['params']['lidar_pose']
+                transformation_matrix = x1_to_x2(cav_lidar_pose, ego_lidar_pose)
+                cav_lidar_pose_clean = selected_cav_base['params']['lidar_pose_clean']
+                transformation_matrix_clean = x1_to_x2(cav_lidar_pose_clean, ego_lidar_pose_clean)
+                selected_cav_processed = \
+                    self.get_item_single_car(selected_cav_base)
+                selected_cav_processed.update({'transformation_matrix': transformation_matrix,
+                                            'transformation_matrix_clean': transformation_matrix_clean})
+                update_cav = "ego" if cav_id == ego_id else cav_id
+                processed_data_dict.update({update_cav: selected_cav_processed})
+                cav_id_list_newname.append(update_cav)
+            # heterogeneous
+            if self.heterogeneous:
+                processed_data_dict['ego']['idx'] = idx
+                processed_data_dict['ego']['cav_list'] = cav_id_list_newname
+            return processed_data_dict
+        def get_item_single_car(self, selected_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            # label
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center_single(
+                [selected_cav_base], selected_cav_base["params"]["lidar_pose_clean"]
+            )
+            # lidar
+            if self.load_lidar_file or self.visualize:
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                lidar_np = mask_points_by_range(lidar_np,
+                                                self.params['preprocess'][
+                                                    'cav_lidar_range'])
+                # remove points that hit ego vehicle
+                lidar_np = mask_ego_points(lidar_np)
+                # data augmentation, seems very important for single agent training, because lack of data diversity.
+                # only work for lidar modality in training.
+                if not self.heterogeneous:
+                    lidar_np, object_bbx_center, object_bbx_mask = \
+                    self.augment(lidar_np, object_bbx_center, object_bbx_mask)
+                lidar_dict = self.pre_processor.preprocess(lidar_np)
+                selected_cav_processed.update({'processed_lidar': lidar_dict})
+            if self.visualize:
+                selected_cav_processed.update({'origin_lidar': lidar_np})
+            # camera
+            if self.load_camera_file:
+                # adapted from https://github.com/nv-tlabs/lift-splat-shoot/blob/master/src/data.py
+                camera_data_list = selected_cav_base["camera_data"]
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                extrinsics = [] # cam_to_lidar
+                post_rots = []
+                post_trans = []
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+                    img_src = [img]
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        self.data_aug_conf, self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    extrinsics.append(torch.from_numpy(camera_to_lidar))
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+                selected_cav_processed.update(
+                    {
+                    "image_inputs":
+                        {
+                            "imgs": torch.stack(imgs), # [N, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "extrinsics": torch.stack(extrinsics),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center,
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                }
+            )
+            # generate targets label
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({"label_dict": label_dict})
+            return selected_cav_processed
+        def collate_batch_train(self, batch):
+            """
+            Customized collate function for pytorch dataloader during training
+            for early and late fusion dataset.
+            Parameters
+            ----------
+            batch : dict
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # during training, we only care about ego.
+            output_dict = {'ego': {}}
+            object_bbx_center = []
+            object_bbx_mask = []
+            processed_lidar_list = []
+            label_dict_list = []
+            origin_lidar = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                label_dict_list.append(ego_dict['label_dict'])
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask})
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'anchor_box': torch.from_numpy(self.anchor_box),
+                                    'label_dict': label_torch_dict})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+            if self.load_lidar_file:
+                for i in range(len(batch)):
+                    processed_lidar_list.append(batch[i]['ego']['processed_lidar'])
+                processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(processed_lidar_list)
+                output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+            if self.load_camera_file:
+                # collate ego camera information
+                imgs_batch = []
+                rots_batch = []
+                trans_batch = []
+                intrins_batch = []
+                extrinsics_batch = []
+                post_trans_batch = []
+                post_rots_batch = []
+                for i in range(len(batch)):
+                    ego_dict = batch[i]["ego"]["image_inputs"]
+                    imgs_batch.append(ego_dict["imgs"])
+                    rots_batch.append(ego_dict["rots"])
+                    trans_batch.append(ego_dict["trans"])
+                    intrins_batch.append(ego_dict["intrins"])
+                    extrinsics_batch.append(ego_dict["extrinsics"])
+                    post_trans_batch.append(ego_dict["post_trans"])
+                    post_rots_batch.append(ego_dict["post_rots"])
+                output_dict["ego"].update({
+                    "image_inputs":
+                        {
+                            "imgs": torch.stack(imgs_batch),  # [B, N, C, H, W]
+                            "rots": torch.stack(rots_batch),
+                            "trans": torch.stack(trans_batch),
+                            "intrins": torch.stack(intrins_batch),
+                            "post_trans": torch.stack(post_trans_batch),
+                            "post_rots": torch.stack(post_rots_batch),
+                        }
+                    }
+                )
+            return output_dict
+        def collate_batch_test(self, batch):
+            """
+            Customized collate function for pytorch dataloader during testing
+            for late fusion dataset.
+            Parameters
+            ----------
+            batch : dict
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # currently, we only support batch size of 1 during testing
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            batch = batch[0]
+            output_dict = {}
+            # heterogeneous
+            if self.heterogeneous:
+                idx = batch['ego']['idx']
+                cav_list = batch['ego']['cav_list'] # ['ego', '650' ..]
+                cav_num = len(batch)
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num] # [1,0,0,1,0]
+                lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                lidar_agent_cav_id = [cav_list[index] for index in lidar_agent_idx] # ['ego', ...]
+            # for late fusion, we also need to stack the lidar for better
+            # visualization
+            if self.visualize:
+                projected_lidar_list = []
+                origin_lidar = []
+            for cav_id, cav_content in batch.items():
+                output_dict.update({cav_id: {}})
+                # shape: (1, max_num, 7)
+                object_bbx_center = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_center']]))
+                object_bbx_mask = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_mask']]))
+                object_ids = cav_content['object_ids']
+                # the anchor box is the same for all bounding boxes usually, thus
+                # we don't need the batch dimension.
+                output_dict[cav_id].update(
+                    {"anchor_box": self.anchor_box_torch}
+                )
+                transformation_matrix = cav_content['transformation_matrix']
+                if self.visualize:
+                    origin_lidar = [cav_content['origin_lidar']]
+                    if (self.params['only_vis_ego'] is False) or (cav_id=='ego'):
+                        projected_lidar = copy.deepcopy(cav_content['origin_lidar'])
+                        projected_lidar[:, :3] = \
+                            box_utils.project_points_by_matrix_torch(
+                                projected_lidar[:, :3],
+                                transformation_matrix)
+                        projected_lidar_list.append(projected_lidar)
+                if self.load_lidar_file:
+                    # processed lidar dictionary
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(
+                            [cav_content['processed_lidar']])
+                    output_dict[cav_id].update({'processed_lidar': processed_lidar_torch_dict})
+                if self.load_camera_file:
+                    imgs_batch = [cav_content["image_inputs"]["imgs"]]
+                    rots_batch = [cav_content["image_inputs"]["rots"]]
+                    trans_batch = [cav_content["image_inputs"]["trans"]]
+                    intrins_batch = [cav_content["image_inputs"]["intrins"]]
+                    extrinsics_batch = [cav_content["image_inputs"]["extrinsics"]]
+                    post_trans_batch = [cav_content["image_inputs"]["post_trans"]]
+                    post_rots_batch = [cav_content["image_inputs"]["post_rots"]]
+                    output_dict[cav_id].update({
+                        "image_inputs":
+                            {
+                                "imgs": torch.stack(imgs_batch),
+                                "rots": torch.stack(rots_batch),
+                                "trans": torch.stack(trans_batch),
+                                "intrins": torch.stack(intrins_batch),
+                                "extrinsics": torch.stack(extrinsics_batch),
+                                "post_trans": torch.stack(post_trans_batch),
+                                "post_rots": torch.stack(post_rots_batch),
+                            }
+                        }
+                    )
+                # heterogeneous
+                if self.heterogeneous:
+                    if cav_id in lidar_agent_cav_id:
+                        output_dict[cav_id].pop('image_inputs')
+                    else:
+                        output_dict[cav_id].pop('processed_lidar')
+                # label dictionary
+                label_torch_dict = \
+                    self.post_processor.collate_batch([cav_content['label_dict']])
+                # for centerpoint
+                label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                         'object_bbx_mask': object_bbx_mask})
+                # save the transformation matrix (4, 4) to ego vehicle
+                transformation_matrix_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix'])).float()
+                # late fusion training, no noise
+                transformation_matrix_clean_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix_clean'])).float()
+                output_dict[cav_id].update({'object_bbx_center': object_bbx_center,
+                                            'object_bbx_mask': object_bbx_mask,
+                                            'label_dict': label_torch_dict,
+                                            'object_ids': object_ids,
+                                            'transformation_matrix': transformation_matrix_torch,
+                                            'transformation_matrix_clean': transformation_matrix_clean_torch})
+                if self.visualize:
+                    origin_lidar = \
+                        np.array(
+                            downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                    origin_lidar = torch.from_numpy(origin_lidar)
+                    output_dict[cav_id].update({'origin_lidar': origin_lidar})
+            if self.visualize:
+                projected_lidar_stack = [torch.from_numpy(
+                    np.vstack(projected_lidar_list))]
+                output_dict['ego'].update({'origin_lidar': projected_lidar_stack})
+                # output_dict['ego'].update({'projected_lidar_list': projected_lidar_list})
+            return output_dict
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict, output_dict
+            )
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            return pred_box_tensor, pred_score, gt_box_tensor
+        def post_process_no_fusion(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego["ego"] = data_dict["ego"]
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict_ego, output_dict_ego
+            )
+            return pred_box_tensor, pred_score, gt_box_tensor
+        def post_process_no_fusion_uncertainty(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego['ego'] = data_dict['ego']
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            pred_box_tensor, pred_score, uncertainty = \
+                self.post_processor.post_process(data_dict_ego, output_dict_ego, return_uncertainty=True)
+            return pred_box_tensor, pred_score, gt_box_tensor, uncertainty
+    return LateFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_heter_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,565 @@

+# late fusion dataset
+import random
+import math
+from collections import OrderedDict
+import cv2
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.heter_utils import Adaptor
+def getLateheterFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class LateheterFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            self.heterogeneous = True
+            self.modality_assignment = read_json(params['heter']['assignment_path'])
+            self.ego_modality = params['heter']['ego_modality'] # "m1" or "m1&m2" or "m3"
+            self.modality_name_list = list(params['heter']['modality_setting'].keys())
+            self.sensor_type_dict = OrderedDict()
+            lidar_channels_dict = params['heter'].get('lidar_channels_dict', OrderedDict())
+            mapping_dict = params['heter']['mapping_dict']
+            self.adaptor = Adaptor(self.ego_modality,
+                                   self.modality_name_list,
+                                   self.modality_assignment,
+                                   lidar_channels_dict,
+                                   mapping_dict,
+                                   None,
+                                   train)
+            for modality_name, modal_setting in params['heter']['modality_setting'].items():
+                self.sensor_type_dict[modality_name] = modal_setting['sensor_type']
+                if modal_setting['sensor_type'] == 'lidar':
+                    setattr(self, f"pre_processor_{modality_name}", build_preprocessor(modal_setting['preprocess'], train))
+                elif modal_setting['sensor_type'] == 'camera':
+                    setattr(self, f"data_aug_conf_{modality_name}", modal_setting['data_aug_conf'])
+                else:
+                    raise("Not support this type of sensor")
+            self.reinitialize()
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            if self.train:
+                reformat_data_dict = self.get_item_train(base_data_dict)
+            else:
+                reformat_data_dict = self.get_item_test(base_data_dict, idx)
+            return reformat_data_dict
+        def get_item_train(self, base_data_dict):
+            processed_data_dict = OrderedDict()
+            base_data_dict = add_noise_data_dict(
+                base_data_dict, self.params["noise_setting"]
+            )
+            # during training, we return a random cav's data
+            # only one vehicle is in processed_data_dict
+            if not self.visualize:
+                options = []
+                for cav_id, cav_content in base_data_dict.items():
+                    if cav_content['modality_name'] in self.ego_modality:
+                        options.append(cav_id)
+                selected_cav_base = base_data_dict[random.choice(options)]
+            else:
+                selected_cav_id, selected_cav_base = list(base_data_dict.items())[0]
+            selected_cav_processed = self.get_item_single_car(selected_cav_base)
+            processed_data_dict.update({"ego": selected_cav_processed})
+            return processed_data_dict
+        def get_item_test(self, base_data_dict, idx):
+            """
+                processed_data_dict.keys() = ['ego', "650", "659", ...]
+            """
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            ego_id = -1
+            ego_lidar_pose = []
+            cav_id_list = []
+            lidar_pose_list = []
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_lidar_pose_clean = cav_content['params']['lidar_pose_clean']
+                    break
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                if distance > self.params['comm_range']:
+                    continue
+                if self.adaptor.unmatched_modality(selected_cav_base['modality_name']):
+                    continue
+                cav_id_list.append(cav_id)
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose'])
+            cav_id_list_newname = []
+            for cav_id in cav_id_list:
+                selected_cav_base = base_data_dict[cav_id]
+                # find the transformation matrix from current cav to ego.
+                cav_lidar_pose = selected_cav_base['params']['lidar_pose']
+                transformation_matrix = x1_to_x2(cav_lidar_pose, ego_lidar_pose)
+                cav_lidar_pose_clean = selected_cav_base['params']['lidar_pose_clean']
+                transformation_matrix_clean = x1_to_x2(cav_lidar_pose_clean, ego_lidar_pose_clean)
+                # In test phase, we all use lidar label for fair comparison. (need discussion)
+                self.label_type = 'lidar' # DAIRV2X
+                self.generate_object_center = self.generate_object_center_lidar # OPV2V, V2XSET
+                selected_cav_processed = \
+                    self.get_item_single_car(selected_cav_base)
+                selected_cav_processed.update({'transformation_matrix': transformation_matrix,
+                                            'transformation_matrix_clean': transformation_matrix_clean})
+                update_cav = "ego" if cav_id == ego_id else cav_id
+                processed_data_dict.update({update_cav: selected_cav_processed})
+                cav_id_list_newname.append(update_cav)
+            return processed_data_dict
+        def get_item_single_car(self, selected_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            modality_name = selected_cav_base['modality_name']
+            sensor_type = self.sensor_type_dict[modality_name]
+            # label
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center_single(
+                [selected_cav_base], selected_cav_base["params"]["lidar_pose_clean"]
+            )
+            # lidar
+            if sensor_type == "lidar" or self.visualize:
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                lidar_np = mask_points_by_range(lidar_np,
+                                                self.params['preprocess'][
+                                                    'cav_lidar_range'])
+                # remove points that hit ego vehicle
+                lidar_np = mask_ego_points(lidar_np)
+                # data augmentation, seems very important for single agent training, because lack of data diversity.
+                # only work for lidar modality in training.
+                lidar_np, object_bbx_center, object_bbx_mask = \
+                self.augment(lidar_np, object_bbx_center, object_bbx_mask)
+                if sensor_type == "lidar":
+                    processed_lidar = eval(f"self.pre_processor_{modality_name}").preprocess(lidar_np)
+                    selected_cav_processed.update({f'processed_features_{modality_name}': processed_lidar})
+            if self.visualize:
+                selected_cav_processed.update({'origin_lidar': lidar_np})
+            # camera
+            if sensor_type == "camera":
+                # adapted from https://github.com/nv-tlabs/lift-splat-shoot/blob/master/src/data.py
+                camera_data_list = selected_cav_base["camera_data"]
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                extrinsics = [] # cam_to_lidar
+                post_rots = []
+                post_trans = []
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+                    img_src = [img]
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        eval(f"self.data_aug_conf_{modality_name}"), self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    extrinsics.append(torch.from_numpy(camera_to_lidar))
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+                selected_cav_processed.update(
+                    {
+                    f"image_inputs_{modality_name}":
+                        {
+                            "imgs": torch.stack(imgs), # [N, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "extrinsics": torch.stack(extrinsics),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center,
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                    "modality_name": modality_name
+                }
+            )
+            # generate targets label
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({"label_dict": label_dict})
+            return selected_cav_processed
+        def collate_batch_train(self, batch):
+            """
+            Customized collate function for pytorch dataloader during training
+            for early and late fusion dataset.
+            Parameters
+            ----------
+            batch : dict
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # during training, we only care about ego.
+            output_dict = {'ego': {}}
+            object_bbx_center = []
+            object_bbx_mask = []
+            label_dict_list = []
+            origin_lidar = []
+            inputs_list_m1 = []
+            inputs_list_m2 = []
+            inputs_list_m3 = []
+            inputs_list_m4 = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                label_dict_list.append(ego_dict['label_dict'])
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask})
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'anchor_box': torch.from_numpy(self.anchor_box),
+                                    'label_dict': label_torch_dict})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+            for modality_name in self.modality_name_list:
+                sensor_type = self.sensor_type_dict[modality_name]
+                for i in range(len(batch)):
+                    ego_dict = batch[i]['ego']
+                    if f'processed_features_{modality_name}' in ego_dict:
+                        eval(f"inputs_list_{modality_name}").append(ego_dict[f'processed_features_{modality_name}'])
+                    elif f'image_inputs_{modality_name}' in ego_dict:
+                        eval(f"inputs_list_{modality_name}").append(ego_dict[f'image_inputs_{modality_name}'])
+                if self.sensor_type_dict[modality_name] == "lidar":
+                    processed_lidar_torch_dict = eval(f"self.pre_processor_{modality_name}").collate_batch(eval(f"inputs_list_{modality_name}"))
+                    output_dict['ego'].update({f'inputs_{modality_name}': processed_lidar_torch_dict})
+                elif self.sensor_type_dict[modality_name] == "camera":
+                    merged_image_inputs_dict = merge_features_to_dict(eval(f"inputs_list_{modality_name}"), merge='stack')
+                    output_dict['ego'].update({f'inputs_{modality_name}': merged_image_inputs_dict})
+            return output_dict
+        def collate_batch_test(self, batch):
+            """
+            Customized collate function for pytorch dataloader during testing
+            for late fusion dataset.
+            Parameters
+            ----------
+            batch : dict
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # currently, we only support batch size of 1 during testing
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            batch = batch[0]
+            output_dict = {}
+            # for late fusion, we also need to stack the lidar for better
+            # visualization
+            if self.visualize:
+                projected_lidar_list = []
+                origin_lidar = []
+            for cav_id, cav_content in batch.items():
+                modality_name = cav_content['modality_name']
+                sensor_type = self.sensor_type_dict[modality_name]
+                output_dict.update({cav_id: {}})
+                # shape: (1, max_num, 7)
+                object_bbx_center = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_center']]))
+                object_bbx_mask = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_mask']]))
+                object_ids = cav_content['object_ids']
+                # the anchor box is the same for all bounding boxes usually, thus
+                # we don't need the batch dimension.
+                output_dict[cav_id].update(
+                    {"anchor_box": self.anchor_box_torch}
+                )
+                transformation_matrix = cav_content['transformation_matrix']
+                if self.visualize:
+                    origin_lidar = [cav_content['origin_lidar']]
+                    if (self.params.get('only_vis_ego', True) is False) or (cav_id=='ego'):
+                        projected_lidar = copy.deepcopy(cav_content['origin_lidar'])
+                        projected_lidar[:, :3] = \
+                            box_utils.project_points_by_matrix_torch(
+                                projected_lidar[:, :3],
+                                transformation_matrix)
+                        projected_lidar_list.append(projected_lidar)
+                if sensor_type == "lidar":
+                    # processed lidar dictionary
+                    processed_lidar_torch_dict = \
+                        eval(f"self.pre_processor_{modality_name}").collate_batch([cav_content[f'processed_features_{modality_name}']])
+                    output_dict[cav_id].update({f'inputs_{modality_name}': processed_lidar_torch_dict})
+                if sensor_type == 'camera':
+                    imgs_batch = [cav_content[f"image_inputs_{modality_name}"]["imgs"]]
+                    rots_batch = [cav_content[f"image_inputs_{modality_name}"]["rots"]]
+                    trans_batch = [cav_content[f"image_inputs_{modality_name}"]["trans"]]
+                    intrins_batch = [cav_content[f"image_inputs_{modality_name}"]["intrins"]]
+                    extrinsics_batch = [cav_content[f"image_inputs_{modality_name}"]["extrinsics"]]
+                    post_trans_batch = [cav_content[f"image_inputs_{modality_name}"]["post_trans"]]
+                    post_rots_batch = [cav_content[f"image_inputs_{modality_name}"]["post_rots"]]
+                    output_dict[cav_id].update({
+                        f"inputs_{modality_name}":
+                            {
+                                "imgs": torch.stack(imgs_batch),
+                                "rots": torch.stack(rots_batch),
+                                "trans": torch.stack(trans_batch),
+                                "intrins": torch.stack(intrins_batch),
+                                "extrinsics": torch.stack(extrinsics_batch),
+                                "post_trans": torch.stack(post_trans_batch),
+                                "post_rots": torch.stack(post_rots_batch),
+                            }
+                        }
+                    )
+                # label dictionary
+                label_torch_dict = \
+                    self.post_processor.collate_batch([cav_content['label_dict']])
+                # for centerpoint
+                label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                         'object_bbx_mask': object_bbx_mask})
+                # save the transformation matrix (4, 4) to ego vehicle
+                transformation_matrix_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix'])).float()
+                # late fusion training, no noise
+                transformation_matrix_clean_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix_clean'])).float()
+                output_dict[cav_id].update({'object_bbx_center': object_bbx_center,
+                                            'object_bbx_mask': object_bbx_mask,
+                                            'label_dict': label_torch_dict,
+                                            'object_ids': object_ids,
+                                            'transformation_matrix': transformation_matrix_torch,
+                                            'transformation_matrix_clean': transformation_matrix_clean_torch,
+                                            'modality_name': modality_name})
+                if self.visualize:
+                    origin_lidar = \
+                        np.array(
+                            downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                    origin_lidar = torch.from_numpy(origin_lidar)
+                    output_dict[cav_id].update({'origin_lidar': origin_lidar})
+            if self.visualize:
+                projected_lidar_stack = [torch.from_numpy(
+                    np.vstack(projected_lidar_list))]
+                output_dict['ego'].update({'origin_lidar': projected_lidar_stack})
+                # output_dict['ego'].update({'projected_lidar_list': projected_lidar_list})
+            return output_dict
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict, output_dict
+            )
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            return pred_box_tensor, pred_score, gt_box_tensor
+        def post_process_no_fusion(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego["ego"] = data_dict["ego"]
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict_ego, output_dict_ego
+            )
+            return pred_box_tensor, pred_score, gt_box_tensor
+        def post_process_no_fusion_uncertainty(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego['ego'] = data_dict['ego']
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            pred_box_tensor, pred_score, uncertainty = \
+                self.post_processor.post_process(data_dict_ego, output_dict_ego, return_uncertainty=True)
+            return pred_box_tensor, pred_score, gt_box_tensor, uncertainty
+    return LateheterFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multi_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,631 @@

+# late fusion dataset
+import random
+import math
+from collections import OrderedDict
+import cv2
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+def getLateclassFusionDataset(cls):
+    """
+    cls: the BaseDataset or父类数据集, 负责一些基础接口，如:
+         - retrieve_base_data()
+         - generate_object_center_single()
+         - self.post_processor
+         - self.pre_processor
+         - self.selector (如果用了 heterogeneous 配置)
+         等等
+    """
+    class LateclassFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            # 是否启用异构学习(例如只选择某些Agent用lidar，某些Agent用camera)
+            self.heterogeneous = False
+            if "heter" in params:
+                self.heterogeneous = True
+            # 是否为多类别
+            self.multiclass = params["model"]["args"].get("multi_class", False)
+            # 根据需要，可在这里给定多类别的类别 ID 列表
+            # 比如 [0, 1, 3] 分别对应 car / pedestrian / cyclist 等
+            self.class_list = params.get("class_list", [0, 1, 3])
+            # 若项目里您是通过 [ 'all', 0, 1, 3 ] 这种方式区分，也可自行调整
+            # 用于可视化
+            self.visualize = visualize
+            self.train = train
+        def __getitem__(self, idx):
+            """
+            训练阶段：随机选 1 个 CAV 做 late 监督(与LateFusionDataset一致)；
+            测试/验证阶段：保留所有范围内 CAV 的信息。
+            """
+            base_data_dict = self.retrieve_base_data(idx)
+            if self.train:
+                reformat_data_dict = self.get_item_train(base_data_dict)
+            else:
+                reformat_data_dict = self.get_item_test(base_data_dict, idx)
+            return reformat_data_dict
+        def get_item_train(self, base_data_dict):
+            """
+            训练阶段的处理逻辑：通常是只抽取 1 个 CAV（含有 label），
+            以减少内存开销、保持与单车训练类似。
+            """
+            from collections import OrderedDict
+            processed_data_dict = OrderedDict()
+            # 数据扰动（如果有）
+            base_data_dict = self.add_noise_data_if_needed(base_data_dict)
+            # 只随机抽取一个 CAV
+            if not self.visualize:
+                selected_cav_id, selected_cav_base = random.choice(
+                    list(base_data_dict.items())
+                )
+            else:
+                # 若要可视化，通常选 ego 做可视化
+                selected_cav_id, selected_cav_base = list(base_data_dict.items())[0]
+            # 处理单个车辆（含多类别的 bbox）
+            cav_processed = self.get_item_single_car(selected_cav_base)
+            processed_data_dict["ego"] = cav_processed
+            return processed_data_dict
+        def get_item_test(self, base_data_dict, idx):
+            """
+            测试/验证阶段：保留所有在 comm_range 内的 CAV，都要 late fusion 的 label。
+            """
+            from collections import OrderedDict
+            import math
+            base_data_dict = self.add_noise_data_if_needed(base_data_dict)
+            processed_data_dict = OrderedDict()
+            ego_id, ego_pose = -1, None
+            # 首先找到 ego
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content["ego"]:
+                    ego_id = cav_id
+                    ego_pose = cav_content["params"]["lidar_pose"]
+                    ego_pose_clean = cav_content["params"]["lidar_pose_clean"]
+                    break
+            assert ego_id != -1
+            cav_id_list = []
+            for cav_id, cav_content in base_data_dict.items():
+                distance = math.sqrt(
+                    (cav_content["params"]["lidar_pose"][0] - ego_pose[0]) ** 2
+                    + (cav_content["params"]["lidar_pose"][1] - ego_pose[1]) ** 2
+                )
+                if distance <= self.params["comm_range"]:
+                    cav_id_list.append(cav_id)
+            cav_id_list_newname = []
+            for cav_id in cav_id_list:
+                selected_cav_base = base_data_dict[cav_id]
+                transformation_matrix = self.x1_to_x2(
+                    selected_cav_base["params"]["lidar_pose"], ego_pose
+                )
+                transformation_matrix_clean = self.x1_to_x2(
+                    selected_cav_base["params"]["lidar_pose_clean"], ego_pose_clean
+                )
+                cav_processed = self.get_item_single_car(selected_cav_base)
+                cav_processed.update(
+                    {
+                        "transformation_matrix": transformation_matrix,
+                        "transformation_matrix_clean": transformation_matrix_clean,
+                    }
+                )
+                # 若是 ego 自身，就命名为 "ego"，否则保持 cav_id
+                update_cav_key = "ego" if cav_id == ego_id else cav_id
+                processed_data_dict[update_cav_key] = cav_processed
+                cav_id_list_newname.append(update_cav_key)
+            # heterogeneous 额外信息
+            if self.heterogeneous:
+                processed_data_dict["ego"]["idx"] = idx
+                processed_data_dict["ego"]["cav_list"] = cav_id_list_newname
+            return processed_data_dict
+        def get_item_single_car(self, cav_base):
+            """
+            处理单辆车的信息，生成其多类别的 label、lidar 数据、camera 数据等等。
+            """
+            selected_cav_processed = {}
+            # 1) 生成多类别或单类别目标框
+            #   如果多类别，就将 cav_base 中属于各类的目标框分开存储/或一次性存 [num_class, max_box, 7]
+            if self.multiclass:
+                # 举例：将 class_list = [0,1,3] 三个类别分别解析
+                # 最简单做法是：对 cav_base["params"]["lidar_pose_clean"] 调用多次 generate_object_center_single
+                # 并把结果堆叠
+                all_box_list, all_mask_list, all_ids_list = [], [], []
+                for cls_id in self.class_list:
+                    box_c, mask_c, ids_c = self.generate_object_center_single(
+                        [cav_base],
+                        cav_base["params"]["lidar_pose_clean"],
+                        class_type=cls_id,  # 您可在 generate_object_center_single 里根据 class_type 做过滤
+                    )
+                    all_box_list.append(box_c)
+                    all_mask_list.append(mask_c)
+                    all_ids_list.append(ids_c)
+                # 堆叠成 [num_class, max_box, 7] / [num_class, max_box]
+                # 需注意每次 generate_object_center_single 返回的 max_box 数量可能不同,
+                # 这里需统一补零或 slice 到相同维度(可参考已有Late/IntermediateFusion实现).
+                object_bbx_center, object_bbx_mask = self.stack_multiclass_label(
+                    all_box_list, all_mask_list
+                )
+                # object_ids 可以按类别各存一个 list，也可以只存 [num_class, ...]
+                object_ids = all_ids_list  # 也可做特殊处理
+            else:
+                # 单类别情况下：直接一次即可
+                object_bbx_center, object_bbx_mask, object_ids = (
+                    self.generate_object_center_single(
+                        [cav_base], cav_base["params"]["lidar_pose_clean"]
+                    )
+                )
+            # 2) lidar 处理(或 camera)
+            #   若需要 lidar，可做 voxelize -> self.pre_processor
+            if self.load_lidar_file or self.visualize:
+                lidar_np = cav_base["lidar_np"]
+                # 一些基础处理，如 shuffle_points, mask_points_by_range, mask_ego_points 等
+                lidar_np = self.basic_lidar_preprocess(lidar_np)
+                # 数据增强(根据需要)
+                lidar_np, object_bbx_center, object_bbx_mask = self.augment_if_needed(
+                    lidar_np, object_bbx_center, object_bbx_mask
+                )
+                # 真正处理，如 voxelize/BEV projection
+                processed_lidar = self.pre_processor.preprocess(lidar_np)
+                selected_cav_processed["processed_lidar"] = processed_lidar
+                if self.visualize:
+                    selected_cav_processed["origin_lidar"] = lidar_np
+            # 3) camera 处理
+            if self.load_camera_file:
+                # 类似 LateFusionDataset 中的逻辑
+                camera_inputs = self.process_camera_data(cav_base)
+                selected_cav_processed["image_inputs"] = camera_inputs
+            # 4) 保存多类别框
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center,
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                }
+            )
+            # 5) 生成 label，若多类别则也要多类别 label
+            if self.multiclass:
+                # 自行封装 post_processor.generate_label(...) 以支持 multi-class
+                # 也可对每个类别分别调用
+                label_dict = self.post_processor.generate_label_multiclass(
+                    object_bbx_center,  # [num_class, max_box, 7]
+                    self.anchor_box,
+                    object_bbx_mask,    # [num_class, max_box]
+                )
+            else:
+                label_dict = self.post_processor.generate_label(
+                    object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                )
+            selected_cav_processed["label_dict"] = label_dict
+            return selected_cav_processed
+        ############################
+        # collate_batch 相关处理  #
+        ############################
+        def collate_batch_train(self, batch):
+            """
+            训练集的 collate：
+            由于本示例中 train 阶段只随机取了 1 个 CAV，直接按 batch 拼接即可。
+            若您想要真正多 CAV 的 late 监督训练，则需参考 test collate 的思路。
+            """
+            import torch
+            from collections import OrderedDict
+            output_dict = {"ego": {}}
+            object_bbx_center_list = []
+            object_bbx_mask_list = []
+            label_dict_list = []
+            origin_lidar_list = []
+            processed_lidar_list = []
+            for item in batch:
+                ego_data = item["ego"]
+                object_bbx_center_list.append(ego_data["object_bbx_center"])
+                object_bbx_mask_list.append(ego_data["object_bbx_mask"])
+                label_dict_list.append(ego_data["label_dict"])
+                if self.visualize and "origin_lidar" in ego_data:
+                    origin_lidar_list.append(ego_data["origin_lidar"])
+                if "processed_lidar" in ego_data:
+                    processed_lidar_list.append(ego_data["processed_lidar"])
+            # 转成 tensor
+            object_bbx_center_torch = self.list_to_tensor(object_bbx_center_list)
+            object_bbx_mask_torch = self.list_to_tensor(object_bbx_mask_list)
+            # 多类别 label 的 collate (或单类别)
+            label_torch_dict = self.post_processor.collate_batch(label_dict_list)
+            # 若使用 centerpoint, 还要再把 object_bbx_center_torch 等融合进 label_torch_dict
+            label_torch_dict.update(
+                {
+                    "object_bbx_center": object_bbx_center_torch,
+                    "object_bbx_mask": object_bbx_mask_torch,
+                }
+            )
+            output_dict["ego"].update(
+                {
+                    "object_bbx_center": object_bbx_center_torch,
+                    "object_bbx_mask": object_bbx_mask_torch,
+                    "anchor_box": torch.from_numpy(self.anchor_box),
+                    "label_dict": label_torch_dict,
+                }
+            )
+            # lidar
+            if len(processed_lidar_list) > 0:
+                processed_lidar_torch_dict = self.pre_processor.collate_batch(
+                    processed_lidar_list
+                )
+                output_dict["ego"]["processed_lidar"] = processed_lidar_torch_dict
+            # camera
+            if self.load_camera_file:
+                # 类似 LateFusionDataset: 将 batch 里的 camera 信息按维度拼起来
+                camera_inputs = self.collate_camera_inputs_train(batch)
+                output_dict["ego"]["image_inputs"] = camera_inputs
+            # visualization
+            if self.visualize and len(origin_lidar_list) > 0:
+                # 您可以根据需要 downsample
+                origin_lidar_torch = self.list_to_tensor(origin_lidar_list)
+                output_dict["ego"]["origin_lidar"] = origin_lidar_torch
+            return output_dict
+        def collate_batch_test(self, batch):
+            """
+            测试集（或验证集）的 collate：
+            一般只支持 batch_size=1（尤其在多 CAV 的情况下），
+            然后把每个 CAV 单独拿出来做 late 处理。
+            """
+            assert len(batch) == 1, "Test time batch_size must be 1 for late fusion!"
+            batch = batch[0]
+            output_dict = {}
+            # heterogeneous
+            if self.heterogeneous and "idx" in batch["ego"]:
+                idx = batch["ego"]["idx"]
+                cav_list = batch["ego"]["cav_list"]
+                # 选择哪些 cav 用 lidar / camera
+                # lidar_agent, camera_agent = self.selector.select_agent(idx)
+                # ...
+            # 收集并 collate
+            if self.visualize:
+                import copy
+                projected_lidar_list = []
+            for cav_id, cav_content in batch.items():
+                output_dict[cav_id] = {}
+                # 把 object_bbx_center/mask 变成 [1, ...]
+                object_bbx_center = self.unsqueeze_to_batch(cav_content["object_bbx_center"])
+                object_bbx_mask = self.unsqueeze_to_batch(cav_content["object_bbx_mask"])
+                label_dict = self.post_processor.collate_batch([cav_content["label_dict"]])
+                # centerpoint 需把 object_bbx_center/mask 再塞回 label_dict
+                label_dict.update(
+                    {
+                        "object_bbx_center": object_bbx_center,
+                        "object_bbx_mask": object_bbx_mask,
+                    }
+                )
+                # lidar
+                if "processed_lidar" in cav_content:
+                    # 只有 1 个 cav 的 processed_lidar
+                    processed_lidar_torch = self.pre_processor.collate_batch(
+                        [cav_content["processed_lidar"]]
+                    )
+                    output_dict[cav_id]["processed_lidar"] = processed_lidar_torch
+                # camera
+                if self.load_camera_file and "image_inputs" in cav_content:
+                    # 同理，只拼一个
+                    cam_torch = self.collate_camera_inputs_test(cav_content)
+                    output_dict[cav_id]["image_inputs"] = cam_torch
+                # heterogeneous 可根据 cav_id 判断是否保留/剔除
+                # if self.heterogeneous:
+                #     pass
+                # 保存变换矩阵
+                output_dict[cav_id]["transformation_matrix"] = torch.from_numpy(
+                    cav_content["transformation_matrix"]
+                ).float()
+                output_dict[cav_id]["transformation_matrix_clean"] = torch.from_numpy(
+                    cav_content["transformation_matrix_clean"]
+                ).float()
+                # label + 其他信息
+                output_dict[cav_id].update(
+                    {
+                        "object_bbx_center": object_bbx_center,
+                        "object_bbx_mask": object_bbx_mask,
+                        "label_dict": label_dict,
+                        "anchor_box": self.anchor_box_torch,
+                        "object_ids": cav_content["object_ids"],
+                    }
+                )
+                if self.visualize and "origin_lidar" in cav_content:
+                    output_dict[cav_id]["origin_lidar"] = torch.from_numpy(
+                        cav_content["origin_lidar"]
+                    )
+            # 若需要把多 cav 的点云拼接到 ego 上做可视化，可以在这里做拼接
+            return output_dict
+        ######################################
+        #          多类别后处理示例          #
+        ######################################
+        def post_process(self, data_dict, output_dict):
+            """
+            如果是多类别，就调用 self.post_process_multiclass，
+            否则与普通 late fusion 相同。
+            """
+            if self.multiclass:
+                # 返回 [List of pred_box], [List of score], [List of gt_box]，每个元素对应一个类别
+                return self.post_process_multiclass(data_dict, output_dict)
+            else:
+                pred_box, pred_score = self.post_processor.post_process(data_dict, output_dict)
+                gt_box = self.post_processor.generate_gt_bbx(data_dict)
+                return pred_box, pred_score, gt_box
+        def post_process_multiclass(self, data_dict, output_dict):
+            """
+            多类别的后处理，每个类别各跑一次 NMS 或类似处理，然后拼一起返回。
+            """
+            import copy
+            # num_class = len(self.class_list)
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+            # 对每个类别独立后处理
+            for i, cls_id in enumerate(self.class_list):
+                # 1) 拷贝出仅包含该类别的数据
+                data_dict_single, output_dict_single = self.split_single_class(
+                    data_dict, output_dict, class_index=i
+                )
+                # 2) 跑后处理
+                pred_box_tensor, pred_score = self.post_processor.post_process(
+                    data_dict_single, output_dict_single
+                )
+                gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict_single)
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+        ############################################
+        # 下方放一些复用/简化方法(根据项目适配即可)  #
+        ############################################
+        def add_noise_data_if_needed(self, base_data_dict):
+            """
+            根据 self.params["noise_setting"] 等需求决定是否进行噪声扰动。
+            这里直接调用已有的 add_noise_data_dict 或 add_noise_data_dict_asymmetric。
+            """
+            from opencood.utils.pose_utils import add_noise_data_dict
+            # 如果想用非对称噪声，请自行替换
+            return add_noise_data_dict(base_data_dict, self.params["noise_setting"])
+        def basic_lidar_preprocess(self, lidar_np):
+            """
+            一些通用的点云预处理，如范围裁剪、shuffle、去除自车点等。
+            """
+            from opencood.utils.pcd_utils import (
+                shuffle_points,
+                mask_points_by_range,
+                mask_ego_points,
+            )
+            lidar_np = shuffle_points(lidar_np)
+            lidar_np = mask_points_by_range(lidar_np, self.params["preprocess"]["cav_lidar_range"])
+            lidar_np = mask_ego_points(lidar_np)
+            return lidar_np
+        def augment_if_needed(self, lidar_np, object_bbx_center, object_bbx_mask):
+            """
+            若 self.train 并且无需异构，可对点云/标签做数据增强。
+            """
+            if self.train and not self.heterogeneous:
+                lidar_np, object_bbx_center, object_bbx_mask = self.augment(
+                    lidar_np, object_bbx_center, object_bbx_mask
+                )
+            return lidar_np, object_bbx_center, object_bbx_mask
+        def process_camera_data(self, cav_base):
+            """
+            将相机图像根据参数（分辨率缩放、裁剪、flip 等）做增广，并返回成一个 dict。
+            可参考 LateFusionDataset / LSS 处理流程。
+            """
+            # 这里仅示例化简, 具体实现请参考原 LateFusionDataset 中的 get_item_single_car -> process_camera_data
+            camera_data_list = cav_base["camera_data"]
+            # ... 做增广与 transform ...
+            camera_inputs = {"imgs": None, "rots": None, ...}
+            return camera_inputs
+        def collate_camera_inputs_train(self, batch):
+            """
+            将 train batch 里多帧图像按维度拼接，比如 [B, N, C, H, W]
+            """
+            # 略，参考 LateFusionDataset 的 collate_batch_train
+            return {}
+        def collate_camera_inputs_test(self, cav_content):
+            """
+            测试阶段只 collate 单个 cav
+            """
+            # 参考 LateFusionDataset 的 collate_batch_test
+            return {}
+        def stack_multiclass_label(self, box_list, mask_list):
+            """
+            输入是一个 list，每个元素是 (max_box, 7)/(max_box,),
+            最终拼成 [num_class, max_box, 7] / [num_class, max_box]。
+            若每个类别分配的 max_box 不同，需要先找最大值再做 padding。
+            """
+            import numpy as np
+            num_class = len(box_list)
+            max_box_counts = [b.shape[0] for b in box_list]
+            M = max(max_box_counts) if max_box_counts else 0
+            # 组合
+            box_array = []
+            mask_array = []
+            for i in range(num_class):
+                cur_box = box_list[i]
+                cur_mask = mask_list[i]
+                pad_size = M - cur_box.shape[0]
+                if pad_size > 0:
+                    # 在 0 处 padding
+                    cur_box = np.concatenate(
+                        [cur_box, np.zeros((pad_size, 7), dtype=cur_box.dtype)], axis=0
+                    )
+                    cur_mask = np.concatenate(
+                        [cur_mask, np.zeros(pad_size, dtype=cur_mask.dtype)], axis=0
+                    )
+                box_array.append(cur_box[None, ...])   # [1, M, 7]
+                mask_array.append(cur_mask[None, ...]) # [1, M]
+            if len(box_array) == 0:
+                # 说明没对象
+                return np.zeros((0, 0, 7)), np.zeros((0, 0))
+            box_array = np.concatenate(box_array, axis=0)   # [num_class, M, 7]
+            mask_array = np.concatenate(mask_array, axis=0) # [num_class, M]
+            return box_array, mask_array
+        def split_single_class(self, data_dict, output_dict, class_index):
+            """
+            post_process_multiclass 用到：
+            将 data_dict/output_dict 中多类别的 object_bbx_center/mask
+            拆分出第 class_index 个类别的子数据，以便单独跑 NMS。
+            """
+            import copy
+            data_dict_single = {"ego": {}}
+            output_dict_single = {}
+            # 遍历所有 cav (late fusion)
+            for cav_id in data_dict.keys():
+                cav_content = data_dict[cav_id]
+                cav_output = output_dict[cav_id]
+                # 如果 object_bbx_center 是 [num_class, M, 7]，mask 是 [num_class, M]
+                # 拆分出 cav_idx = class_index 这一路
+                single_box_center = cav_content["object_bbx_center"][class_index, ...]
+                single_mask = cav_content["object_bbx_mask"][class_index, ...]
+                # object_ids 如果是按类别存储的list���可按 class_index 取即可
+                # 如果合并一起，需要自己额外做记录
+                if isinstance(cav_content["object_ids"], list):
+                    single_ids = cav_content["object_ids"][class_index]
+                else:
+                    single_ids = cav_content["object_ids"]  # 或者看具体储存方式
+                # 类似地，对网络输出 cls_preds, reg_preds_multiclass 都要取第 class_index 路
+                # 具体看原网络 forward 的输出 shape
+                cls_preds_single = cav_output["cls_preds"][
+                    :, class_index : class_index + 1, :, :
+                ]  # e.g. [B,1,H,W]
+                reg_preds_single = cav_output["reg_preds_multiclass"][
+                    :, class_index, :, :
+                ]  # [B,H,W,Nreg]
+                # 构造新的 data_dict_single / output_dict_single
+                data_dict_single[cav_id] = copy.deepcopy(cav_content)
+                data_dict_single[cav_id]["object_bbx_center"] = single_box_center[None, ...]  # 保留一个 batch 维
+                data_dict_single[cav_id]["object_bbx_mask"] = single_mask[None, ...]
+                data_dict_single[cav_id]["object_ids"] = single_ids
+                output_dict_single[cav_id] = copy.deepcopy(cav_output)
+                output_dict_single[cav_id]["cls_preds"] = cls_preds_single
+                output_dict_single[cav_id]["reg_preds"] = reg_preds_single
+            return data_dict_single, output_dict_single
+        ###################################################
+        # 一些工具函数(和原 LateFusionDataset/中间类一致) #
+        ###################################################
+        def x1_to_x2(self, lidar_pose1, lidar_pose2):
+            """
+            位姿变换矩阵, 与 opencood.utils.transformation_utils.x1_to_x2 一致。
+            """
+            return x1_to_x2(lidar_pose1, lidar_pose2)
+        def list_to_tensor(self, data_list):
+            """
+            简易把 list of np.array 变成 torch.Tensor, 做 batch 拼接用。
+            """
+            import numpy as np
+            import torch
+            if len(data_list) == 0:
+                return None
+            arr = np.stack(data_list, axis=0)
+            return torch.from_numpy(arr)
+        def unsqueeze_to_batch(self, arr):
+            """
+            如果 arr 是 np.ndarray，就转成 [1, ...]，再转成 torch。
+            """
+            import numpy as np
+            import torch
+            if isinstance(arr, np.ndarray):
+                arr = arr[None, ...]  # 在前面加一个 batch 维
+                arr = torch.from_numpy(arr)
+            elif isinstance(arr, torch.Tensor) and arr.dim() == 2:
+                # [M,7] -> [1,M,7]
+                arr = arr.unsqueeze(0)
+            return arr
+    return LateMultiFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multiclass_fusion_dataset.py ADDED Viewed

	@@ -0,0 +1,1233 @@

+# late fusion dataset
+import random
+import math
+from collections import OrderedDict
+import cv2
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict, add_noise_data_dict_asymmetric
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    mask_ego_points_v2,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import merge_features_to_dict
+def getLatemulticlassFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class LatemulticlassFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+            self.multiclass = params['model']['args']['multi_class']
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+            # self.proj_first = False
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            # self.supervise_single = False
+            self.online_eval_only = False
+        def __getitem__(self, idx, extra_source=None, data_dir=None):
+            if data_dir is not None:
+                extra_source=1
+            object_bbx_center_list = []
+            object_bbx_mask_list = []
+            object_id_dict = {}
+            object_bbx_center_list_single = []
+            object_bbx_mask_list_single = []
+            gt_object_bbx_center_list = []
+            gt_object_bbx_mask_list = []
+            gt_object_id_dict = {}
+            gt_object_bbx_center_list_single = []
+            gt_object_bbx_mask_list_single = []
+            output_dict = {}
+            for tpe in ['all', 0, 1, 3]:
+                output_single_class = self.__getitem_single_class__(idx, tpe, extra_source, data_dir)
+                output_dict[tpe] = output_single_class
+                if tpe == 'all' and extra_source is None:
+                    continue
+                elif tpe == 'all' and extra_source is not None:
+                    break
+                object_bbx_center_list.append(output_single_class['ego']['object_bbx_center'])
+                object_bbx_mask_list.append(output_single_class['ego']['object_bbx_mask'])
+                object_id_dict[tpe] = output_single_class['ego']['object_ids']
+                gt_object_bbx_center_list.append(output_single_class['ego']['gt_object_bbx_center'])
+                gt_object_bbx_mask_list.append(output_single_class['ego']['gt_object_bbx_mask'])
+                gt_object_id_dict[tpe] = output_single_class['ego']['gt_object_ids']
+            if self.multiclass and extra_source is None:
+                output_dict['all']['ego']['object_bbx_center'] = np.stack(object_bbx_center_list, axis=0)
+                output_dict['all']['ego']['object_bbx_mask'] = np.stack(object_bbx_mask_list, axis=0)
+                output_dict['all']['ego']['object_ids'] = object_id_dict
+                output_dict['all']['ego']['gt_object_bbx_center'] = np.stack(gt_object_bbx_center_list, axis=0)
+                output_dict['all']['ego']['gt_object_bbx_mask'] = np.stack(gt_object_bbx_mask_list, axis=0)
+                output_dict['all']['ego']['gt_object_ids'] = gt_object_id_dict
+            return output_dict['all']
+        def __getitem_single_class__(self, idx, tpe=None, extra_source=None, data_dir=None):
+            if extra_source is None and data_dir is None:
+                base_data_dict = self.retrieve_base_data(idx, tpe) ## {id:{'ego':True/False, 'params': {'lidar_pose','speed','vehicles','ego_pos',...}, 'lidar_np': array (N,4)}}
+            elif data_dir is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, data_dir=data_dir)
+            elif extra_source is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, extra_source=extra_source)
+            # base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+            base_data_dict = add_noise_data_dict_asymmetric(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+            cav_id_list = []
+            lidar_pose_list = []
+            too_far = []
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_lidar_pose_clean = cav_content['params']['lidar_pose_clean']
+                    ego_cav_base = cav_content
+                    break
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_mask_stack = []
+            object_id_stack = []
+            gt_object_stack = []
+            gt_object_mask_stack = []
+            gt_object_id_stack = []
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+            if self.visualize:
+                projected_lidar_stack = []
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+                cav_id_list.append(cav_id)
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose'])
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+            cav_num = len(cav_id_list)
+            cav_id_list_newname = []
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            for cav_id in cav_id_list:
+                selected_cav_base = base_data_dict[cav_id]
+                # find the transformation matrix from current cav to ego.
+                cav_lidar_pose = selected_cav_base['params']['lidar_pose']
+                transformation_matrix = x1_to_x2(cav_lidar_pose, ego_lidar_pose)
+                cav_lidar_pose_clean = selected_cav_base['params']['lidar_pose_clean']
+                transformation_matrix_clean = x1_to_x2(cav_lidar_pose_clean, ego_lidar_pose_clean)
+                selected_cav_processed = \
+                    self.get_item_single_car(selected_cav_base,
+                                            ego_cav_base,
+                                            tpe,
+                                            extra_source!=None)
+                selected_cav_processed.update({'transformation_matrix': transformation_matrix,
+                                            'transformation_matrix_clean': transformation_matrix_clean})
+                if extra_source is None:
+                    object_stack.append(selected_cav_processed['object_bbx_center'])
+                    object_mask_stack.append(selected_cav_processed['object_bbx_mask'])
+                    object_id_stack += selected_cav_processed['object_ids']
+                    gt_object_stack.append(selected_cav_processed['gt_object_bbx_center'])
+                    gt_object_mask_stack.append(selected_cav_processed['gt_object_bbx_mask'])
+                    gt_object_id_stack += selected_cav_processed['gt_object_ids']
+                if tpe == 'all':
+                    if self.load_lidar_file:
+                        processed_features.append(
+                            selected_cav_processed['processed_lidar'])
+                    if self.load_camera_file:
+                        agents_image_inputs.append(
+                            selected_cav_processed['image_inputs'])
+                    if self.visualize:
+                        projected_lidar_stack.append(
+                            selected_cav_processed['projected_lidar'])
+                if self.supervise_single  and extra_source is None :
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+                update_cav = "ego" if cav_id == ego_id else cav_id
+                processed_data_dict.update({update_cav: selected_cav_processed})
+                cav_id_list_newname.append(update_cav)
+            if self.supervise_single and extra_source is None:
+                single_label_dicts = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+            # heterogeneous
+            if self.heterogeneous:
+                processed_data_dict['ego']['idx'] = idx
+                processed_data_dict['ego']['cav_list'] = cav_id_list_newname
+            if extra_source is None:
+                unique_indices = \
+                    [object_id_stack.index(x) for x in set(object_id_stack)]
+                object_stack = np.vstack(object_stack)
+                object_mask_stack = np.concatenate(object_mask_stack)
+                object_stack = object_stack[unique_indices]
+                object_mask_stack = object_mask_stack[unique_indices]
+                # make sure bounding boxes across all frames have the same number
+                object_bbx_center = \
+                    np.zeros((self.params['postprocess']['max_num'], 7))
+                mask = np.zeros(self.params['postprocess']['max_num'])
+                object_bbx_center[:object_stack.shape[0], :] = object_stack
+                mask[:object_mask_stack.shape[0]] = object_mask_stack
+                # mask[:object_mask_stack.shape[0]] = 1
+                gt_unique_indices = \
+                    [gt_object_id_stack.index(x) for x in set(gt_object_id_stack)]
+                gt_object_stack = np.vstack(gt_object_stack)
+                gt_object_mask_stack = np.concatenate(gt_object_mask_stack)
+                gt_object_stack = gt_object_stack[gt_unique_indices]
+                gt_object_mask_stack = gt_object_mask_stack[unique_indices]
+                # make sure bounding boxes across all frames have the same number
+                gt_object_bbx_center = \
+                    np.zeros((self.params['postprocess']['max_num'], 7))
+                gt_mask = np.zeros(self.params['postprocess']['max_num'])
+                gt_object_bbx_center[:gt_object_stack.shape[0], :] = gt_object_stack
+                gt_mask[:gt_object_mask_stack.shape[0]] = gt_object_mask_stack
+                # gt_mask[:gt_object_mask_stack.shape[0]] = 1
+                processed_data_dict['ego'].update(
+                    {'object_bbx_center': object_bbx_center,  # (100,7)
+                    'object_bbx_mask': mask, # (100,)
+                    'object_ids': [object_id_stack[i] for i in unique_indices],
+                    }
+                )
+            # generate targets label
+            label_dict = {}
+            # if tpe == 'all':
+            # unused label
+            if extra_source is None:
+                label_dict = \
+                    self.post_processor.generate_label(
+                        gt_box_center=object_bbx_center,
+                        anchors=self.anchor_box,
+                        mask=mask)
+                gt_label_dict = \
+                    self.post_processor.generate_label(
+                        gt_box_center=gt_object_bbx_center,
+                        anchors=self.anchor_box,
+                        mask=gt_mask)
+                processed_data_dict['ego'].update(
+                    {'gt_object_bbx_center': gt_object_bbx_center,  # (100,7)
+                    'gt_object_bbx_mask': gt_mask, # (100,)
+                    'gt_object_ids': [gt_object_id_stack[i] for i in gt_unique_indices],
+                    'gt_label_dict': gt_label_dict})
+            processed_data_dict['ego'].update(
+                {
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+            if tpe == 'all':
+                if self.load_lidar_file:
+                    merged_feature_dict = merge_features_to_dict(processed_features)
+                    processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict})
+                if self.load_camera_file:
+                    merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                    processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+                if self.visualize:
+                    processed_data_dict['ego'].update({'origin_lidar':
+                                                    #    projected_lidar_stack})
+                        np.vstack(
+                            projected_lidar_stack)})
+                    processed_data_dict['ego'].update({'lidar_len': [len(projected_lidar_stack[i]) for i in range(len(projected_lidar_stack))]})
+                processed_data_dict['ego'].update({'sample_idx': idx,
+                                                    'cav_id_list': cav_id_list})
+                img_front_list = []
+                img_left_list = []
+                img_right_list = []
+                BEV_list = []
+                if self.visualize:
+                    for car_id in base_data_dict:
+                        if not base_data_dict[car_id]['ego'] == True:
+                            continue
+                        if 'rgb_front' in base_data_dict[car_id] and 'rgb_left' in base_data_dict[car_id] and 'rgb_right' in base_data_dict[car_id] and 'BEV' in base_data_dict[car_id] :
+                            img_front_list.append(base_data_dict[car_id]['rgb_front'])
+                            img_left_list.append(base_data_dict[car_id]['rgb_left'])
+                            img_right_list.append(base_data_dict[car_id]['rgb_right'])
+                            BEV_list.append(base_data_dict[car_id]['BEV'])
+                processed_data_dict['ego'].update({'img_front': img_front_list,
+                                                    'img_left': img_left_list,
+                                                    'img_right': img_right_list,
+                                                    'BEV': BEV_list})
+            processed_data_dict['ego'].update({'scene_dict': base_data_dict['car_0']['scene_dict'],
+                                                    'frame_id': base_data_dict['car_0']['frame_id'],
+                                                    })
+            return processed_data_dict
+        def get_item_single_car(self, selected_cav_base, ego_cav_base, tpe, online_eval=False):
+            """
+            Process a single CAV's information for the train/test pipeline.
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            if not online_eval:
+                # label
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center_single(
+                    [selected_cav_base], selected_cav_base["params"]["lidar_pose_clean"]
+                )
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            # lidar
+            if tpe == 'all':
+                if self.load_lidar_file or self.visualize:
+                    lidar_np = selected_cav_base['lidar_np']
+                    lidar_np = shuffle_points(lidar_np)
+                    lidar_np = mask_points_by_range(lidar_np,
+                                                self.params['preprocess'][
+                                                    'cav_lidar_range'])
+                    # remove points that hit ego vehicle
+                    lidar_np = mask_ego_points_v2(lidar_np)
+                    # data augmentation, seems very important for single agent training, because lack of data diversity.
+                    # only work for lidar modality in training.
+                    if not self.heterogeneous and not online_eval:
+                        lidar_np, object_bbx_center, object_bbx_mask = \
+                        self.augment(lidar_np, object_bbx_center, object_bbx_mask)
+                    projected_lidar = \
+                        box_utils.project_points_by_matrix_torch(lidar_np[:, :3], transformation_matrix)
+                    if self.proj_first:
+                        lidar_np[:, :3] = projected_lidar
+                    if self.visualize:
+                        # filter lidar
+                        selected_cav_processed.update({'projected_lidar': projected_lidar})
+                    lidar_dict = self.pre_processor.preprocess(lidar_np)
+                    selected_cav_processed.update({'processed_lidar': lidar_dict})
+                if self.visualize:
+                    selected_cav_processed.update({'origin_lidar': lidar_np})
+            if not online_eval:
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                    [selected_cav_base], selected_cav_base['params']['lidar_pose']
+                )
+                gt_object_bbx_center, gt_object_bbx_mask, gt_object_ids = self.generate_object_center(
+                    [selected_cav_base], selected_cav_base['params']['lidar_pose']
+                )
+                label_dict = self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                )
+                gt_label_dict = self.post_processor.generate_label(
+                    gt_box_center=gt_object_bbx_center, anchors=self.anchor_box, mask=gt_object_bbx_mask
+                )
+                selected_cav_processed.update({
+                                    "single_label_dict": label_dict,
+                                    "single_object_bbx_center": object_bbx_center,
+                                    "single_object_bbx_mask": object_bbx_mask})
+            # camera
+            if tpe == 'all':
+                if self.load_camera_file:
+                    # adapted from https://github.com/nv-tlabs/lift-splat-shoot/blob/master/src/data.py
+                    camera_data_list = selected_cav_base["camera_data"]
+                    params = selected_cav_base["params"]
+                    imgs = []
+                    rots = []
+                    trans = []
+                    intrins = []
+                    extrinsics = [] # cam_to_lidar
+                    post_rots = []
+                    post_trans = []
+                    for idx, img in enumerate(camera_data_list):
+                        camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+                        intrin = torch.from_numpy(camera_intrinsic)
+                        rot = torch.from_numpy(
+                            camera_to_lidar[:3, :3]
+                        )  # R_wc, we consider world-coord is the lidar-coord
+                        tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+                        post_rot = torch.eye(2)
+                        post_tran = torch.zeros(2)
+                        img_src = [img]
+                        # depth
+                        if self.load_depth_file:
+                            depth_img = selected_cav_base["depth_data"][idx]
+                            img_src.append(depth_img)
+                        else:
+                            depth_img = None
+                        # data augmentation
+                        resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                            self.data_aug_conf, self.train
+                        )
+                        img_src, post_rot2, post_tran2 = img_transform(
+                            img_src,
+                            post_rot,
+                            post_tran,
+                            resize=resize,
+                            resize_dims=resize_dims,
+                            crop=crop,
+                            flip=flip,
+                            rotate=rotate,
+                        )
+                        # for convenience, make augmentation matrices 3x3
+                        post_tran = torch.zeros(3)
+                        post_rot = torch.eye(3)
+                        post_tran[:2] = post_tran2
+                        post_rot[:2, :2] = post_rot2
+                        img_src[0] = normalize_img(img_src[0])
+                        if self.load_depth_file:
+                            img_src[1] = img_to_tensor(img_src[1]) * 255
+                        imgs.append(torch.cat(img_src, dim=0))
+                        intrins.append(intrin)
+                        extrinsics.append(torch.from_numpy(camera_to_lidar))
+                        rots.append(rot)
+                        trans.append(tran)
+                        post_rots.append(post_rot)
+                        post_trans.append(post_tran)
+                    selected_cav_processed.update(
+                        {
+                        "image_inputs":
+                            {
+                                "imgs": torch.stack(imgs), # [N, 3or4, H, W]
+                                "intrins": torch.stack(intrins),
+                                "extrinsics": torch.stack(extrinsics),
+                                "rots": torch.stack(rots),
+                                "trans": torch.stack(trans),
+                                "post_rots": torch.stack(post_rots),
+                                "post_trans": torch.stack(post_trans),
+                            }
+                        }
+                    )
+                selected_cav_processed.update({"anchor_box": self.anchor_box})
+            if not online_eval:
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                            ego_pose_clean)
+                gt_object_bbx_center, gt_object_bbx_mask, gt_object_ids = self.generate_object_center([selected_cav_base],
+                                                            ego_pose_clean)
+                selected_cav_processed.update(
+                    {
+                        "object_bbx_center": object_bbx_center,
+                        "object_bbx_mask": object_bbx_mask,
+                        "object_ids": object_ids,
+                    }
+                )
+                selected_cav_processed.update(
+                    {
+                        "gt_object_bbx_center": gt_object_bbx_center[gt_object_bbx_mask == 1],
+                        "gt_object_bbx_mask": gt_object_bbx_mask,
+                        "gt_object_ids": gt_object_ids
+                    }
+                )
+                # generate targets label
+                label_dict = self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                )
+                selected_cav_processed.update({"label_dict": label_dict})
+            selected_cav_processed.update(
+                {
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+            return selected_cav_processed
+        def collate_batch_train(self, batch, online_eval_only=False):
+            """
+            Customized collate function for pytorch dataloader during training
+            for early and late fusion dataset.
+            Parameters
+            ----------
+            batch : dict
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # during training, we only care about ego.
+            output_dict = {'ego': {}}
+            object_bbx_center = []
+            object_bbx_mask = []
+            processed_lidar_list = []
+            label_dict_list = []
+            origin_lidar = []
+            gt_object_bbx_center = []
+            gt_object_bbx_mask = []
+            gt_object_ids = []
+            gt_label_dict_list = []
+            record_len = []
+            object_ids = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_len = []
+            lidar_pose_clean_list = []
+            # heterogeneous
+            lidar_agent_list = []
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+            # disconet
+            teacher_processed_lidar_list = []
+            # image
+            img_front = []
+            img_left = []
+            img_right = []
+            BEV = []
+            dict_list = []
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                if not online_eval_only:
+                    object_bbx_center.append(ego_dict['object_bbx_center'])
+                    object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                    object_ids.append(ego_dict['object_ids'])
+                    gt_object_bbx_center.append(ego_dict['gt_object_bbx_center'])
+                    gt_object_bbx_mask.append(ego_dict['gt_object_bbx_mask'])
+                    gt_object_ids.append(ego_dict['gt_object_ids'])
+                    label_dict_list.append(ego_dict['label_dict'])
+                    gt_label_dict_list.append(ego_dict['gt_label_dict'])
+                else:
+                    object_ids.append(None)
+                    gt_object_ids.append(None)
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                record_len.append(ego_dict['cav_num'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+                dict_list.append([ego_dict['scene_dict'], ego_dict['frame_id']])
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                    # lidar_len.append(ego_dict['lidar_len'])
+                    if len(ego_dict['img_front']) > 0 and len(ego_dict['img_right']) > 0 and len(ego_dict['img_left']) > 0 and len(ego_dict['BEV']) > 0:
+                        img_front.append(ego_dict['img_front'][0])
+                        img_left.append(ego_dict['img_left'][0])
+                        img_right.append(ego_dict['img_right'][0])
+                        BEV.append(ego_dict['BEV'][0])
+                if self.supervise_single and not online_eval_only:
+                    # unused label
+                    if False:
+                        pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                        neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                        targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+            # convert to numpy, (B, max_num, 7)
+            if not online_eval_only:
+                object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+                object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+                gt_object_bbx_center = torch.from_numpy(np.array(gt_object_bbx_center))
+                gt_object_bbx_mask = torch.from_numpy(np.array(gt_object_bbx_mask))
+            else:
+                object_bbx_center = None
+                object_bbx_mask = None
+                gt_object_bbx_center = None
+                gt_object_bbx_mask = None
+            # unused label
+            label_torch_dict = {}
+            if False:
+                label_torch_dict = \
+                    self.post_processor.collate_batch(label_dict_list)
+            record_len = torch.from_numpy(np.array(record_len))
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            label_torch_dict['record_len'] = record_len
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            # for centerpoint
+            if not online_eval_only:
+                label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                        'object_bbx_mask': object_bbx_mask})
+                output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                        'object_bbx_mask': object_bbx_mask,})
+            output_dict['ego'].update({
+                                    'anchor_box': torch.from_numpy(self.anchor_box),
+                                    'label_dict': label_torch_dict,
+                                    'record_len': record_len,
+                                    'pairwise_t_matrix': pairwise_t_matrix})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+            if self.load_camera_file:
+                # collate ego camera information
+                imgs_batch = []
+                rots_batch = []
+                trans_batch = []
+                intrins_batch = []
+                extrinsics_batch = []
+                post_trans_batch = []
+                post_rots_batch = []
+                for i in range(len(batch)):
+                    ego_dict = batch[i]["ego"]["image_inputs"]
+                    imgs_batch.append(ego_dict["imgs"])
+                    rots_batch.append(ego_dict["rots"])
+                    trans_batch.append(ego_dict["trans"])
+                    intrins_batch.append(ego_dict["intrins"])
+                    extrinsics_batch.append(ego_dict["extrinsics"])
+                    post_trans_batch.append(ego_dict["post_trans"])
+                    post_rots_batch.append(ego_dict["post_rots"])
+                output_dict["ego"].update({
+                    "image_inputs":
+                        {
+                            "imgs": torch.stack(imgs_batch),  # [B, N, C, H, W]
+                            "rots": torch.stack(rots_batch),
+                            "trans": torch.stack(trans_batch),
+                            "intrins": torch.stack(intrins_batch),
+                            "post_trans": torch.stack(post_trans_batch),
+                            "post_rots": torch.stack(post_rots_batch),
+                        }
+                    }
+                )
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            label_torch_dict['record_len'] = record_len
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            if not online_eval_only:
+                label_torch_dict = \
+                    self.post_processor.collate_batch(label_dict_list)
+                gt_label_torch_dict = \
+                    self.post_processor.collate_batch(gt_label_dict_list)
+                # for centerpoint
+                label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                        'object_bbx_mask': object_bbx_mask})
+                gt_label_torch_dict.update({'gt_object_bbx_center': gt_object_bbx_center,
+                                        'gt_object_bbx_mask': gt_object_bbx_mask})
+            else:
+                gt_label_torch_dict = {}
+            gt_label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            gt_label_torch_dict['record_len'] = record_len
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+            output_dict['ego'].update({'gt_object_bbx_center': gt_object_bbx_center,
+                                    'gt_object_bbx_mask': gt_object_bbx_mask,
+                                    'gt_label_dict': gt_label_torch_dict,
+                                    'gt_object_ids': gt_object_ids[0]})
+            output_dict['ego'].update({'dict_list': dict_list})
+            output_dict['ego'].update({'record_len': record_len,
+                                       'pairwise_t_matrix': pairwise_t_matrix
+                })
+            if self.visualize:
+                origin_lidar = torch.from_numpy(np.array(origin_lidar))
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+                output_dict['ego'].update({'img_front': img_front})
+                output_dict['ego'].update({'img_right': img_right})
+                output_dict['ego'].update({'img_left': img_left})
+                output_dict['ego'].update({'BEV': BEV})
+            if self.supervise_single and not online_eval_only:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            # "pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                            # "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                            # "targets": torch.cat(targets_single, dim=0),
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+            return output_dict
+        def collate_batch_test(self, batch, online_eval_only=False):
+            """
+            Customized collate function for pytorch dataloader during testing
+            for late fusion dataset.
+            Parameters
+            ----------
+            batch : dict
+            Returns
+            -------
+            batch : dicn
+                Reformatted batch.
+            """
+            # currently, we only support batch size of 1 during testing
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            self.online_eval_only = online_eval_only
+            output_dict = self.collate_batch_train(batch, online_eval_only)
+            if output_dict is None:
+                return None
+            batch = batch[0]
+            if batch['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+            record_len = torch.from_numpy(np.array([batch['ego']['cav_num']]))
+            pairwise_t_matrix = torch.from_numpy(np.array([batch['ego']['pairwise_t_matrix']]))
+            output_dict['ego'].update({'record_len': record_len,
+                'pairwise_t_matrix': pairwise_t_matrix
+                })
+            # heterogeneous
+            if self.heterogeneous:
+                idx = batch['ego']['idx']
+                cav_list = batch['ego']['cav_list'] # ['ego', '650' ..]
+                cav_num = len(batch)
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num] # [1,0,0,1,0]
+                lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                lidar_agent_cav_id = [cav_list[index] for index in lidar_agent_idx] # ['ego', ...]
+            # for late fusion, we also need to stack the lidar for better
+            # visualization
+            if self.visualize:
+                projected_lidar_list = []
+                origin_lidar = []
+            for cav_id, cav_content in batch.items():
+                if cav_id != 'ego':
+                    output_dict.update({cav_id: {}})
+                # output_dict.update({cav_id: {}})
+                if not online_eval_only:
+                    object_bbx_center = \
+                        torch.from_numpy(np.array([cav_content['object_bbx_center']]))
+                    object_bbx_mask = \
+                        torch.from_numpy(np.array([cav_content['object_bbx_mask']]))
+                    object_ids = cav_content['object_ids']
+                # the anchor box is the same for all bounding boxes usually, thus
+                # we don't need the batch dimension.
+                output_dict[cav_id].update(
+                    {"anchor_box": self.anchor_box_torch}
+                )
+                transformation_matrix = cav_content['transformation_matrix']
+                if self.visualize:
+                    origin_lidar = [cav_content['origin_lidar']]
+                    if (self.params['only_vis_ego'] is False) or (cav_id=='ego'):
+                        projected_lidar = copy.deepcopy(cav_content['origin_lidar'])
+                        projected_lidar[:, :3] = \
+                            box_utils.project_points_by_matrix_torch(
+                                projected_lidar[:, :3],
+                                transformation_matrix)
+                        projected_lidar_list.append(projected_lidar)
+                if self.load_lidar_file:
+                    # processed lidar dictionary
+                    #if 'processed_features' in cav_content.keys():
+                    merged_feature_dict = merge_features_to_dict([cav_content['processed_lidar']])
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict[cav_id].update({'processed_lidar': processed_lidar_torch_dict})
+                if self.load_camera_file:
+                    imgs_batch = [cav_content["image_inputs"]["imgs"]]
+                    rots_batch = [cav_content["image_inputs"]["rots"]]
+                    trans_batch = [cav_content["image_inputs"]["trans"]]
+                    intrins_batch = [cav_content["image_inputs"]["intrins"]]
+                    extrinsics_batch = [cav_content["image_inputs"]["extrinsics"]]
+                    post_trans_batch = [cav_content["image_inputs"]["post_trans"]]
+                    post_rots_batch = [cav_content["image_inputs"]["post_rots"]]
+                    output_dict[cav_id].update({
+                        "image_inputs":
+                            {
+                                "imgs": torch.stack(imgs_batch),
+                                "rots": torch.stack(rots_batch),
+                                "trans": torch.stack(trans_batch),
+                                "intrins": torch.stack(intrins_batch),
+                                "extrinsics": torch.stack(extrinsics_batch),
+                                "post_trans": torch.stack(post_trans_batch),
+                                "post_rots": torch.stack(post_rots_batch),
+                            }
+                        }
+                    )
+                # heterogeneous
+                if self.heterogeneous:
+                    if cav_id in lidar_agent_cav_id:
+                        output_dict[cav_id].pop('image_inputs')
+                    else:
+                        output_dict[cav_id].pop('processed_lidar')
+                if not online_eval_only:
+                    # label dictionary
+                    label_torch_dict = \
+                        self.post_processor.collate_batch([cav_content['label_dict']])
+                    # for centerpoint
+                    label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                            'object_bbx_mask': object_bbx_mask})
+                # save the transformation matrix (4, 4) to ego vehicle
+                transformation_matrix_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix'])).float()
+                # late fusion training, no noise
+                transformation_matrix_clean_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix_clean'])).float()
+                if not online_eval_only:
+                    output_dict[cav_id].update({'object_bbx_center': object_bbx_center,
+                                                'object_bbx_mask': object_bbx_mask,
+                                                'label_dict': label_torch_dict,
+                                                # 'record_len': record_len,
+                                                'object_ids': object_ids,})
+                output_dict[cav_id].update({
+                                            'transformation_matrix': transformation_matrix_torch,
+                                            'transformation_matrix_clean': transformation_matrix_clean_torch})
+                if 'cav_num' in cav_content.keys():
+                    record_len = torch.from_numpy(np.array([cav_content['cav_num']]))
+                    output_dict[cav_id].update({'record_len': record_len})
+                if 'pairwise_t_matrix' in cav_content.keys():
+                    pairwise_t_matrix = torch.from_numpy(np.array([cav_content['pairwise_t_matrix']]))
+                    output_dict[cav_id].update({'pairwise_t_matrix': pairwise_t_matrix})
+                if self.visualize:
+                    origin_lidar = \
+                        np.array(
+                            downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                    origin_lidar = torch.from_numpy(origin_lidar)
+                    output_dict[cav_id].update({'origin_lidar': origin_lidar})
+            if self.visualize:
+                projected_lidar_stack = [torch.from_numpy(
+                    np.vstack(projected_lidar_list))]
+                output_dict['ego'].update({'origin_lidar': projected_lidar_stack})
+            output_dict['ego'].update({
+                "sample_idx": batch['ego']['sample_idx'],
+                "cav_id_list": batch['ego']['cav_id_list']
+            })
+            batch_record_len = output_dict['ego']['record_len']
+            for cav_id in output_dict.keys():
+                if 'record_len' in output_dict[cav_id].keys():
+                    continue
+                output_dict[cav_id].update({'record_len': batch_record_len})
+            return output_dict
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict, output_dict
+            )
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            return pred_box_tensor, pred_score, gt_box_tensor
+        def post_process_no_fusion(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego["ego"] = data_dict["ego"]
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict_ego, output_dict_ego
+            )
+            return pred_box_tensor, pred_score, gt_box_tensor
+        def post_process_multiclass(self, data_dict, output_dict, online_eval_only=False):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            if online_eval_only == False:
+                online_eval_only = self.online_eval_only
+            num_class = output_dict['ego']['cls_preds'].shape[1]
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+            num_list = [0,1,3]
+            for i in range(num_class):
+                data_dict_single = copy.deepcopy(data_dict)
+                gt_dict_single = {'ego': {}}
+                gt_dict_single['ego'] = copy.deepcopy(data_dict['ego'])
+                output_dict_single = copy.deepcopy(output_dict)
+                if not online_eval_only:
+                    data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:]
+                    data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:]
+                    data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]]
+                    gt_dict_single['ego']['object_bbx_center'] = data_dict['ego']['gt_object_bbx_center'][:,i,:,:]
+                    gt_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['gt_object_bbx_mask'][:,i,:]
+                    gt_dict_single['ego']['object_ids'] = data_dict['ego']['gt_object_ids'][num_list[i]]
+                for cav in output_dict_single.keys():
+                    output_dict_single[cav]['cls_preds'] = output_dict[cav]['cls_preds'][:,i:i+1,:,:]
+                    output_dict_single[cav]['reg_preds'] = output_dict[cav]['reg_preds_multiclass'][:,i,:,:]
+                pred_box_tensor, pred_score = \
+                    self.post_processor.post_process(data_dict_single, output_dict_single)
+                if not online_eval_only:
+                    gt_box_tensor = self.post_processor.generate_gt_bbx(gt_dict_single)
+                else:
+                    gt_box_tensor = None
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+        def post_process_multiclass_no_fusion(self, data_dict, output_dict_ego, online_eval_only=False):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+            output_dict :dict
+                The dictionary containing the output of the model.
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            online_eval_only = self.online_eval_only
+            num_class = data_dict['ego']['object_bbx_center'].shape[1]
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+            num_list = [0,1,3]
+            for i in range(num_class):
+                data_dict_single = copy.deepcopy(data_dict)
+                gt_dict_single = {'ego': {}}
+                gt_dict_single['ego'] = copy.deepcopy(data_dict['ego'])
+                output_dict_single = copy.deepcopy(output_dict_ego)
+                data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:]
+                data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:]
+                data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]]
+                gt_dict_single['ego']['object_bbx_center'] = data_dict['ego']['gt_object_bbx_center'][:,i,:,:]
+                gt_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['gt_object_bbx_mask'][:,i,:]
+                gt_dict_single['ego']['object_ids'] = data_dict['ego']['gt_object_ids'][num_list[i]]
+                output_dict_single['ego']['cls_preds'] = output_dict_ego['ego']['cls_preds'][:,i:i+1,:,:]
+                output_dict_single['ego']['reg_preds'] = output_dict_ego['ego']['reg_preds_multiclass'][:,i,:,:]
+                data_dict_single_ego = OrderedDict()
+                data_dict_single_ego["ego"] = data_dict_single["ego"]
+                pred_box_tensor, pred_score = \
+                    self.post_processor.post_process(data_dict_single_ego, output_dict_single)
+                gt_box_tensor = self.post_processor.generate_gt_bbx(gt_dict_single)
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+        def post_process_no_fusion_uncertainty(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego['ego'] = data_dict['ego']
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+            pred_box_tensor, pred_score, uncertainty = \
+                self.post_processor.post_process(data_dict_ego, output_dict_ego, return_uncertainty=True)
+            return pred_box_tensor, pred_score, gt_box_tensor, uncertainty
+    return LatemulticlassFusionDataset

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <[email protected]>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+from opencood.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor
+from opencood.data_utils.post_processor.bev_postprocessor import BevPostprocessor
+from opencood.data_utils.post_processor.ciassd_postprocessor import CiassdPostprocessor
+from opencood.data_utils.post_processor.fpvrcnn_postprocessor import FpvrcnnPostprocessor
+from opencood.data_utils.post_processor.uncertainty_voxel_postprocessor import UncertaintyVoxelPostprocessor
+__all__ = {
+    'VoxelPostprocessor': VoxelPostprocessor,
+    'BevPostprocessor': BevPostprocessor,
+    'CiassdPostprocessor': CiassdPostprocessor,
+    'FpvrcnnPostprocessor': FpvrcnnPostprocessor,
+    'UncertaintyVoxelPostprocessor': UncertaintyVoxelPostprocessor,
+}
+def build_postprocessor(anchor_cfg, train):
+    process_method_name = anchor_cfg['core_method']
+    anchor_generator = __all__[process_method_name](
+        anchor_params=anchor_cfg,
+        train=train
+    )
+    return anchor_generator

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (979 Bytes). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/base_postprocessor.cpython-37.pyc ADDED Viewed

Binary file (13.8 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/bev_postprocessor.cpython-37.pyc ADDED Viewed

Binary file (11.6 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/ciassd_postprocessor.cpython-37.pyc ADDED Viewed

Binary file (4.26 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/fpvrcnn_postprocessor.cpython-37.pyc ADDED Viewed

Binary file (5.71 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/uncertainty_voxel_postprocessor.cpython-37.pyc ADDED Viewed

Binary file (5.61 kB). View file

v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/voxel_postprocessor.cpython-37.pyc ADDED Viewed

Binary file (10.6 kB). View file